Spaces:
Runtime error
Runtime error
File size: 2,646 Bytes
3fabc11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
import cv2
import numpy as np
from PIL import Image
import google.generativeai as genai
from sympy import sympify, solve
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Initialize Google Generative AI
def initialize_genai():
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise ValueError("Google API Key not found in environment variables.")
genai.configure(api_key=api_key)
def create_prompt(image):
# Adjust the prompt based on how the model expects the input
return "Analyze the following image of an equation. Recognize and solve the equation. Image:"
def recognize_equation_with_genai(image):
try:
# Convert image to text using Google Generative AI with a prompt template
prompt = create_prompt(image)
response = genai.text_detect(image, prompt=prompt)
recognized_text = response.get('text', '')
return recognized_text.strip()
except Exception as e:
return f"Error recognizing text: {str(e)}"
def solve_equation(equation):
try:
expr = sympify(equation)
solutions = solve(expr)
return str(solutions)
except Exception as e:
return f"Error solving equation: {str(e)}"
def process_frame(frame):
# Convert frame to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
_, thresholded = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
# Convert OpenCV image to PIL image
image = Image.fromarray(thresholded)
# Recognize and solve the equation
recognized_equation = recognize_equation_with_genai(image)
solutions = solve_equation(recognized_equation)
return recognized_equation, solutions, image
def main():
# Initialize Google Generative AI
initialize_genai()
with gr.Blocks() as demo:
gr.Markdown("## Virtual Math Calculator with Google Generative AI")
with gr.Row():
video_input = gr.Video(source="webcam", type="numpy", label="Record your video")
output_text = gr.Textbox(label="Recognized Equation")
output_solutions = gr.Textbox(label="Solution")
output_image = gr.Image(label="Captured Image")
def process_video(video):
frame = video[0] # Take the first frame from the video
recognized_equation, solutions, image = process_frame(frame)
return recognized_equation, solutions, image
video_input.change(process_video, inputs=video_input, outputs=[output_text, output_solutions, output_image])
demo.launch()
if __name__ == "__main__":
main()
|