Spaces:

samiee2213
/

MathVision

Runtime error

File size: 2,646 Bytes

3fabc11

import gradio as gr
import cv2
import numpy as np
from PIL import Image
import google.generativeai as genai
from sympy import sympify, solve
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Initialize Google Generative AI
def initialize_genai():
    api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("Google API Key not found in environment variables.")
    genai.configure(api_key=api_key)

def create_prompt(image):
    # Adjust the prompt based on how the model expects the input
    return "Analyze the following image of an equation. Recognize and solve the equation. Image:"

def recognize_equation_with_genai(image):
    try:
        # Convert image to text using Google Generative AI with a prompt template
        prompt = create_prompt(image)
        response = genai.text_detect(image, prompt=prompt)
        recognized_text = response.get('text', '')
        return recognized_text.strip()
    except Exception as e:
        return f"Error recognizing text: {str(e)}"

def solve_equation(equation):
    try:
        expr = sympify(equation)
        solutions = solve(expr)
        return str(solutions)
    except Exception as e:
        return f"Error solving equation: {str(e)}"

def process_frame(frame):
    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    _, thresholded = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

    # Convert OpenCV image to PIL image
    image = Image.fromarray(thresholded)

    # Recognize and solve the equation
    recognized_equation = recognize_equation_with_genai(image)
    solutions = solve_equation(recognized_equation)
    
    return recognized_equation, solutions, image

def main():
    # Initialize Google Generative AI
    initialize_genai()
    
    with gr.Blocks() as demo:
        gr.Markdown("## Virtual Math Calculator with Google Generative AI")
        
        with gr.Row():
            video_input = gr.Video(source="webcam", type="numpy", label="Record your video")
            output_text = gr.Textbox(label="Recognized Equation")
            output_solutions = gr.Textbox(label="Solution")
            output_image = gr.Image(label="Captured Image")

        def process_video(video):
            frame = video[0]  # Take the first frame from the video
            recognized_equation, solutions, image = process_frame(frame)
            return recognized_equation, solutions, image

        video_input.change(process_video, inputs=video_input, outputs=[output_text, output_solutions, output_image])

    demo.launch()

if __name__ == "__main__":
    main()