import gradio as gr import cv2 import numpy as np from PIL import Image import google.generativeai as genai from sympy import sympify, solve import os from dotenv import load_dotenv # Load environment variables load_dotenv() # Initialize Google Generative AI def initialize_genai(): api_key = os.getenv("GOOGLE_API_KEY") if not api_key: raise ValueError("Google API Key not found in environment variables.") genai.configure(api_key=api_key) def create_prompt(image): # Adjust the prompt based on how the model expects the input return "Analyze the following image of an equation. Recognize and solve the equation. Image:" def recognize_equation_with_genai(image): try: # Convert image to text using Google Generative AI with a prompt template prompt = create_prompt(image) response = genai.text_detect(image, prompt=prompt) recognized_text = response.get('text', '') return recognized_text.strip() except Exception as e: return f"Error recognizing text: {str(e)}" def solve_equation(equation): try: expr = sympify(equation) solutions = solve(expr) return str(solutions) except Exception as e: return f"Error solving equation: {str(e)}" def process_frame(frame): # Convert frame to grayscale gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) _, thresholded = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV) # Convert OpenCV image to PIL image image = Image.fromarray(thresholded) # Recognize and solve the equation recognized_equation = recognize_equation_with_genai(image) solutions = solve_equation(recognized_equation) return recognized_equation, solutions, image def main(): # Initialize Google Generative AI initialize_genai() with gr.Blocks() as demo: gr.Markdown("## Virtual Math Calculator with Google Generative AI") with gr.Row(): video_input = gr.Video(source="webcam", type="numpy", label="Record your video") output_text = gr.Textbox(label="Recognized Equation") output_solutions = gr.Textbox(label="Solution") output_image = gr.Image(label="Captured Image") def process_video(video): frame = video[0] # Take the first frame from the video recognized_equation, solutions, image = process_frame(frame) return recognized_equation, solutions, image video_input.change(process_video, inputs=video_input, outputs=[output_text, output_solutions, output_image]) demo.launch() if __name__ == "__main__": main()