Spaces:
Running
Running
tweak the vision_api prompt, create configuration files, minor tweak to main script
e29216a
import streamlit as st | |
import os, base64, requests | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
def get_transcribed_text(base64_image): | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {OPENAI_API_KEY}" | |
} | |
image_prompt = ( | |
"Understand and interpret the image properly, there could be " | |
"handwritten notes or scribbles beside the electronic text. " | |
"Once you have sufficient understanding of the image, " | |
"transcribed them into text. If the content is a question, " | |
"convert the question into text." | |
) | |
print(image_prompt) | |
payload = { | |
"model": "gpt-4-vision-preview", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": image_prompt | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}" | |
} | |
} | |
] | |
} | |
], | |
"max_tokens": 300 | |
} | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
transcribed_msg = response.json()["choices"][0]["message"]["content"] | |
return transcribed_msg |