|
import os |
|
import subprocess |
|
import openai |
|
import gradio as gr |
|
import requests |
|
from gtts import gTTS |
|
import asyncio |
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/lyimo/whisper-small-sw2" |
|
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"} |
|
|
|
def query(filename): |
|
with open(filename, "rb") as f: |
|
data = f.read() |
|
response = requests.post(API_URL, headers=headers, data=data) |
|
return response.json() |
|
|
|
|
|
def transcribe(audio): |
|
output = query(audio) |
|
return output["text"] |
|
|
|
def generate_response(transcribed_text): |
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content": " All your answers should be in swahili only, users undertands swahili only, so here we start... Wewe ni mtaalamu wa viazi lishe na utajibu maswali yote kwa kiswahili tu!"}, |
|
{"role": "user", "content": "Mambo vipi?"}, |
|
{"role": "assistant", "content": """Salama je una swali lolote kuhusu viazi lishe?"""}, |
|
{"role": "user", "content": "nini maana ya Viazi lishe?"}, |
|
{"role": "assistant", "content": """ viazi lishe ni Viazi vitamu vyenye rangi ya karoti kwa ndani ambavyo vina vitamin A kwa wingi"""}, |
|
{"role": "user", "content": "nini matumizi ya viazi lishe?"}, |
|
{"role": "assistant", "content": """ viazi lishe vinaweza kutengenezea chakula kama Keki, |
|
Maandazi, Kalimati na tambi: Ukisaga unga wa viazi lishe, |
|
unaweza kutumika kupika vyakula ambavyo huwa watu |
|
hutumia unga wa ngano kupika, unga wa viazi lishe una |
|
virutubisho vingi zaidi kuliko unga wa ngano na |
|
ukitumika kupikia vyakula tajwa hapo juu watumiaji |
|
watakuwa wanakula vyakula vyenye virutubisho Zaidi."""}, |
|
{"role": "user", "content": transcribed_text}, |
|
] |
|
) |
|
return response['choices'][0]['message']['content'] |
|
|
|
def inference(text): |
|
output_file = "tts_output.wav" |
|
tts = gTTS(text, lang="sw") |
|
tts.save(output_file) |
|
return output_file |
|
|
|
|
|
async def process_audio_and_respond(audio): |
|
text = await asyncio.to_thread(transcribe, audio) |
|
response_text = await asyncio.to_thread(generate_response, text) |
|
output_file = await asyncio.to_thread(inference, response_text) |
|
return response_text, output_file |
|
|
|
def process_audio_and_respond(audio): |
|
text = transcribe(audio) |
|
response_text = generate_response(text) |
|
output_file = inference(response_text) |
|
return response_text, output_file |
|
|
|
demo = gr.Interface( |
|
process_audio_and_respond, |
|
gr.inputs.Audio(source="microphone", type="filepath", label="Bonyeza kitufe cha kurekodi na uliza swali lako"), |
|
[gr.outputs.Textbox(label="Jibu (kwa njia ya maandishi)"), gr.outputs.Audio(type="filepath", label="Jibu kwa njia ya sauti (Bofya kusikiliza Jibu)")], |
|
title="Mtaalamu wa Viazi Lishe", |
|
description="Uliza Mtaalamu wetu swali lolote Kuhusu viazi Lishe", |
|
theme="compact", |
|
layout="vertical", |
|
allow_flagging=False, |
|
live=True, |
|
) |
|
asyncio.run(demo.launch()) |
|
|