File size: 3,178 Bytes
a40ce12
 
e6c4009
c827f71
5ada59d
 
cffcfb8
e6c4009
49db081
a4cdc2d
5ada59d
 
a4cdc2d
5ada59d
 
 
 
 
49db081
e6c4009
5ada59d
 
 
 
c827f71
49db081
 
 
5ada59d
49db081
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c827f71
5ada59d
 
 
 
c827f71
d62dd0b
cffcfb8
 
 
 
5ada59d
c827f71
d62dd0b
 
 
 
 
 
a40ce12
5ada59d
 
d62dd0b
5ada59d
 
 
 
 
 
a40ce12
cffcfb8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import subprocess
import openai
import gradio as gr
import requests
from gtts import gTTS
import asyncio  # Import asyncio for the asynchronous functionality

openai.api_key = os.getenv("OPENAI_API_KEY")

API_URL = "https://api-inference.huggingface.co/models/lyimo/whisper-small-sw2"
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}

def query(filename):
    with open(filename, "rb") as f:
        data = f.read()
    response = requests.post(API_URL, headers=headers, data=data)
    return response.json()


def transcribe(audio):
    output = query(audio)
    return output["text"]

def generate_response(transcribed_text):
   response = openai.ChatCompletion.create(
       model="gpt-3.5-turbo",
       messages=[
           {"role": "system", "content": " All your answers should be in swahili only, users undertands swahili only, so here we start... Wewe ni mtaalamu wa viazi lishe na utajibu maswali yote kwa kiswahili tu!"},
           {"role": "user", "content": "Mambo vipi?"},
           {"role": "assistant", "content": """Salama je una swali lolote kuhusu viazi lishe?"""},
           {"role": "user", "content": "nini maana ya Viazi lishe?"},
           {"role": "assistant", "content": """ viazi lishe ni Viazi vitamu vyenye rangi ya karoti kwa ndani ambavyo vina vitamin A kwa wingi"""},
           {"role": "user", "content": "nini matumizi ya viazi lishe?"},
           {"role": "assistant", "content": """ viazi lishe vinaweza kutengenezea chakula kama Keki,
           Maandazi, Kalimati na tambi: Ukisaga unga wa viazi lishe,
           unaweza kutumika kupika vyakula ambavyo huwa watu
           hutumia unga wa ngano kupika, unga wa viazi lishe una
           virutubisho vingi zaidi kuliko unga wa ngano na
           ukitumika kupikia vyakula tajwa hapo juu watumiaji
           watakuwa wanakula vyakula vyenye virutubisho Zaidi."""},
           {"role": "user", "content": transcribed_text},
       ]
   )
   return response['choices'][0]['message']['content']

def inference(text):
    output_file = "tts_output.wav"
    tts = gTTS(text, lang="sw")
    tts.save(output_file)
    return output_file


async def process_audio_and_respond(audio):
    text = await asyncio.to_thread(transcribe, audio)
    response_text = await asyncio.to_thread(generate_response, text)
    output_file = await asyncio.to_thread(inference, response_text)
    return response_text, output_file

def process_audio_and_respond(audio):
    text = transcribe(audio)
    response_text = generate_response(text)
    output_file = inference(response_text)
    return response_text, output_file

demo = gr.Interface(
    process_audio_and_respond,
    gr.inputs.Audio(source="microphone", type="filepath", label="Bonyeza kitufe cha kurekodi na uliza swali lako"),
    [gr.outputs.Textbox(label="Jibu (kwa njia ya maandishi)"), gr.outputs.Audio(type="filepath", label="Jibu kwa njia ya sauti (Bofya kusikiliza Jibu)")],
    title="Mtaalamu wa Viazi Lishe",
    description="Uliza Mtaalamu wetu swali lolote Kuhusu viazi Lishe",
    theme="compact",
    layout="vertical",
    allow_flagging=False,
    live=True,
)
asyncio.run(demo.launch())