TAKAHE

Runtime error

App Files Files Community

steve7909 commited on Apr 2, 2024

Commit

69ce671

1 Parent(s): a224a43

little working with random row

Browse files

Files changed (2) hide show

anki_japanese_english_pairs.csv +1 -0
app.py +43 -43

anki_japanese_english_pairs.csv CHANGED Viewed

@@ -1,3 +1,4 @@
 これは本だ・です。,This is a book.
 コンサートは八時からだ・です。,The concert is from eight o'clock.
 あの人は先生だ。,That person is a teacher.

+Japanese sentence,English sentence
 これは本だ・です。,This is a book.
 コンサートは八時からだ・です。,The concert is from eight o'clock.
 あの人は先生だ。,That person is a teacher.

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ import spacy
 #from langchain.chat_models import ChatOpenAI
 from langchain_openai import ChatOpenAI
 from langchain.schema import AIMessage, HumanMessage
 # Load environment variables from .env file
 load_dotenv()
@@ -24,9 +25,9 @@ load_dotenv()
 HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
 # openai setup
-client = OpenAI(
-  api_key=os.getenv('OPENAI_API_KEY')
-)
 # hugging face setup
 #model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
@@ -47,64 +48,63 @@ def split_sentences_ginza(input_text):
     sentences = [sent.text for sent in doc.sents]
     return sentences
-def query_hf(payload, model_name):
-    # HTTP POST Request
-    response = requests.post(API_URL+model_name, headers=headers, json=payload)
-    return response.json()
-def translate_hf(input_text):
-    debug_print("Translating... ", input_text)
-    sentences = split_sentences_ginza(input_text)  # split into sentences
-    translated_sentences = []
-    debug_print("Split sentences... ", sentences)
-    for sentence in sentences:
-        if sentence.strip():  # Ensure sentence is not empty
-            # API Request for each sentence:
-            response = query_hf({
-                "inputs": sentence.strip(),
-                "options": {"wait_for_model": True}
-            }, "Helsinki-NLP/opus-mt-ja-en")
-            debug_print("response: ", response)
-            translated_sentence = response[0]["translation_text"]
-            translated_sentences.append(translated_sentence)
-    # Join the translated sentences
-    translation = ' '.join(translated_sentences)
-    return translation
-def translate_openai(input_text):
-    prompt = "Translate the following text into Japanese language: " + input_text
-    response = client.chat.completions.create( # get translation from GPT
-        messages=[
-            {
-                "role": "user",
-                "content": prompt,
-            }
-        ],
-        model="gpt-3.5-turbo",
-        temperature=0 # should be the same translation every time
-      )
-    translation = response.choices[0].message.content
-    debug_print("GPT translation:", translation)
-    return translation
 llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')
 def predict(message, history):
-    history_langchain_format = []
     for human, ai in history:
-        history_langchain_format.append(HumanMessage(content=human)) # converted to str to avoid error; not compatible with multimodal
         history_langchain_format.append(AIMessage(content=ai))
     history_langchain_format.append(HumanMessage(content=message))
     gpt_response = llm(history_langchain_format)
     return gpt_response.content

 #from langchain.chat_models import ChatOpenAI
 from langchain_openai import ChatOpenAI
 from langchain.schema import AIMessage, HumanMessage
+import pandas as pd
 # Load environment variables from .env file
 load_dotenv()
 HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
 # openai setup
+# client = OpenAI(
+#   api_key=os.getenv('OPENAI_API_KEY')
+# )
 # hugging face setup
 #model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
     sentences = [sent.text for sent in doc.sents]
     return sentences
+file_path = 'anki_japanese_english_pairs.csv'
+def load_csv(file_path):
+    # Load the CSV file into a DataFrame
+    df = pd.read_csv(file_path)
+    return df
+def get_sentence_pair(df):
+    # Get a random row from the DataFrame
+    random_row = df.sample(1)
+    #debug_print("### random_row:", random_row)
+    #print(random_row.shape)
+    japanese_sentence = str(random_row.iloc[0, 0])
+    english_sentence = str(random_row.iloc[0,  1])
+    debug_print("### Japanese sentence:", japanese_sentence)
+    debug_print("### English sentence:", english_sentence)
+    return japanese_sentence, english_sentence
+japanese_sentence, english_sentence = get_sentence_pair(load_csv(file_path))
 llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')
 def predict(message, history):
+    # Define your initial setup prompt here
+    initial_setup = f'''
+    Japanese students are learning to translate Japanese text to English text. They will be given a Japanese sentence to translate, and will provide an English translation attempt.
+    Based on the feedback you provide, they will revise their translation. This process will continue until their translation is accurate.
+    Encourage the student by specifying the strengths of their writing.
+    DO NOT PROVIDE THE CORRECT ENGLISH TRANSLATION. Let the student work it out.
+    The student's translation need not match the provided English translation exactly, but it should be accurate to the Japanese text.
+    Provide your feedback as a list.
+    Execute the following tasks step by step:
+    1. Ask the student to translate the following sentence from Japanese to English: {japanese_sentence}. Here is the English translation for reference: {english_sentence}
+    2. Suggest only mechanical corrections (i.e., spelling, grammar, and punctuation) for the student. Ask for another translation attempt.
+    '''
+    # Start your history with a SystemMessage containing the setup prompt
+    history_langchain_format = [AIMessage(content=initial_setup)]
     for human, ai in history:
+        history_langchain_format.append(HumanMessage(content=human)) # convert to str to avoid error; not compatible with multimodal
         history_langchain_format.append(AIMessage(content=ai))
     history_langchain_format.append(HumanMessage(content=message))
     gpt_response = llm(history_langchain_format)
     return gpt_response.content