little working with random row
Browse files- anki_japanese_english_pairs.csv +1 -0
- app.py +43 -43
anki_japanese_english_pairs.csv
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
これは本だ・です。,This is a book.
|
2 |
コンサートは八時からだ・です。,The concert is from eight o'clock.
|
3 |
あの人は先生だ。,That person is a teacher.
|
|
|
1 |
+
Japanese sentence,English sentence
|
2 |
これは本だ・です。,This is a book.
|
3 |
コンサートは八時からだ・です。,The concert is from eight o'clock.
|
4 |
あの人は先生だ。,That person is a teacher.
|
app.py
CHANGED
@@ -16,6 +16,7 @@ import spacy
|
|
16 |
#from langchain.chat_models import ChatOpenAI
|
17 |
from langchain_openai import ChatOpenAI
|
18 |
from langchain.schema import AIMessage, HumanMessage
|
|
|
19 |
|
20 |
# Load environment variables from .env file
|
21 |
load_dotenv()
|
@@ -24,9 +25,9 @@ load_dotenv()
|
|
24 |
HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
|
25 |
|
26 |
# openai setup
|
27 |
-
client = OpenAI(
|
28 |
-
|
29 |
-
)
|
30 |
|
31 |
# hugging face setup
|
32 |
#model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
|
@@ -47,64 +48,63 @@ def split_sentences_ginza(input_text):
|
|
47 |
sentences = [sent.text for sent in doc.sents]
|
48 |
return sentences
|
49 |
|
50 |
-
def query_hf(payload, model_name):
|
51 |
-
# HTTP POST Request
|
52 |
-
response = requests.post(API_URL+model_name, headers=headers, json=payload)
|
53 |
-
return response.json()
|
54 |
|
55 |
-
|
56 |
-
debug_print("Translating... ", input_text)
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
60 |
|
61 |
-
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
response = query_hf({
|
67 |
-
"inputs": sentence.strip(),
|
68 |
-
"options": {"wait_for_model": True}
|
69 |
-
}, "Helsinki-NLP/opus-mt-ja-en")
|
70 |
|
71 |
-
|
72 |
-
translated_sentence = response[0]["translation_text"]
|
73 |
-
translated_sentences.append(translated_sentence)
|
74 |
|
75 |
-
|
76 |
-
|
77 |
|
78 |
-
|
|
|
79 |
|
80 |
|
81 |
-
|
82 |
|
83 |
-
prompt = "Translate the following text into Japanese language: " + input_text
|
84 |
|
85 |
-
|
86 |
-
messages=[
|
87 |
-
{
|
88 |
-
"role": "user",
|
89 |
-
"content": prompt,
|
90 |
-
}
|
91 |
-
],
|
92 |
-
model="gpt-3.5-turbo",
|
93 |
-
temperature=0 # should be the same translation every time
|
94 |
-
)
|
95 |
-
translation = response.choices[0].message.content
|
96 |
-
debug_print("GPT translation:", translation)
|
97 |
-
|
98 |
-
return translation
|
99 |
|
100 |
|
101 |
llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')
|
102 |
|
103 |
def predict(message, history):
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
for human, ai in history:
|
106 |
-
history_langchain_format.append(HumanMessage(content=human)) #
|
107 |
history_langchain_format.append(AIMessage(content=ai))
|
|
|
108 |
history_langchain_format.append(HumanMessage(content=message))
|
109 |
gpt_response = llm(history_langchain_format)
|
110 |
return gpt_response.content
|
|
|
16 |
#from langchain.chat_models import ChatOpenAI
|
17 |
from langchain_openai import ChatOpenAI
|
18 |
from langchain.schema import AIMessage, HumanMessage
|
19 |
+
import pandas as pd
|
20 |
|
21 |
# Load environment variables from .env file
|
22 |
load_dotenv()
|
|
|
25 |
HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
|
26 |
|
27 |
# openai setup
|
28 |
+
# client = OpenAI(
|
29 |
+
# api_key=os.getenv('OPENAI_API_KEY')
|
30 |
+
# )
|
31 |
|
32 |
# hugging face setup
|
33 |
#model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
|
|
|
48 |
sentences = [sent.text for sent in doc.sents]
|
49 |
return sentences
|
50 |
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
file_path = 'anki_japanese_english_pairs.csv'
|
|
|
53 |
|
54 |
+
def load_csv(file_path):
|
55 |
+
# Load the CSV file into a DataFrame
|
56 |
+
df = pd.read_csv(file_path)
|
57 |
+
|
58 |
+
return df
|
59 |
|
60 |
+
def get_sentence_pair(df):
|
61 |
|
62 |
+
# Get a random row from the DataFrame
|
63 |
+
random_row = df.sample(1)
|
64 |
+
#debug_print("### random_row:", random_row)
|
|
|
|
|
|
|
|
|
65 |
|
66 |
+
#print(random_row.shape)
|
|
|
|
|
67 |
|
68 |
+
japanese_sentence = str(random_row.iloc[0, 0])
|
69 |
+
english_sentence = str(random_row.iloc[0, 1])
|
70 |
|
71 |
+
debug_print("### Japanese sentence:", japanese_sentence)
|
72 |
+
debug_print("### English sentence:", english_sentence)
|
73 |
|
74 |
|
75 |
+
return japanese_sentence, english_sentence
|
76 |
|
|
|
77 |
|
78 |
+
japanese_sentence, english_sentence = get_sentence_pair(load_csv(file_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
|
81 |
llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')
|
82 |
|
83 |
def predict(message, history):
|
84 |
+
|
85 |
+
# Define your initial setup prompt here
|
86 |
+
initial_setup = f'''
|
87 |
+
|
88 |
+
Japanese students are learning to translate Japanese text to English text. They will be given a Japanese sentence to translate, and will provide an English translation attempt.
|
89 |
+
Based on the feedback you provide, they will revise their translation. This process will continue until their translation is accurate.
|
90 |
+
|
91 |
+
Encourage the student by specifying the strengths of their writing.
|
92 |
+
DO NOT PROVIDE THE CORRECT ENGLISH TRANSLATION. Let the student work it out.
|
93 |
+
The student's translation need not match the provided English translation exactly, but it should be accurate to the Japanese text.
|
94 |
+
Provide your feedback as a list.
|
95 |
+
|
96 |
+
Execute the following tasks step by step:
|
97 |
+
1. Ask the student to translate the following sentence from Japanese to English: {japanese_sentence}. Here is the English translation for reference: {english_sentence}
|
98 |
+
2. Suggest only mechanical corrections (i.e., spelling, grammar, and punctuation) for the student. Ask for another translation attempt.
|
99 |
+
'''
|
100 |
+
# Start your history with a SystemMessage containing the setup prompt
|
101 |
+
history_langchain_format = [AIMessage(content=initial_setup)]
|
102 |
+
|
103 |
+
|
104 |
for human, ai in history:
|
105 |
+
history_langchain_format.append(HumanMessage(content=human)) # convert to str to avoid error; not compatible with multimodal
|
106 |
history_langchain_format.append(AIMessage(content=ai))
|
107 |
+
|
108 |
history_langchain_format.append(HumanMessage(content=message))
|
109 |
gpt_response = llm(history_langchain_format)
|
110 |
return gpt_response.content
|