PierreBrunelle commited on
Commit
210dd42
·
verified ·
1 Parent(s): 0e0864e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -27
app.py CHANGED
@@ -45,7 +45,7 @@ def process_files(pdf_files, chunk_limit, chunk_separator):
45
  {'document': pxt.DocumentType(nullable=True),
46
  'question': pxt.StringType(nullable=True)}
47
  )
48
-
49
  # Insert the PDF files into the documents table
50
  t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))
51
 
@@ -64,19 +64,16 @@ def process_files(pdf_files, chunk_limit, chunk_separator):
64
  # Add an embedding index to the chunks for similarity search
65
  chunks_t.add_embedding_index('text', string_embed=e5_embed)
66
 
67
- try:
68
- @chunks_t.query
69
- def top_k(query_text: str):
70
- sim = chunks_t.text.similarity(query_text)
71
- return (
72
- chunks_t.order_by(sim, asc=False)
73
- .select(chunks_t.text, sim=sim)
74
- .limit(5)
75
- )
76
- except Exception:
77
- pass
78
-
79
- # Add computed columns to the table for context retrieval and prompt creation
80
  t['question_context'] = chunks_t.top_k(t.question)
81
  t['prompt'] = create_prompt(
82
  t.question_context, t.question
@@ -115,11 +112,16 @@ def get_answer(msg):
115
 
116
  # Insert the question into the table
117
  t.insert([{'question': msg}])
118
-
119
- answer = t.select(t.gpt4omini).tail(1)['gpt4omini'][0]
120
 
121
  return answer
122
 
 
 
 
 
 
123
  # Gradio interface
124
  with gr.Blocks(theme=Monochrome()) as demo:
125
  gr.Markdown(
@@ -139,9 +141,9 @@ with gr.Blocks(theme=Monochrome()) as demo:
139
  )
140
 
141
  with gr.Row():
142
- with gr.Column():
143
  pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")
144
- chunk_limit = gr.Slider(minimum=100, maximum=500, value=300, step=5, label="Chunk Size Limit (only used when the separator is token_/char_limit)")
145
  chunk_separator = gr.Dropdown(
146
  choices=["token_limit", "char_limit", "sentence", "paragraph", "heading"],
147
  value="token_limit",
@@ -150,18 +152,13 @@ with gr.Blocks(theme=Monochrome()) as demo:
150
  process_button = gr.Button("Process Files")
151
  process_output = gr.Textbox(label="Processing Output")
152
 
153
- with gr.Column():
154
  chatbot = gr.Chatbot(label="Chat History")
155
- msg = gr.Textbox(label="Your Question")
156
  submit = gr.Button("Submit")
157
 
158
- def respond(message, chat_history):
159
- bot_message = get_answer(message)
160
- chat_history.append((message, bot_message))
161
- return "", chat_history
162
-
163
- submit.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
164
  process_button.click(process_files, inputs=[pdf_files, chunk_limit, chunk_separator], outputs=[process_output])
 
165
 
166
  if __name__ == "__main__":
167
- demo.launch(debug=True)
 
45
  {'document': pxt.DocumentType(nullable=True),
46
  'question': pxt.StringType(nullable=True)}
47
  )
48
+
49
  # Insert the PDF files into the documents table
50
  t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))
51
 
 
64
  # Add an embedding index to the chunks for similarity search
65
  chunks_t.add_embedding_index('text', string_embed=e5_embed)
66
 
67
+ @chunks_t.query
68
+ def top_k(query_text: str):
69
+ sim = chunks_t.text.similarity(query_text)
70
+ return (
71
+ chunks_t.order_by(sim, asc=False)
72
+ .select(chunks_t.text, sim=sim)
73
+ .limit(5)
74
+ )
75
+
76
+ # Add computed columns to the table for context retrieval and prompt creation
 
 
 
77
  t['question_context'] = chunks_t.top_k(t.question)
78
  t['prompt'] = create_prompt(
79
  t.question_context, t.question
 
112
 
113
  # Insert the question into the table
114
  t.insert([{'question': msg}])
115
+
116
+ answer = t.select(t.gpt4omini).where(t.question == msg).collect()['gpt4omini'][0]
117
 
118
  return answer
119
 
120
+ def respond(message, chat_history):
121
+ bot_message = get_answer(message)
122
+ chat_history.append((message, bot_message))
123
+ return "", chat_history
124
+
125
  # Gradio interface
126
  with gr.Blocks(theme=Monochrome()) as demo:
127
  gr.Markdown(
 
141
  )
142
 
143
  with gr.Row():
144
+ with gr.Column(scale=1):
145
  pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")
146
+ chunk_limit = gr.Slider(minimum=100, maximum=500, value=300, step=5, label="Chunk Size Limit")
147
  chunk_separator = gr.Dropdown(
148
  choices=["token_limit", "char_limit", "sentence", "paragraph", "heading"],
149
  value="token_limit",
 
152
  process_button = gr.Button("Process Files")
153
  process_output = gr.Textbox(label="Processing Output")
154
 
155
+ with gr.Column(scale=2):
156
  chatbot = gr.Chatbot(label="Chat History")
157
+ msg = gr.Textbox(label="Your Question", placeholder="Ask a question about the uploaded documents")
158
  submit = gr.Button("Submit")
159
 
 
 
 
 
 
 
160
  process_button.click(process_files, inputs=[pdf_files, chunk_limit, chunk_separator], outputs=[process_output])
161
+ submit.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
162
 
163
  if __name__ == "__main__":
164
+ demo.launch()