TomData commited on
Commit
c98215f
·
1 Parent(s): 85df319

bug fix db_input

Browse files
Files changed (3) hide show
  1. Home.py +1 -0
  2. src/chatbot.py +19 -12
  3. src/vectordatabase.py +6 -11
Home.py CHANGED
@@ -5,6 +5,7 @@ from datetime import datetime
5
 
6
 
7
  legislature_periods = [
 
8
  "20. Legislaturperiode",
9
  "19. Legislaturperiode",
10
  "18. Legislaturperiode",
 
5
 
6
 
7
  legislature_periods = [
8
+ "All",
9
  "20. Legislaturperiode",
10
  "19. Legislaturperiode",
11
  "18. Legislaturperiode",
src/chatbot.py CHANGED
@@ -28,18 +28,27 @@ llm = HuggingFaceHub(
28
  #,huggingfacehub_api_token
29
 
30
  )
31
- # To Do: Experiment with different templates replying in german or english depending on the input language
32
- prompt1 = ChatPromptTemplate.from_template("""<s>[INST]
33
  Instruction: Beantworte die folgende Frage auf deutsch und nur auf der Grundlage des angegebenen Kontexts:
34
 
35
  Context: {context}
36
 
37
  Question: {input}
38
  [/INST]"""
39
- # Returns the answer in English!?
40
  )
 
41
 
42
- prompt2 = ChatPromptTemplate.from_template("""Beantworte die folgende Frage auf deutsch und nur auf der Grundlage des angegebenen Kontexts:
 
 
 
 
 
 
 
 
43
 
44
  <context>
45
  {context}
@@ -56,16 +65,14 @@ prompt2 = ChatPromptTemplate.from_template("""Beantworte die folgende Frage auf
56
  #index_name = "legislature20"
57
  #db = get
58
 
59
-
60
-
61
-
62
-
63
-
64
- def chatbot(message, history, db_inputs, llm=llm, prompt=prompt2):
65
  db = get_vectorstore(inputs = db_inputs, embeddings=embeddings)
66
  raw_response = RAG(llm=llm, prompt=prompt, db=db, question=message)
67
- # Only necessary because mistral does not give beautiful outputs
68
- response = raw_response['answer'].split("Antwort: ")[1]
 
 
 
69
  return response
70
 
71
 
 
28
  #,huggingfacehub_api_token
29
 
30
  )
31
+ # To Do: Experiment with different templates
32
+ prompt_test = ChatPromptTemplate.from_template("""<s>[INST]
33
  Instruction: Beantworte die folgende Frage auf deutsch und nur auf der Grundlage des angegebenen Kontexts:
34
 
35
  Context: {context}
36
 
37
  Question: {input}
38
  [/INST]"""
39
+
40
  )
41
+ prompt_de = ChatPromptTemplate.from_template("""Beantworte die folgende Frage auf deutsch und nur auf der Grundlage des angegebenen Kontexts:
42
 
43
+ <context>
44
+ {context}
45
+ </context>
46
+
47
+ Frage: {input}
48
+ """
49
+ # Returns the answer in German
50
+ )
51
+ prompt_en = ChatPromptTemplate.from_template("""Beantworte die folgende Frage auf deutsch und nur auf der Grundlage des angegebenen Kontexts:
52
 
53
  <context>
54
  {context}
 
65
  #index_name = "legislature20"
66
  #db = get
67
 
68
+ def chatbot(message, history, db_inputs, llm=llm, prompt=prompt_de):
 
 
 
 
 
69
  db = get_vectorstore(inputs = db_inputs, embeddings=embeddings)
70
  raw_response = RAG(llm=llm, prompt=prompt, db=db, question=message)
71
+ # Only necessary because mistral does include it´s json structure in the output
72
+ try:
73
+ response = raw_response['answer'].split("Antwort: ")[1]
74
+ except:
75
+ response = raw_response['answer']
76
  return response
77
 
78
 
src/vectordatabase.py CHANGED
@@ -9,25 +9,20 @@ from langchain.chains import create_retrieval_chain
9
  from faiss import IndexFlatL2
10
  from langchain_community.docstore.in_memory import InMemoryDocstore
11
  from langchain.embeddings import SentenceTransformerEmbeddings
12
- import functools
13
-
14
-
15
-
16
 
17
  import pandas as pd
18
-
19
  import os
20
- #from dotenv import load_dotenv
21
 
22
- #Load environmental variables from .env-file
23
- #load_dotenv()
 
24
 
25
 
26
  # Load documents to create a vectorstore later
27
  def load_documents(df):
28
- # To Do: Create one initial vectore store loading all the documents with this function
29
- #loader = CSVLoader(index_name, source_column="speech_content") #unprocessed csv file
30
- loader = DataFrameLoader(data_frame=df, page_content_column='speech_content') #df
31
  data = loader.load()
32
  splitter = RecursiveCharacterTextSplitter(
33
  chunk_size=1024,
 
9
  from faiss import IndexFlatL2
10
  from langchain_community.docstore.in_memory import InMemoryDocstore
11
  from langchain.embeddings import SentenceTransformerEmbeddings
12
+ #import functools
 
 
 
13
 
14
  import pandas as pd
 
15
  import os
 
16
 
17
+ # For local run load environmental variables from .env-file
18
+ # from dotenv import load_dotenv
19
+ # load_dotenv()
20
 
21
 
22
  # Load documents to create a vectorstore later
23
  def load_documents(df):
24
+
25
+ loader = DataFrameLoader(data_frame=df, page_content_column='speech_content')
 
26
  data = loader.load()
27
  splitter = RecursiveCharacterTextSplitter(
28
  chunk_size=1024,