cnstvariable commited on
Commit
c34fe27
1 Parent(s): ccb7d99

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -1,34 +1,8 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
 
 
 
 
 
3
  *.h5 filter=lfs diff=lfs merge=lfs -text
4
+ *..data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
5
+ *.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
6
+ question_extractor_model_2_11/keras_metadata.pb filter=lfs diff=lfs merge=lfs -text
7
+ question_extractor_model_2_11/saved_model.pb filter=lfs diff=lfs merge=lfs -text
8
+ train_gpt_data.pkl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/Dockerfile
2
+
3
+ FROM python:3.9-slim
4
+
5
+ WORKDIR /app
6
+
7
+ RUN apt-get update && apt-get install -y \
8
+ build-essential \
9
+ curl \
10
+ software-properties-common \
11
+ git \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ RUN git clone https://github.com/streamlit/streamlit-example.git .
15
+
16
+ RUN pip3 install -r requirements.txt
17
+
18
+ EXPOSE 8501
19
+
20
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
21
+
22
+ ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,12 +1 @@
1
- ---
2
- title: Med Bot
3
- emoji: 💻
4
- colorFrom: green
5
- colorTo: pink
6
- sdk: streamlit
7
- sdk_version: 1.17.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # med-bot-gpt
 
 
 
 
 
 
 
 
 
 
 
deployment.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled0.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/13kE5uGoL2gfzSwTJli-WZolqCNBZXxNV
8
+ """
9
+
10
+ import tensorflow as tf
11
+ import numpy as np
12
+ import pandas as pd
13
+ import streamlit as st
14
+ import re
15
+ import os
16
+ import csv
17
+ from tqdm import tqdm
18
+ import faiss
19
+ from nltk.translate.bleu_score import sentence_bleu
20
+ from datetime import datetime
21
+
22
+ def decontractions(phrase):
23
+ """decontracted takes text and convert contractions into natural form.
24
+ ref: https://stackoverflow.com/questions/19790188/expanding-english-language-contractions-in-python/47091490#47091490"""
25
+ # specific
26
+ phrase = re.sub(r"won\'t", "will not", phrase)
27
+ phrase = re.sub(r"can\'t", "can not", phrase)
28
+ phrase = re.sub(r"won\’t", "will not", phrase)
29
+ phrase = re.sub(r"can\’t", "can not", phrase)
30
+
31
+ # general
32
+ phrase = re.sub(r"n\'t", " not", phrase)
33
+ phrase = re.sub(r"\'re", " are", phrase)
34
+ phrase = re.sub(r"\'s", " is", phrase)
35
+ phrase = re.sub(r"\'d", " would", phrase)
36
+ phrase = re.sub(r"\'ll", " will", phrase)
37
+ phrase = re.sub(r"\'t", " not", phrase)
38
+ phrase = re.sub(r"\'ve", " have", phrase)
39
+ phrase = re.sub(r"\'m", " am", phrase)
40
+
41
+ phrase = re.sub(r"n\’t", " not", phrase)
42
+ phrase = re.sub(r"\’re", " are", phrase)
43
+ phrase = re.sub(r"\’s", " is", phrase)
44
+ phrase = re.sub(r"\’d", " would", phrase)
45
+ phrase = re.sub(r"\’ll", " will", phrase)
46
+ phrase = re.sub(r"\’t", " not", phrase)
47
+ phrase = re.sub(r"\’ve", " have", phrase)
48
+ phrase = re.sub(r"\’m", " am", phrase)
49
+
50
+ return phrase
51
+
52
+
53
+ def preprocess(text):
54
+ # convert all the text into lower letters
55
+ # remove the words betweent brakets ()
56
+ # remove these characters: {'$', ')', '?', '"', '’', '.', '°', '!', ';', '/', "'", '€', '%', ':', ',', '('}
57
+ # replace these spl characters with space: '\u200b', '\xa0', '-', '/'
58
+
59
+ text = text.lower()
60
+ text = decontractions(text)
61
+ text = re.sub('[$)\?"’.°!;\'€%:,(/]', '', text)
62
+ text = re.sub('\u200b', ' ', text)
63
+ text = re.sub('\xa0', ' ', text)
64
+ text = re.sub('-', ' ', text)
65
+ return text
66
+
67
+
68
+ #importing bert tokenizer and loading the trained question embedding extractor model
69
+
70
+ from transformers import AutoTokenizer, TFGPT2Model
71
+ @st.cache(allow_output_mutation=True)
72
+ def return_biobert_tokenizer_model():
73
+ '''returns pretrained biobert tokenizer and question extractor model'''
74
+ biobert_tokenizer = AutoTokenizer.from_pretrained("cambridgeltl/BioRedditBERT-uncased")
75
+ question_extractor_model1=tf.keras.models.load_model('question_extractor_model_2_11')
76
+ return biobert_tokenizer,question_extractor_model1
77
+
78
+
79
+ #importing gpt2 tokenizer and loading the trained gpt2 model
80
+ from transformers import GPT2Tokenizer,TFGPT2LMHeadModel
81
+ @st.cache(allow_output_mutation=True)
82
+ def return_gpt2_tokenizer_model():
83
+ '''returns pretrained gpt2 tokenizer and gpt2 model'''
84
+ gpt2_tokenizer=GPT2Tokenizer.from_pretrained("gpt2")
85
+ tf_gpt2_model=TFGPT2LMHeadModel.from_pretrained("tf_gpt2_model_2_118_50000")
86
+ return gpt2_tokenizer,tf_gpt2_model
87
+
88
+ #preparing the faiss search
89
+ qa=pd.read_pickle('train_gpt_data.pkl')
90
+ question_bert = qa["Q_FFNN_embeds"].tolist()
91
+ answer_bert = qa["A_FFNN_embeds"].tolist()
92
+ question_bert = np.array(question_bert)
93
+ answer_bert = np.array(answer_bert)
94
+
95
+ question_bert = question_bert.astype('float32')
96
+ answer_bert = answer_bert.astype('float32')
97
+
98
+ answer_index = faiss.IndexFlatIP(answer_bert.shape[-1])
99
+
100
+ question_index = faiss.IndexFlatIP(question_bert.shape[-1])
101
+ answer_index.add(answer_bert)
102
+ question_index.add(question_bert)
103
+
104
+
105
+ print('finished initializing')
106
+
107
+ #defining function to prepare the data for gpt inference
108
+ #https://github.com/ash3n/DocProduct
109
+
110
+ def preparing_gpt_inference_data(gpt2_tokenizer,question,question_embedding):
111
+ topk=20
112
+ scores,indices=answer_index.search(
113
+ question_embedding.astype('float32'), topk)
114
+ q_sub=qa.iloc[indices.reshape(20)]
115
+
116
+ line = '`QUESTION: %s `ANSWER: ' % (
117
+ question)
118
+ encoded_len=len(gpt2_tokenizer.encode(line))
119
+ for i in q_sub.iterrows():
120
+ line='`QUESTION: %s `ANSWER: %s ' % (i[1]['question'],i[1]['answer']) + line
121
+ line=line.replace('\n','')
122
+ encoded_len=len(gpt2_tokenizer.encode(line))
123
+ if encoded_len>=1024:
124
+ break
125
+ return gpt2_tokenizer.encode(line)[-1024:]
126
+
127
+
128
+
129
+ #function to generate answer given a question and the required answer length
130
+
131
+ def give_answer(question,answer_len):
132
+ preprocessed_question=preprocess(question)
133
+ question_len=len(preprocessed_question.split(' '))
134
+ truncated_question=preprocessed_question
135
+ if question_len>500:
136
+ truncated_question=' '.join(preprocessed_question.split(' ')[:500])
137
+ biobert_tokenizer,question_extractor_model1= return_biobert_tokenizer_model()
138
+ gpt2_tokenizer,tf_gpt2_model= return_gpt2_tokenizer_model()
139
+ encoded_question= biobert_tokenizer.encode(truncated_question)
140
+ max_length=512
141
+ padded_question=tf.keras.preprocessing.sequence.pad_sequences(
142
+ [encoded_question], maxlen=max_length, padding='post')
143
+ question_mask=[[1 if token!=0 else 0 for token in question] for question in padded_question]
144
+ embeddings=question_extractor_model1({'question':np.array(padded_question),'question_mask':np.array(question_mask)})
145
+ gpt_input=preparing_gpt_inference_data(gpt2_tokenizer,truncated_question,embeddings.numpy())
146
+ mask_start = len(gpt_input) - list(gpt_input[::-1]).index(4600) + 1
147
+ input=gpt_input[:mask_start+1]
148
+ if len(input)>(1024-answer_len):
149
+ input=input[-(1024-answer_len):]
150
+ gpt2_output=gpt2_tokenizer.decode(tf_gpt2_model.generate(input_ids=tf.constant([np.array(input)]),max_length=1024,temperature=0.7)[0])
151
+ answer=gpt2_output.rindex('`ANSWER: ')
152
+ return gpt2_output[answer+len('`ANSWER: '):]
153
+
154
+
155
+
156
+ #defining the final function to generate answer assuming default answer length to be 20
157
+ def final_func_1(question):
158
+ answer_len=25
159
+ return give_answer(question,answer_len)
160
+
161
+
162
+ def main():
163
+ st.title('Medical Chatbot')
164
+ question=st.text_input('Question',"Type Here")
165
+ result=""
166
+ if st.button('ask'):
167
+ #with st.spinner("You Know! an apple a day keeps doctor away!"):
168
+ start=datetime.now()
169
+ result=final_func_1(question)
170
+ end_time =datetime.now()
171
+ st.success("Here is the answer")
172
+ st.text(result)
173
+ st.text("result recieved within "+str((end_time-start).total_seconds()))
174
+
175
+
176
+
177
+
178
+
179
+ if __name__=='__main__':
180
+ main()
question_extractor_model_2_11/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eadb8131f377ce917571a19da0e644ebb369921e2a94178c208b76937f350ea
3
+ size 150810
question_extractor_model_2_11/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0158efad4ac4618241e29c652d5d24c5c7a641328af6d1d9e1cd993a3274c60f
3
+ size 6893930
question_extractor_model_2_11/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50c81fe8ad9b3813d279bab35d6c029029183e9f5585f9bd2edc674133113cb6
3
+ size 435721428
question_extractor_model_2_11/variables/variables.index ADDED
Binary file (11.8 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ tensorflow
2
+ Keras
3
+ opencv-python-headless
4
+ streamlit
5
+ transformers
6
+ faiss-cpu
7
+ nltk
8
+
streamlit_app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled0.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/13kE5uGoL2gfzSwTJli-WZolqCNBZXxNV
8
+ """
9
+
10
+ import tensorflow as tf
11
+ import numpy as np
12
+ import pandas as pd
13
+ import streamlit as st
14
+ import re
15
+ import os
16
+ import csv
17
+ from tqdm import tqdm
18
+ import faiss
19
+ from nltk.translate.bleu_score import sentence_bleu
20
+ from datetime import datetime
21
+
22
+ def decontractions(phrase):
23
+ """decontracted takes text and convert contractions into natural form.
24
+ ref: https://stackoverflow.com/questions/19790188/expanding-english-language-contractions-in-python/47091490#47091490"""
25
+ # specific
26
+ phrase = re.sub(r"won\'t", "will not", phrase)
27
+ phrase = re.sub(r"can\'t", "can not", phrase)
28
+ phrase = re.sub(r"won\’t", "will not", phrase)
29
+ phrase = re.sub(r"can\’t", "can not", phrase)
30
+
31
+ # general
32
+ phrase = re.sub(r"n\'t", " not", phrase)
33
+ phrase = re.sub(r"\'re", " are", phrase)
34
+ phrase = re.sub(r"\'s", " is", phrase)
35
+ phrase = re.sub(r"\'d", " would", phrase)
36
+ phrase = re.sub(r"\'ll", " will", phrase)
37
+ phrase = re.sub(r"\'t", " not", phrase)
38
+ phrase = re.sub(r"\'ve", " have", phrase)
39
+ phrase = re.sub(r"\'m", " am", phrase)
40
+
41
+ phrase = re.sub(r"n\’t", " not", phrase)
42
+ phrase = re.sub(r"\’re", " are", phrase)
43
+ phrase = re.sub(r"\’s", " is", phrase)
44
+ phrase = re.sub(r"\’d", " would", phrase)
45
+ phrase = re.sub(r"\’ll", " will", phrase)
46
+ phrase = re.sub(r"\’t", " not", phrase)
47
+ phrase = re.sub(r"\’ve", " have", phrase)
48
+ phrase = re.sub(r"\’m", " am", phrase)
49
+
50
+ return phrase
51
+
52
+
53
+ def preprocess(text):
54
+ # convert all the text into lower letters
55
+ # remove the words betweent brakets ()
56
+ # remove these characters: {'$', ')', '?', '"', '’', '.', '°', '!', ';', '/', "'", '€', '%', ':', ',', '('}
57
+ # replace these spl characters with space: '\u200b', '\xa0', '-', '/'
58
+
59
+ text = text.lower()
60
+ text = decontractions(text)
61
+ text = re.sub('[$)\?"’.°!;\'€%:,(/]', '', text)
62
+ text = re.sub('\u200b', ' ', text)
63
+ text = re.sub('\xa0', ' ', text)
64
+ text = re.sub('-', ' ', text)
65
+ return text
66
+
67
+
68
+ #importing bert tokenizer and loading the trained question embedding extractor model
69
+
70
+ from transformers import AutoTokenizer, TFGPT2Model
71
+ @st.cache(allow_output_mutation=True)
72
+ def return_biobert_tokenizer_model():
73
+ '''returns pretrained biobert tokenizer and question extractor model'''
74
+ biobert_tokenizer = AutoTokenizer.from_pretrained("cambridgeltl/BioRedditBERT-uncased")
75
+ question_extractor_model1=tf.keras.models.load_model('question_extractor_model_2_11')
76
+ return biobert_tokenizer,question_extractor_model1
77
+
78
+
79
+ #importing gpt2 tokenizer and loading the trained gpt2 model
80
+ from transformers import GPT2Tokenizer,TFGPT2LMHeadModel
81
+ @st.cache(allow_output_mutation=True)
82
+ def return_gpt2_tokenizer_model():
83
+ '''returns pretrained gpt2 tokenizer and gpt2 model'''
84
+ gpt2_tokenizer=GPT2Tokenizer.from_pretrained("gpt2")
85
+ tf_gpt2_model=TFGPT2LMHeadModel.from_pretrained("tf_gpt2_model_2_118_50000")
86
+ return gpt2_tokenizer,tf_gpt2_model
87
+
88
+ #preparing the faiss search
89
+ qa=pd.read_pickle('train_gpt_data.pkl')
90
+ question_bert = qa["Q_FFNN_embeds"].tolist()
91
+ answer_bert = qa["A_FFNN_embeds"].tolist()
92
+ question_bert = np.array(question_bert)
93
+ answer_bert = np.array(answer_bert)
94
+
95
+ question_bert = question_bert.astype('float32')
96
+ answer_bert = answer_bert.astype('float32')
97
+
98
+ answer_index = faiss.IndexFlatIP(answer_bert.shape[-1])
99
+
100
+ question_index = faiss.IndexFlatIP(question_bert.shape[-1])
101
+ answer_index.add(answer_bert)
102
+ question_index.add(question_bert)
103
+
104
+
105
+ print('finished initializing')
106
+
107
+ #defining function to prepare the data for gpt inference
108
+ #https://github.com/ash3n/DocProduct
109
+
110
+ def preparing_gpt_inference_data(gpt2_tokenizer,question,question_embedding):
111
+ topk=20
112
+ scores,indices=answer_index.search(
113
+ question_embedding.astype('float32'), topk)
114
+ q_sub=qa.iloc[indices.reshape(20)]
115
+
116
+ line = '`QUESTION: %s `ANSWER: ' % (
117
+ question)
118
+ encoded_len=len(gpt2_tokenizer.encode(line))
119
+ for i in q_sub.iterrows():
120
+ line='`QUESTION: %s `ANSWER: %s ' % (i[1]['question'],i[1]['answer']) + line
121
+ line=line.replace('\n','')
122
+ encoded_len=len(gpt2_tokenizer.encode(line))
123
+ if encoded_len>=1024:
124
+ break
125
+ return gpt2_tokenizer.encode(line)[-1024:]
126
+
127
+
128
+
129
+ #function to generate answer given a question and the required answer length
130
+
131
+ def give_answer(question,answer_len):
132
+ preprocessed_question=preprocess(question)
133
+ question_len=len(preprocessed_question.split(' '))
134
+ truncated_question=preprocessed_question
135
+ if question_len>500:
136
+ truncated_question=' '.join(preprocessed_question.split(' ')[:500])
137
+ biobert_tokenizer,question_extractor_model1= return_biobert_tokenizer_model()
138
+ gpt2_tokenizer,tf_gpt2_model= return_gpt2_tokenizer_model()
139
+ encoded_question= biobert_tokenizer.encode(truncated_question)
140
+ max_length=512
141
+ padded_question=tf.keras.preprocessing.sequence.pad_sequences(
142
+ [encoded_question], maxlen=max_length, padding='post')
143
+ question_mask=[[1 if token!=0 else 0 for token in question] for question in padded_question]
144
+ embeddings=question_extractor_model1({'question':np.array(padded_question),'question_mask':np.array(question_mask)})
145
+ gpt_input=preparing_gpt_inference_data(gpt2_tokenizer,truncated_question,embeddings.numpy())
146
+ mask_start = len(gpt_input) - list(gpt_input[::-1]).index(4600) + 1
147
+ input=gpt_input[:mask_start+1]
148
+ if len(input)>(1024-answer_len):
149
+ input=input[-(1024-answer_len):]
150
+ gpt2_output=gpt2_tokenizer.decode(tf_gpt2_model.generate(input_ids=tf.constant([np.array(input)]),max_length=1024,temperature=0.7)[0])
151
+ answer=gpt2_output.rindex('`ANSWER: ')
152
+ return gpt2_output[answer+len('`ANSWER: '):]
153
+
154
+
155
+
156
+ #defining the final function to generate answer assuming default answer length to be 20
157
+ def final_func_1(question):
158
+ answer_len=25
159
+ return give_answer(question,answer_len)
160
+
161
+
162
+ def main():
163
+ st.title('Medical Chatbot')
164
+ question=st.text_input('Question',"Type Here")
165
+ result=""
166
+ if st.button('ask'):
167
+ #with st.spinner("You Know! an apple a day keeps doctor away!"):
168
+ start=datetime.now()
169
+ result=final_func_1(question)
170
+ end_time =datetime.now()
171
+ st.success("Here is the answer")
172
+ st.text(result)
173
+ st.text("result recieved within "+str((end_time-start).total_seconds()))
174
+
175
+
176
+
177
+
178
+
179
+ if __name__=='__main__':
180
+ main()
tf_gpt2_model_2_118_50000/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/kaggle/input/data45",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "transformers_version": "4.20.1",
36
+ "use_cache": true,
37
+ "vocab_size": 50257
38
+ }
tf_gpt2_model_2_118_50000/tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb1d9c174d87de89ebb18e21b1c9aea878a2aefb49135e387e3a5fdd4abe776
3
+ size 497934896
train_gpt_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944f5a6e1822cbbe49c3d4658faaa417f8207bc94cabb43c7018779c26abaee2
3
+ size 86799441