liujch1998 commited on
Commit
4641d03
β€’
1 Parent(s): 40c12a7

Improve description and logging

Browse files
Files changed (2) hide show
  1. app.py +16 -4
  2. constants.py +3 -3
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import json
3
  import os
4
  import requests
@@ -8,12 +9,13 @@ API_IPADDR = os.environ.get('API_IPADDR', None)
8
  default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
9
  max_size = os.environ.get('max_size', 100)
10
  max_threads = os.environ.get('max_threads', 40)
11
- debug = os.environ.get('debug', False)
12
 
13
  def process(corpus_desc, query_desc, query):
14
  corpus = CORPUS_BY_DESC[corpus_desc]
15
  query_type = QUERY_TYPE_BY_DESC[query_desc]
16
- print(json.dumps({'corpus': corpus, 'query_type': query_type, 'query': query}))
 
17
  data = {
18
  'corpus': corpus,
19
  'query_type': query_type,
@@ -26,7 +28,8 @@ def process(corpus_desc, query_desc, query):
26
  result = response.json()
27
  else:
28
  raise ValueError(f'Invalid response: {response.status_code}')
29
- # print(result)
 
30
  return result
31
 
32
  with gr.Blocks() as demo:
@@ -35,12 +38,13 @@ with gr.Blocks() as demo:
35
  '''<h1 text-align="center">Infini-gram: An Engine for n-gram / ∞-gram Language Models with Trillion-Token Corpora</h1>
36
 
37
  <p style='font-size: 16px;'>This is an engine that processes n-gram / ∞-gram queries on a text corpus. Please first select the corpus and the type of query, then enter your query and submit.</p>
 
38
  '''
39
  )
40
  with gr.Row():
41
  with gr.Column(scale=1):
42
  corpus_desc = gr.Radio(choices=CORPUS_DESCS, label='Corpus', value=CORPUS_DESCS[0])
43
- with gr.Column(scale=4):
44
  query_desc = gr.Radio(
45
  choices=QUERY_DESCS, label='Query Type', value=QUERY_DESCS[0],
46
  )
@@ -171,6 +175,14 @@ with gr.Blocks() as demo:
171
  with gr.Column(scale=1):
172
  doc_analysis_output = gr.HTML(value='', label='Analysis')
173
 
 
 
 
 
 
 
 
 
174
  count_clear.add([count_input, count_output, count_output_tokens])
175
  ngram_clear.add([ngram_input, ngram_output, ngram_output_tokens])
176
  a_ntd_clear.add([a_ntd_input, a_ntd_output, a_ntd_output_tokens])
 
1
  import gradio as gr
2
+ import datetime
3
  import json
4
  import os
5
  import requests
 
9
  default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
10
  max_size = os.environ.get('max_size', 100)
11
  max_threads = os.environ.get('max_threads', 40)
12
+ debug = (os.environ.get('debug', 'False') != 'False')
13
 
14
  def process(corpus_desc, query_desc, query):
15
  corpus = CORPUS_BY_DESC[corpus_desc]
16
  query_type = QUERY_TYPE_BY_DESC[query_desc]
17
+ timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
18
+ print(json.dumps({'timestamp': timestamp, 'corpus': corpus, 'query_type': query_type, 'query': query}))
19
  data = {
20
  'corpus': corpus,
21
  'query_type': query_type,
 
28
  result = response.json()
29
  else:
30
  raise ValueError(f'Invalid response: {response.status_code}')
31
+ if debug:
32
+ print(result)
33
  return result
34
 
35
  with gr.Blocks() as demo:
 
38
  '''<h1 text-align="center">Infini-gram: An Engine for n-gram / ∞-gram Language Models with Trillion-Token Corpora</h1>
39
 
40
  <p style='font-size: 16px;'>This is an engine that processes n-gram / ∞-gram queries on a text corpus. Please first select the corpus and the type of query, then enter your query and submit.</p>
41
+ <p style='font-size: 16px;'>The engine is documented in our paper: <a href="">Infini-gram: Scaling Unbounded n-gram Language Models to a Trillion Tokens</a></p>
42
  '''
43
  )
44
  with gr.Row():
45
  with gr.Column(scale=1):
46
  corpus_desc = gr.Radio(choices=CORPUS_DESCS, label='Corpus', value=CORPUS_DESCS[0])
47
+ with gr.Column(scale=3):
48
  query_desc = gr.Radio(
49
  choices=QUERY_DESCS, label='Query Type', value=QUERY_DESCS[0],
50
  )
 
175
  with gr.Column(scale=1):
176
  doc_analysis_output = gr.HTML(value='', label='Analysis')
177
 
178
+ with gr.Row():
179
+ gr.Markdown('''
180
+ If you find this tool useful, please kindly cite our paper:
181
+ ```
182
+ (coming soon)
183
+ ```
184
+ ''')
185
+
186
  count_clear.add([count_input, count_output, count_output_tokens])
187
  ngram_clear.add([ngram_input, ngram_output, ngram_output_tokens])
188
  a_ntd_clear.add([a_ntd_input, a_ntd_output, a_ntd_output_tokens])
constants.py CHANGED
@@ -1,9 +1,9 @@
1
  import os
2
 
3
  CORPUS_BY_DESC = {
4
- 'RedPajama (LLaMA tokenizer)': 'v3_rpj_llama_c4',
5
- 'Pile-val (LLaMA tokenizer)': 'v3_pileval_llama',
6
- 'Pile-val (GPT-2 tokenizer)': 'v3_pileval',
7
  }
8
  CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
9
  QUERY_TYPE_BY_DESC = {
 
1
  import os
2
 
3
  CORPUS_BY_DESC = {
4
+ 'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
5
+ 'Pile-val (LLaMA tokenizer), 790M tokens': 'v3_pileval_llama',
6
+ 'Pile-val (GPT-2 tokenizer) 770M tokens': 'v3_pileval',
7
  }
8
  CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
9
  QUERY_TYPE_BY_DESC = {