Oscar Wang commited on
Commit
8fb98cd
·
verified ·
1 Parent(s): 591799e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -28
app.py CHANGED
@@ -1,28 +1,32 @@
1
  import pandas as pd
2
- from groq import Groq
3
  import os
4
  import gradio as gr
5
  import threading
6
  import time
 
7
 
 
8
  client = Groq()
9
- max_size = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
 
 
 
 
 
 
 
 
 
10
  file_index = 1
11
- data_directory = 'data'
12
- current_file = os.path.join(data_directory, f'data{file_index}.csv')
13
  file_paths = [current_file]
14
  combined_tokens = 0
15
- update_interval = 1 # Update interval in seconds
16
-
17
- # Ensure the data directory exists
18
- if not os.path.exists(data_directory):
19
- os.makedirs(data_directory)
20
 
 
21
  def get_file_size(filename):
22
- if os.path.isfile(filename):
23
- return os.path.getsize(filename)
24
- return 0
25
 
 
26
  def generate_and_save_data():
27
  global file_index, current_file, file_paths, combined_tokens
28
  while True:
@@ -47,9 +51,9 @@ def generate_and_save_data():
47
  prompt_tokens = 0
48
  for chunk in completion:
49
  content = chunk.choices[0].delta.content
50
- if content is not None:
51
  prompt += content
52
- prompt_tokens += len(content.split()) # Assuming tokens are words for simplicity
53
 
54
  # Use the generated prompt to query the model again
55
  second_completion = client.chat.completions.create(
@@ -61,7 +65,7 @@ def generate_and_save_data():
61
  }
62
  ],
63
  temperature=1,
64
- max_tokens=1024,
65
  top_p=1,
66
  stream=True,
67
  stop=None,
@@ -71,9 +75,9 @@ def generate_and_save_data():
71
  response_tokens = 0
72
  for chunk in second_completion:
73
  content = chunk.choices[0].delta.content
74
- if content is not None:
75
  response += content
76
- response_tokens += len(content.split()) # Assuming tokens are words for simplicity
77
 
78
  # Update the combined token count
79
  combined_tokens += (prompt_tokens + response_tokens)
@@ -86,33 +90,32 @@ def generate_and_save_data():
86
  data = pd.DataFrame({"prompt": [prompt], "response": [response]})
87
 
88
  # Check the size of the current file
89
- if get_file_size(current_file) >= max_size:
90
  file_index += 1
91
- current_file = os.path.join(data_directory, f'data{file_index}.csv')
92
  file_paths.append(current_file)
93
 
94
- # Check if the current file exists
95
- file_exists = os.path.isfile(current_file)
96
-
97
- # If the file exists, append without overwriting
98
- if file_exists:
99
- data.to_csv(current_file, mode='a', header=False, index=False)
100
- else:
101
- data.to_csv(current_file, mode='w', header=True, index=False)
102
 
103
  # Wait for the next update interval
104
- time.sleep(update_interval)
105
 
106
  except Exception as e:
107
  print(f"An error occurred: {e}. Retrying in 5 seconds...")
108
  time.sleep(5)
109
 
 
110
  def get_available_files():
111
  return [f for f in file_paths if os.path.isfile(f)]
112
 
 
113
  def update_file_list():
114
  return gr.update(choices=get_available_files())
115
 
 
116
  def update_token_count():
117
  return combined_tokens
118
 
 
1
  import pandas as pd
 
2
  import os
3
  import gradio as gr
4
  import threading
5
  import time
6
+ from groq import Groq
7
 
8
+ # Initialize Groq client
9
  client = Groq()
10
+
11
+ # Constants
12
+ MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
13
+ DATA_DIRECTORY = 'data'
14
+ UPDATE_INTERVAL = 1 # Update interval in seconds
15
+
16
+ # Ensure the data directory exists
17
+ os.makedirs(DATA_DIRECTORY, exist_ok=True)
18
+
19
+ # Initialize variables
20
  file_index = 1
21
+ current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
 
22
  file_paths = [current_file]
23
  combined_tokens = 0
 
 
 
 
 
24
 
25
+ # Helper function to get file size
26
  def get_file_size(filename):
27
+ return os.path.getsize(filename) if os.path.isfile(filename) else 0
 
 
28
 
29
+ # Data generation and saving function
30
  def generate_and_save_data():
31
  global file_index, current_file, file_paths, combined_tokens
32
  while True:
 
51
  prompt_tokens = 0
52
  for chunk in completion:
53
  content = chunk.choices[0].delta.content
54
+ if content:
55
  prompt += content
56
+ prompt_tokens += len(content.split())
57
 
58
  # Use the generated prompt to query the model again
59
  second_completion = client.chat.completions.create(
 
65
  }
66
  ],
67
  temperature=1,
68
+ max_tokens=5000,
69
  top_p=1,
70
  stream=True,
71
  stop=None,
 
75
  response_tokens = 0
76
  for chunk in second_completion:
77
  content = chunk.choices[0].delta.content
78
+ if content:
79
  response += content
80
+ response_tokens += len(content.split())
81
 
82
  # Update the combined token count
83
  combined_tokens += (prompt_tokens + response_tokens)
 
90
  data = pd.DataFrame({"prompt": [prompt], "response": [response]})
91
 
92
  # Check the size of the current file
93
+ if get_file_size(current_file) >= MAX_SIZE:
94
  file_index += 1
95
+ current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
96
  file_paths.append(current_file)
97
 
98
+ # Append data to the current file
99
+ mode = 'a' if os.path.isfile(current_file) else 'w'
100
+ header = not os.path.isfile(current_file)
101
+ data.to_csv(current_file, mode=mode, header=header, index=False)
 
 
 
 
102
 
103
  # Wait for the next update interval
104
+ time.sleep(UPDATE_INTERVAL)
105
 
106
  except Exception as e:
107
  print(f"An error occurred: {e}. Retrying in 5 seconds...")
108
  time.sleep(5)
109
 
110
+ # Get available files
111
  def get_available_files():
112
  return [f for f in file_paths if os.path.isfile(f)]
113
 
114
+ # Update file list
115
  def update_file_list():
116
  return gr.update(choices=get_available_files())
117
 
118
+ # Update token count
119
  def update_token_count():
120
  return combined_tokens
121