Spaces:
Sleeping
Sleeping
Oscar Wang
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,32 @@
|
|
1 |
import pandas as pd
|
2 |
-
from groq import Groq
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
import threading
|
6 |
import time
|
|
|
7 |
|
|
|
8 |
client = Groq()
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
file_index = 1
|
11 |
-
|
12 |
-
current_file = os.path.join(data_directory, f'data{file_index}.csv')
|
13 |
file_paths = [current_file]
|
14 |
combined_tokens = 0
|
15 |
-
update_interval = 1 # Update interval in seconds
|
16 |
-
|
17 |
-
# Ensure the data directory exists
|
18 |
-
if not os.path.exists(data_directory):
|
19 |
-
os.makedirs(data_directory)
|
20 |
|
|
|
21 |
def get_file_size(filename):
|
22 |
-
if os.path.isfile(filename)
|
23 |
-
return os.path.getsize(filename)
|
24 |
-
return 0
|
25 |
|
|
|
26 |
def generate_and_save_data():
|
27 |
global file_index, current_file, file_paths, combined_tokens
|
28 |
while True:
|
@@ -47,9 +51,9 @@ def generate_and_save_data():
|
|
47 |
prompt_tokens = 0
|
48 |
for chunk in completion:
|
49 |
content = chunk.choices[0].delta.content
|
50 |
-
if content
|
51 |
prompt += content
|
52 |
-
prompt_tokens += len(content.split())
|
53 |
|
54 |
# Use the generated prompt to query the model again
|
55 |
second_completion = client.chat.completions.create(
|
@@ -61,7 +65,7 @@ def generate_and_save_data():
|
|
61 |
}
|
62 |
],
|
63 |
temperature=1,
|
64 |
-
max_tokens=
|
65 |
top_p=1,
|
66 |
stream=True,
|
67 |
stop=None,
|
@@ -71,9 +75,9 @@ def generate_and_save_data():
|
|
71 |
response_tokens = 0
|
72 |
for chunk in second_completion:
|
73 |
content = chunk.choices[0].delta.content
|
74 |
-
if content
|
75 |
response += content
|
76 |
-
response_tokens += len(content.split())
|
77 |
|
78 |
# Update the combined token count
|
79 |
combined_tokens += (prompt_tokens + response_tokens)
|
@@ -86,33 +90,32 @@ def generate_and_save_data():
|
|
86 |
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
|
87 |
|
88 |
# Check the size of the current file
|
89 |
-
if get_file_size(current_file) >=
|
90 |
file_index += 1
|
91 |
-
current_file = os.path.join(
|
92 |
file_paths.append(current_file)
|
93 |
|
94 |
-
#
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
if file_exists:
|
99 |
-
data.to_csv(current_file, mode='a', header=False, index=False)
|
100 |
-
else:
|
101 |
-
data.to_csv(current_file, mode='w', header=True, index=False)
|
102 |
|
103 |
# Wait for the next update interval
|
104 |
-
time.sleep(
|
105 |
|
106 |
except Exception as e:
|
107 |
print(f"An error occurred: {e}. Retrying in 5 seconds...")
|
108 |
time.sleep(5)
|
109 |
|
|
|
110 |
def get_available_files():
|
111 |
return [f for f in file_paths if os.path.isfile(f)]
|
112 |
|
|
|
113 |
def update_file_list():
|
114 |
return gr.update(choices=get_available_files())
|
115 |
|
|
|
116 |
def update_token_count():
|
117 |
return combined_tokens
|
118 |
|
|
|
1 |
import pandas as pd
|
|
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
import threading
|
5 |
import time
|
6 |
+
from groq import Groq
|
7 |
|
8 |
+
# Initialize Groq client
|
9 |
client = Groq()
|
10 |
+
|
11 |
+
# Constants
|
12 |
+
MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes
|
13 |
+
DATA_DIRECTORY = 'data'
|
14 |
+
UPDATE_INTERVAL = 1 # Update interval in seconds
|
15 |
+
|
16 |
+
# Ensure the data directory exists
|
17 |
+
os.makedirs(DATA_DIRECTORY, exist_ok=True)
|
18 |
+
|
19 |
+
# Initialize variables
|
20 |
file_index = 1
|
21 |
+
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
|
|
|
22 |
file_paths = [current_file]
|
23 |
combined_tokens = 0
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
# Helper function to get file size
|
26 |
def get_file_size(filename):
|
27 |
+
return os.path.getsize(filename) if os.path.isfile(filename) else 0
|
|
|
|
|
28 |
|
29 |
+
# Data generation and saving function
|
30 |
def generate_and_save_data():
|
31 |
global file_index, current_file, file_paths, combined_tokens
|
32 |
while True:
|
|
|
51 |
prompt_tokens = 0
|
52 |
for chunk in completion:
|
53 |
content = chunk.choices[0].delta.content
|
54 |
+
if content:
|
55 |
prompt += content
|
56 |
+
prompt_tokens += len(content.split())
|
57 |
|
58 |
# Use the generated prompt to query the model again
|
59 |
second_completion = client.chat.completions.create(
|
|
|
65 |
}
|
66 |
],
|
67 |
temperature=1,
|
68 |
+
max_tokens=5000,
|
69 |
top_p=1,
|
70 |
stream=True,
|
71 |
stop=None,
|
|
|
75 |
response_tokens = 0
|
76 |
for chunk in second_completion:
|
77 |
content = chunk.choices[0].delta.content
|
78 |
+
if content:
|
79 |
response += content
|
80 |
+
response_tokens += len(content.split())
|
81 |
|
82 |
# Update the combined token count
|
83 |
combined_tokens += (prompt_tokens + response_tokens)
|
|
|
90 |
data = pd.DataFrame({"prompt": [prompt], "response": [response]})
|
91 |
|
92 |
# Check the size of the current file
|
93 |
+
if get_file_size(current_file) >= MAX_SIZE:
|
94 |
file_index += 1
|
95 |
+
current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv')
|
96 |
file_paths.append(current_file)
|
97 |
|
98 |
+
# Append data to the current file
|
99 |
+
mode = 'a' if os.path.isfile(current_file) else 'w'
|
100 |
+
header = not os.path.isfile(current_file)
|
101 |
+
data.to_csv(current_file, mode=mode, header=header, index=False)
|
|
|
|
|
|
|
|
|
102 |
|
103 |
# Wait for the next update interval
|
104 |
+
time.sleep(UPDATE_INTERVAL)
|
105 |
|
106 |
except Exception as e:
|
107 |
print(f"An error occurred: {e}. Retrying in 5 seconds...")
|
108 |
time.sleep(5)
|
109 |
|
110 |
+
# Get available files
|
111 |
def get_available_files():
|
112 |
return [f for f in file_paths if os.path.isfile(f)]
|
113 |
|
114 |
+
# Update file list
|
115 |
def update_file_list():
|
116 |
return gr.update(choices=get_available_files())
|
117 |
|
118 |
+
# Update token count
|
119 |
def update_token_count():
|
120 |
return combined_tokens
|
121 |
|