Update app.py
Browse files
app.py
CHANGED
@@ -14,45 +14,57 @@ import spaces
|
|
14 |
from huggingface_hub import login
|
15 |
import os
|
16 |
|
|
|
|
|
|
|
|
|
17 |
# Add login function at the start
|
18 |
def init_huggingface_auth():
|
19 |
# Get token from environment variable or set it directly
|
20 |
token = os.getenv("HUGGINGFACE_TOKEN")
|
21 |
if token:
|
22 |
login(token=token)
|
|
|
23 |
else:
|
24 |
-
|
25 |
|
26 |
# Load both models and their processors/tokenizers
|
27 |
def load_models():
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
vision_model, vision_processor, code_model, code_tokenizer = load_models()
|
58 |
|
@@ -189,11 +201,38 @@ def process_for_code(vision_description):
|
|
189 |
|
190 |
@spaces.GPU
|
191 |
def process_content(video, transcribed_text):
|
192 |
-
|
193 |
-
|
|
|
194 |
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
# Gradio interface
|
199 |
iface = gr.Interface(
|
@@ -207,7 +246,9 @@ iface = gr.Interface(
|
|
207 |
gr.Code(label="Fixed Code", language="python")
|
208 |
],
|
209 |
title="Vision Code Debugger",
|
210 |
-
description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues."
|
|
|
|
|
211 |
)
|
212 |
|
213 |
if __name__ == "__main__":
|
|
|
14 |
from huggingface_hub import login
|
15 |
import os
|
16 |
|
17 |
+
# Add quota management constants
|
18 |
+
MAX_GPU_TIME_PER_REQUEST = 30 # seconds
|
19 |
+
COOLDOWN_PERIOD = 300 # 5 minutes in seconds
|
20 |
+
|
21 |
# Add login function at the start
|
22 |
def init_huggingface_auth():
|
23 |
# Get token from environment variable or set it directly
|
24 |
token = os.getenv("HUGGINGFACE_TOKEN")
|
25 |
if token:
|
26 |
login(token=token)
|
27 |
+
print("Successfully authenticated with Hugging Face")
|
28 |
else:
|
29 |
+
raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
|
30 |
|
31 |
# Load both models and their processors/tokenizers
|
32 |
def load_models():
|
33 |
+
try:
|
34 |
+
# Initialize HF auth before loading models
|
35 |
+
init_huggingface_auth()
|
36 |
+
|
37 |
+
# Vision model
|
38 |
+
vision_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
39 |
+
"Qwen/Qwen2-VL-2B-Instruct",
|
40 |
+
torch_dtype=torch.float16,
|
41 |
+
device_map="auto",
|
42 |
+
use_auth_token=True # Add auth token usage
|
43 |
+
)
|
44 |
+
vision_processor = AutoProcessor.from_pretrained(
|
45 |
+
"Qwen/Qwen2-VL-2B-Instruct",
|
46 |
+
use_auth_token=True # Add auth token usage
|
47 |
+
)
|
48 |
+
|
49 |
+
# Code model
|
50 |
+
code_model = AutoModelForCausalLM.from_pretrained(
|
51 |
+
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
|
52 |
+
torch_dtype=torch.float16,
|
53 |
+
device_map="auto",
|
54 |
+
use_auth_token=True # Add auth token usage
|
55 |
+
)
|
56 |
+
code_tokenizer = AutoTokenizer.from_pretrained(
|
57 |
+
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
|
58 |
+
use_auth_token=True # Add auth token usage
|
59 |
+
)
|
60 |
+
|
61 |
+
# Free up CUDA memory after loading
|
62 |
+
torch.cuda.empty_cache()
|
63 |
+
|
64 |
+
return vision_model, vision_processor, code_model, code_tokenizer
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Error loading models: {str(e)}")
|
67 |
+
raise
|
68 |
|
69 |
vision_model, vision_processor, code_model, code_tokenizer = load_models()
|
70 |
|
|
|
201 |
|
202 |
@spaces.GPU
|
203 |
def process_content(video, transcribed_text):
|
204 |
+
try:
|
205 |
+
if video is None:
|
206 |
+
return "Please upload a video file of code with errors.", ""
|
207 |
|
208 |
+
# Add GPU memory management
|
209 |
+
torch.cuda.empty_cache()
|
210 |
+
|
211 |
+
# Check available GPU memory
|
212 |
+
if torch.cuda.is_available():
|
213 |
+
available_memory = torch.cuda.get_device_properties(0).total_memory
|
214 |
+
if available_memory < 1e9: # Less than 1GB available
|
215 |
+
raise RuntimeError("Insufficient GPU memory available")
|
216 |
+
|
217 |
+
vision_output, code_output = process_video_for_code(
|
218 |
+
video.name,
|
219 |
+
transcribed_text,
|
220 |
+
max_frames=8 # Reduced from 16 to lower GPU usage
|
221 |
+
)
|
222 |
+
|
223 |
+
return vision_output, code_output
|
224 |
+
|
225 |
+
except spaces.zero.gradio.HTMLError as e:
|
226 |
+
if "exceeded your GPU quota" in str(e):
|
227 |
+
return (
|
228 |
+
"GPU quota exceeded. Please try again later or consider upgrading to a paid plan.",
|
229 |
+
""
|
230 |
+
)
|
231 |
+
except Exception as e:
|
232 |
+
return f"Error processing content: {str(e)}", ""
|
233 |
+
finally:
|
234 |
+
# Clean up GPU memory
|
235 |
+
torch.cuda.empty_cache()
|
236 |
|
237 |
# Gradio interface
|
238 |
iface = gr.Interface(
|
|
|
246 |
gr.Code(label="Fixed Code", language="python")
|
247 |
],
|
248 |
title="Vision Code Debugger",
|
249 |
+
description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues.",
|
250 |
+
allow_flagging="never", # Disable flagging to reduce overhead
|
251 |
+
cache_examples=True # Enable caching to reduce GPU usage
|
252 |
)
|
253 |
|
254 |
if __name__ == "__main__":
|