whyumesh commited on
Commit
f68cd65
·
verified ·
1 Parent(s): 753217c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -34
app.py CHANGED
@@ -14,45 +14,57 @@ import spaces
14
  from huggingface_hub import login
15
  import os
16
 
 
 
 
 
17
  # Add login function at the start
18
  def init_huggingface_auth():
19
  # Get token from environment variable or set it directly
20
  token = os.getenv("HUGGINGFACE_TOKEN")
21
  if token:
22
  login(token=token)
 
23
  else:
24
- print("Warning: HUGGINGFACE_TOKEN not found in environment variables")
25
 
26
  # Load both models and their processors/tokenizers
27
  def load_models():
28
- # Initialize HF auth before loading models
29
- init_huggingface_auth()
30
-
31
- # Vision model
32
- vision_model = Qwen2VLForConditionalGeneration.from_pretrained(
33
- "Qwen/Qwen2-VL-2B-Instruct",
34
- torch_dtype=torch.float16,
35
- device_map="auto",
36
- use_auth_token=True # Add auth token usage
37
- )
38
- vision_processor = AutoProcessor.from_pretrained(
39
- "Qwen/Qwen2-VL-2B-Instruct",
40
- use_auth_token=True # Add auth token usage
41
- )
42
-
43
- # Code model
44
- code_model = AutoModelForCausalLM.from_pretrained(
45
- "Qwen/Qwen2.5-Coder-1.5B-Instruct",
46
- torch_dtype=torch.float16,
47
- device_map="auto",
48
- use_auth_token=True # Add auth token usage
49
- )
50
- code_tokenizer = AutoTokenizer.from_pretrained(
51
- "Qwen/Qwen2.5-Coder-1.5B-Instruct",
52
- use_auth_token=True # Add auth token usage
53
- )
54
-
55
- return vision_model, vision_processor, code_model, code_tokenizer
 
 
 
 
 
 
 
56
 
57
  vision_model, vision_processor, code_model, code_tokenizer = load_models()
58
 
@@ -189,11 +201,38 @@ def process_for_code(vision_description):
189
 
190
  @spaces.GPU
191
  def process_content(video, transcribed_text):
192
- if video is None:
193
- return "Please upload a video file of code with errors.", ""
 
194
 
195
- vision_output, code_output = process_video_for_code(video.name, transcribed_text)
196
- return vision_output, code_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  # Gradio interface
199
  iface = gr.Interface(
@@ -207,7 +246,9 @@ iface = gr.Interface(
207
  gr.Code(label="Fixed Code", language="python")
208
  ],
209
  title="Vision Code Debugger",
210
- description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues."
 
 
211
  )
212
 
213
  if __name__ == "__main__":
 
14
  from huggingface_hub import login
15
  import os
16
 
17
+ # Add quota management constants
18
+ MAX_GPU_TIME_PER_REQUEST = 30 # seconds
19
+ COOLDOWN_PERIOD = 300 # 5 minutes in seconds
20
+
21
  # Add login function at the start
22
  def init_huggingface_auth():
23
  # Get token from environment variable or set it directly
24
  token = os.getenv("HUGGINGFACE_TOKEN")
25
  if token:
26
  login(token=token)
27
+ print("Successfully authenticated with Hugging Face")
28
  else:
29
+ raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
30
 
31
  # Load both models and their processors/tokenizers
32
  def load_models():
33
+ try:
34
+ # Initialize HF auth before loading models
35
+ init_huggingface_auth()
36
+
37
+ # Vision model
38
+ vision_model = Qwen2VLForConditionalGeneration.from_pretrained(
39
+ "Qwen/Qwen2-VL-2B-Instruct",
40
+ torch_dtype=torch.float16,
41
+ device_map="auto",
42
+ use_auth_token=True # Add auth token usage
43
+ )
44
+ vision_processor = AutoProcessor.from_pretrained(
45
+ "Qwen/Qwen2-VL-2B-Instruct",
46
+ use_auth_token=True # Add auth token usage
47
+ )
48
+
49
+ # Code model
50
+ code_model = AutoModelForCausalLM.from_pretrained(
51
+ "Qwen/Qwen2.5-Coder-1.5B-Instruct",
52
+ torch_dtype=torch.float16,
53
+ device_map="auto",
54
+ use_auth_token=True # Add auth token usage
55
+ )
56
+ code_tokenizer = AutoTokenizer.from_pretrained(
57
+ "Qwen/Qwen2.5-Coder-1.5B-Instruct",
58
+ use_auth_token=True # Add auth token usage
59
+ )
60
+
61
+ # Free up CUDA memory after loading
62
+ torch.cuda.empty_cache()
63
+
64
+ return vision_model, vision_processor, code_model, code_tokenizer
65
+ except Exception as e:
66
+ print(f"Error loading models: {str(e)}")
67
+ raise
68
 
69
  vision_model, vision_processor, code_model, code_tokenizer = load_models()
70
 
 
201
 
202
  @spaces.GPU
203
  def process_content(video, transcribed_text):
204
+ try:
205
+ if video is None:
206
+ return "Please upload a video file of code with errors.", ""
207
 
208
+ # Add GPU memory management
209
+ torch.cuda.empty_cache()
210
+
211
+ # Check available GPU memory
212
+ if torch.cuda.is_available():
213
+ available_memory = torch.cuda.get_device_properties(0).total_memory
214
+ if available_memory < 1e9: # Less than 1GB available
215
+ raise RuntimeError("Insufficient GPU memory available")
216
+
217
+ vision_output, code_output = process_video_for_code(
218
+ video.name,
219
+ transcribed_text,
220
+ max_frames=8 # Reduced from 16 to lower GPU usage
221
+ )
222
+
223
+ return vision_output, code_output
224
+
225
+ except spaces.zero.gradio.HTMLError as e:
226
+ if "exceeded your GPU quota" in str(e):
227
+ return (
228
+ "GPU quota exceeded. Please try again later or consider upgrading to a paid plan.",
229
+ ""
230
+ )
231
+ except Exception as e:
232
+ return f"Error processing content: {str(e)}", ""
233
+ finally:
234
+ # Clean up GPU memory
235
+ torch.cuda.empty_cache()
236
 
237
  # Gradio interface
238
  iface = gr.Interface(
 
246
  gr.Code(label="Fixed Code", language="python")
247
  ],
248
  title="Vision Code Debugger",
249
+ description="Upload a video of code with errors and provide transcribed audio, and the AI will analyze and fix the issues.",
250
+ allow_flagging="never", # Disable flagging to reduce overhead
251
+ cache_examples=True # Enable caching to reduce GPU usage
252
  )
253
 
254
  if __name__ == "__main__":