Rollback for better performance

#15
by dzmltzack - opened
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +2 -2
  3. requirements.txt +5 -7
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🍮
4
  colorFrom: gray
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.25.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: gray
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 3.6
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -3,9 +3,10 @@ import gradio as gr
3
  import torch
4
  import numpy as np
5
  from transformers import pipeline
6
- import spaces
7
 
8
  import torch
 
 
9
 
10
  pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
11
  pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
@@ -29,7 +30,6 @@ Q: A juggler can juggle 16 balls. Half of the balls are golf balls, and half of
29
  title = "Flan T5 and Vanilla T5"
30
  description = "This demo compares [T5-large](https://huggingface.co/t5-large) and [Flan-T5-XX-large](https://huggingface.co/google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
31
 
32
- @spaces.GPU
33
  def inference(text):
34
  output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
35
  output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]
 
3
  import torch
4
  import numpy as np
5
  from transformers import pipeline
 
6
 
7
  import torch
8
+ print(f"Is CUDA available: {torch.cuda.is_available()}")
9
+ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
10
 
11
  pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
12
  pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
 
30
  title = "Flan T5 and Vanilla T5"
31
  description = "This demo compares [T5-large](https://huggingface.co/t5-large) and [Flan-T5-XX-large](https://huggingface.co/google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
32
 
 
33
  def inference(text):
34
  output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
35
  output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]
requirements.txt CHANGED
@@ -1,7 +1,5 @@
1
- accelerate==0.25.0
2
- bitsandbytes==0.41.1
3
- gradio==4.14.0
4
- scipy==1.11.2
5
- spaces==0.20.0
6
- torch==2.0.0
7
- transformers==4.36.2
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch
3
+ transformers==4.26.0
4
+ accelerate
5
+ bitsandbytes