Spaces:

osanseviero
/

i-like-flan

Running on A10G

Rollback for better performance

#15

by dzmltzack - opened Feb 3, 2023

←

Files changed (3) hide show

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🍮
 colorFrom: gray
 colorTo: purple
 sdk: gradio
-sdk_version: 4.25.0
 app_file: app.py
 pinned: false
 ---

 colorFrom: gray
 colorTo: purple
 sdk: gradio
+sdk_version: 3.6
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -3,9 +3,10 @@ import gradio as gr
 import torch
 import numpy as np
 from transformers import pipeline
-import spaces
 import torch
 pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
 pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
@@ -29,7 +30,6 @@ Q: A juggler can juggle 16 balls. Half of the balls are golf balls, and half of
 title = "Flan T5 and Vanilla T5"
 description = "This demo compares [T5-large](https://huggingface.co/t5-large) and [Flan-T5-XX-large](https://huggingface.co/google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
-@spaces.GPU
 def inference(text):
   output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
   output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]

 import torch
 import numpy as np
 from transformers import pipeline
 import torch
+print(f"Is CUDA available: {torch.cuda.is_available()}")
+print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
 pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
 title = "Flan T5 and Vanilla T5"
 description = "This demo compares [T5-large](https://huggingface.co/t5-large) and [Flan-T5-XX-large](https://huggingface.co/google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
 def inference(text):
   output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
   output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]

requirements.txt CHANGED Viewed

@@ -1,7 +1,5 @@
-accelerate==0.25.0
-bitsandbytes==0.41.1
-gradio==4.14.0
-scipy==1.11.2
-spaces==0.20.0
-torch==2.0.0
-transformers==4.36.2

+--extra-index-url https://download.pytorch.org/whl/cu113
+torch
+transformers==4.26.0
+accelerate
+bitsandbytes