Spaces:
Running
on
A10G
Running
on
A10G
Rollback for better performance
#15
by
dzmltzack
- opened
- README.md +1 -1
- app.py +2 -2
- requirements.txt +5 -7
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🍮
|
|
4 |
colorFrom: gray
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
4 |
colorFrom: gray
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.6
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
CHANGED
@@ -3,9 +3,10 @@ import gradio as gr
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
from transformers import pipeline
|
6 |
-
import spaces
|
7 |
|
8 |
import torch
|
|
|
|
|
9 |
|
10 |
pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
|
11 |
pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
|
@@ -29,7 +30,6 @@ Q: A juggler can juggle 16 balls. Half of the balls are golf balls, and half of
|
|
29 |
title = "Flan T5 and Vanilla T5"
|
30 |
description = "This demo compares [T5-large](https://huggingface.co/t5-large) and [Flan-T5-XX-large](https://huggingface.co/google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
|
31 |
|
32 |
-
@spaces.GPU
|
33 |
def inference(text):
|
34 |
output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
|
35 |
output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]
|
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
from transformers import pipeline
|
|
|
6 |
|
7 |
import torch
|
8 |
+
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
9 |
+
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
10 |
|
11 |
pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
|
12 |
pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
|
|
|
30 |
title = "Flan T5 and Vanilla T5"
|
31 |
description = "This demo compares [T5-large](https://huggingface.co/t5-large) and [Flan-T5-XX-large](https://huggingface.co/google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
|
32 |
|
|
|
33 |
def inference(text):
|
34 |
output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
|
35 |
output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]
|
requirements.txt
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
torch==2.0.0
|
7 |
-
transformers==4.36.2
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
+
torch
|
3 |
+
transformers==4.26.0
|
4 |
+
accelerate
|
5 |
+
bitsandbytes
|
|
|
|