Commit
•
06f233f
1
Parent(s):
afda370
Update README.md
Browse files
README.md
CHANGED
@@ -390,6 +390,7 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
390 |
|
391 |
# Enable static cache and compile the forward pass
|
392 |
model.generation_config.cache_implementation = "static"
|
|
|
393 |
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
394 |
|
395 |
processor = AutoProcessor.from_pretrained(model_id)
|
@@ -409,7 +410,7 @@ sample = dataset[0]["audio"]
|
|
409 |
# 2 warmup steps
|
410 |
for _ in tqdm(range(2), desc="Warm-up step"):
|
411 |
with sdpa_kernel(SDPBackend.MATH):
|
412 |
-
result = pipe(sample.copy())
|
413 |
|
414 |
# fast run
|
415 |
with sdpa_kernel(SDPBackend.MATH):
|
|
|
390 |
|
391 |
# Enable static cache and compile the forward pass
|
392 |
model.generation_config.cache_implementation = "static"
|
393 |
+
model.generation_config.max_new_tokens = 256
|
394 |
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
395 |
|
396 |
processor = AutoProcessor.from_pretrained(model_id)
|
|
|
410 |
# 2 warmup steps
|
411 |
for _ in tqdm(range(2), desc="Warm-up step"):
|
412 |
with sdpa_kernel(SDPBackend.MATH):
|
413 |
+
result = pipe(sample.copy(), generate_kwargs={"min_new_tokens": 256, "max_new_tokens": 256})
|
414 |
|
415 |
# fast run
|
416 |
with sdpa_kernel(SDPBackend.MATH):
|