rwkv-v5-1b5-cpu

Runtime error

BlinkDL commited on Mar 3, 2023

Commit

58784a4

•

1 Parent(s): 7797c56

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,9 +5,9 @@ from huggingface_hub import hf_hub_download
 from pynvml import *
 nvmlInit()
 gpu_h = nvmlDeviceGetHandleByIndex(0)
 title = "RWKV-4 14B fp16"
-desc = '''DEMO limited to ctxlen 512, and slow because A10g does not have enough VRAM (some layers are computed on CPU instead). Links:
 <a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 0.5em">ChatRWKV</a>
 <a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 0.5em">RWKV-LM</a>
 <a href="https://pypi.org/project/rwkv/" target="_blank" style="margin:0 0.5em">RWKV pip package</a>
@@ -18,7 +18,7 @@ os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (muc
 from rwkv.model import RWKV
 model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230213-8019.pth")
-model = RWKV(model=model_path, strategy='cuda fp16 *32 -> cpu fp32')
 # model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
 # model = RWKV(model=model_path, strategy='cuda fp16')
 from rwkv.utils import PIPELINE, PIPELINE_ARGS
@@ -53,7 +53,7 @@ def infer(
     occurrence = {}
     state = None
     for i in range(int(token_count)):
-        out, state = model.forward(pipeline.encode(ctx)[:512] if i == 0 else [token], state)
         for n in args.token_ban:
             out[n] = -float('inf')
         for n in occurrence:

 from pynvml import *
 nvmlInit()
 gpu_h = nvmlDeviceGetHandleByIndex(0)
+ctx_limit = 512
 title = "RWKV-4 14B fp16"
+desc = f'''DEMO limited to ctxlen {ctx_limit}, and slow because A10g does not have enough VRAM (some layers are computed on CPU instead). Links:
 <a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 0.5em">ChatRWKV</a>
 <a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 0.5em">RWKV-LM</a>
 <a href="https://pypi.org/project/rwkv/" target="_blank" style="margin:0 0.5em">RWKV pip package</a>
 from rwkv.model import RWKV
 model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230213-8019.pth")
+model = RWKV(model=model_path, strategy='cuda fp16 *33 -> cpu fp32')
 # model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
 # model = RWKV(model=model_path, strategy='cuda fp16')
 from rwkv.utils import PIPELINE, PIPELINE_ARGS
     occurrence = {}
     state = None
     for i in range(int(token_count)):
+        out, state = model.forward(pipeline.encode(ctx)[:ctx_limit] if i == 0 else [token], state)
         for n in args.token_ban:
             out[n] = -float('inf')
         for n in occurrence: