ys-2020
commited on
Commit
·
9845231
1
Parent(s):
834b257
sync
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/config-241119.183703.yaml +85 -0
- runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/results-241119.183703.json +32 -0
- runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/run-241119.183703.log +219 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/config-241119.200548.yaml +146 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/acts.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/model.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/rotation.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/scale.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/smooth.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/wgts.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/results-241119.200548.json +32 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.185856.log +0 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.200548.log +0 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/config-241119.200727.yaml +146 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/model.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/rotation.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/scale.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/smooth.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/wgts.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/run-241119.200727.log +0 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/config-241119.201608.yaml +129 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/acts.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/model.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/rotation.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/scale.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/wgts.pt +3 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/results-241119.201608.json +32 -0
- runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/run-241119.201608.log +0 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/config-241119.172947.yaml +85 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/results-241119.172947.json +32 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/run-241119.172947.log +219 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/config-241119.200545.yaml +146 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/acts.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/model.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/rotation.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/scale.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/smooth.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/wgts.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/results-241119.200545.json +32 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.183745.log +0 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.200545.log +0 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/config-241119.200729.yaml +146 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/acts.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/model.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/rotation.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/scale.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/smooth.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/wgts.pt +3 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/results-241119.200729.json +32 -0
- runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/run-241119.200729.log +0 -0
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/config-241119.183703.yaml
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cache:
|
2 |
+
root: runs/shang
|
3 |
+
path:
|
4 |
+
rotation: ''
|
5 |
+
reorder: ''
|
6 |
+
smooth: ''
|
7 |
+
wgts: ''
|
8 |
+
acts: ''
|
9 |
+
output:
|
10 |
+
root: runs/shang
|
11 |
+
dirname: default-pileval.128x1024.[0-0]
|
12 |
+
job: run
|
13 |
+
model:
|
14 |
+
name: llama-2-7b-instruct-together-32k
|
15 |
+
family: llama-2
|
16 |
+
path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
17 |
+
root: ''
|
18 |
+
local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
19 |
+
local_root: /home/yujunlin/models
|
20 |
+
dtype: torch.float16
|
21 |
+
eval:
|
22 |
+
num_gpus: 1
|
23 |
+
batch_size: 8
|
24 |
+
tasks:
|
25 |
+
- wikitext
|
26 |
+
max_seq_length: -4096
|
27 |
+
evaluators:
|
28 |
+
- gptq
|
29 |
+
quant:
|
30 |
+
wgts:
|
31 |
+
dtype: null
|
32 |
+
zero_point: null
|
33 |
+
group_shapes:
|
34 |
+
- - -1
|
35 |
+
- -1
|
36 |
+
- -1
|
37 |
+
scale_dtypes:
|
38 |
+
- null
|
39 |
+
intermediate_dtypes: []
|
40 |
+
intermediate_levels: []
|
41 |
+
needs_dequant_saturation: false
|
42 |
+
skips: []
|
43 |
+
enable_kernel_gptq: false
|
44 |
+
enable_calib_range: false
|
45 |
+
ipts:
|
46 |
+
dtype: null
|
47 |
+
zero_point: null
|
48 |
+
group_shapes:
|
49 |
+
- - -1
|
50 |
+
- -1
|
51 |
+
- -1
|
52 |
+
scale_dtypes:
|
53 |
+
- null
|
54 |
+
skips: []
|
55 |
+
static: false
|
56 |
+
enable_calib_range: false
|
57 |
+
opts:
|
58 |
+
dtype: null
|
59 |
+
zero_point: null
|
60 |
+
group_shapes:
|
61 |
+
- - -1
|
62 |
+
- -1
|
63 |
+
- -1
|
64 |
+
scale_dtypes:
|
65 |
+
- null
|
66 |
+
skips: []
|
67 |
+
static: false
|
68 |
+
enable_calib_range: false
|
69 |
+
calib:
|
70 |
+
data: pileval
|
71 |
+
num_samples: 128
|
72 |
+
path: mit-han-lab/pile-val-backup
|
73 |
+
seq_length: 1024
|
74 |
+
min_seq_length: 0
|
75 |
+
max_seq_length: 0
|
76 |
+
local_path: ''
|
77 |
+
enable_rotation: false
|
78 |
+
enable_reorder: false
|
79 |
+
enable_smooth: false
|
80 |
+
develop_dtype: torch.float32
|
81 |
+
seed: 12345
|
82 |
+
skip_eval: false
|
83 |
+
load_from: ''
|
84 |
+
save_model: false
|
85 |
+
copy_on_save: false
|
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/results-241119.183703.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"gptq": {
|
3 |
+
"2048": {
|
4 |
+
"results": {
|
5 |
+
"wikitext": {
|
6 |
+
"word_perplexity": 6.443161573358209
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"versions": {
|
10 |
+
"wikitext": 1
|
11 |
+
},
|
12 |
+
"config": {
|
13 |
+
"model": "llama-2-7b-instruct-together-32k"
|
14 |
+
},
|
15 |
+
"model": "llama-2-7b-instruct-together-32k"
|
16 |
+
},
|
17 |
+
"4096": {
|
18 |
+
"results": {
|
19 |
+
"wikitext": {
|
20 |
+
"word_perplexity": 5.964906855443073
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"versions": {
|
24 |
+
"wikitext": 1
|
25 |
+
},
|
26 |
+
"config": {
|
27 |
+
"model": "llama-2-7b-instruct-together-32k"
|
28 |
+
},
|
29 |
+
"model": "llama-2-7b-instruct-together-32k"
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/run-241119.183703.log
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
24-11-19 18:37:03 | I | === Configurations ===
|
2 |
+
24-11-19 18:37:03 | I | LlmPtqRunConfig(
|
3 |
+
24-11-19 18:37:03 | I | cache=LlmCacheConfig(
|
4 |
+
24-11-19 18:37:03 | I | root=runs/shang,
|
5 |
+
24-11-19 18:37:03 | I | dirpath=LlmQuantCacheConfig(
|
6 |
+
24-11-19 18:37:03 | I | rotation=,
|
7 |
+
24-11-19 18:37:03 | I | reorder=,
|
8 |
+
24-11-19 18:37:03 | I | smooth=,
|
9 |
+
24-11-19 18:37:03 | I | wgts=,
|
10 |
+
24-11-19 18:37:03 | I | acts=),
|
11 |
+
24-11-19 18:37:03 | I | path=LlmQuantCacheConfig(
|
12 |
+
24-11-19 18:37:03 | I | rotation=,
|
13 |
+
24-11-19 18:37:03 | I | reorder=,
|
14 |
+
24-11-19 18:37:03 | I | smooth=,
|
15 |
+
24-11-19 18:37:03 | I | wgts=,
|
16 |
+
24-11-19 18:37:03 | I | acts=)),
|
17 |
+
24-11-19 18:37:03 | I | output=OutputConfig(
|
18 |
+
24-11-19 18:37:03 | I | root=runs/shang,
|
19 |
+
24-11-19 18:37:03 | I | dirname=default-pileval.128x1024.[0-0],
|
20 |
+
24-11-19 18:37:03 | I | job=run,
|
21 |
+
24-11-19 18:37:03 | I | dirpath=runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0],
|
22 |
+
24-11-19 18:37:03 | I | timestamp=241119.183703),
|
23 |
+
24-11-19 18:37:03 | I | model=LlmModelConfig(
|
24 |
+
24-11-19 18:37:03 | I | name=llama-2-7b-instruct-together-32k,
|
25 |
+
24-11-19 18:37:03 | I | family=llama-2,
|
26 |
+
24-11-19 18:37:03 | I | path=/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k,
|
27 |
+
24-11-19 18:37:03 | I | root=,
|
28 |
+
24-11-19 18:37:03 | I | local_path=/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k,
|
29 |
+
24-11-19 18:37:03 | I | local_root=/home/yujunlin/models,
|
30 |
+
24-11-19 18:37:03 | I | size=7.0,
|
31 |
+
24-11-19 18:37:03 | I | variant=instruct-together-32k,
|
32 |
+
24-11-19 18:37:03 | I | dtype=torch.float16,
|
33 |
+
24-11-19 18:37:03 | I | orig_dtype=torch.float16),
|
34 |
+
24-11-19 18:37:03 | I | eval=LlmEvalConfig(
|
35 |
+
24-11-19 18:37:03 | I | num_gpus=1,
|
36 |
+
24-11-19 18:37:03 | I | batch_size=8,
|
37 |
+
24-11-19 18:37:03 | I | tasks=['wikitext'],
|
38 |
+
24-11-19 18:37:03 | I | max_seq_length=-4096,
|
39 |
+
24-11-19 18:37:03 | I | evaluators=['gptq']),
|
40 |
+
24-11-19 18:37:03 | I | quant=LlmQuantConfig(
|
41 |
+
24-11-19 18:37:03 | I | wgts=LlmWeightQuantizerConfig(
|
42 |
+
24-11-19 18:37:03 | I | dtype=None,
|
43 |
+
24-11-19 18:37:03 | I | zero_point=None,
|
44 |
+
24-11-19 18:37:03 | I | group_shapes=((-1, -1, -1),),
|
45 |
+
24-11-19 18:37:03 | I | scale_dtypes=(None,),
|
46 |
+
24-11-19 18:37:03 | I | intermediate_dtypes=(),
|
47 |
+
24-11-19 18:37:03 | I | intermediate_levels=(),
|
48 |
+
24-11-19 18:37:03 | I | needs_dequant_saturation=False,
|
49 |
+
24-11-19 18:37:03 | I | skips=[],
|
50 |
+
24-11-19 18:37:03 | I | static=False,
|
51 |
+
24-11-19 18:37:03 | I | kernel_gptq=None,
|
52 |
+
24-11-19 18:37:03 | I | calib_range=None),
|
53 |
+
24-11-19 18:37:03 | I | ipts=LlmActivationQuantizerConfig(
|
54 |
+
24-11-19 18:37:03 | I | dtype=None,
|
55 |
+
24-11-19 18:37:03 | I | zero_point=None,
|
56 |
+
24-11-19 18:37:03 | I | group_shapes=((-1, -1, -1),),
|
57 |
+
24-11-19 18:37:03 | I | scale_dtypes=(None,),
|
58 |
+
24-11-19 18:37:03 | I | intermediate_dtypes=(),
|
59 |
+
24-11-19 18:37:03 | I | intermediate_levels=(),
|
60 |
+
24-11-19 18:37:03 | I | needs_dequant_saturation=False,
|
61 |
+
24-11-19 18:37:03 | I | skips=[],
|
62 |
+
24-11-19 18:37:03 | I | static=False,
|
63 |
+
24-11-19 18:37:03 | I | kernel_gptq=None,
|
64 |
+
24-11-19 18:37:03 | I | calib_range=None),
|
65 |
+
24-11-19 18:37:03 | I | opts=LlmActivationQuantizerConfig(
|
66 |
+
24-11-19 18:37:03 | I | dtype=None,
|
67 |
+
24-11-19 18:37:03 | I | zero_point=None,
|
68 |
+
24-11-19 18:37:03 | I | group_shapes=((-1, -1, -1),),
|
69 |
+
24-11-19 18:37:03 | I | scale_dtypes=(None,),
|
70 |
+
24-11-19 18:37:03 | I | intermediate_dtypes=(),
|
71 |
+
24-11-19 18:37:03 | I | intermediate_levels=(),
|
72 |
+
24-11-19 18:37:03 | I | needs_dequant_saturation=False,
|
73 |
+
24-11-19 18:37:03 | I | skips=[],
|
74 |
+
24-11-19 18:37:03 | I | static=False,
|
75 |
+
24-11-19 18:37:03 | I | kernel_gptq=None,
|
76 |
+
24-11-19 18:37:03 | I | calib_range=None),
|
77 |
+
24-11-19 18:37:03 | I | calib=LlmCalibDataLoaderConfig(
|
78 |
+
24-11-19 18:37:03 | I | data=pileval,
|
79 |
+
24-11-19 18:37:03 | I | num_samples=128,
|
80 |
+
24-11-19 18:37:03 | I | batch_size=1,
|
81 |
+
24-11-19 18:37:03 | I | path=mit-han-lab/pile-val-backup,
|
82 |
+
24-11-19 18:37:03 | I | seq_length=1024,
|
83 |
+
24-11-19 18:37:03 | I | min_seq_length=0,
|
84 |
+
24-11-19 18:37:03 | I | max_seq_length=0,
|
85 |
+
24-11-19 18:37:03 | I | local_path=),
|
86 |
+
24-11-19 18:37:03 | I | rotation=None,
|
87 |
+
24-11-19 18:37:03 | I | reorder=None,
|
88 |
+
24-11-19 18:37:03 | I | smooth=None,
|
89 |
+
24-11-19 18:37:03 | I | develop_dtype=torch.float32),
|
90 |
+
24-11-19 18:37:03 | I | seed=12345,
|
91 |
+
24-11-19 18:37:03 | I | skip_eval=False,
|
92 |
+
24-11-19 18:37:03 | I | load_from=,
|
93 |
+
24-11-19 18:37:03 | I | save_model=False,
|
94 |
+
24-11-19 18:37:03 | I | copy_on_save=False)
|
95 |
+
24-11-19 18:37:03 | I | === Dumped Configurations ===
|
96 |
+
24-11-19 18:37:03 | I | { 'cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': 'runs/shang'},
|
97 |
+
24-11-19 18:37:03 | I | 'copy_on_save': False,
|
98 |
+
24-11-19 18:37:03 | I | 'eval': {'batch_size': 8, 'evaluators': ['gptq'], 'max_seq_length': -4096, 'num_gpus': 1, 'tasks': ['wikitext']},
|
99 |
+
24-11-19 18:37:03 | I | 'load_from': '',
|
100 |
+
24-11-19 18:37:03 | I | 'model': { 'dtype': 'torch.float16',
|
101 |
+
24-11-19 18:37:03 | I | 'family': 'llama-2',
|
102 |
+
24-11-19 18:37:03 | I | 'local_path': '/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k',
|
103 |
+
24-11-19 18:37:03 | I | 'local_root': '/home/yujunlin/models',
|
104 |
+
24-11-19 18:37:03 | I | 'name': 'llama-2-7b-instruct-together-32k',
|
105 |
+
24-11-19 18:37:03 | I | 'path': '/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k',
|
106 |
+
24-11-19 18:37:03 | I | 'root': ''},
|
107 |
+
24-11-19 18:37:03 | I | 'output': {'dirname': 'default-pileval.128x1024.[0-0]', 'job': 'run', 'root': 'runs/shang'},
|
108 |
+
24-11-19 18:37:03 | I | 'quant': { 'calib': { 'data': 'pileval',
|
109 |
+
24-11-19 18:37:03 | I | 'local_path': '',
|
110 |
+
24-11-19 18:37:03 | I | 'max_seq_length': 0,
|
111 |
+
24-11-19 18:37:03 | I | 'min_seq_length': 0,
|
112 |
+
24-11-19 18:37:03 | I | 'num_samples': 128,
|
113 |
+
24-11-19 18:37:03 | I | 'path': 'mit-han-lab/pile-val-backup',
|
114 |
+
24-11-19 18:37:03 | I | 'seq_length': 1024},
|
115 |
+
24-11-19 18:37:03 | I | 'develop_dtype': 'torch.float32',
|
116 |
+
24-11-19 18:37:03 | I | 'enable_reorder': False,
|
117 |
+
24-11-19 18:37:03 | I | 'enable_rotation': False,
|
118 |
+
24-11-19 18:37:03 | I | 'enable_smooth': False,
|
119 |
+
24-11-19 18:37:03 | I | 'ipts': { 'dtype': None,
|
120 |
+
24-11-19 18:37:03 | I | 'enable_calib_range': False,
|
121 |
+
24-11-19 18:37:03 | I | 'group_shapes': [[-1, -1, -1]],
|
122 |
+
24-11-19 18:37:03 | I | 'scale_dtypes': [None],
|
123 |
+
24-11-19 18:37:03 | I | 'skips': [],
|
124 |
+
24-11-19 18:37:03 | I | 'static': False,
|
125 |
+
24-11-19 18:37:03 | I | 'zero_point': None},
|
126 |
+
24-11-19 18:37:03 | I | 'opts': { 'dtype': None,
|
127 |
+
24-11-19 18:37:03 | I | 'enable_calib_range': False,
|
128 |
+
24-11-19 18:37:03 | I | 'group_shapes': [[-1, -1, -1]],
|
129 |
+
24-11-19 18:37:03 | I | 'scale_dtypes': [None],
|
130 |
+
24-11-19 18:37:03 | I | 'skips': [],
|
131 |
+
24-11-19 18:37:03 | I | 'static': False,
|
132 |
+
24-11-19 18:37:03 | I | 'zero_point': None},
|
133 |
+
24-11-19 18:37:03 | I | 'wgts': { 'dtype': None,
|
134 |
+
24-11-19 18:37:03 | I | 'enable_calib_range': False,
|
135 |
+
24-11-19 18:37:03 | I | 'enable_kernel_gptq': False,
|
136 |
+
24-11-19 18:37:03 | I | 'group_shapes': [[-1, -1, -1]],
|
137 |
+
24-11-19 18:37:03 | I | 'intermediate_dtypes': [],
|
138 |
+
24-11-19 18:37:03 | I | 'intermediate_levels': [],
|
139 |
+
24-11-19 18:37:03 | I | 'needs_dequant_saturation': False,
|
140 |
+
24-11-19 18:37:03 | I | 'scale_dtypes': [None],
|
141 |
+
24-11-19 18:37:03 | I | 'skips': [],
|
142 |
+
24-11-19 18:37:03 | I | 'zero_point': None}},
|
143 |
+
24-11-19 18:37:03 | I | 'save_model': False,
|
144 |
+
24-11-19 18:37:03 | I | 'seed': 12345,
|
145 |
+
24-11-19 18:37:03 | I | 'skip_eval': False}
|
146 |
+
24-11-19 18:37:03 | I | === Output Directory ===
|
147 |
+
24-11-19 18:37:03 | I | runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703
|
148 |
+
24-11-19 18:37:03 | I | === Start Evaluating ===
|
149 |
+
24-11-19 18:37:03 | I | * Building model llama-2-7b-instruct-together-32k from /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
150 |
+
24-11-19 18:37:03 | I | We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
|
151 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.0.self_attn
|
152 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.1.self_attn
|
153 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.2.self_attn
|
154 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.3.self_attn
|
155 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.4.self_attn
|
156 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.5.self_attn
|
157 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.6.self_attn
|
158 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.7.self_attn
|
159 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.8.self_attn
|
160 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.9.self_attn
|
161 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.10.self_attn
|
162 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.11.self_attn
|
163 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.12.self_attn
|
164 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.13.self_attn
|
165 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.14.self_attn
|
166 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.15.self_attn
|
167 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.16.self_attn
|
168 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.17.self_attn
|
169 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.18.self_attn
|
170 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.19.self_attn
|
171 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.20.self_attn
|
172 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.21.self_attn
|
173 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.22.self_attn
|
174 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.23.self_attn
|
175 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.24.self_attn
|
176 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.25.self_attn
|
177 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.26.self_attn
|
178 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.27.self_attn
|
179 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.28.self_attn
|
180 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.29.self_attn
|
181 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.30.self_attn
|
182 |
+
24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.31.self_attn
|
183 |
+
24-11-19 18:37:12 | I | * Development dtype is torch.float32
|
184 |
+
24-11-19 18:37:12 | I | * Evaluating model
|
185 |
+
24-11-19 18:37:12 | W | `pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.
|
186 |
+
24-11-19 18:37:12 | I | Using model type 'default'
|
187 |
+
24-11-19 18:37:12 | W | Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration
|
188 |
+
24-11-19 18:37:12 | I | - Evaluator: gptq
|
189 |
+
24-11-19 18:37:12 | I | - Tasks: ['wikitext']
|
190 |
+
24-11-19 18:37:12 | I | - Batch_size: 8
|
191 |
+
24-11-19 18:37:12 | I | + Max_seq_length: 2048
|
192 |
+
24-11-19 18:37:12 | D | Starting new HTTPS connection (1): huggingface.co:443
|
193 |
+
24-11-19 18:37:18 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
|
194 |
+
24-11-19 18:37:18 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
|
195 |
+
24-11-19 18:37:18 | D | Attempting to acquire lock 23438952619984 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
196 |
+
24-11-19 18:37:18 | D | Lock 23438952619984 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
197 |
+
24-11-19 18:37:18 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
|
198 |
+
24-11-19 18:37:18 | D | Attempting to release lock 23438952619984 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
199 |
+
24-11-19 18:37:18 | D | Lock 23438952619984 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
200 |
+
24-11-19 18:37:31 | I | - Results:
|
201 |
+
24-11-19 18:37:32 | I | | Task |Version| Metric |Value | |Stderr|
|
202 |
+
24-11-19 18:37:32 | I | |--------|------:|---------------|-----:|---|-----:|
|
203 |
+
24-11-19 18:37:32 | I | |wikitext| 1|word_perplexity|6.4432|± |6.4432|
|
204 |
+
24-11-19 18:37:32 | I |
|
205 |
+
24-11-19 18:37:32 | I | + Max_seq_length: 4096
|
206 |
+
24-11-19 18:37:32 | D | Starting new HTTPS connection (2): huggingface.co:443
|
207 |
+
24-11-19 18:37:38 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
|
208 |
+
24-11-19 18:37:38 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
|
209 |
+
24-11-19 18:37:38 | D | Attempting to acquire lock 23438952626944 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
210 |
+
24-11-19 18:37:38 | D | Lock 23438952626944 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
211 |
+
24-11-19 18:37:38 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
|
212 |
+
24-11-19 18:37:38 | D | Attempting to release lock 23438952626944 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
213 |
+
24-11-19 18:37:38 | D | Lock 23438952626944 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
214 |
+
24-11-19 18:37:50 | I | - Results:
|
215 |
+
24-11-19 18:37:50 | I | | Task |Version| Metric |Value | |Stderr|
|
216 |
+
24-11-19 18:37:50 | I | |--------|------:|---------------|-----:|---|-----:|
|
217 |
+
24-11-19 18:37:50 | I | |wikitext| 1|word_perplexity|5.9649|± |5.9649|
|
218 |
+
24-11-19 18:37:50 | I |
|
219 |
+
24-11-19 18:37:50 | I | * Saving results to runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/config-241119.200548.yaml
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cache:
|
2 |
+
root: runs/shang
|
3 |
+
path:
|
4 |
+
rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
|
5 |
+
reorder: ''
|
6 |
+
smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/llama-2-7b-instruct-together-32k.pt
|
7 |
+
wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
|
8 |
+
acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
|
9 |
+
output:
|
10 |
+
root: runs/shang
|
11 |
+
dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
|
12 |
+
job: run
|
13 |
+
model:
|
14 |
+
name: llama-2-7b-instruct-together-32k
|
15 |
+
family: llama-2
|
16 |
+
path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
17 |
+
root: ''
|
18 |
+
local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
19 |
+
local_root: /home/yujunlin/models
|
20 |
+
dtype: torch.float16
|
21 |
+
eval:
|
22 |
+
num_gpus: 1
|
23 |
+
batch_size: 8
|
24 |
+
tasks:
|
25 |
+
- wikitext
|
26 |
+
max_seq_length: -4096
|
27 |
+
evaluators:
|
28 |
+
- gptq
|
29 |
+
quant:
|
30 |
+
wgts:
|
31 |
+
dtype: sint8
|
32 |
+
zero_point: null
|
33 |
+
group_shapes:
|
34 |
+
- - 1
|
35 |
+
- -1
|
36 |
+
- -1
|
37 |
+
scale_dtypes:
|
38 |
+
- torch.float16
|
39 |
+
intermediate_dtypes: []
|
40 |
+
intermediate_levels: []
|
41 |
+
needs_dequant_saturation: false
|
42 |
+
skips: []
|
43 |
+
enable_kernel_gptq: true
|
44 |
+
kernel_gptq:
|
45 |
+
damp_percentage: 0.01
|
46 |
+
block_size: 128
|
47 |
+
num_inv_tries: 250
|
48 |
+
hessian_block_size: 512
|
49 |
+
enable_calib_range: true
|
50 |
+
calib_range:
|
51 |
+
degree: 2
|
52 |
+
objective: OutputsError
|
53 |
+
strategy: GridSearch
|
54 |
+
granularity: Group
|
55 |
+
element_batch_size: 64
|
56 |
+
sample_batch_size: -1
|
57 |
+
element_size: 512
|
58 |
+
sample_size: -1
|
59 |
+
pre_reshape: true
|
60 |
+
outputs_device: cpu
|
61 |
+
ratio: 1.0
|
62 |
+
max_shrink: 0.2
|
63 |
+
max_expand: 1.0
|
64 |
+
num_grids: 80
|
65 |
+
allow_scale: false
|
66 |
+
skips: []
|
67 |
+
ipts:
|
68 |
+
dtype: sint8
|
69 |
+
zero_point: null
|
70 |
+
group_shapes:
|
71 |
+
- - 1
|
72 |
+
- -1
|
73 |
+
- -1
|
74 |
+
scale_dtypes:
|
75 |
+
- torch.float16
|
76 |
+
skips: []
|
77 |
+
static: false
|
78 |
+
enable_calib_range: false
|
79 |
+
opts:
|
80 |
+
dtype: sint8
|
81 |
+
zero_point: null
|
82 |
+
group_shapes:
|
83 |
+
- - -1
|
84 |
+
- -1
|
85 |
+
- -1
|
86 |
+
scale_dtypes:
|
87 |
+
- torch.float16
|
88 |
+
skips:
|
89 |
+
- attn_q
|
90 |
+
static: true
|
91 |
+
enable_calib_range: true
|
92 |
+
calib_range:
|
93 |
+
degree: 2
|
94 |
+
objective: OutputsError
|
95 |
+
strategy: Manual
|
96 |
+
granularity: Layer
|
97 |
+
element_batch_size: -1
|
98 |
+
sample_batch_size: -1
|
99 |
+
element_size: -1
|
100 |
+
sample_size: -1
|
101 |
+
pre_reshape: true
|
102 |
+
outputs_device: cpu
|
103 |
+
ratio: 1.0
|
104 |
+
max_shrink: 0.2
|
105 |
+
max_expand: 1.0
|
106 |
+
num_grids: 80
|
107 |
+
allow_scale: false
|
108 |
+
skips: []
|
109 |
+
calib:
|
110 |
+
data: pileval
|
111 |
+
num_samples: 128
|
112 |
+
path: mit-han-lab/pile-val-backup
|
113 |
+
seq_length: 1024
|
114 |
+
min_seq_length: 0
|
115 |
+
max_seq_length: 0
|
116 |
+
local_path: ''
|
117 |
+
enable_rotation: true
|
118 |
+
rotation:
|
119 |
+
random: false
|
120 |
+
transforms:
|
121 |
+
- out_proj
|
122 |
+
enable_reorder: false
|
123 |
+
enable_smooth: true
|
124 |
+
smooth:
|
125 |
+
enable_proj: false
|
126 |
+
enable_attn: true
|
127 |
+
attn:
|
128 |
+
degree: 2
|
129 |
+
strategy: GridSearch
|
130 |
+
sample_batch_size: -1
|
131 |
+
sample_size: -1
|
132 |
+
outputs_device: cpu
|
133 |
+
allow_a_quant: true
|
134 |
+
allow_b_quant: true
|
135 |
+
spans:
|
136 |
+
- - AbsMax
|
137 |
+
- AbsMax
|
138 |
+
alpha: 0.5
|
139 |
+
beta: -2
|
140 |
+
num_grids: 20
|
141 |
+
develop_dtype: torch.float32
|
142 |
+
seed: 12345
|
143 |
+
skip_eval: false
|
144 |
+
load_from: ''
|
145 |
+
save_model: 'true'
|
146 |
+
copy_on_save: false
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/acts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:452d684e7ed9bbcde200ac9e61316bd873246869750aa79ffd3cb0aececc9b99
|
3 |
+
size 35898
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc1ac15dda8c29eb2ebd22eee524e1bb0c696f2dcba86f840af18da8c8434b12
|
3 |
+
size 13476951926
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/rotation.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
|
3 |
+
size 134219097
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/scale.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e4b445b99650db18a04ac7d3ec3214937f362749e437fafd2d124b993fdb60f
|
3 |
+
size 5566058
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/smooth.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e46a0206c5c7210660e08615fba714402bcd532050747f0fd9b134f77cb9fba
|
3 |
+
size 535162
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/wgts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3454af85d02208d593d736d00fa756e45f909f2f6f1bbefdcef52f1e3e6cbbaf
|
3 |
+
size 5527158
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/results-241119.200548.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"gptq": {
|
3 |
+
"2048": {
|
4 |
+
"results": {
|
5 |
+
"wikitext": {
|
6 |
+
"word_perplexity": 6.503797370202683
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"versions": {
|
10 |
+
"wikitext": 1
|
11 |
+
},
|
12 |
+
"config": {
|
13 |
+
"model": "llama-2-7b-instruct-together-32k"
|
14 |
+
},
|
15 |
+
"model": "llama-2-7b-instruct-together-32k"
|
16 |
+
},
|
17 |
+
"4096": {
|
18 |
+
"results": {
|
19 |
+
"wikitext": {
|
20 |
+
"word_perplexity": 6.014449215881915
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"versions": {
|
24 |
+
"wikitext": 1
|
25 |
+
},
|
26 |
+
"config": {
|
27 |
+
"model": "llama-2-7b-instruct-together-32k"
|
28 |
+
},
|
29 |
+
"model": "llama-2-7b-instruct-together-32k"
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.185856.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.200548.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/config-241119.200727.yaml
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cache:
|
2 |
+
root: runs/shang
|
3 |
+
path:
|
4 |
+
rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
|
5 |
+
reorder: ''
|
6 |
+
smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/llama-2-7b-instruct-together-32k.pt
|
7 |
+
wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
|
8 |
+
acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
|
9 |
+
output:
|
10 |
+
root: runs/shang
|
11 |
+
dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
|
12 |
+
job: run
|
13 |
+
model:
|
14 |
+
name: llama-2-7b-instruct-together-32k
|
15 |
+
family: llama-2
|
16 |
+
path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
17 |
+
root: ''
|
18 |
+
local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
19 |
+
local_root: /home/yujunlin/models
|
20 |
+
dtype: torch.float16
|
21 |
+
eval:
|
22 |
+
num_gpus: 1
|
23 |
+
batch_size: 8
|
24 |
+
tasks:
|
25 |
+
- wikitext
|
26 |
+
max_seq_length: -4096
|
27 |
+
evaluators:
|
28 |
+
- gptq
|
29 |
+
quant:
|
30 |
+
wgts:
|
31 |
+
dtype: sint8
|
32 |
+
zero_point: null
|
33 |
+
group_shapes:
|
34 |
+
- - 1
|
35 |
+
- -1
|
36 |
+
- -1
|
37 |
+
scale_dtypes:
|
38 |
+
- torch.float16
|
39 |
+
intermediate_dtypes: []
|
40 |
+
intermediate_levels: []
|
41 |
+
needs_dequant_saturation: false
|
42 |
+
skips: []
|
43 |
+
enable_kernel_gptq: true
|
44 |
+
kernel_gptq:
|
45 |
+
damp_percentage: 0.01
|
46 |
+
block_size: 128
|
47 |
+
num_inv_tries: 250
|
48 |
+
hessian_block_size: 512
|
49 |
+
enable_calib_range: true
|
50 |
+
calib_range:
|
51 |
+
degree: 2
|
52 |
+
objective: OutputsError
|
53 |
+
strategy: GridSearch
|
54 |
+
granularity: Group
|
55 |
+
element_batch_size: 64
|
56 |
+
sample_batch_size: -1
|
57 |
+
element_size: 512
|
58 |
+
sample_size: -1
|
59 |
+
pre_reshape: true
|
60 |
+
outputs_device: cpu
|
61 |
+
ratio: 1.0
|
62 |
+
max_shrink: 0.2
|
63 |
+
max_expand: 1.0
|
64 |
+
num_grids: 80
|
65 |
+
allow_scale: false
|
66 |
+
skips: []
|
67 |
+
ipts:
|
68 |
+
dtype: sint8
|
69 |
+
zero_point: null
|
70 |
+
group_shapes:
|
71 |
+
- - 1
|
72 |
+
- -1
|
73 |
+
- -1
|
74 |
+
scale_dtypes:
|
75 |
+
- torch.float16
|
76 |
+
skips: []
|
77 |
+
static: false
|
78 |
+
enable_calib_range: false
|
79 |
+
opts:
|
80 |
+
dtype: sint8
|
81 |
+
zero_point: null
|
82 |
+
group_shapes:
|
83 |
+
- - -1
|
84 |
+
- -1
|
85 |
+
- -1
|
86 |
+
scale_dtypes:
|
87 |
+
- torch.float16
|
88 |
+
skips:
|
89 |
+
- attn_q
|
90 |
+
static: true
|
91 |
+
enable_calib_range: true
|
92 |
+
calib_range:
|
93 |
+
degree: 2
|
94 |
+
objective: OutputsError
|
95 |
+
strategy: Manual
|
96 |
+
granularity: Layer
|
97 |
+
element_batch_size: -1
|
98 |
+
sample_batch_size: -1
|
99 |
+
element_size: -1
|
100 |
+
sample_size: -1
|
101 |
+
pre_reshape: true
|
102 |
+
outputs_device: cpu
|
103 |
+
ratio: 1.0
|
104 |
+
max_shrink: 0.2
|
105 |
+
max_expand: 1.0
|
106 |
+
num_grids: 80
|
107 |
+
allow_scale: false
|
108 |
+
skips: []
|
109 |
+
calib:
|
110 |
+
data: pileval
|
111 |
+
num_samples: 128
|
112 |
+
path: mit-han-lab/pile-val-backup
|
113 |
+
seq_length: 1024
|
114 |
+
min_seq_length: 0
|
115 |
+
max_seq_length: 0
|
116 |
+
local_path: ''
|
117 |
+
enable_rotation: true
|
118 |
+
rotation:
|
119 |
+
random: false
|
120 |
+
transforms:
|
121 |
+
- out_proj
|
122 |
+
enable_reorder: false
|
123 |
+
enable_smooth: true
|
124 |
+
smooth:
|
125 |
+
enable_proj: false
|
126 |
+
enable_attn: true
|
127 |
+
attn:
|
128 |
+
degree: 2
|
129 |
+
strategy: Manual
|
130 |
+
sample_batch_size: -1
|
131 |
+
sample_size: -1
|
132 |
+
outputs_device: cpu
|
133 |
+
allow_a_quant: true
|
134 |
+
allow_b_quant: true
|
135 |
+
spans:
|
136 |
+
- - AbsMax
|
137 |
+
- AbsMax
|
138 |
+
alpha: 0.5
|
139 |
+
beta: 0
|
140 |
+
num_grids: 20
|
141 |
+
develop_dtype: torch.float32
|
142 |
+
seed: 12345
|
143 |
+
skip_eval: false
|
144 |
+
load_from: ''
|
145 |
+
save_model: 'true'
|
146 |
+
copy_on_save: false
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7707587af5ec67ddc0f3f7dbfdbd9c86f9ded33e10c5bf4b0246518c228199f
|
3 |
+
size 13476951926
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/rotation.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
|
3 |
+
size 134219097
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/scale.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c32ea151ff1a6e987ec8866aae01d07225acf388d05417668928a96d3fe6c6a
|
3 |
+
size 5566058
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/smooth.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:385f0d71a9d0cca3df4fd946a21df2de62d089964597e2e1704c7a39cfee0711
|
3 |
+
size 535162
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/wgts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90a45af762722116d586b0e7011d9acfcc8950086c0470898c078c05af779d0e
|
3 |
+
size 5527158
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/run-241119.200727.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/config-241119.201608.yaml
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cache:
|
2 |
+
root: runs/shang
|
3 |
+
path:
|
4 |
+
rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
|
5 |
+
reorder: ''
|
6 |
+
smooth: ''
|
7 |
+
wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
|
8 |
+
acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
|
9 |
+
output:
|
10 |
+
root: runs/shang
|
11 |
+
dirname: skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
|
12 |
+
job: run
|
13 |
+
model:
|
14 |
+
name: llama-2-7b-instruct-together-32k
|
15 |
+
family: llama-2
|
16 |
+
path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
17 |
+
root: ''
|
18 |
+
local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
|
19 |
+
local_root: /home/yujunlin/models
|
20 |
+
dtype: torch.float16
|
21 |
+
eval:
|
22 |
+
num_gpus: 1
|
23 |
+
batch_size: 8
|
24 |
+
tasks:
|
25 |
+
- wikitext
|
26 |
+
max_seq_length: -4096
|
27 |
+
evaluators:
|
28 |
+
- gptq
|
29 |
+
quant:
|
30 |
+
wgts:
|
31 |
+
dtype: sint8
|
32 |
+
zero_point: null
|
33 |
+
group_shapes:
|
34 |
+
- - 1
|
35 |
+
- -1
|
36 |
+
- -1
|
37 |
+
scale_dtypes:
|
38 |
+
- torch.float16
|
39 |
+
intermediate_dtypes: []
|
40 |
+
intermediate_levels: []
|
41 |
+
needs_dequant_saturation: false
|
42 |
+
skips: []
|
43 |
+
enable_kernel_gptq: true
|
44 |
+
kernel_gptq:
|
45 |
+
damp_percentage: 0.01
|
46 |
+
block_size: 128
|
47 |
+
num_inv_tries: 250
|
48 |
+
hessian_block_size: 512
|
49 |
+
enable_calib_range: true
|
50 |
+
calib_range:
|
51 |
+
degree: 2
|
52 |
+
objective: OutputsError
|
53 |
+
strategy: GridSearch
|
54 |
+
granularity: Group
|
55 |
+
element_batch_size: 64
|
56 |
+
sample_batch_size: -1
|
57 |
+
element_size: 512
|
58 |
+
sample_size: -1
|
59 |
+
pre_reshape: true
|
60 |
+
outputs_device: cpu
|
61 |
+
ratio: 1.0
|
62 |
+
max_shrink: 0.2
|
63 |
+
max_expand: 1.0
|
64 |
+
num_grids: 80
|
65 |
+
allow_scale: false
|
66 |
+
skips: []
|
67 |
+
ipts:
|
68 |
+
dtype: sint8
|
69 |
+
zero_point: null
|
70 |
+
group_shapes:
|
71 |
+
- - 1
|
72 |
+
- -1
|
73 |
+
- -1
|
74 |
+
scale_dtypes:
|
75 |
+
- torch.float16
|
76 |
+
skips: []
|
77 |
+
static: false
|
78 |
+
enable_calib_range: false
|
79 |
+
opts:
|
80 |
+
dtype: sint8
|
81 |
+
zero_point: null
|
82 |
+
group_shapes:
|
83 |
+
- - -1
|
84 |
+
- -1
|
85 |
+
- -1
|
86 |
+
scale_dtypes:
|
87 |
+
- torch.float16
|
88 |
+
skips:
|
89 |
+
- attn_q
|
90 |
+
static: true
|
91 |
+
enable_calib_range: true
|
92 |
+
calib_range:
|
93 |
+
degree: 2
|
94 |
+
objective: OutputsError
|
95 |
+
strategy: Manual
|
96 |
+
granularity: Layer
|
97 |
+
element_batch_size: -1
|
98 |
+
sample_batch_size: -1
|
99 |
+
element_size: -1
|
100 |
+
sample_size: -1
|
101 |
+
pre_reshape: true
|
102 |
+
outputs_device: cpu
|
103 |
+
ratio: 1.0
|
104 |
+
max_shrink: 0.2
|
105 |
+
max_expand: 1.0
|
106 |
+
num_grids: 80
|
107 |
+
allow_scale: false
|
108 |
+
skips: []
|
109 |
+
calib:
|
110 |
+
data: pileval
|
111 |
+
num_samples: 128
|
112 |
+
path: mit-han-lab/pile-val-backup
|
113 |
+
seq_length: 1024
|
114 |
+
min_seq_length: 0
|
115 |
+
max_seq_length: 0
|
116 |
+
local_path: ''
|
117 |
+
enable_rotation: true
|
118 |
+
rotation:
|
119 |
+
random: false
|
120 |
+
transforms:
|
121 |
+
- out_proj
|
122 |
+
enable_reorder: false
|
123 |
+
enable_smooth: false
|
124 |
+
develop_dtype: torch.float32
|
125 |
+
seed: 12345
|
126 |
+
skip_eval: false
|
127 |
+
load_from: ''
|
128 |
+
save_model: 'true'
|
129 |
+
copy_on_save: false
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/acts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38385b12cbc35439f6e635f9e60e69e2490cb32f65a90e85b921587f19973ca6
|
3 |
+
size 35898
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e05ab74d7e1f68b9ae327e1afb40528b281d90dbf814760976dd74e97a54beae
|
3 |
+
size 13476951926
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/rotation.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
|
3 |
+
size 134219097
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/scale.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:527a5ec6f0ba29bd2579c7fc5a8221231449f5dcb35223f3036ef6e87166bf60
|
3 |
+
size 5566058
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/wgts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6a8e12842f3c488159d02404a8885723a3c096ac3e63aedcc515c13da27b1a2
|
3 |
+
size 5527158
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/results-241119.201608.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"gptq": {
|
3 |
+
"2048": {
|
4 |
+
"results": {
|
5 |
+
"wikitext": {
|
6 |
+
"word_perplexity": 6.509843744356094
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"versions": {
|
10 |
+
"wikitext": 1
|
11 |
+
},
|
12 |
+
"config": {
|
13 |
+
"model": "llama-2-7b-instruct-together-32k"
|
14 |
+
},
|
15 |
+
"model": "llama-2-7b-instruct-together-32k"
|
16 |
+
},
|
17 |
+
"4096": {
|
18 |
+
"results": {
|
19 |
+
"wikitext": {
|
20 |
+
"word_perplexity": 6.017616942577096
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"versions": {
|
24 |
+
"wikitext": 1
|
25 |
+
},
|
26 |
+
"config": {
|
27 |
+
"model": "llama-2-7b-instruct-together-32k"
|
28 |
+
},
|
29 |
+
"model": "llama-2-7b-instruct-together-32k"
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/run-241119.201608.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/config-241119.172947.yaml
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cache:
|
2 |
+
root: runs/shang
|
3 |
+
path:
|
4 |
+
rotation: ''
|
5 |
+
reorder: ''
|
6 |
+
smooth: ''
|
7 |
+
wgts: ''
|
8 |
+
acts: ''
|
9 |
+
output:
|
10 |
+
root: runs/shang
|
11 |
+
dirname: default-pileval.128x1024.[0-0]
|
12 |
+
job: run
|
13 |
+
model:
|
14 |
+
name: llama-3-8b-instruct-gradient-1048k
|
15 |
+
family: llama-3
|
16 |
+
path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
|
17 |
+
root: ''
|
18 |
+
local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
|
19 |
+
local_root: /home/yujunlin/models
|
20 |
+
dtype: torch.float16
|
21 |
+
eval:
|
22 |
+
num_gpus: 1
|
23 |
+
batch_size: 8
|
24 |
+
tasks:
|
25 |
+
- wikitext
|
26 |
+
max_seq_length: -4096
|
27 |
+
evaluators:
|
28 |
+
- gptq
|
29 |
+
quant:
|
30 |
+
wgts:
|
31 |
+
dtype: null
|
32 |
+
zero_point: null
|
33 |
+
group_shapes:
|
34 |
+
- - -1
|
35 |
+
- -1
|
36 |
+
- -1
|
37 |
+
scale_dtypes:
|
38 |
+
- null
|
39 |
+
intermediate_dtypes: []
|
40 |
+
intermediate_levels: []
|
41 |
+
needs_dequant_saturation: false
|
42 |
+
skips: []
|
43 |
+
enable_kernel_gptq: false
|
44 |
+
enable_calib_range: false
|
45 |
+
ipts:
|
46 |
+
dtype: null
|
47 |
+
zero_point: null
|
48 |
+
group_shapes:
|
49 |
+
- - -1
|
50 |
+
- -1
|
51 |
+
- -1
|
52 |
+
scale_dtypes:
|
53 |
+
- null
|
54 |
+
skips: []
|
55 |
+
static: false
|
56 |
+
enable_calib_range: false
|
57 |
+
opts:
|
58 |
+
dtype: null
|
59 |
+
zero_point: null
|
60 |
+
group_shapes:
|
61 |
+
- - -1
|
62 |
+
- -1
|
63 |
+
- -1
|
64 |
+
scale_dtypes:
|
65 |
+
- null
|
66 |
+
skips: []
|
67 |
+
static: false
|
68 |
+
enable_calib_range: false
|
69 |
+
calib:
|
70 |
+
data: pileval
|
71 |
+
num_samples: 128
|
72 |
+
path: mit-han-lab/pile-val-backup
|
73 |
+
seq_length: 1024
|
74 |
+
min_seq_length: 0
|
75 |
+
max_seq_length: 0
|
76 |
+
local_path: ''
|
77 |
+
enable_rotation: false
|
78 |
+
enable_reorder: false
|
79 |
+
enable_smooth: false
|
80 |
+
develop_dtype: torch.float32
|
81 |
+
seed: 12345
|
82 |
+
skip_eval: false
|
83 |
+
load_from: ''
|
84 |
+
save_model: false
|
85 |
+
copy_on_save: false
|
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/results-241119.172947.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"gptq": {
|
3 |
+
"2048": {
|
4 |
+
"results": {
|
5 |
+
"wikitext": {
|
6 |
+
"word_perplexity": 7.833065190604292
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"versions": {
|
10 |
+
"wikitext": 1
|
11 |
+
},
|
12 |
+
"config": {
|
13 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
14 |
+
},
|
15 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
16 |
+
},
|
17 |
+
"4096": {
|
18 |
+
"results": {
|
19 |
+
"wikitext": {
|
20 |
+
"word_perplexity": 7.261800992411218
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"versions": {
|
24 |
+
"wikitext": 1
|
25 |
+
},
|
26 |
+
"config": {
|
27 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
28 |
+
},
|
29 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/run-241119.172947.log
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
24-11-19 17:29:47 | I | === Configurations ===
|
2 |
+
24-11-19 17:29:47 | I | LlmPtqRunConfig(
|
3 |
+
24-11-19 17:29:47 | I | cache=LlmCacheConfig(
|
4 |
+
24-11-19 17:29:47 | I | root=runs/shang,
|
5 |
+
24-11-19 17:29:47 | I | dirpath=LlmQuantCacheConfig(
|
6 |
+
24-11-19 17:29:47 | I | rotation=,
|
7 |
+
24-11-19 17:29:47 | I | reorder=,
|
8 |
+
24-11-19 17:29:47 | I | smooth=,
|
9 |
+
24-11-19 17:29:47 | I | wgts=,
|
10 |
+
24-11-19 17:29:47 | I | acts=),
|
11 |
+
24-11-19 17:29:47 | I | path=LlmQuantCacheConfig(
|
12 |
+
24-11-19 17:29:47 | I | rotation=,
|
13 |
+
24-11-19 17:29:47 | I | reorder=,
|
14 |
+
24-11-19 17:29:47 | I | smooth=,
|
15 |
+
24-11-19 17:29:47 | I | wgts=,
|
16 |
+
24-11-19 17:29:47 | I | acts=)),
|
17 |
+
24-11-19 17:29:47 | I | output=OutputConfig(
|
18 |
+
24-11-19 17:29:47 | I | root=runs/shang,
|
19 |
+
24-11-19 17:29:47 | I | dirname=default-pileval.128x1024.[0-0],
|
20 |
+
24-11-19 17:29:47 | I | job=run,
|
21 |
+
24-11-19 17:29:47 | I | dirpath=runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0],
|
22 |
+
24-11-19 17:29:47 | I | timestamp=241119.172947),
|
23 |
+
24-11-19 17:29:47 | I | model=LlmModelConfig(
|
24 |
+
24-11-19 17:29:47 | I | name=llama-3-8b-instruct-gradient-1048k,
|
25 |
+
24-11-19 17:29:47 | I | family=llama-3,
|
26 |
+
24-11-19 17:29:47 | I | path=/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k,
|
27 |
+
24-11-19 17:29:47 | I | root=,
|
28 |
+
24-11-19 17:29:47 | I | local_path=/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k,
|
29 |
+
24-11-19 17:29:47 | I | local_root=/home/yujunlin/models,
|
30 |
+
24-11-19 17:29:47 | I | size=8.0,
|
31 |
+
24-11-19 17:29:47 | I | variant=instruct-gradient-1048k,
|
32 |
+
24-11-19 17:29:47 | I | dtype=torch.float16,
|
33 |
+
24-11-19 17:29:47 | I | orig_dtype=torch.bfloat16),
|
34 |
+
24-11-19 17:29:47 | I | eval=LlmEvalConfig(
|
35 |
+
24-11-19 17:29:47 | I | num_gpus=1,
|
36 |
+
24-11-19 17:29:47 | I | batch_size=8,
|
37 |
+
24-11-19 17:29:47 | I | tasks=['wikitext'],
|
38 |
+
24-11-19 17:29:47 | I | max_seq_length=-4096,
|
39 |
+
24-11-19 17:29:47 | I | evaluators=['gptq']),
|
40 |
+
24-11-19 17:29:47 | I | quant=LlmQuantConfig(
|
41 |
+
24-11-19 17:29:47 | I | wgts=LlmWeightQuantizerConfig(
|
42 |
+
24-11-19 17:29:47 | I | dtype=None,
|
43 |
+
24-11-19 17:29:47 | I | zero_point=None,
|
44 |
+
24-11-19 17:29:47 | I | group_shapes=((-1, -1, -1),),
|
45 |
+
24-11-19 17:29:47 | I | scale_dtypes=(None,),
|
46 |
+
24-11-19 17:29:47 | I | intermediate_dtypes=(),
|
47 |
+
24-11-19 17:29:47 | I | intermediate_levels=(),
|
48 |
+
24-11-19 17:29:47 | I | needs_dequant_saturation=False,
|
49 |
+
24-11-19 17:29:47 | I | skips=[],
|
50 |
+
24-11-19 17:29:47 | I | static=False,
|
51 |
+
24-11-19 17:29:47 | I | kernel_gptq=None,
|
52 |
+
24-11-19 17:29:47 | I | calib_range=None),
|
53 |
+
24-11-19 17:29:47 | I | ipts=LlmActivationQuantizerConfig(
|
54 |
+
24-11-19 17:29:47 | I | dtype=None,
|
55 |
+
24-11-19 17:29:47 | I | zero_point=None,
|
56 |
+
24-11-19 17:29:47 | I | group_shapes=((-1, -1, -1),),
|
57 |
+
24-11-19 17:29:47 | I | scale_dtypes=(None,),
|
58 |
+
24-11-19 17:29:47 | I | intermediate_dtypes=(),
|
59 |
+
24-11-19 17:29:47 | I | intermediate_levels=(),
|
60 |
+
24-11-19 17:29:47 | I | needs_dequant_saturation=False,
|
61 |
+
24-11-19 17:29:47 | I | skips=[],
|
62 |
+
24-11-19 17:29:47 | I | static=False,
|
63 |
+
24-11-19 17:29:47 | I | kernel_gptq=None,
|
64 |
+
24-11-19 17:29:47 | I | calib_range=None),
|
65 |
+
24-11-19 17:29:47 | I | opts=LlmActivationQuantizerConfig(
|
66 |
+
24-11-19 17:29:47 | I | dtype=None,
|
67 |
+
24-11-19 17:29:47 | I | zero_point=None,
|
68 |
+
24-11-19 17:29:47 | I | group_shapes=((-1, -1, -1),),
|
69 |
+
24-11-19 17:29:47 | I | scale_dtypes=(None,),
|
70 |
+
24-11-19 17:29:47 | I | intermediate_dtypes=(),
|
71 |
+
24-11-19 17:29:47 | I | intermediate_levels=(),
|
72 |
+
24-11-19 17:29:47 | I | needs_dequant_saturation=False,
|
73 |
+
24-11-19 17:29:47 | I | skips=[],
|
74 |
+
24-11-19 17:29:47 | I | static=False,
|
75 |
+
24-11-19 17:29:47 | I | kernel_gptq=None,
|
76 |
+
24-11-19 17:29:47 | I | calib_range=None),
|
77 |
+
24-11-19 17:29:47 | I | calib=LlmCalibDataLoaderConfig(
|
78 |
+
24-11-19 17:29:47 | I | data=pileval,
|
79 |
+
24-11-19 17:29:47 | I | num_samples=128,
|
80 |
+
24-11-19 17:29:47 | I | batch_size=1,
|
81 |
+
24-11-19 17:29:47 | I | path=mit-han-lab/pile-val-backup,
|
82 |
+
24-11-19 17:29:47 | I | seq_length=1024,
|
83 |
+
24-11-19 17:29:47 | I | min_seq_length=0,
|
84 |
+
24-11-19 17:29:47 | I | max_seq_length=0,
|
85 |
+
24-11-19 17:29:47 | I | local_path=),
|
86 |
+
24-11-19 17:29:47 | I | rotation=None,
|
87 |
+
24-11-19 17:29:47 | I | reorder=None,
|
88 |
+
24-11-19 17:29:47 | I | smooth=None,
|
89 |
+
24-11-19 17:29:47 | I | develop_dtype=torch.float32),
|
90 |
+
24-11-19 17:29:47 | I | seed=12345,
|
91 |
+
24-11-19 17:29:47 | I | skip_eval=False,
|
92 |
+
24-11-19 17:29:47 | I | load_from=,
|
93 |
+
24-11-19 17:29:47 | I | save_model=False,
|
94 |
+
24-11-19 17:29:47 | I | copy_on_save=False)
|
95 |
+
24-11-19 17:29:47 | I | === Dumped Configurations ===
|
96 |
+
24-11-19 17:29:47 | I | { 'cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': 'runs/shang'},
|
97 |
+
24-11-19 17:29:47 | I | 'copy_on_save': False,
|
98 |
+
24-11-19 17:29:47 | I | 'eval': {'batch_size': 8, 'evaluators': ['gptq'], 'max_seq_length': -4096, 'num_gpus': 1, 'tasks': ['wikitext']},
|
99 |
+
24-11-19 17:29:47 | I | 'load_from': '',
|
100 |
+
24-11-19 17:29:47 | I | 'model': { 'dtype': 'torch.float16',
|
101 |
+
24-11-19 17:29:47 | I | 'family': 'llama-3',
|
102 |
+
24-11-19 17:29:47 | I | 'local_path': '/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k',
|
103 |
+
24-11-19 17:29:47 | I | 'local_root': '/home/yujunlin/models',
|
104 |
+
24-11-19 17:29:47 | I | 'name': 'llama-3-8b-instruct-gradient-1048k',
|
105 |
+
24-11-19 17:29:47 | I | 'path': '/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k',
|
106 |
+
24-11-19 17:29:47 | I | 'root': ''},
|
107 |
+
24-11-19 17:29:47 | I | 'output': {'dirname': 'default-pileval.128x1024.[0-0]', 'job': 'run', 'root': 'runs/shang'},
|
108 |
+
24-11-19 17:29:47 | I | 'quant': { 'calib': { 'data': 'pileval',
|
109 |
+
24-11-19 17:29:47 | I | 'local_path': '',
|
110 |
+
24-11-19 17:29:47 | I | 'max_seq_length': 0,
|
111 |
+
24-11-19 17:29:47 | I | 'min_seq_length': 0,
|
112 |
+
24-11-19 17:29:47 | I | 'num_samples': 128,
|
113 |
+
24-11-19 17:29:47 | I | 'path': 'mit-han-lab/pile-val-backup',
|
114 |
+
24-11-19 17:29:47 | I | 'seq_length': 1024},
|
115 |
+
24-11-19 17:29:47 | I | 'develop_dtype': 'torch.float32',
|
116 |
+
24-11-19 17:29:47 | I | 'enable_reorder': False,
|
117 |
+
24-11-19 17:29:47 | I | 'enable_rotation': False,
|
118 |
+
24-11-19 17:29:47 | I | 'enable_smooth': False,
|
119 |
+
24-11-19 17:29:47 | I | 'ipts': { 'dtype': None,
|
120 |
+
24-11-19 17:29:47 | I | 'enable_calib_range': False,
|
121 |
+
24-11-19 17:29:47 | I | 'group_shapes': [[-1, -1, -1]],
|
122 |
+
24-11-19 17:29:47 | I | 'scale_dtypes': [None],
|
123 |
+
24-11-19 17:29:47 | I | 'skips': [],
|
124 |
+
24-11-19 17:29:47 | I | 'static': False,
|
125 |
+
24-11-19 17:29:47 | I | 'zero_point': None},
|
126 |
+
24-11-19 17:29:47 | I | 'opts': { 'dtype': None,
|
127 |
+
24-11-19 17:29:47 | I | 'enable_calib_range': False,
|
128 |
+
24-11-19 17:29:47 | I | 'group_shapes': [[-1, -1, -1]],
|
129 |
+
24-11-19 17:29:47 | I | 'scale_dtypes': [None],
|
130 |
+
24-11-19 17:29:47 | I | 'skips': [],
|
131 |
+
24-11-19 17:29:47 | I | 'static': False,
|
132 |
+
24-11-19 17:29:47 | I | 'zero_point': None},
|
133 |
+
24-11-19 17:29:47 | I | 'wgts': { 'dtype': None,
|
134 |
+
24-11-19 17:29:47 | I | 'enable_calib_range': False,
|
135 |
+
24-11-19 17:29:47 | I | 'enable_kernel_gptq': False,
|
136 |
+
24-11-19 17:29:47 | I | 'group_shapes': [[-1, -1, -1]],
|
137 |
+
24-11-19 17:29:47 | I | 'intermediate_dtypes': [],
|
138 |
+
24-11-19 17:29:47 | I | 'intermediate_levels': [],
|
139 |
+
24-11-19 17:29:47 | I | 'needs_dequant_saturation': False,
|
140 |
+
24-11-19 17:29:47 | I | 'scale_dtypes': [None],
|
141 |
+
24-11-19 17:29:47 | I | 'skips': [],
|
142 |
+
24-11-19 17:29:47 | I | 'zero_point': None}},
|
143 |
+
24-11-19 17:29:47 | I | 'save_model': False,
|
144 |
+
24-11-19 17:29:47 | I | 'seed': 12345,
|
145 |
+
24-11-19 17:29:47 | I | 'skip_eval': False}
|
146 |
+
24-11-19 17:29:47 | I | === Output Directory ===
|
147 |
+
24-11-19 17:29:47 | I | runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947
|
148 |
+
24-11-19 17:29:47 | I | === Start Evaluating ===
|
149 |
+
24-11-19 17:29:47 | I | * Building model llama-3-8b-instruct-gradient-1048k from /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
|
150 |
+
24-11-19 17:29:48 | I | We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
|
151 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.0.self_attn
|
152 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.1.self_attn
|
153 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.2.self_attn
|
154 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.3.self_attn
|
155 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.4.self_attn
|
156 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.5.self_attn
|
157 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.6.self_attn
|
158 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.7.self_attn
|
159 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.8.self_attn
|
160 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.9.self_attn
|
161 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.10.self_attn
|
162 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.11.self_attn
|
163 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.12.self_attn
|
164 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.13.self_attn
|
165 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.14.self_attn
|
166 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.15.self_attn
|
167 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.16.self_attn
|
168 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.17.self_attn
|
169 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.18.self_attn
|
170 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.19.self_attn
|
171 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.20.self_attn
|
172 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.21.self_attn
|
173 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.22.self_attn
|
174 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.23.self_attn
|
175 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.24.self_attn
|
176 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.25.self_attn
|
177 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.26.self_attn
|
178 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.27.self_attn
|
179 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.28.self_attn
|
180 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.29.self_attn
|
181 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.30.self_attn
|
182 |
+
24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.31.self_attn
|
183 |
+
24-11-19 17:29:56 | I | * Development dtype is torch.float32
|
184 |
+
24-11-19 17:29:56 | I | * Evaluating model
|
185 |
+
24-11-19 17:29:56 | W | `pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.
|
186 |
+
24-11-19 17:29:56 | I | Using model type 'default'
|
187 |
+
24-11-19 17:29:56 | W | Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration
|
188 |
+
24-11-19 17:29:56 | I | - Evaluator: gptq
|
189 |
+
24-11-19 17:29:56 | I | - Tasks: ['wikitext']
|
190 |
+
24-11-19 17:29:56 | I | - Batch_size: 8
|
191 |
+
24-11-19 17:29:56 | I | + Max_seq_length: 2048
|
192 |
+
24-11-19 17:29:56 | D | Starting new HTTPS connection (1): huggingface.co:443
|
193 |
+
24-11-19 17:30:03 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
|
194 |
+
24-11-19 17:30:03 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
|
195 |
+
24-11-19 17:30:03 | D | Attempting to acquire lock 23438954666640 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
196 |
+
24-11-19 17:30:03 | D | Lock 23438954666640 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
197 |
+
24-11-19 17:30:03 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
|
198 |
+
24-11-19 17:30:03 | D | Attempting to release lock 23438954666640 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
199 |
+
24-11-19 17:30:03 | D | Lock 23438954666640 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
200 |
+
24-11-19 17:30:15 | I | - Results:
|
201 |
+
24-11-19 17:30:15 | I | | Task |Version| Metric |Value | |Stderr|
|
202 |
+
24-11-19 17:30:15 | I | |--------|------:|---------------|-----:|---|-----:|
|
203 |
+
24-11-19 17:30:15 | I | |wikitext| 1|word_perplexity|7.8331|± |7.8331|
|
204 |
+
24-11-19 17:30:15 | I |
|
205 |
+
24-11-19 17:30:15 | I | + Max_seq_length: 4096
|
206 |
+
24-11-19 17:30:15 | D | Starting new HTTPS connection (2): huggingface.co:443
|
207 |
+
24-11-19 17:30:21 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
|
208 |
+
24-11-19 17:30:21 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
|
209 |
+
24-11-19 17:30:21 | D | Attempting to acquire lock 23438952840800 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
210 |
+
24-11-19 17:30:21 | D | Lock 23438952840800 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
211 |
+
24-11-19 17:30:21 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
|
212 |
+
24-11-19 17:30:21 | D | Attempting to release lock 23438952840800 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
213 |
+
24-11-19 17:30:21 | D | Lock 23438952840800 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
|
214 |
+
24-11-19 17:30:32 | I | - Results:
|
215 |
+
24-11-19 17:30:32 | I | | Task |Version| Metric |Value | |Stderr|
|
216 |
+
24-11-19 17:30:32 | I | |--------|------:|---------------|-----:|---|-----:|
|
217 |
+
24-11-19 17:30:32 | I | |wikitext| 1|word_perplexity|7.2618|± |7.2618|
|
218 |
+
24-11-19 17:30:32 | I |
|
219 |
+
24-11-19 17:30:32 | I | * Saving results to runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/config-241119.200545.yaml
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cache:
|
2 |
+
root: runs/shang
|
3 |
+
path:
|
4 |
+
rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-3-8b-instruct-gradient-1048k.pt
|
5 |
+
reorder: ''
|
6 |
+
smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/llama-3-8b-instruct-gradient-1048k.pt
|
7 |
+
wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
|
8 |
+
acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
|
9 |
+
output:
|
10 |
+
root: runs/shang
|
11 |
+
dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
|
12 |
+
job: run
|
13 |
+
model:
|
14 |
+
name: llama-3-8b-instruct-gradient-1048k
|
15 |
+
family: llama-3
|
16 |
+
path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
|
17 |
+
root: ''
|
18 |
+
local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
|
19 |
+
local_root: /home/yujunlin/models
|
20 |
+
dtype: torch.float16
|
21 |
+
eval:
|
22 |
+
num_gpus: 1
|
23 |
+
batch_size: 8
|
24 |
+
tasks:
|
25 |
+
- wikitext
|
26 |
+
max_seq_length: -4096
|
27 |
+
evaluators:
|
28 |
+
- gptq
|
29 |
+
quant:
|
30 |
+
wgts:
|
31 |
+
dtype: sint8
|
32 |
+
zero_point: null
|
33 |
+
group_shapes:
|
34 |
+
- - 1
|
35 |
+
- -1
|
36 |
+
- -1
|
37 |
+
scale_dtypes:
|
38 |
+
- torch.float16
|
39 |
+
intermediate_dtypes: []
|
40 |
+
intermediate_levels: []
|
41 |
+
needs_dequant_saturation: false
|
42 |
+
skips: []
|
43 |
+
enable_kernel_gptq: true
|
44 |
+
kernel_gptq:
|
45 |
+
damp_percentage: 0.01
|
46 |
+
block_size: 128
|
47 |
+
num_inv_tries: 250
|
48 |
+
hessian_block_size: 512
|
49 |
+
enable_calib_range: true
|
50 |
+
calib_range:
|
51 |
+
degree: 2
|
52 |
+
objective: OutputsError
|
53 |
+
strategy: GridSearch
|
54 |
+
granularity: Group
|
55 |
+
element_batch_size: 64
|
56 |
+
sample_batch_size: -1
|
57 |
+
element_size: 512
|
58 |
+
sample_size: -1
|
59 |
+
pre_reshape: true
|
60 |
+
outputs_device: cpu
|
61 |
+
ratio: 1.0
|
62 |
+
max_shrink: 0.2
|
63 |
+
max_expand: 1.0
|
64 |
+
num_grids: 80
|
65 |
+
allow_scale: false
|
66 |
+
skips: []
|
67 |
+
ipts:
|
68 |
+
dtype: sint8
|
69 |
+
zero_point: null
|
70 |
+
group_shapes:
|
71 |
+
- - 1
|
72 |
+
- -1
|
73 |
+
- -1
|
74 |
+
scale_dtypes:
|
75 |
+
- torch.float16
|
76 |
+
skips: []
|
77 |
+
static: false
|
78 |
+
enable_calib_range: false
|
79 |
+
opts:
|
80 |
+
dtype: sint8
|
81 |
+
zero_point: null
|
82 |
+
group_shapes:
|
83 |
+
- - -1
|
84 |
+
- -1
|
85 |
+
- -1
|
86 |
+
scale_dtypes:
|
87 |
+
- torch.float16
|
88 |
+
skips:
|
89 |
+
- attn_q
|
90 |
+
static: true
|
91 |
+
enable_calib_range: true
|
92 |
+
calib_range:
|
93 |
+
degree: 2
|
94 |
+
objective: OutputsError
|
95 |
+
strategy: Manual
|
96 |
+
granularity: Layer
|
97 |
+
element_batch_size: -1
|
98 |
+
sample_batch_size: -1
|
99 |
+
element_size: -1
|
100 |
+
sample_size: -1
|
101 |
+
pre_reshape: true
|
102 |
+
outputs_device: cpu
|
103 |
+
ratio: 1.0
|
104 |
+
max_shrink: 0.2
|
105 |
+
max_expand: 1.0
|
106 |
+
num_grids: 80
|
107 |
+
allow_scale: false
|
108 |
+
skips: []
|
109 |
+
calib:
|
110 |
+
data: pileval
|
111 |
+
num_samples: 128
|
112 |
+
path: mit-han-lab/pile-val-backup
|
113 |
+
seq_length: 1024
|
114 |
+
min_seq_length: 0
|
115 |
+
max_seq_length: 0
|
116 |
+
local_path: ''
|
117 |
+
enable_rotation: true
|
118 |
+
rotation:
|
119 |
+
random: false
|
120 |
+
transforms:
|
121 |
+
- out_proj
|
122 |
+
enable_reorder: false
|
123 |
+
enable_smooth: true
|
124 |
+
smooth:
|
125 |
+
enable_proj: false
|
126 |
+
enable_attn: true
|
127 |
+
attn:
|
128 |
+
degree: 2
|
129 |
+
strategy: GridSearch
|
130 |
+
sample_batch_size: -1
|
131 |
+
sample_size: -1
|
132 |
+
outputs_device: cpu
|
133 |
+
allow_a_quant: true
|
134 |
+
allow_b_quant: true
|
135 |
+
spans:
|
136 |
+
- - AbsMax
|
137 |
+
- AbsMax
|
138 |
+
alpha: 0.5
|
139 |
+
beta: -2
|
140 |
+
num_grids: 20
|
141 |
+
develop_dtype: torch.float32
|
142 |
+
seed: 12345
|
143 |
+
skip_eval: false
|
144 |
+
load_from: ''
|
145 |
+
save_model: 'true'
|
146 |
+
copy_on_save: false
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/acts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee8c1a641aeb1fd1a3376f43e71fb6e4d46ed629e0117cc990b225cc4521eeab
|
3 |
+
size 36034
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86a0a4420a3ab6b037c08654751c88818251fea6062d3041237ff4f2e3b00907
|
3 |
+
size 16060644786
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/rotation.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50249bca461c76fdeb0f51b07f019025e975e67b47eb49f2a69e5d5dee80e195
|
3 |
+
size 134219107
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/scale.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a8f507ae04bfb9f02a9d7dc98d60b7dd69a9d6ed4b654ed8d64a71f7f52c487
|
3 |
+
size 5631594
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/smooth.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64eef470c18d4efd8590cc9d25734e3313da1181fc3f4282ab6d09cfd58ecd93
|
3 |
+
size 535234
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/wgts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e479d958e3157b658360082fd2c6699cc38833c35d198babf006616a1564d95a
|
3 |
+
size 5593150
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/results-241119.200545.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"gptq": {
|
3 |
+
"2048": {
|
4 |
+
"results": {
|
5 |
+
"wikitext": {
|
6 |
+
"word_perplexity": 7.989659368184836
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"versions": {
|
10 |
+
"wikitext": 1
|
11 |
+
},
|
12 |
+
"config": {
|
13 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
14 |
+
},
|
15 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
16 |
+
},
|
17 |
+
"4096": {
|
18 |
+
"results": {
|
19 |
+
"wikitext": {
|
20 |
+
"word_perplexity": 7.396661695312681
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"versions": {
|
24 |
+
"wikitext": 1
|
25 |
+
},
|
26 |
+
"config": {
|
27 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
28 |
+
},
|
29 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.183745.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.200545.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/config-241119.200729.yaml
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cache:
|
2 |
+
root: runs/shang
|
3 |
+
path:
|
4 |
+
rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-3-8b-instruct-gradient-1048k.pt
|
5 |
+
reorder: ''
|
6 |
+
smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/llama-3-8b-instruct-gradient-1048k.pt
|
7 |
+
wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
|
8 |
+
acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
|
9 |
+
output:
|
10 |
+
root: runs/shang
|
11 |
+
dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
|
12 |
+
job: run
|
13 |
+
model:
|
14 |
+
name: llama-3-8b-instruct-gradient-1048k
|
15 |
+
family: llama-3
|
16 |
+
path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
|
17 |
+
root: ''
|
18 |
+
local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
|
19 |
+
local_root: /home/yujunlin/models
|
20 |
+
dtype: torch.float16
|
21 |
+
eval:
|
22 |
+
num_gpus: 1
|
23 |
+
batch_size: 8
|
24 |
+
tasks:
|
25 |
+
- wikitext
|
26 |
+
max_seq_length: -4096
|
27 |
+
evaluators:
|
28 |
+
- gptq
|
29 |
+
quant:
|
30 |
+
wgts:
|
31 |
+
dtype: sint8
|
32 |
+
zero_point: null
|
33 |
+
group_shapes:
|
34 |
+
- - 1
|
35 |
+
- -1
|
36 |
+
- -1
|
37 |
+
scale_dtypes:
|
38 |
+
- torch.float16
|
39 |
+
intermediate_dtypes: []
|
40 |
+
intermediate_levels: []
|
41 |
+
needs_dequant_saturation: false
|
42 |
+
skips: []
|
43 |
+
enable_kernel_gptq: true
|
44 |
+
kernel_gptq:
|
45 |
+
damp_percentage: 0.01
|
46 |
+
block_size: 128
|
47 |
+
num_inv_tries: 250
|
48 |
+
hessian_block_size: 512
|
49 |
+
enable_calib_range: true
|
50 |
+
calib_range:
|
51 |
+
degree: 2
|
52 |
+
objective: OutputsError
|
53 |
+
strategy: GridSearch
|
54 |
+
granularity: Group
|
55 |
+
element_batch_size: 64
|
56 |
+
sample_batch_size: -1
|
57 |
+
element_size: 512
|
58 |
+
sample_size: -1
|
59 |
+
pre_reshape: true
|
60 |
+
outputs_device: cpu
|
61 |
+
ratio: 1.0
|
62 |
+
max_shrink: 0.2
|
63 |
+
max_expand: 1.0
|
64 |
+
num_grids: 80
|
65 |
+
allow_scale: false
|
66 |
+
skips: []
|
67 |
+
ipts:
|
68 |
+
dtype: sint8
|
69 |
+
zero_point: null
|
70 |
+
group_shapes:
|
71 |
+
- - 1
|
72 |
+
- -1
|
73 |
+
- -1
|
74 |
+
scale_dtypes:
|
75 |
+
- torch.float16
|
76 |
+
skips: []
|
77 |
+
static: false
|
78 |
+
enable_calib_range: false
|
79 |
+
opts:
|
80 |
+
dtype: sint8
|
81 |
+
zero_point: null
|
82 |
+
group_shapes:
|
83 |
+
- - -1
|
84 |
+
- -1
|
85 |
+
- -1
|
86 |
+
scale_dtypes:
|
87 |
+
- torch.float16
|
88 |
+
skips:
|
89 |
+
- attn_q
|
90 |
+
static: true
|
91 |
+
enable_calib_range: true
|
92 |
+
calib_range:
|
93 |
+
degree: 2
|
94 |
+
objective: OutputsError
|
95 |
+
strategy: Manual
|
96 |
+
granularity: Layer
|
97 |
+
element_batch_size: -1
|
98 |
+
sample_batch_size: -1
|
99 |
+
element_size: -1
|
100 |
+
sample_size: -1
|
101 |
+
pre_reshape: true
|
102 |
+
outputs_device: cpu
|
103 |
+
ratio: 1.0
|
104 |
+
max_shrink: 0.2
|
105 |
+
max_expand: 1.0
|
106 |
+
num_grids: 80
|
107 |
+
allow_scale: false
|
108 |
+
skips: []
|
109 |
+
calib:
|
110 |
+
data: pileval
|
111 |
+
num_samples: 128
|
112 |
+
path: mit-han-lab/pile-val-backup
|
113 |
+
seq_length: 1024
|
114 |
+
min_seq_length: 0
|
115 |
+
max_seq_length: 0
|
116 |
+
local_path: ''
|
117 |
+
enable_rotation: true
|
118 |
+
rotation:
|
119 |
+
random: false
|
120 |
+
transforms:
|
121 |
+
- out_proj
|
122 |
+
enable_reorder: false
|
123 |
+
enable_smooth: true
|
124 |
+
smooth:
|
125 |
+
enable_proj: false
|
126 |
+
enable_attn: true
|
127 |
+
attn:
|
128 |
+
degree: 2
|
129 |
+
strategy: Manual
|
130 |
+
sample_batch_size: -1
|
131 |
+
sample_size: -1
|
132 |
+
outputs_device: cpu
|
133 |
+
allow_a_quant: true
|
134 |
+
allow_b_quant: true
|
135 |
+
spans:
|
136 |
+
- - AbsMax
|
137 |
+
- AbsMax
|
138 |
+
alpha: 0.5
|
139 |
+
beta: 0
|
140 |
+
num_grids: 20
|
141 |
+
develop_dtype: torch.float32
|
142 |
+
seed: 12345
|
143 |
+
skip_eval: false
|
144 |
+
load_from: ''
|
145 |
+
save_model: 'true'
|
146 |
+
copy_on_save: false
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/acts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac70125cfcf7096842127bff88c84a458b44f2fc66bf8fb1970940c922e9e805
|
3 |
+
size 36034
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:125f19e7dc83de3adaf1c1b7c9c20d58c72f075b630c568116fe3e6da7c90719
|
3 |
+
size 16060644786
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/rotation.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50249bca461c76fdeb0f51b07f019025e975e67b47eb49f2a69e5d5dee80e195
|
3 |
+
size 134219107
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/scale.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e586371fed1a42757c5fdf37cca9febf4f2fbd8a7c4a5dd97902d01fc95c931
|
3 |
+
size 5631594
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/smooth.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3f26f4aed70f6838fd246750c38631eed6601e521a12ba700c6c065f27e37dd
|
3 |
+
size 535234
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/wgts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b399ac3cecb57039c93754780228596a35530dee066aee45312a98c3933a7f4b
|
3 |
+
size 5593150
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/results-241119.200729.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"gptq": {
|
3 |
+
"2048": {
|
4 |
+
"results": {
|
5 |
+
"wikitext": {
|
6 |
+
"word_perplexity": 7.989206018837591
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"versions": {
|
10 |
+
"wikitext": 1
|
11 |
+
},
|
12 |
+
"config": {
|
13 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
14 |
+
},
|
15 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
16 |
+
},
|
17 |
+
"4096": {
|
18 |
+
"results": {
|
19 |
+
"wikitext": {
|
20 |
+
"word_perplexity": 7.399224506977849
|
21 |
+
}
|
22 |
+
},
|
23 |
+
"versions": {
|
24 |
+
"wikitext": 1
|
25 |
+
},
|
26 |
+
"config": {
|
27 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
28 |
+
},
|
29 |
+
"model": "llama-3-8b-instruct-gradient-1048k"
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/run-241119.200729.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|