Shangy
/

trans

Model card Files Files and versions Community

ys-2020 commited on Jan 24

Commit

9845231

1 Parent(s): 834b257

sync

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/config-241119.183703.yaml +85 -0
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/results-241119.183703.json +32 -0
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/run-241119.183703.log +219 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/config-241119.200548.yaml +146 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/acts.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/model.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/rotation.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/scale.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/smooth.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/wgts.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/results-241119.200548.json +32 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.185856.log +0 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.200548.log +0 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/config-241119.200727.yaml +146 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/model.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/rotation.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/scale.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/smooth.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/wgts.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/run-241119.200727.log +0 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/config-241119.201608.yaml +129 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/acts.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/model.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/rotation.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/scale.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/wgts.pt +3 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/results-241119.201608.json +32 -0
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/run-241119.201608.log +0 -0
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/config-241119.172947.yaml +85 -0
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/results-241119.172947.json +32 -0
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/run-241119.172947.log +219 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/config-241119.200545.yaml +146 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/acts.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/model.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/rotation.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/scale.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/smooth.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/wgts.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/results-241119.200545.json +32 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.183745.log +0 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.200545.log +0 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/config-241119.200729.yaml +146 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/acts.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/model.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/rotation.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/scale.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/smooth.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/wgts.pt +3 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/results-241119.200729.json +32 -0
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/run-241119.200729.log +0 -0

runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/config-241119.183703.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+cache:
+  root: runs/shang
+  path:
+    rotation: ''
+    reorder: ''
+    smooth: ''
+    wgts: ''
+    acts: ''
+output:
+  root: runs/shang
+  dirname: default-pileval.128x1024.[0-0]
+  job: run
+model:
+  name: llama-2-7b-instruct-together-32k
+  family: llama-2
+  path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  root: ''
+  local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  local_root: /home/yujunlin/models
+  dtype: torch.float16
+eval:
+  num_gpus: 1
+  batch_size: 8
+  tasks:
+  - wikitext
+  max_seq_length: -4096
+  evaluators:
+  - gptq
+quant:
+  wgts:
+    dtype: null
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - null
+    intermediate_dtypes: []
+    intermediate_levels: []
+    needs_dequant_saturation: false
+    skips: []
+    enable_kernel_gptq: false
+    enable_calib_range: false
+  ipts:
+    dtype: null
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - null
+    skips: []
+    static: false
+    enable_calib_range: false
+  opts:
+    dtype: null
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - null
+    skips: []
+    static: false
+    enable_calib_range: false
+  calib:
+    data: pileval
+    num_samples: 128
+    path: mit-han-lab/pile-val-backup
+    seq_length: 1024
+    min_seq_length: 0
+    max_seq_length: 0
+    local_path: ''
+  enable_rotation: false
+  enable_reorder: false
+  enable_smooth: false
+  develop_dtype: torch.float32
+seed: 12345
+skip_eval: false
+load_from: ''
+save_model: false
+copy_on_save: false

runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/results-241119.183703.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "gptq": {
+    "2048": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 6.443161573358209
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-2-7b-instruct-together-32k"
+      },
+      "model": "llama-2-7b-instruct-together-32k"
+    },
+    "4096": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 5.964906855443073
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-2-7b-instruct-together-32k"
+      },
+      "model": "llama-2-7b-instruct-together-32k"
+    }
+  }
+}

runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/run-241119.183703.log ADDED Viewed

	@@ -0,0 +1,219 @@

+24-11-19 18:37:03 | I | === Configurations ===
+24-11-19 18:37:03 | I | LlmPtqRunConfig(
+24-11-19 18:37:03 | I |   cache=LlmCacheConfig(
+24-11-19 18:37:03 | I |     root=runs/shang,
+24-11-19 18:37:03 | I |     dirpath=LlmQuantCacheConfig(
+24-11-19 18:37:03 | I |       rotation=,
+24-11-19 18:37:03 | I |       reorder=,
+24-11-19 18:37:03 | I |       smooth=,
+24-11-19 18:37:03 | I |       wgts=,
+24-11-19 18:37:03 | I |       acts=),
+24-11-19 18:37:03 | I |     path=LlmQuantCacheConfig(
+24-11-19 18:37:03 | I |       rotation=,
+24-11-19 18:37:03 | I |       reorder=,
+24-11-19 18:37:03 | I |       smooth=,
+24-11-19 18:37:03 | I |       wgts=,
+24-11-19 18:37:03 | I |       acts=)),
+24-11-19 18:37:03 | I |   output=OutputConfig(
+24-11-19 18:37:03 | I |     root=runs/shang,
+24-11-19 18:37:03 | I |     dirname=default-pileval.128x1024.[0-0],
+24-11-19 18:37:03 | I |     job=run,
+24-11-19 18:37:03 | I |     dirpath=runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0],
+24-11-19 18:37:03 | I |     timestamp=241119.183703),
+24-11-19 18:37:03 | I |   model=LlmModelConfig(
+24-11-19 18:37:03 | I |     name=llama-2-7b-instruct-together-32k,
+24-11-19 18:37:03 | I |     family=llama-2,
+24-11-19 18:37:03 | I |     path=/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k,
+24-11-19 18:37:03 | I |     root=,
+24-11-19 18:37:03 | I |     local_path=/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k,
+24-11-19 18:37:03 | I |     local_root=/home/yujunlin/models,
+24-11-19 18:37:03 | I |     size=7.0,
+24-11-19 18:37:03 | I |     variant=instruct-together-32k,
+24-11-19 18:37:03 | I |     dtype=torch.float16,
+24-11-19 18:37:03 | I |     orig_dtype=torch.float16),
+24-11-19 18:37:03 | I |   eval=LlmEvalConfig(
+24-11-19 18:37:03 | I |     num_gpus=1,
+24-11-19 18:37:03 | I |     batch_size=8,
+24-11-19 18:37:03 | I |     tasks=['wikitext'],
+24-11-19 18:37:03 | I |     max_seq_length=-4096,
+24-11-19 18:37:03 | I |     evaluators=['gptq']),
+24-11-19 18:37:03 | I |   quant=LlmQuantConfig(
+24-11-19 18:37:03 | I |     wgts=LlmWeightQuantizerConfig(
+24-11-19 18:37:03 | I |       dtype=None,
+24-11-19 18:37:03 | I |       zero_point=None,
+24-11-19 18:37:03 | I |       group_shapes=((-1, -1, -1),),
+24-11-19 18:37:03 | I |       scale_dtypes=(None,),
+24-11-19 18:37:03 | I |       intermediate_dtypes=(),
+24-11-19 18:37:03 | I |       intermediate_levels=(),
+24-11-19 18:37:03 | I |       needs_dequant_saturation=False,
+24-11-19 18:37:03 | I |       skips=[],
+24-11-19 18:37:03 | I |       static=False,
+24-11-19 18:37:03 | I |       kernel_gptq=None,
+24-11-19 18:37:03 | I |       calib_range=None),
+24-11-19 18:37:03 | I |     ipts=LlmActivationQuantizerConfig(
+24-11-19 18:37:03 | I |       dtype=None,
+24-11-19 18:37:03 | I |       zero_point=None,
+24-11-19 18:37:03 | I |       group_shapes=((-1, -1, -1),),
+24-11-19 18:37:03 | I |       scale_dtypes=(None,),
+24-11-19 18:37:03 | I |       intermediate_dtypes=(),
+24-11-19 18:37:03 | I |       intermediate_levels=(),
+24-11-19 18:37:03 | I |       needs_dequant_saturation=False,
+24-11-19 18:37:03 | I |       skips=[],
+24-11-19 18:37:03 | I |       static=False,
+24-11-19 18:37:03 | I |       kernel_gptq=None,
+24-11-19 18:37:03 | I |       calib_range=None),
+24-11-19 18:37:03 | I |     opts=LlmActivationQuantizerConfig(
+24-11-19 18:37:03 | I |       dtype=None,
+24-11-19 18:37:03 | I |       zero_point=None,
+24-11-19 18:37:03 | I |       group_shapes=((-1, -1, -1),),
+24-11-19 18:37:03 | I |       scale_dtypes=(None,),
+24-11-19 18:37:03 | I |       intermediate_dtypes=(),
+24-11-19 18:37:03 | I |       intermediate_levels=(),
+24-11-19 18:37:03 | I |       needs_dequant_saturation=False,
+24-11-19 18:37:03 | I |       skips=[],
+24-11-19 18:37:03 | I |       static=False,
+24-11-19 18:37:03 | I |       kernel_gptq=None,
+24-11-19 18:37:03 | I |       calib_range=None),
+24-11-19 18:37:03 | I |     calib=LlmCalibDataLoaderConfig(
+24-11-19 18:37:03 | I |       data=pileval,
+24-11-19 18:37:03 | I |       num_samples=128,
+24-11-19 18:37:03 | I |       batch_size=1,
+24-11-19 18:37:03 | I |       path=mit-han-lab/pile-val-backup,
+24-11-19 18:37:03 | I |       seq_length=1024,
+24-11-19 18:37:03 | I |       min_seq_length=0,
+24-11-19 18:37:03 | I |       max_seq_length=0,
+24-11-19 18:37:03 | I |       local_path=),
+24-11-19 18:37:03 | I |     rotation=None,
+24-11-19 18:37:03 | I |     reorder=None,
+24-11-19 18:37:03 | I |     smooth=None,
+24-11-19 18:37:03 | I |     develop_dtype=torch.float32),
+24-11-19 18:37:03 | I |   seed=12345,
+24-11-19 18:37:03 | I |   skip_eval=False,
+24-11-19 18:37:03 | I |   load_from=,
+24-11-19 18:37:03 | I |   save_model=False,
+24-11-19 18:37:03 | I |   copy_on_save=False)
+24-11-19 18:37:03 | I | === Dumped Configurations ===
+24-11-19 18:37:03 | I | { 'cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': 'runs/shang'},
+24-11-19 18:37:03 | I |   'copy_on_save': False,
+24-11-19 18:37:03 | I |   'eval': {'batch_size': 8, 'evaluators': ['gptq'], 'max_seq_length': -4096, 'num_gpus': 1, 'tasks': ['wikitext']},
+24-11-19 18:37:03 | I |   'load_from': '',
+24-11-19 18:37:03 | I |   'model': { 'dtype': 'torch.float16',
+24-11-19 18:37:03 | I |              'family': 'llama-2',
+24-11-19 18:37:03 | I |              'local_path': '/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k',
+24-11-19 18:37:03 | I |              'local_root': '/home/yujunlin/models',
+24-11-19 18:37:03 | I |              'name': 'llama-2-7b-instruct-together-32k',
+24-11-19 18:37:03 | I |              'path': '/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k',
+24-11-19 18:37:03 | I |              'root': ''},
+24-11-19 18:37:03 | I |   'output': {'dirname': 'default-pileval.128x1024.[0-0]', 'job': 'run', 'root': 'runs/shang'},
+24-11-19 18:37:03 | I |   'quant': { 'calib': { 'data': 'pileval',
+24-11-19 18:37:03 | I |                         'local_path': '',
+24-11-19 18:37:03 | I |                         'max_seq_length': 0,
+24-11-19 18:37:03 | I |                         'min_seq_length': 0,
+24-11-19 18:37:03 | I |                         'num_samples': 128,
+24-11-19 18:37:03 | I |                         'path': 'mit-han-lab/pile-val-backup',
+24-11-19 18:37:03 | I |                         'seq_length': 1024},
+24-11-19 18:37:03 | I |              'develop_dtype': 'torch.float32',
+24-11-19 18:37:03 | I |              'enable_reorder': False,
+24-11-19 18:37:03 | I |              'enable_rotation': False,
+24-11-19 18:37:03 | I |              'enable_smooth': False,
+24-11-19 18:37:03 | I |              'ipts': { 'dtype': None,
+24-11-19 18:37:03 | I |                        'enable_calib_range': False,
+24-11-19 18:37:03 | I |                        'group_shapes': [[-1, -1, -1]],
+24-11-19 18:37:03 | I |                        'scale_dtypes': [None],
+24-11-19 18:37:03 | I |                        'skips': [],
+24-11-19 18:37:03 | I |                        'static': False,
+24-11-19 18:37:03 | I |                        'zero_point': None},
+24-11-19 18:37:03 | I |              'opts': { 'dtype': None,
+24-11-19 18:37:03 | I |                        'enable_calib_range': False,
+24-11-19 18:37:03 | I |                        'group_shapes': [[-1, -1, -1]],
+24-11-19 18:37:03 | I |                        'scale_dtypes': [None],
+24-11-19 18:37:03 | I |                        'skips': [],
+24-11-19 18:37:03 | I |                        'static': False,
+24-11-19 18:37:03 | I |                        'zero_point': None},
+24-11-19 18:37:03 | I |              'wgts': { 'dtype': None,
+24-11-19 18:37:03 | I |                        'enable_calib_range': False,
+24-11-19 18:37:03 | I |                        'enable_kernel_gptq': False,
+24-11-19 18:37:03 | I |                        'group_shapes': [[-1, -1, -1]],
+24-11-19 18:37:03 | I |                        'intermediate_dtypes': [],
+24-11-19 18:37:03 | I |                        'intermediate_levels': [],
+24-11-19 18:37:03 | I |                        'needs_dequant_saturation': False,
+24-11-19 18:37:03 | I |                        'scale_dtypes': [None],
+24-11-19 18:37:03 | I |                        'skips': [],
+24-11-19 18:37:03 | I |                        'zero_point': None}},
+24-11-19 18:37:03 | I |   'save_model': False,
+24-11-19 18:37:03 | I |   'seed': 12345,
+24-11-19 18:37:03 | I |   'skip_eval': False}
+24-11-19 18:37:03 | I | === Output Directory ===
+24-11-19 18:37:03 | I | runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703
+24-11-19 18:37:03 | I | === Start Evaluating ===
+24-11-19 18:37:03 | I | * Building model llama-2-7b-instruct-together-32k from /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+24-11-19 18:37:03 | I |   We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.0.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.1.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.2.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.3.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.4.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.5.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.6.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.7.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.8.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.9.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.10.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.11.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.12.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.13.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.14.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.15.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.16.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.17.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.18.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.19.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.20.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.21.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.22.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.23.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.24.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.25.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.26.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.27.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.28.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.29.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.30.self_attn
+24-11-19 18:37:12 | I |   - Patching LlamaSdpaAttention.forward in model.layers.31.self_attn
+24-11-19 18:37:12 | I | * Development dtype is torch.float32
+24-11-19 18:37:12 | I | * Evaluating model
+24-11-19 18:37:12 | W |     `pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.
+24-11-19 18:37:12 | I |     Using model type 'default'
+24-11-19 18:37:12 | W |     Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration
+24-11-19 18:37:12 | I |   - Evaluator: gptq
+24-11-19 18:37:12 | I |   - Tasks: ['wikitext']
+24-11-19 18:37:12 | I |   - Batch_size: 8
+24-11-19 18:37:12 | I |     + Max_seq_length: 2048
+24-11-19 18:37:12 | D |         Starting new HTTPS connection (1): huggingface.co:443
+24-11-19 18:37:18 | W |         Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
+24-11-19 18:37:18 | W |         Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct  8 19:51:38 2024).
+24-11-19 18:37:18 | D |         Attempting to acquire lock 23438952619984 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:18 | D |         Lock 23438952619984 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:18 | D |         open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
+24-11-19 18:37:18 | D |         Attempting to release lock 23438952619984 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:18 | D |         Lock 23438952619984 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:31 | I |       - Results:
+24-11-19 18:37:32 | I |         |  Task  |Version|    Metric     |Value |   |Stderr|
+24-11-19 18:37:32 | I |         |--------|------:|---------------|-----:|---|-----:|
+24-11-19 18:37:32 | I |         |wikitext|      1|word_perplexity|6.4432|±  |6.4432|
+24-11-19 18:37:32 | I |
+24-11-19 18:37:32 | I |     + Max_seq_length: 4096
+24-11-19 18:37:32 | D |         Starting new HTTPS connection (2): huggingface.co:443
+24-11-19 18:37:38 | W |         Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
+24-11-19 18:37:38 | W |         Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct  8 19:51:38 2024).
+24-11-19 18:37:38 | D |         Attempting to acquire lock 23438952626944 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:38 | D |         Lock 23438952626944 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:38 | D |         open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
+24-11-19 18:37:38 | D |         Attempting to release lock 23438952626944 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:38 | D |         Lock 23438952626944 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 18:37:50 | I |       - Results:
+24-11-19 18:37:50 | I |         |  Task  |Version|    Metric     |Value |   |Stderr|
+24-11-19 18:37:50 | I |         |--------|------:|---------------|-----:|---|-----:|
+24-11-19 18:37:50 | I |         |wikitext|      1|word_perplexity|5.9649|±  |5.9649|
+24-11-19 18:37:50 | I |
+24-11-19 18:37:50 | I | * Saving results to runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/config-241119.200548.yaml ADDED Viewed

	@@ -0,0 +1,146 @@

+cache:
+  root: runs/shang
+  path:
+    rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
+    reorder: ''
+    smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/llama-2-7b-instruct-together-32k.pt
+    wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
+    acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
+output:
+  root: runs/shang
+  dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
+  job: run
+model:
+  name: llama-2-7b-instruct-together-32k
+  family: llama-2
+  path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  root: ''
+  local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  local_root: /home/yujunlin/models
+  dtype: torch.float16
+eval:
+  num_gpus: 1
+  batch_size: 8
+  tasks:
+  - wikitext
+  max_seq_length: -4096
+  evaluators:
+  - gptq
+quant:
+  wgts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    intermediate_dtypes: []
+    intermediate_levels: []
+    needs_dequant_saturation: false
+    skips: []
+    enable_kernel_gptq: true
+    kernel_gptq:
+      damp_percentage: 0.01
+      block_size: 128
+      num_inv_tries: 250
+      hessian_block_size: 512
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: GridSearch
+      granularity: Group
+      element_batch_size: 64
+      sample_batch_size: -1
+      element_size: 512
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  ipts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips: []
+    static: false
+    enable_calib_range: false
+  opts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips:
+    - attn_q
+    static: true
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: Manual
+      granularity: Layer
+      element_batch_size: -1
+      sample_batch_size: -1
+      element_size: -1
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  calib:
+    data: pileval
+    num_samples: 128
+    path: mit-han-lab/pile-val-backup
+    seq_length: 1024
+    min_seq_length: 0
+    max_seq_length: 0
+    local_path: ''
+  enable_rotation: true
+  rotation:
+    random: false
+    transforms:
+    - out_proj
+  enable_reorder: false
+  enable_smooth: true
+  smooth:
+    enable_proj: false
+    enable_attn: true
+    attn:
+      degree: 2
+      strategy: GridSearch
+      sample_batch_size: -1
+      sample_size: -1
+      outputs_device: cpu
+      allow_a_quant: true
+      allow_b_quant: true
+      spans:
+      - - AbsMax
+        - AbsMax
+      alpha: 0.5
+      beta: -2
+      num_grids: 20
+  develop_dtype: torch.float32
+seed: 12345
+skip_eval: false
+load_from: ''
+save_model: 'true'
+copy_on_save: false

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:452d684e7ed9bbcde200ac9e61316bd873246869750aa79ffd3cb0aececc9b99
+size 35898

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc1ac15dda8c29eb2ebd22eee524e1bb0c696f2dcba86f840af18da8c8434b12
+size 13476951926

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
+size 134219097

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e4b445b99650db18a04ac7d3ec3214937f362749e437fafd2d124b993fdb60f
+size 5566058

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e46a0206c5c7210660e08615fba714402bcd532050747f0fd9b134f77cb9fba
+size 535162

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3454af85d02208d593d736d00fa756e45f909f2f6f1bbefdcef52f1e3e6cbbaf
+size 5527158

	@@ -0,0 +1,32 @@

+{
+  "gptq": {
+    "2048": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 6.503797370202683
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-2-7b-instruct-together-32k"
+      },
+      "model": "llama-2-7b-instruct-together-32k"
+    },
+    "4096": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 6.014449215881915
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-2-7b-instruct-together-32k"
+      },
+      "model": "llama-2-7b-instruct-together-32k"
+    }
+  }
+}

The diff for this file is too large to render. See raw diff

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/config-241119.200727.yaml ADDED Viewed

	@@ -0,0 +1,146 @@

+cache:
+  root: runs/shang
+  path:
+    rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
+    reorder: ''
+    smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/llama-2-7b-instruct-together-32k.pt
+    wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
+    acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
+output:
+  root: runs/shang
+  dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
+  job: run
+model:
+  name: llama-2-7b-instruct-together-32k
+  family: llama-2
+  path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  root: ''
+  local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  local_root: /home/yujunlin/models
+  dtype: torch.float16
+eval:
+  num_gpus: 1
+  batch_size: 8
+  tasks:
+  - wikitext
+  max_seq_length: -4096
+  evaluators:
+  - gptq
+quant:
+  wgts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    intermediate_dtypes: []
+    intermediate_levels: []
+    needs_dequant_saturation: false
+    skips: []
+    enable_kernel_gptq: true
+    kernel_gptq:
+      damp_percentage: 0.01
+      block_size: 128
+      num_inv_tries: 250
+      hessian_block_size: 512
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: GridSearch
+      granularity: Group
+      element_batch_size: 64
+      sample_batch_size: -1
+      element_size: 512
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  ipts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips: []
+    static: false
+    enable_calib_range: false
+  opts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips:
+    - attn_q
+    static: true
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: Manual
+      granularity: Layer
+      element_batch_size: -1
+      sample_batch_size: -1
+      element_size: -1
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  calib:
+    data: pileval
+    num_samples: 128
+    path: mit-han-lab/pile-val-backup
+    seq_length: 1024
+    min_seq_length: 0
+    max_seq_length: 0
+    local_path: ''
+  enable_rotation: true
+  rotation:
+    random: false
+    transforms:
+    - out_proj
+  enable_reorder: false
+  enable_smooth: true
+  smooth:
+    enable_proj: false
+    enable_attn: true
+    attn:
+      degree: 2
+      strategy: Manual
+      sample_batch_size: -1
+      sample_size: -1
+      outputs_device: cpu
+      allow_a_quant: true
+      allow_b_quant: true
+      spans:
+      - - AbsMax
+        - AbsMax
+      alpha: 0.5
+      beta: 0
+      num_grids: 20
+  develop_dtype: torch.float32
+seed: 12345
+skip_eval: false
+load_from: ''
+save_model: 'true'
+copy_on_save: false

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7707587af5ec67ddc0f3f7dbfdbd9c86f9ded33e10c5bf4b0246518c228199f
+size 13476951926

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/rotation.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
+size 134219097

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/scale.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c32ea151ff1a6e987ec8866aae01d07225acf388d05417668928a96d3fe6c6a
+size 5566058

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/smooth.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:385f0d71a9d0cca3df4fd946a21df2de62d089964597e2e1704c7a39cfee0711
+size 535162

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/wgts.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90a45af762722116d586b0e7011d9acfcc8950086c0470898c078c05af779d0e
+size 5527158

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/run-241119.200727.log ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/config-241119.201608.yaml ADDED Viewed

	@@ -0,0 +1,129 @@

+cache:
+  root: runs/shang
+  path:
+    rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
+    reorder: ''
+    smooth: ''
+    wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
+    acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
+output:
+  root: runs/shang
+  dirname: skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
+  job: run
+model:
+  name: llama-2-7b-instruct-together-32k
+  family: llama-2
+  path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  root: ''
+  local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
+  local_root: /home/yujunlin/models
+  dtype: torch.float16
+eval:
+  num_gpus: 1
+  batch_size: 8
+  tasks:
+  - wikitext
+  max_seq_length: -4096
+  evaluators:
+  - gptq
+quant:
+  wgts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    intermediate_dtypes: []
+    intermediate_levels: []
+    needs_dequant_saturation: false
+    skips: []
+    enable_kernel_gptq: true
+    kernel_gptq:
+      damp_percentage: 0.01
+      block_size: 128
+      num_inv_tries: 250
+      hessian_block_size: 512
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: GridSearch
+      granularity: Group
+      element_batch_size: 64
+      sample_batch_size: -1
+      element_size: 512
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  ipts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips: []
+    static: false
+    enable_calib_range: false
+  opts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips:
+    - attn_q
+    static: true
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: Manual
+      granularity: Layer
+      element_batch_size: -1
+      sample_batch_size: -1
+      element_size: -1
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  calib:
+    data: pileval
+    num_samples: 128
+    path: mit-han-lab/pile-val-backup
+    seq_length: 1024
+    min_seq_length: 0
+    max_seq_length: 0
+    local_path: ''
+  enable_rotation: true
+  rotation:
+    random: false
+    transforms:
+    - out_proj
+  enable_reorder: false
+  enable_smooth: false
+  develop_dtype: torch.float32
+seed: 12345
+skip_eval: false
+load_from: ''
+save_model: 'true'
+copy_on_save: false

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38385b12cbc35439f6e635f9e60e69e2490cb32f65a90e85b921587f19973ca6
+size 35898

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e05ab74d7e1f68b9ae327e1afb40528b281d90dbf814760976dd74e97a54beae
+size 13476951926

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
+size 134219097

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:527a5ec6f0ba29bd2579c7fc5a8221231449f5dcb35223f3036ef6e87166bf60
+size 5566058

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6a8e12842f3c488159d02404a8885723a3c096ac3e63aedcc515c13da27b1a2
+size 5527158

	@@ -0,0 +1,32 @@

+{
+  "gptq": {
+    "2048": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 6.509843744356094
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-2-7b-instruct-together-32k"
+      },
+      "model": "llama-2-7b-instruct-together-32k"
+    },
+    "4096": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 6.017616942577096
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-2-7b-instruct-together-32k"
+      },
+      "model": "llama-2-7b-instruct-together-32k"
+    }
+  }
+}

The diff for this file is too large to render. See raw diff

runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/config-241119.172947.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+cache:
+  root: runs/shang
+  path:
+    rotation: ''
+    reorder: ''
+    smooth: ''
+    wgts: ''
+    acts: ''
+output:
+  root: runs/shang
+  dirname: default-pileval.128x1024.[0-0]
+  job: run
+model:
+  name: llama-3-8b-instruct-gradient-1048k
+  family: llama-3
+  path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
+  root: ''
+  local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
+  local_root: /home/yujunlin/models
+  dtype: torch.float16
+eval:
+  num_gpus: 1
+  batch_size: 8
+  tasks:
+  - wikitext
+  max_seq_length: -4096
+  evaluators:
+  - gptq
+quant:
+  wgts:
+    dtype: null
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - null
+    intermediate_dtypes: []
+    intermediate_levels: []
+    needs_dequant_saturation: false
+    skips: []
+    enable_kernel_gptq: false
+    enable_calib_range: false
+  ipts:
+    dtype: null
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - null
+    skips: []
+    static: false
+    enable_calib_range: false
+  opts:
+    dtype: null
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - null
+    skips: []
+    static: false
+    enable_calib_range: false
+  calib:
+    data: pileval
+    num_samples: 128
+    path: mit-han-lab/pile-val-backup
+    seq_length: 1024
+    min_seq_length: 0
+    max_seq_length: 0
+    local_path: ''
+  enable_rotation: false
+  enable_reorder: false
+  enable_smooth: false
+  develop_dtype: torch.float32
+seed: 12345
+skip_eval: false
+load_from: ''
+save_model: false
+copy_on_save: false

runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/results-241119.172947.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "gptq": {
+    "2048": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 7.833065190604292
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-3-8b-instruct-gradient-1048k"
+      },
+      "model": "llama-3-8b-instruct-gradient-1048k"
+    },
+    "4096": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 7.261800992411218
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-3-8b-instruct-gradient-1048k"
+      },
+      "model": "llama-3-8b-instruct-gradient-1048k"
+    }
+  }
+}

runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/run-241119.172947.log ADDED Viewed

	@@ -0,0 +1,219 @@

+24-11-19 17:29:47 | I | === Configurations ===
+24-11-19 17:29:47 | I | LlmPtqRunConfig(
+24-11-19 17:29:47 | I |   cache=LlmCacheConfig(
+24-11-19 17:29:47 | I |     root=runs/shang,
+24-11-19 17:29:47 | I |     dirpath=LlmQuantCacheConfig(
+24-11-19 17:29:47 | I |       rotation=,
+24-11-19 17:29:47 | I |       reorder=,
+24-11-19 17:29:47 | I |       smooth=,
+24-11-19 17:29:47 | I |       wgts=,
+24-11-19 17:29:47 | I |       acts=),
+24-11-19 17:29:47 | I |     path=LlmQuantCacheConfig(
+24-11-19 17:29:47 | I |       rotation=,
+24-11-19 17:29:47 | I |       reorder=,
+24-11-19 17:29:47 | I |       smooth=,
+24-11-19 17:29:47 | I |       wgts=,
+24-11-19 17:29:47 | I |       acts=)),
+24-11-19 17:29:47 | I |   output=OutputConfig(
+24-11-19 17:29:47 | I |     root=runs/shang,
+24-11-19 17:29:47 | I |     dirname=default-pileval.128x1024.[0-0],
+24-11-19 17:29:47 | I |     job=run,
+24-11-19 17:29:47 | I |     dirpath=runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0],
+24-11-19 17:29:47 | I |     timestamp=241119.172947),
+24-11-19 17:29:47 | I |   model=LlmModelConfig(
+24-11-19 17:29:47 | I |     name=llama-3-8b-instruct-gradient-1048k,
+24-11-19 17:29:47 | I |     family=llama-3,
+24-11-19 17:29:47 | I |     path=/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k,
+24-11-19 17:29:47 | I |     root=,
+24-11-19 17:29:47 | I |     local_path=/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k,
+24-11-19 17:29:47 | I |     local_root=/home/yujunlin/models,
+24-11-19 17:29:47 | I |     size=8.0,
+24-11-19 17:29:47 | I |     variant=instruct-gradient-1048k,
+24-11-19 17:29:47 | I |     dtype=torch.float16,
+24-11-19 17:29:47 | I |     orig_dtype=torch.bfloat16),
+24-11-19 17:29:47 | I |   eval=LlmEvalConfig(
+24-11-19 17:29:47 | I |     num_gpus=1,
+24-11-19 17:29:47 | I |     batch_size=8,
+24-11-19 17:29:47 | I |     tasks=['wikitext'],
+24-11-19 17:29:47 | I |     max_seq_length=-4096,
+24-11-19 17:29:47 | I |     evaluators=['gptq']),
+24-11-19 17:29:47 | I |   quant=LlmQuantConfig(
+24-11-19 17:29:47 | I |     wgts=LlmWeightQuantizerConfig(
+24-11-19 17:29:47 | I |       dtype=None,
+24-11-19 17:29:47 | I |       zero_point=None,
+24-11-19 17:29:47 | I |       group_shapes=((-1, -1, -1),),
+24-11-19 17:29:47 | I |       scale_dtypes=(None,),
+24-11-19 17:29:47 | I |       intermediate_dtypes=(),
+24-11-19 17:29:47 | I |       intermediate_levels=(),
+24-11-19 17:29:47 | I |       needs_dequant_saturation=False,
+24-11-19 17:29:47 | I |       skips=[],
+24-11-19 17:29:47 | I |       static=False,
+24-11-19 17:29:47 | I |       kernel_gptq=None,
+24-11-19 17:29:47 | I |       calib_range=None),
+24-11-19 17:29:47 | I |     ipts=LlmActivationQuantizerConfig(
+24-11-19 17:29:47 | I |       dtype=None,
+24-11-19 17:29:47 | I |       zero_point=None,
+24-11-19 17:29:47 | I |       group_shapes=((-1, -1, -1),),
+24-11-19 17:29:47 | I |       scale_dtypes=(None,),
+24-11-19 17:29:47 | I |       intermediate_dtypes=(),
+24-11-19 17:29:47 | I |       intermediate_levels=(),
+24-11-19 17:29:47 | I |       needs_dequant_saturation=False,
+24-11-19 17:29:47 | I |       skips=[],
+24-11-19 17:29:47 | I |       static=False,
+24-11-19 17:29:47 | I |       kernel_gptq=None,
+24-11-19 17:29:47 | I |       calib_range=None),
+24-11-19 17:29:47 | I |     opts=LlmActivationQuantizerConfig(
+24-11-19 17:29:47 | I |       dtype=None,
+24-11-19 17:29:47 | I |       zero_point=None,
+24-11-19 17:29:47 | I |       group_shapes=((-1, -1, -1),),
+24-11-19 17:29:47 | I |       scale_dtypes=(None,),
+24-11-19 17:29:47 | I |       intermediate_dtypes=(),
+24-11-19 17:29:47 | I |       intermediate_levels=(),
+24-11-19 17:29:47 | I |       needs_dequant_saturation=False,
+24-11-19 17:29:47 | I |       skips=[],
+24-11-19 17:29:47 | I |       static=False,
+24-11-19 17:29:47 | I |       kernel_gptq=None,
+24-11-19 17:29:47 | I |       calib_range=None),
+24-11-19 17:29:47 | I |     calib=LlmCalibDataLoaderConfig(
+24-11-19 17:29:47 | I |       data=pileval,
+24-11-19 17:29:47 | I |       num_samples=128,
+24-11-19 17:29:47 | I |       batch_size=1,
+24-11-19 17:29:47 | I |       path=mit-han-lab/pile-val-backup,
+24-11-19 17:29:47 | I |       seq_length=1024,
+24-11-19 17:29:47 | I |       min_seq_length=0,
+24-11-19 17:29:47 | I |       max_seq_length=0,
+24-11-19 17:29:47 | I |       local_path=),
+24-11-19 17:29:47 | I |     rotation=None,
+24-11-19 17:29:47 | I |     reorder=None,
+24-11-19 17:29:47 | I |     smooth=None,
+24-11-19 17:29:47 | I |     develop_dtype=torch.float32),
+24-11-19 17:29:47 | I |   seed=12345,
+24-11-19 17:29:47 | I |   skip_eval=False,
+24-11-19 17:29:47 | I |   load_from=,
+24-11-19 17:29:47 | I |   save_model=False,
+24-11-19 17:29:47 | I |   copy_on_save=False)
+24-11-19 17:29:47 | I | === Dumped Configurations ===
+24-11-19 17:29:47 | I | { 'cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': 'runs/shang'},
+24-11-19 17:29:47 | I |   'copy_on_save': False,
+24-11-19 17:29:47 | I |   'eval': {'batch_size': 8, 'evaluators': ['gptq'], 'max_seq_length': -4096, 'num_gpus': 1, 'tasks': ['wikitext']},
+24-11-19 17:29:47 | I |   'load_from': '',
+24-11-19 17:29:47 | I |   'model': { 'dtype': 'torch.float16',
+24-11-19 17:29:47 | I |              'family': 'llama-3',
+24-11-19 17:29:47 | I |              'local_path': '/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k',
+24-11-19 17:29:47 | I |              'local_root': '/home/yujunlin/models',
+24-11-19 17:29:47 | I |              'name': 'llama-3-8b-instruct-gradient-1048k',
+24-11-19 17:29:47 | I |              'path': '/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k',
+24-11-19 17:29:47 | I |              'root': ''},
+24-11-19 17:29:47 | I |   'output': {'dirname': 'default-pileval.128x1024.[0-0]', 'job': 'run', 'root': 'runs/shang'},
+24-11-19 17:29:47 | I |   'quant': { 'calib': { 'data': 'pileval',
+24-11-19 17:29:47 | I |                         'local_path': '',
+24-11-19 17:29:47 | I |                         'max_seq_length': 0,
+24-11-19 17:29:47 | I |                         'min_seq_length': 0,
+24-11-19 17:29:47 | I |                         'num_samples': 128,
+24-11-19 17:29:47 | I |                         'path': 'mit-han-lab/pile-val-backup',
+24-11-19 17:29:47 | I |                         'seq_length': 1024},
+24-11-19 17:29:47 | I |              'develop_dtype': 'torch.float32',
+24-11-19 17:29:47 | I |              'enable_reorder': False,
+24-11-19 17:29:47 | I |              'enable_rotation': False,
+24-11-19 17:29:47 | I |              'enable_smooth': False,
+24-11-19 17:29:47 | I |              'ipts': { 'dtype': None,
+24-11-19 17:29:47 | I |                        'enable_calib_range': False,
+24-11-19 17:29:47 | I |                        'group_shapes': [[-1, -1, -1]],
+24-11-19 17:29:47 | I |                        'scale_dtypes': [None],
+24-11-19 17:29:47 | I |                        'skips': [],
+24-11-19 17:29:47 | I |                        'static': False,
+24-11-19 17:29:47 | I |                        'zero_point': None},
+24-11-19 17:29:47 | I |              'opts': { 'dtype': None,
+24-11-19 17:29:47 | I |                        'enable_calib_range': False,
+24-11-19 17:29:47 | I |                        'group_shapes': [[-1, -1, -1]],
+24-11-19 17:29:47 | I |                        'scale_dtypes': [None],
+24-11-19 17:29:47 | I |                        'skips': [],
+24-11-19 17:29:47 | I |                        'static': False,
+24-11-19 17:29:47 | I |                        'zero_point': None},
+24-11-19 17:29:47 | I |              'wgts': { 'dtype': None,
+24-11-19 17:29:47 | I |                        'enable_calib_range': False,
+24-11-19 17:29:47 | I |                        'enable_kernel_gptq': False,
+24-11-19 17:29:47 | I |                        'group_shapes': [[-1, -1, -1]],
+24-11-19 17:29:47 | I |                        'intermediate_dtypes': [],
+24-11-19 17:29:47 | I |                        'intermediate_levels': [],
+24-11-19 17:29:47 | I |                        'needs_dequant_saturation': False,
+24-11-19 17:29:47 | I |                        'scale_dtypes': [None],
+24-11-19 17:29:47 | I |                        'skips': [],
+24-11-19 17:29:47 | I |                        'zero_point': None}},
+24-11-19 17:29:47 | I |   'save_model': False,
+24-11-19 17:29:47 | I |   'seed': 12345,
+24-11-19 17:29:47 | I |   'skip_eval': False}
+24-11-19 17:29:47 | I | === Output Directory ===
+24-11-19 17:29:47 | I | runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947
+24-11-19 17:29:47 | I | === Start Evaluating ===
+24-11-19 17:29:47 | I | * Building model llama-3-8b-instruct-gradient-1048k from /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
+24-11-19 17:29:48 | I |   We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.0.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.1.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.2.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.3.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.4.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.5.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.6.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.7.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.8.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.9.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.10.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.11.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.12.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.13.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.14.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.15.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.16.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.17.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.18.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.19.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.20.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.21.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.22.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.23.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.24.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.25.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.26.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.27.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.28.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.29.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.30.self_attn
+24-11-19 17:29:56 | I |   - Patching LlamaSdpaAttention.forward in model.layers.31.self_attn
+24-11-19 17:29:56 | I | * Development dtype is torch.float32
+24-11-19 17:29:56 | I | * Evaluating model
+24-11-19 17:29:56 | W |     `pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.
+24-11-19 17:29:56 | I |     Using model type 'default'
+24-11-19 17:29:56 | W |     Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration
+24-11-19 17:29:56 | I |   - Evaluator: gptq
+24-11-19 17:29:56 | I |   - Tasks: ['wikitext']
+24-11-19 17:29:56 | I |   - Batch_size: 8
+24-11-19 17:29:56 | I |     + Max_seq_length: 2048
+24-11-19 17:29:56 | D |         Starting new HTTPS connection (1): huggingface.co:443
+24-11-19 17:30:03 | W |         Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
+24-11-19 17:30:03 | W |         Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct  8 19:51:38 2024).
+24-11-19 17:30:03 | D |         Attempting to acquire lock 23438954666640 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:03 | D |         Lock 23438954666640 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:03 | D |         open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
+24-11-19 17:30:03 | D |         Attempting to release lock 23438954666640 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:03 | D |         Lock 23438954666640 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:15 | I |       - Results:
+24-11-19 17:30:15 | I |         |  Task  |Version|    Metric     |Value |   |Stderr|
+24-11-19 17:30:15 | I |         |--------|------:|---------------|-----:|---|-----:|
+24-11-19 17:30:15 | I |         |wikitext|      1|word_perplexity|7.8331|±  |7.8331|
+24-11-19 17:30:15 | I |
+24-11-19 17:30:15 | I |     + Max_seq_length: 4096
+24-11-19 17:30:15 | D |         Starting new HTTPS connection (2): huggingface.co:443
+24-11-19 17:30:21 | W |         Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
+24-11-19 17:30:21 | W |         Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct  8 19:51:38 2024).
+24-11-19 17:30:21 | D |         Attempting to acquire lock 23438952840800 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:21 | D |         Lock 23438952840800 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:21 | D |         open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
+24-11-19 17:30:21 | D |         Attempting to release lock 23438952840800 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:21 | D |         Lock 23438952840800 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
+24-11-19 17:30:32 | I |       - Results:
+24-11-19 17:30:32 | I |         |  Task  |Version|    Metric     |Value |   |Stderr|
+24-11-19 17:30:32 | I |         |--------|------:|---------------|-----:|---|-----:|
+24-11-19 17:30:32 | I |         |wikitext|      1|word_perplexity|7.2618|±  |7.2618|
+24-11-19 17:30:32 | I |
+24-11-19 17:30:32 | I | * Saving results to runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/config-241119.200545.yaml ADDED Viewed

	@@ -0,0 +1,146 @@

+cache:
+  root: runs/shang
+  path:
+    rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-3-8b-instruct-gradient-1048k.pt
+    reorder: ''
+    smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/llama-3-8b-instruct-gradient-1048k.pt
+    wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
+    acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
+output:
+  root: runs/shang
+  dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
+  job: run
+model:
+  name: llama-3-8b-instruct-gradient-1048k
+  family: llama-3
+  path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
+  root: ''
+  local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
+  local_root: /home/yujunlin/models
+  dtype: torch.float16
+eval:
+  num_gpus: 1
+  batch_size: 8
+  tasks:
+  - wikitext
+  max_seq_length: -4096
+  evaluators:
+  - gptq
+quant:
+  wgts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    intermediate_dtypes: []
+    intermediate_levels: []
+    needs_dequant_saturation: false
+    skips: []
+    enable_kernel_gptq: true
+    kernel_gptq:
+      damp_percentage: 0.01
+      block_size: 128
+      num_inv_tries: 250
+      hessian_block_size: 512
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: GridSearch
+      granularity: Group
+      element_batch_size: 64
+      sample_batch_size: -1
+      element_size: 512
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  ipts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips: []
+    static: false
+    enable_calib_range: false
+  opts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips:
+    - attn_q
+    static: true
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: Manual
+      granularity: Layer
+      element_batch_size: -1
+      sample_batch_size: -1
+      element_size: -1
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  calib:
+    data: pileval
+    num_samples: 128
+    path: mit-han-lab/pile-val-backup
+    seq_length: 1024
+    min_seq_length: 0
+    max_seq_length: 0
+    local_path: ''
+  enable_rotation: true
+  rotation:
+    random: false
+    transforms:
+    - out_proj
+  enable_reorder: false
+  enable_smooth: true
+  smooth:
+    enable_proj: false
+    enable_attn: true
+    attn:
+      degree: 2
+      strategy: GridSearch
+      sample_batch_size: -1
+      sample_size: -1
+      outputs_device: cpu
+      allow_a_quant: true
+      allow_b_quant: true
+      spans:
+      - - AbsMax
+        - AbsMax
+      alpha: 0.5
+      beta: -2
+      num_grids: 20
+  develop_dtype: torch.float32
+seed: 12345
+skip_eval: false
+load_from: ''
+save_model: 'true'
+copy_on_save: false

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee8c1a641aeb1fd1a3376f43e71fb6e4d46ed629e0117cc990b225cc4521eeab
+size 36034

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86a0a4420a3ab6b037c08654751c88818251fea6062d3041237ff4f2e3b00907
+size 16060644786

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50249bca461c76fdeb0f51b07f019025e975e67b47eb49f2a69e5d5dee80e195
+size 134219107

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8f507ae04bfb9f02a9d7dc98d60b7dd69a9d6ed4b654ed8d64a71f7f52c487
+size 5631594

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64eef470c18d4efd8590cc9d25734e3313da1181fc3f4282ab6d09cfd58ecd93
+size 535234

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e479d958e3157b658360082fd2c6699cc38833c35d198babf006616a1564d95a
+size 5593150

	@@ -0,0 +1,32 @@

+{
+  "gptq": {
+    "2048": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 7.989659368184836
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-3-8b-instruct-gradient-1048k"
+      },
+      "model": "llama-3-8b-instruct-gradient-1048k"
+    },
+    "4096": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 7.396661695312681
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-3-8b-instruct-gradient-1048k"
+      },
+      "model": "llama-3-8b-instruct-gradient-1048k"
+    }
+  }
+}

The diff for this file is too large to render. See raw diff

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/config-241119.200729.yaml ADDED Viewed

	@@ -0,0 +1,146 @@

+cache:
+  root: runs/shang
+  path:
+    rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-3-8b-instruct-gradient-1048k.pt
+    reorder: ''
+    smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/llama-3-8b-instruct-gradient-1048k.pt
+    wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
+    acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
+output:
+  root: runs/shang
+  dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
+  job: run
+model:
+  name: llama-3-8b-instruct-gradient-1048k
+  family: llama-3
+  path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
+  root: ''
+  local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
+  local_root: /home/yujunlin/models
+  dtype: torch.float16
+eval:
+  num_gpus: 1
+  batch_size: 8
+  tasks:
+  - wikitext
+  max_seq_length: -4096
+  evaluators:
+  - gptq
+quant:
+  wgts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    intermediate_dtypes: []
+    intermediate_levels: []
+    needs_dequant_saturation: false
+    skips: []
+    enable_kernel_gptq: true
+    kernel_gptq:
+      damp_percentage: 0.01
+      block_size: 128
+      num_inv_tries: 250
+      hessian_block_size: 512
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: GridSearch
+      granularity: Group
+      element_batch_size: 64
+      sample_batch_size: -1
+      element_size: 512
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  ipts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - 1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips: []
+    static: false
+    enable_calib_range: false
+  opts:
+    dtype: sint8
+    zero_point: null
+    group_shapes:
+    - - -1
+      - -1
+      - -1
+    scale_dtypes:
+    - torch.float16
+    skips:
+    - attn_q
+    static: true
+    enable_calib_range: true
+    calib_range:
+      degree: 2
+      objective: OutputsError
+      strategy: Manual
+      granularity: Layer
+      element_batch_size: -1
+      sample_batch_size: -1
+      element_size: -1
+      sample_size: -1
+      pre_reshape: true
+      outputs_device: cpu
+      ratio: 1.0
+      max_shrink: 0.2
+      max_expand: 1.0
+      num_grids: 80
+      allow_scale: false
+      skips: []
+  calib:
+    data: pileval
+    num_samples: 128
+    path: mit-han-lab/pile-val-backup
+    seq_length: 1024
+    min_seq_length: 0
+    max_seq_length: 0
+    local_path: ''
+  enable_rotation: true
+  rotation:
+    random: false
+    transforms:
+    - out_proj
+  enable_reorder: false
+  enable_smooth: true
+  smooth:
+    enable_proj: false
+    enable_attn: true
+    attn:
+      degree: 2
+      strategy: Manual
+      sample_batch_size: -1
+      sample_size: -1
+      outputs_device: cpu
+      allow_a_quant: true
+      allow_b_quant: true
+      spans:
+      - - AbsMax
+        - AbsMax
+      alpha: 0.5
+      beta: 0
+      num_grids: 20
+  develop_dtype: torch.float32
+seed: 12345
+skip_eval: false
+load_from: ''
+save_model: 'true'
+copy_on_save: false

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/acts.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac70125cfcf7096842127bff88c84a458b44f2fc66bf8fb1970940c922e9e805
+size 36034

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:125f19e7dc83de3adaf1c1b7c9c20d58c72f075b630c568116fe3e6da7c90719
+size 16060644786

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/rotation.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50249bca461c76fdeb0f51b07f019025e975e67b47eb49f2a69e5d5dee80e195
+size 134219107

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/scale.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e586371fed1a42757c5fdf37cca9febf4f2fbd8a7c4a5dd97902d01fc95c931
+size 5631594

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/smooth.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3f26f4aed70f6838fd246750c38631eed6601e521a12ba700c6c065f27e37dd
+size 535234

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/wgts.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b399ac3cecb57039c93754780228596a35530dee066aee45312a98c3933a7f4b
+size 5593150

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/results-241119.200729.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "gptq": {
+    "2048": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 7.989206018837591
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-3-8b-instruct-gradient-1048k"
+      },
+      "model": "llama-3-8b-instruct-gradient-1048k"
+    },
+    "4096": {
+      "results": {
+        "wikitext": {
+          "word_perplexity": 7.399224506977849
+        }
+      },
+      "versions": {
+        "wikitext": 1
+      },
+      "config": {
+        "model": "llama-3-8b-instruct-gradient-1048k"
+      },
+      "model": "llama-3-8b-instruct-gradient-1048k"
+    }
+  }
+}

runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/run-241119.200729.log ADDED Viewed

The diff for this file is too large to render. See raw diff