ys-2020 commited on
Commit
9845231
·
1 Parent(s): 834b257
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/config-241119.183703.yaml +85 -0
  2. runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/results-241119.183703.json +32 -0
  3. runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/run-241119.183703.log +219 -0
  4. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/config-241119.200548.yaml +146 -0
  5. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/acts.pt +3 -0
  6. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/model.pt +3 -0
  7. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/rotation.pt +3 -0
  8. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/scale.pt +3 -0
  9. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/smooth.pt +3 -0
  10. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/wgts.pt +3 -0
  11. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/results-241119.200548.json +32 -0
  12. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.185856.log +0 -0
  13. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.200548.log +0 -0
  14. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/config-241119.200727.yaml +146 -0
  15. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/model.pt +3 -0
  16. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/rotation.pt +3 -0
  17. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/scale.pt +3 -0
  18. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/smooth.pt +3 -0
  19. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/wgts.pt +3 -0
  20. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/run-241119.200727.log +0 -0
  21. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/config-241119.201608.yaml +129 -0
  22. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/acts.pt +3 -0
  23. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/model.pt +3 -0
  24. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/rotation.pt +3 -0
  25. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/scale.pt +3 -0
  26. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/wgts.pt +3 -0
  27. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/results-241119.201608.json +32 -0
  28. runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/run-241119.201608.log +0 -0
  29. runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/config-241119.172947.yaml +85 -0
  30. runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/results-241119.172947.json +32 -0
  31. runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/run-241119.172947.log +219 -0
  32. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/config-241119.200545.yaml +146 -0
  33. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/acts.pt +3 -0
  34. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/model.pt +3 -0
  35. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/rotation.pt +3 -0
  36. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/scale.pt +3 -0
  37. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/smooth.pt +3 -0
  38. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/wgts.pt +3 -0
  39. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/results-241119.200545.json +32 -0
  40. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.183745.log +0 -0
  41. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.200545.log +0 -0
  42. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/config-241119.200729.yaml +146 -0
  43. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/acts.pt +3 -0
  44. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/model.pt +3 -0
  45. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/rotation.pt +3 -0
  46. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/scale.pt +3 -0
  47. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/smooth.pt +3 -0
  48. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/wgts.pt +3 -0
  49. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/results-241119.200729.json +32 -0
  50. runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/run-241119.200729.log +0 -0
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/config-241119.183703.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache:
2
+ root: runs/shang
3
+ path:
4
+ rotation: ''
5
+ reorder: ''
6
+ smooth: ''
7
+ wgts: ''
8
+ acts: ''
9
+ output:
10
+ root: runs/shang
11
+ dirname: default-pileval.128x1024.[0-0]
12
+ job: run
13
+ model:
14
+ name: llama-2-7b-instruct-together-32k
15
+ family: llama-2
16
+ path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
17
+ root: ''
18
+ local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
19
+ local_root: /home/yujunlin/models
20
+ dtype: torch.float16
21
+ eval:
22
+ num_gpus: 1
23
+ batch_size: 8
24
+ tasks:
25
+ - wikitext
26
+ max_seq_length: -4096
27
+ evaluators:
28
+ - gptq
29
+ quant:
30
+ wgts:
31
+ dtype: null
32
+ zero_point: null
33
+ group_shapes:
34
+ - - -1
35
+ - -1
36
+ - -1
37
+ scale_dtypes:
38
+ - null
39
+ intermediate_dtypes: []
40
+ intermediate_levels: []
41
+ needs_dequant_saturation: false
42
+ skips: []
43
+ enable_kernel_gptq: false
44
+ enable_calib_range: false
45
+ ipts:
46
+ dtype: null
47
+ zero_point: null
48
+ group_shapes:
49
+ - - -1
50
+ - -1
51
+ - -1
52
+ scale_dtypes:
53
+ - null
54
+ skips: []
55
+ static: false
56
+ enable_calib_range: false
57
+ opts:
58
+ dtype: null
59
+ zero_point: null
60
+ group_shapes:
61
+ - - -1
62
+ - -1
63
+ - -1
64
+ scale_dtypes:
65
+ - null
66
+ skips: []
67
+ static: false
68
+ enable_calib_range: false
69
+ calib:
70
+ data: pileval
71
+ num_samples: 128
72
+ path: mit-han-lab/pile-val-backup
73
+ seq_length: 1024
74
+ min_seq_length: 0
75
+ max_seq_length: 0
76
+ local_path: ''
77
+ enable_rotation: false
78
+ enable_reorder: false
79
+ enable_smooth: false
80
+ develop_dtype: torch.float32
81
+ seed: 12345
82
+ skip_eval: false
83
+ load_from: ''
84
+ save_model: false
85
+ copy_on_save: false
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/results-241119.183703.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gptq": {
3
+ "2048": {
4
+ "results": {
5
+ "wikitext": {
6
+ "word_perplexity": 6.443161573358209
7
+ }
8
+ },
9
+ "versions": {
10
+ "wikitext": 1
11
+ },
12
+ "config": {
13
+ "model": "llama-2-7b-instruct-together-32k"
14
+ },
15
+ "model": "llama-2-7b-instruct-together-32k"
16
+ },
17
+ "4096": {
18
+ "results": {
19
+ "wikitext": {
20
+ "word_perplexity": 5.964906855443073
21
+ }
22
+ },
23
+ "versions": {
24
+ "wikitext": 1
25
+ },
26
+ "config": {
27
+ "model": "llama-2-7b-instruct-together-32k"
28
+ },
29
+ "model": "llama-2-7b-instruct-together-32k"
30
+ }
31
+ }
32
+ }
runs/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703/run-241119.183703.log ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 24-11-19 18:37:03 | I | === Configurations ===
2
+ 24-11-19 18:37:03 | I | LlmPtqRunConfig(
3
+ 24-11-19 18:37:03 | I | cache=LlmCacheConfig(
4
+ 24-11-19 18:37:03 | I | root=runs/shang,
5
+ 24-11-19 18:37:03 | I | dirpath=LlmQuantCacheConfig(
6
+ 24-11-19 18:37:03 | I | rotation=,
7
+ 24-11-19 18:37:03 | I | reorder=,
8
+ 24-11-19 18:37:03 | I | smooth=,
9
+ 24-11-19 18:37:03 | I | wgts=,
10
+ 24-11-19 18:37:03 | I | acts=),
11
+ 24-11-19 18:37:03 | I | path=LlmQuantCacheConfig(
12
+ 24-11-19 18:37:03 | I | rotation=,
13
+ 24-11-19 18:37:03 | I | reorder=,
14
+ 24-11-19 18:37:03 | I | smooth=,
15
+ 24-11-19 18:37:03 | I | wgts=,
16
+ 24-11-19 18:37:03 | I | acts=)),
17
+ 24-11-19 18:37:03 | I | output=OutputConfig(
18
+ 24-11-19 18:37:03 | I | root=runs/shang,
19
+ 24-11-19 18:37:03 | I | dirname=default-pileval.128x1024.[0-0],
20
+ 24-11-19 18:37:03 | I | job=run,
21
+ 24-11-19 18:37:03 | I | dirpath=runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0],
22
+ 24-11-19 18:37:03 | I | timestamp=241119.183703),
23
+ 24-11-19 18:37:03 | I | model=LlmModelConfig(
24
+ 24-11-19 18:37:03 | I | name=llama-2-7b-instruct-together-32k,
25
+ 24-11-19 18:37:03 | I | family=llama-2,
26
+ 24-11-19 18:37:03 | I | path=/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k,
27
+ 24-11-19 18:37:03 | I | root=,
28
+ 24-11-19 18:37:03 | I | local_path=/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k,
29
+ 24-11-19 18:37:03 | I | local_root=/home/yujunlin/models,
30
+ 24-11-19 18:37:03 | I | size=7.0,
31
+ 24-11-19 18:37:03 | I | variant=instruct-together-32k,
32
+ 24-11-19 18:37:03 | I | dtype=torch.float16,
33
+ 24-11-19 18:37:03 | I | orig_dtype=torch.float16),
34
+ 24-11-19 18:37:03 | I | eval=LlmEvalConfig(
35
+ 24-11-19 18:37:03 | I | num_gpus=1,
36
+ 24-11-19 18:37:03 | I | batch_size=8,
37
+ 24-11-19 18:37:03 | I | tasks=['wikitext'],
38
+ 24-11-19 18:37:03 | I | max_seq_length=-4096,
39
+ 24-11-19 18:37:03 | I | evaluators=['gptq']),
40
+ 24-11-19 18:37:03 | I | quant=LlmQuantConfig(
41
+ 24-11-19 18:37:03 | I | wgts=LlmWeightQuantizerConfig(
42
+ 24-11-19 18:37:03 | I | dtype=None,
43
+ 24-11-19 18:37:03 | I | zero_point=None,
44
+ 24-11-19 18:37:03 | I | group_shapes=((-1, -1, -1),),
45
+ 24-11-19 18:37:03 | I | scale_dtypes=(None,),
46
+ 24-11-19 18:37:03 | I | intermediate_dtypes=(),
47
+ 24-11-19 18:37:03 | I | intermediate_levels=(),
48
+ 24-11-19 18:37:03 | I | needs_dequant_saturation=False,
49
+ 24-11-19 18:37:03 | I | skips=[],
50
+ 24-11-19 18:37:03 | I | static=False,
51
+ 24-11-19 18:37:03 | I | kernel_gptq=None,
52
+ 24-11-19 18:37:03 | I | calib_range=None),
53
+ 24-11-19 18:37:03 | I | ipts=LlmActivationQuantizerConfig(
54
+ 24-11-19 18:37:03 | I | dtype=None,
55
+ 24-11-19 18:37:03 | I | zero_point=None,
56
+ 24-11-19 18:37:03 | I | group_shapes=((-1, -1, -1),),
57
+ 24-11-19 18:37:03 | I | scale_dtypes=(None,),
58
+ 24-11-19 18:37:03 | I | intermediate_dtypes=(),
59
+ 24-11-19 18:37:03 | I | intermediate_levels=(),
60
+ 24-11-19 18:37:03 | I | needs_dequant_saturation=False,
61
+ 24-11-19 18:37:03 | I | skips=[],
62
+ 24-11-19 18:37:03 | I | static=False,
63
+ 24-11-19 18:37:03 | I | kernel_gptq=None,
64
+ 24-11-19 18:37:03 | I | calib_range=None),
65
+ 24-11-19 18:37:03 | I | opts=LlmActivationQuantizerConfig(
66
+ 24-11-19 18:37:03 | I | dtype=None,
67
+ 24-11-19 18:37:03 | I | zero_point=None,
68
+ 24-11-19 18:37:03 | I | group_shapes=((-1, -1, -1),),
69
+ 24-11-19 18:37:03 | I | scale_dtypes=(None,),
70
+ 24-11-19 18:37:03 | I | intermediate_dtypes=(),
71
+ 24-11-19 18:37:03 | I | intermediate_levels=(),
72
+ 24-11-19 18:37:03 | I | needs_dequant_saturation=False,
73
+ 24-11-19 18:37:03 | I | skips=[],
74
+ 24-11-19 18:37:03 | I | static=False,
75
+ 24-11-19 18:37:03 | I | kernel_gptq=None,
76
+ 24-11-19 18:37:03 | I | calib_range=None),
77
+ 24-11-19 18:37:03 | I | calib=LlmCalibDataLoaderConfig(
78
+ 24-11-19 18:37:03 | I | data=pileval,
79
+ 24-11-19 18:37:03 | I | num_samples=128,
80
+ 24-11-19 18:37:03 | I | batch_size=1,
81
+ 24-11-19 18:37:03 | I | path=mit-han-lab/pile-val-backup,
82
+ 24-11-19 18:37:03 | I | seq_length=1024,
83
+ 24-11-19 18:37:03 | I | min_seq_length=0,
84
+ 24-11-19 18:37:03 | I | max_seq_length=0,
85
+ 24-11-19 18:37:03 | I | local_path=),
86
+ 24-11-19 18:37:03 | I | rotation=None,
87
+ 24-11-19 18:37:03 | I | reorder=None,
88
+ 24-11-19 18:37:03 | I | smooth=None,
89
+ 24-11-19 18:37:03 | I | develop_dtype=torch.float32),
90
+ 24-11-19 18:37:03 | I | seed=12345,
91
+ 24-11-19 18:37:03 | I | skip_eval=False,
92
+ 24-11-19 18:37:03 | I | load_from=,
93
+ 24-11-19 18:37:03 | I | save_model=False,
94
+ 24-11-19 18:37:03 | I | copy_on_save=False)
95
+ 24-11-19 18:37:03 | I | === Dumped Configurations ===
96
+ 24-11-19 18:37:03 | I | { 'cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': 'runs/shang'},
97
+ 24-11-19 18:37:03 | I | 'copy_on_save': False,
98
+ 24-11-19 18:37:03 | I | 'eval': {'batch_size': 8, 'evaluators': ['gptq'], 'max_seq_length': -4096, 'num_gpus': 1, 'tasks': ['wikitext']},
99
+ 24-11-19 18:37:03 | I | 'load_from': '',
100
+ 24-11-19 18:37:03 | I | 'model': { 'dtype': 'torch.float16',
101
+ 24-11-19 18:37:03 | I | 'family': 'llama-2',
102
+ 24-11-19 18:37:03 | I | 'local_path': '/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k',
103
+ 24-11-19 18:37:03 | I | 'local_root': '/home/yujunlin/models',
104
+ 24-11-19 18:37:03 | I | 'name': 'llama-2-7b-instruct-together-32k',
105
+ 24-11-19 18:37:03 | I | 'path': '/home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k',
106
+ 24-11-19 18:37:03 | I | 'root': ''},
107
+ 24-11-19 18:37:03 | I | 'output': {'dirname': 'default-pileval.128x1024.[0-0]', 'job': 'run', 'root': 'runs/shang'},
108
+ 24-11-19 18:37:03 | I | 'quant': { 'calib': { 'data': 'pileval',
109
+ 24-11-19 18:37:03 | I | 'local_path': '',
110
+ 24-11-19 18:37:03 | I | 'max_seq_length': 0,
111
+ 24-11-19 18:37:03 | I | 'min_seq_length': 0,
112
+ 24-11-19 18:37:03 | I | 'num_samples': 128,
113
+ 24-11-19 18:37:03 | I | 'path': 'mit-han-lab/pile-val-backup',
114
+ 24-11-19 18:37:03 | I | 'seq_length': 1024},
115
+ 24-11-19 18:37:03 | I | 'develop_dtype': 'torch.float32',
116
+ 24-11-19 18:37:03 | I | 'enable_reorder': False,
117
+ 24-11-19 18:37:03 | I | 'enable_rotation': False,
118
+ 24-11-19 18:37:03 | I | 'enable_smooth': False,
119
+ 24-11-19 18:37:03 | I | 'ipts': { 'dtype': None,
120
+ 24-11-19 18:37:03 | I | 'enable_calib_range': False,
121
+ 24-11-19 18:37:03 | I | 'group_shapes': [[-1, -1, -1]],
122
+ 24-11-19 18:37:03 | I | 'scale_dtypes': [None],
123
+ 24-11-19 18:37:03 | I | 'skips': [],
124
+ 24-11-19 18:37:03 | I | 'static': False,
125
+ 24-11-19 18:37:03 | I | 'zero_point': None},
126
+ 24-11-19 18:37:03 | I | 'opts': { 'dtype': None,
127
+ 24-11-19 18:37:03 | I | 'enable_calib_range': False,
128
+ 24-11-19 18:37:03 | I | 'group_shapes': [[-1, -1, -1]],
129
+ 24-11-19 18:37:03 | I | 'scale_dtypes': [None],
130
+ 24-11-19 18:37:03 | I | 'skips': [],
131
+ 24-11-19 18:37:03 | I | 'static': False,
132
+ 24-11-19 18:37:03 | I | 'zero_point': None},
133
+ 24-11-19 18:37:03 | I | 'wgts': { 'dtype': None,
134
+ 24-11-19 18:37:03 | I | 'enable_calib_range': False,
135
+ 24-11-19 18:37:03 | I | 'enable_kernel_gptq': False,
136
+ 24-11-19 18:37:03 | I | 'group_shapes': [[-1, -1, -1]],
137
+ 24-11-19 18:37:03 | I | 'intermediate_dtypes': [],
138
+ 24-11-19 18:37:03 | I | 'intermediate_levels': [],
139
+ 24-11-19 18:37:03 | I | 'needs_dequant_saturation': False,
140
+ 24-11-19 18:37:03 | I | 'scale_dtypes': [None],
141
+ 24-11-19 18:37:03 | I | 'skips': [],
142
+ 24-11-19 18:37:03 | I | 'zero_point': None}},
143
+ 24-11-19 18:37:03 | I | 'save_model': False,
144
+ 24-11-19 18:37:03 | I | 'seed': 12345,
145
+ 24-11-19 18:37:03 | I | 'skip_eval': False}
146
+ 24-11-19 18:37:03 | I | === Output Directory ===
147
+ 24-11-19 18:37:03 | I | runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703
148
+ 24-11-19 18:37:03 | I | === Start Evaluating ===
149
+ 24-11-19 18:37:03 | I | * Building model llama-2-7b-instruct-together-32k from /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
150
+ 24-11-19 18:37:03 | I | We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
151
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.0.self_attn
152
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.1.self_attn
153
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.2.self_attn
154
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.3.self_attn
155
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.4.self_attn
156
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.5.self_attn
157
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.6.self_attn
158
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.7.self_attn
159
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.8.self_attn
160
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.9.self_attn
161
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.10.self_attn
162
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.11.self_attn
163
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.12.self_attn
164
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.13.self_attn
165
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.14.self_attn
166
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.15.self_attn
167
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.16.self_attn
168
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.17.self_attn
169
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.18.self_attn
170
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.19.self_attn
171
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.20.self_attn
172
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.21.self_attn
173
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.22.self_attn
174
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.23.self_attn
175
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.24.self_attn
176
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.25.self_attn
177
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.26.self_attn
178
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.27.self_attn
179
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.28.self_attn
180
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.29.self_attn
181
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.30.self_attn
182
+ 24-11-19 18:37:12 | I | - Patching LlamaSdpaAttention.forward in model.layers.31.self_attn
183
+ 24-11-19 18:37:12 | I | * Development dtype is torch.float32
184
+ 24-11-19 18:37:12 | I | * Evaluating model
185
+ 24-11-19 18:37:12 | W | `pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.
186
+ 24-11-19 18:37:12 | I | Using model type 'default'
187
+ 24-11-19 18:37:12 | W | Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration
188
+ 24-11-19 18:37:12 | I | - Evaluator: gptq
189
+ 24-11-19 18:37:12 | I | - Tasks: ['wikitext']
190
+ 24-11-19 18:37:12 | I | - Batch_size: 8
191
+ 24-11-19 18:37:12 | I | + Max_seq_length: 2048
192
+ 24-11-19 18:37:12 | D | Starting new HTTPS connection (1): huggingface.co:443
193
+ 24-11-19 18:37:18 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
194
+ 24-11-19 18:37:18 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
195
+ 24-11-19 18:37:18 | D | Attempting to acquire lock 23438952619984 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
196
+ 24-11-19 18:37:18 | D | Lock 23438952619984 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
197
+ 24-11-19 18:37:18 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
198
+ 24-11-19 18:37:18 | D | Attempting to release lock 23438952619984 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
199
+ 24-11-19 18:37:18 | D | Lock 23438952619984 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
200
+ 24-11-19 18:37:31 | I | - Results:
201
+ 24-11-19 18:37:32 | I | | Task |Version| Metric |Value | |Stderr|
202
+ 24-11-19 18:37:32 | I | |--------|------:|---------------|-----:|---|-----:|
203
+ 24-11-19 18:37:32 | I | |wikitext| 1|word_perplexity|6.4432|± |6.4432|
204
+ 24-11-19 18:37:32 | I |
205
+ 24-11-19 18:37:32 | I | + Max_seq_length: 4096
206
+ 24-11-19 18:37:32 | D | Starting new HTTPS connection (2): huggingface.co:443
207
+ 24-11-19 18:37:38 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
208
+ 24-11-19 18:37:38 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
209
+ 24-11-19 18:37:38 | D | Attempting to acquire lock 23438952626944 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
210
+ 24-11-19 18:37:38 | D | Lock 23438952626944 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
211
+ 24-11-19 18:37:38 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
212
+ 24-11-19 18:37:38 | D | Attempting to release lock 23438952626944 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
213
+ 24-11-19 18:37:38 | D | Lock 23438952626944 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
214
+ 24-11-19 18:37:50 | I | - Results:
215
+ 24-11-19 18:37:50 | I | | Task |Version| Metric |Value | |Stderr|
216
+ 24-11-19 18:37:50 | I | |--------|------:|---------------|-----:|---|-----:|
217
+ 24-11-19 18:37:50 | I | |wikitext| 1|word_perplexity|5.9649|± |5.9649|
218
+ 24-11-19 18:37:50 | I |
219
+ 24-11-19 18:37:50 | I | * Saving results to runs/shang/llm/llama-2/llama-2-7b-instruct-together-32k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.183703
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/config-241119.200548.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache:
2
+ root: runs/shang
3
+ path:
4
+ rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
5
+ reorder: ''
6
+ smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/llama-2-7b-instruct-together-32k.pt
7
+ wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
8
+ acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
9
+ output:
10
+ root: runs/shang
11
+ dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
12
+ job: run
13
+ model:
14
+ name: llama-2-7b-instruct-together-32k
15
+ family: llama-2
16
+ path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
17
+ root: ''
18
+ local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
19
+ local_root: /home/yujunlin/models
20
+ dtype: torch.float16
21
+ eval:
22
+ num_gpus: 1
23
+ batch_size: 8
24
+ tasks:
25
+ - wikitext
26
+ max_seq_length: -4096
27
+ evaluators:
28
+ - gptq
29
+ quant:
30
+ wgts:
31
+ dtype: sint8
32
+ zero_point: null
33
+ group_shapes:
34
+ - - 1
35
+ - -1
36
+ - -1
37
+ scale_dtypes:
38
+ - torch.float16
39
+ intermediate_dtypes: []
40
+ intermediate_levels: []
41
+ needs_dequant_saturation: false
42
+ skips: []
43
+ enable_kernel_gptq: true
44
+ kernel_gptq:
45
+ damp_percentage: 0.01
46
+ block_size: 128
47
+ num_inv_tries: 250
48
+ hessian_block_size: 512
49
+ enable_calib_range: true
50
+ calib_range:
51
+ degree: 2
52
+ objective: OutputsError
53
+ strategy: GridSearch
54
+ granularity: Group
55
+ element_batch_size: 64
56
+ sample_batch_size: -1
57
+ element_size: 512
58
+ sample_size: -1
59
+ pre_reshape: true
60
+ outputs_device: cpu
61
+ ratio: 1.0
62
+ max_shrink: 0.2
63
+ max_expand: 1.0
64
+ num_grids: 80
65
+ allow_scale: false
66
+ skips: []
67
+ ipts:
68
+ dtype: sint8
69
+ zero_point: null
70
+ group_shapes:
71
+ - - 1
72
+ - -1
73
+ - -1
74
+ scale_dtypes:
75
+ - torch.float16
76
+ skips: []
77
+ static: false
78
+ enable_calib_range: false
79
+ opts:
80
+ dtype: sint8
81
+ zero_point: null
82
+ group_shapes:
83
+ - - -1
84
+ - -1
85
+ - -1
86
+ scale_dtypes:
87
+ - torch.float16
88
+ skips:
89
+ - attn_q
90
+ static: true
91
+ enable_calib_range: true
92
+ calib_range:
93
+ degree: 2
94
+ objective: OutputsError
95
+ strategy: Manual
96
+ granularity: Layer
97
+ element_batch_size: -1
98
+ sample_batch_size: -1
99
+ element_size: -1
100
+ sample_size: -1
101
+ pre_reshape: true
102
+ outputs_device: cpu
103
+ ratio: 1.0
104
+ max_shrink: 0.2
105
+ max_expand: 1.0
106
+ num_grids: 80
107
+ allow_scale: false
108
+ skips: []
109
+ calib:
110
+ data: pileval
111
+ num_samples: 128
112
+ path: mit-han-lab/pile-val-backup
113
+ seq_length: 1024
114
+ min_seq_length: 0
115
+ max_seq_length: 0
116
+ local_path: ''
117
+ enable_rotation: true
118
+ rotation:
119
+ random: false
120
+ transforms:
121
+ - out_proj
122
+ enable_reorder: false
123
+ enable_smooth: true
124
+ smooth:
125
+ enable_proj: false
126
+ enable_attn: true
127
+ attn:
128
+ degree: 2
129
+ strategy: GridSearch
130
+ sample_batch_size: -1
131
+ sample_size: -1
132
+ outputs_device: cpu
133
+ allow_a_quant: true
134
+ allow_b_quant: true
135
+ spans:
136
+ - - AbsMax
137
+ - AbsMax
138
+ alpha: 0.5
139
+ beta: -2
140
+ num_grids: 20
141
+ develop_dtype: torch.float32
142
+ seed: 12345
143
+ skip_eval: false
144
+ load_from: ''
145
+ save_model: 'true'
146
+ copy_on_save: false
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/acts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:452d684e7ed9bbcde200ac9e61316bd873246869750aa79ffd3cb0aececc9b99
3
+ size 35898
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1ac15dda8c29eb2ebd22eee524e1bb0c696f2dcba86f840af18da8c8434b12
3
+ size 13476951926
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/rotation.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
3
+ size 134219097
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/scale.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e4b445b99650db18a04ac7d3ec3214937f362749e437fafd2d124b993fdb60f
3
+ size 5566058
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/smooth.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e46a0206c5c7210660e08615fba714402bcd532050747f0fd9b134f77cb9fba
3
+ size 535162
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/model/wgts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3454af85d02208d593d736d00fa756e45f909f2f6f1bbefdcef52f1e3e6cbbaf
3
+ size 5527158
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/results-241119.200548.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gptq": {
3
+ "2048": {
4
+ "results": {
5
+ "wikitext": {
6
+ "word_perplexity": 6.503797370202683
7
+ }
8
+ },
9
+ "versions": {
10
+ "wikitext": 1
11
+ },
12
+ "config": {
13
+ "model": "llama-2-7b-instruct-together-32k"
14
+ },
15
+ "model": "llama-2-7b-instruct-together-32k"
16
+ },
17
+ "4096": {
18
+ "results": {
19
+ "wikitext": {
20
+ "word_perplexity": 6.014449215881915
21
+ }
22
+ },
23
+ "versions": {
24
+ "wikitext": 1
25
+ },
26
+ "config": {
27
+ "model": "llama-2-7b-instruct-together-32k"
28
+ },
29
+ "model": "llama-2-7b-instruct-together-32k"
30
+ }
31
+ }
32
+ }
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.185856.log ADDED
The diff for this file is too large to render. See raw diff
 
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200548/run-241119.200548.log ADDED
The diff for this file is too large to render. See raw diff
 
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/config-241119.200727.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache:
2
+ root: runs/shang
3
+ path:
4
+ rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
5
+ reorder: ''
6
+ smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/llama-2-7b-instruct-together-32k.pt
7
+ wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
8
+ acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
9
+ output:
10
+ root: runs/shang
11
+ dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
12
+ job: run
13
+ model:
14
+ name: llama-2-7b-instruct-together-32k
15
+ family: llama-2
16
+ path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
17
+ root: ''
18
+ local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
19
+ local_root: /home/yujunlin/models
20
+ dtype: torch.float16
21
+ eval:
22
+ num_gpus: 1
23
+ batch_size: 8
24
+ tasks:
25
+ - wikitext
26
+ max_seq_length: -4096
27
+ evaluators:
28
+ - gptq
29
+ quant:
30
+ wgts:
31
+ dtype: sint8
32
+ zero_point: null
33
+ group_shapes:
34
+ - - 1
35
+ - -1
36
+ - -1
37
+ scale_dtypes:
38
+ - torch.float16
39
+ intermediate_dtypes: []
40
+ intermediate_levels: []
41
+ needs_dequant_saturation: false
42
+ skips: []
43
+ enable_kernel_gptq: true
44
+ kernel_gptq:
45
+ damp_percentage: 0.01
46
+ block_size: 128
47
+ num_inv_tries: 250
48
+ hessian_block_size: 512
49
+ enable_calib_range: true
50
+ calib_range:
51
+ degree: 2
52
+ objective: OutputsError
53
+ strategy: GridSearch
54
+ granularity: Group
55
+ element_batch_size: 64
56
+ sample_batch_size: -1
57
+ element_size: 512
58
+ sample_size: -1
59
+ pre_reshape: true
60
+ outputs_device: cpu
61
+ ratio: 1.0
62
+ max_shrink: 0.2
63
+ max_expand: 1.0
64
+ num_grids: 80
65
+ allow_scale: false
66
+ skips: []
67
+ ipts:
68
+ dtype: sint8
69
+ zero_point: null
70
+ group_shapes:
71
+ - - 1
72
+ - -1
73
+ - -1
74
+ scale_dtypes:
75
+ - torch.float16
76
+ skips: []
77
+ static: false
78
+ enable_calib_range: false
79
+ opts:
80
+ dtype: sint8
81
+ zero_point: null
82
+ group_shapes:
83
+ - - -1
84
+ - -1
85
+ - -1
86
+ scale_dtypes:
87
+ - torch.float16
88
+ skips:
89
+ - attn_q
90
+ static: true
91
+ enable_calib_range: true
92
+ calib_range:
93
+ degree: 2
94
+ objective: OutputsError
95
+ strategy: Manual
96
+ granularity: Layer
97
+ element_batch_size: -1
98
+ sample_batch_size: -1
99
+ element_size: -1
100
+ sample_size: -1
101
+ pre_reshape: true
102
+ outputs_device: cpu
103
+ ratio: 1.0
104
+ max_shrink: 0.2
105
+ max_expand: 1.0
106
+ num_grids: 80
107
+ allow_scale: false
108
+ skips: []
109
+ calib:
110
+ data: pileval
111
+ num_samples: 128
112
+ path: mit-han-lab/pile-val-backup
113
+ seq_length: 1024
114
+ min_seq_length: 0
115
+ max_seq_length: 0
116
+ local_path: ''
117
+ enable_rotation: true
118
+ rotation:
119
+ random: false
120
+ transforms:
121
+ - out_proj
122
+ enable_reorder: false
123
+ enable_smooth: true
124
+ smooth:
125
+ enable_proj: false
126
+ enable_attn: true
127
+ attn:
128
+ degree: 2
129
+ strategy: Manual
130
+ sample_batch_size: -1
131
+ sample_size: -1
132
+ outputs_device: cpu
133
+ allow_a_quant: true
134
+ allow_b_quant: true
135
+ spans:
136
+ - - AbsMax
137
+ - AbsMax
138
+ alpha: 0.5
139
+ beta: 0
140
+ num_grids: 20
141
+ develop_dtype: torch.float32
142
+ seed: 12345
143
+ skip_eval: false
144
+ load_from: ''
145
+ save_model: 'true'
146
+ copy_on_save: false
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7707587af5ec67ddc0f3f7dbfdbd9c86f9ded33e10c5bf4b0246518c228199f
3
+ size 13476951926
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/rotation.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
3
+ size 134219097
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/scale.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c32ea151ff1a6e987ec8866aae01d07225acf388d05417668928a96d3fe6c6a
3
+ size 5566058
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/smooth.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385f0d71a9d0cca3df4fd946a21df2de62d089964597e2e1704c7a39cfee0711
3
+ size 535162
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/model/wgts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a45af762722116d586b0e7011d9acfcc8950086c0470898c078c05af779d0e
3
+ size 5527158
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0].RUNNING/run-241119.200727.RUNNING/run-241119.200727.log ADDED
The diff for this file is too large to render. See raw diff
 
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/config-241119.201608.yaml ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache:
2
+ root: runs/shang
3
+ path:
4
+ rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-2-7b-instruct-together-32k.pt
5
+ reorder: ''
6
+ smooth: ''
7
+ wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-2-7b-instruct-together-32k.pt
8
+ acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-2-7b-instruct-together-32k.pt
9
+ output:
10
+ root: runs/shang
11
+ dirname: skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
12
+ job: run
13
+ model:
14
+ name: llama-2-7b-instruct-together-32k
15
+ family: llama-2
16
+ path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
17
+ root: ''
18
+ local_path: /home/yujunlin/models/llama-2/llama-2-7b-instruct-together-32k
19
+ local_root: /home/yujunlin/models
20
+ dtype: torch.float16
21
+ eval:
22
+ num_gpus: 1
23
+ batch_size: 8
24
+ tasks:
25
+ - wikitext
26
+ max_seq_length: -4096
27
+ evaluators:
28
+ - gptq
29
+ quant:
30
+ wgts:
31
+ dtype: sint8
32
+ zero_point: null
33
+ group_shapes:
34
+ - - 1
35
+ - -1
36
+ - -1
37
+ scale_dtypes:
38
+ - torch.float16
39
+ intermediate_dtypes: []
40
+ intermediate_levels: []
41
+ needs_dequant_saturation: false
42
+ skips: []
43
+ enable_kernel_gptq: true
44
+ kernel_gptq:
45
+ damp_percentage: 0.01
46
+ block_size: 128
47
+ num_inv_tries: 250
48
+ hessian_block_size: 512
49
+ enable_calib_range: true
50
+ calib_range:
51
+ degree: 2
52
+ objective: OutputsError
53
+ strategy: GridSearch
54
+ granularity: Group
55
+ element_batch_size: 64
56
+ sample_batch_size: -1
57
+ element_size: 512
58
+ sample_size: -1
59
+ pre_reshape: true
60
+ outputs_device: cpu
61
+ ratio: 1.0
62
+ max_shrink: 0.2
63
+ max_expand: 1.0
64
+ num_grids: 80
65
+ allow_scale: false
66
+ skips: []
67
+ ipts:
68
+ dtype: sint8
69
+ zero_point: null
70
+ group_shapes:
71
+ - - 1
72
+ - -1
73
+ - -1
74
+ scale_dtypes:
75
+ - torch.float16
76
+ skips: []
77
+ static: false
78
+ enable_calib_range: false
79
+ opts:
80
+ dtype: sint8
81
+ zero_point: null
82
+ group_shapes:
83
+ - - -1
84
+ - -1
85
+ - -1
86
+ scale_dtypes:
87
+ - torch.float16
88
+ skips:
89
+ - attn_q
90
+ static: true
91
+ enable_calib_range: true
92
+ calib_range:
93
+ degree: 2
94
+ objective: OutputsError
95
+ strategy: Manual
96
+ granularity: Layer
97
+ element_batch_size: -1
98
+ sample_batch_size: -1
99
+ element_size: -1
100
+ sample_size: -1
101
+ pre_reshape: true
102
+ outputs_device: cpu
103
+ ratio: 1.0
104
+ max_shrink: 0.2
105
+ max_expand: 1.0
106
+ num_grids: 80
107
+ allow_scale: false
108
+ skips: []
109
+ calib:
110
+ data: pileval
111
+ num_samples: 128
112
+ path: mit-han-lab/pile-val-backup
113
+ seq_length: 1024
114
+ min_seq_length: 0
115
+ max_seq_length: 0
116
+ local_path: ''
117
+ enable_rotation: true
118
+ rotation:
119
+ random: false
120
+ transforms:
121
+ - out_proj
122
+ enable_reorder: false
123
+ enable_smooth: false
124
+ develop_dtype: torch.float32
125
+ seed: 12345
126
+ skip_eval: false
127
+ load_from: ''
128
+ save_model: 'true'
129
+ copy_on_save: false
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/acts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38385b12cbc35439f6e635f9e60e69e2490cb32f65a90e85b921587f19973ca6
3
+ size 35898
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e05ab74d7e1f68b9ae327e1afb40528b281d90dbf814760976dd74e97a54beae
3
+ size 13476951926
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/rotation.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f916dabab0b940f3f0130e5acabbb6d02e7c557bf8608b430e001e02d4a75ab2
3
+ size 134219097
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/scale.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527a5ec6f0ba29bd2579c7fc5a8221231449f5dcb35223f3036ef6e87166bf60
3
+ size 5566058
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/model/wgts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6a8e12842f3c488159d02404a8885723a3c096ac3e63aedcc515c13da27b1a2
3
+ size 5527158
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/results-241119.201608.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gptq": {
3
+ "2048": {
4
+ "results": {
5
+ "wikitext": {
6
+ "word_perplexity": 6.509843744356094
7
+ }
8
+ },
9
+ "versions": {
10
+ "wikitext": 1
11
+ },
12
+ "config": {
13
+ "model": "llama-2-7b-instruct-together-32k"
14
+ },
15
+ "model": "llama-2-7b-instruct-together-32k"
16
+ },
17
+ "4096": {
18
+ "results": {
19
+ "wikitext": {
20
+ "word_perplexity": 6.017616942577096
21
+ }
22
+ },
23
+ "versions": {
24
+ "wikitext": 1
25
+ },
26
+ "config": {
27
+ "model": "llama-2-7b-instruct-together-32k"
28
+ },
29
+ "model": "llama-2-7b-instruct-together-32k"
30
+ }
31
+ }
32
+ }
runs/llama-2-7b-instruct-together-32k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.201608/run-241119.201608.log ADDED
The diff for this file is too large to render. See raw diff
 
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/config-241119.172947.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache:
2
+ root: runs/shang
3
+ path:
4
+ rotation: ''
5
+ reorder: ''
6
+ smooth: ''
7
+ wgts: ''
8
+ acts: ''
9
+ output:
10
+ root: runs/shang
11
+ dirname: default-pileval.128x1024.[0-0]
12
+ job: run
13
+ model:
14
+ name: llama-3-8b-instruct-gradient-1048k
15
+ family: llama-3
16
+ path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
17
+ root: ''
18
+ local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
19
+ local_root: /home/yujunlin/models
20
+ dtype: torch.float16
21
+ eval:
22
+ num_gpus: 1
23
+ batch_size: 8
24
+ tasks:
25
+ - wikitext
26
+ max_seq_length: -4096
27
+ evaluators:
28
+ - gptq
29
+ quant:
30
+ wgts:
31
+ dtype: null
32
+ zero_point: null
33
+ group_shapes:
34
+ - - -1
35
+ - -1
36
+ - -1
37
+ scale_dtypes:
38
+ - null
39
+ intermediate_dtypes: []
40
+ intermediate_levels: []
41
+ needs_dequant_saturation: false
42
+ skips: []
43
+ enable_kernel_gptq: false
44
+ enable_calib_range: false
45
+ ipts:
46
+ dtype: null
47
+ zero_point: null
48
+ group_shapes:
49
+ - - -1
50
+ - -1
51
+ - -1
52
+ scale_dtypes:
53
+ - null
54
+ skips: []
55
+ static: false
56
+ enable_calib_range: false
57
+ opts:
58
+ dtype: null
59
+ zero_point: null
60
+ group_shapes:
61
+ - - -1
62
+ - -1
63
+ - -1
64
+ scale_dtypes:
65
+ - null
66
+ skips: []
67
+ static: false
68
+ enable_calib_range: false
69
+ calib:
70
+ data: pileval
71
+ num_samples: 128
72
+ path: mit-han-lab/pile-val-backup
73
+ seq_length: 1024
74
+ min_seq_length: 0
75
+ max_seq_length: 0
76
+ local_path: ''
77
+ enable_rotation: false
78
+ enable_reorder: false
79
+ enable_smooth: false
80
+ develop_dtype: torch.float32
81
+ seed: 12345
82
+ skip_eval: false
83
+ load_from: ''
84
+ save_model: false
85
+ copy_on_save: false
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/results-241119.172947.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gptq": {
3
+ "2048": {
4
+ "results": {
5
+ "wikitext": {
6
+ "word_perplexity": 7.833065190604292
7
+ }
8
+ },
9
+ "versions": {
10
+ "wikitext": 1
11
+ },
12
+ "config": {
13
+ "model": "llama-3-8b-instruct-gradient-1048k"
14
+ },
15
+ "model": "llama-3-8b-instruct-gradient-1048k"
16
+ },
17
+ "4096": {
18
+ "results": {
19
+ "wikitext": {
20
+ "word_perplexity": 7.261800992411218
21
+ }
22
+ },
23
+ "versions": {
24
+ "wikitext": 1
25
+ },
26
+ "config": {
27
+ "model": "llama-3-8b-instruct-gradient-1048k"
28
+ },
29
+ "model": "llama-3-8b-instruct-gradient-1048k"
30
+ }
31
+ }
32
+ }
runs/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947/run-241119.172947.log ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 24-11-19 17:29:47 | I | === Configurations ===
2
+ 24-11-19 17:29:47 | I | LlmPtqRunConfig(
3
+ 24-11-19 17:29:47 | I | cache=LlmCacheConfig(
4
+ 24-11-19 17:29:47 | I | root=runs/shang,
5
+ 24-11-19 17:29:47 | I | dirpath=LlmQuantCacheConfig(
6
+ 24-11-19 17:29:47 | I | rotation=,
7
+ 24-11-19 17:29:47 | I | reorder=,
8
+ 24-11-19 17:29:47 | I | smooth=,
9
+ 24-11-19 17:29:47 | I | wgts=,
10
+ 24-11-19 17:29:47 | I | acts=),
11
+ 24-11-19 17:29:47 | I | path=LlmQuantCacheConfig(
12
+ 24-11-19 17:29:47 | I | rotation=,
13
+ 24-11-19 17:29:47 | I | reorder=,
14
+ 24-11-19 17:29:47 | I | smooth=,
15
+ 24-11-19 17:29:47 | I | wgts=,
16
+ 24-11-19 17:29:47 | I | acts=)),
17
+ 24-11-19 17:29:47 | I | output=OutputConfig(
18
+ 24-11-19 17:29:47 | I | root=runs/shang,
19
+ 24-11-19 17:29:47 | I | dirname=default-pileval.128x1024.[0-0],
20
+ 24-11-19 17:29:47 | I | job=run,
21
+ 24-11-19 17:29:47 | I | dirpath=runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0],
22
+ 24-11-19 17:29:47 | I | timestamp=241119.172947),
23
+ 24-11-19 17:29:47 | I | model=LlmModelConfig(
24
+ 24-11-19 17:29:47 | I | name=llama-3-8b-instruct-gradient-1048k,
25
+ 24-11-19 17:29:47 | I | family=llama-3,
26
+ 24-11-19 17:29:47 | I | path=/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k,
27
+ 24-11-19 17:29:47 | I | root=,
28
+ 24-11-19 17:29:47 | I | local_path=/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k,
29
+ 24-11-19 17:29:47 | I | local_root=/home/yujunlin/models,
30
+ 24-11-19 17:29:47 | I | size=8.0,
31
+ 24-11-19 17:29:47 | I | variant=instruct-gradient-1048k,
32
+ 24-11-19 17:29:47 | I | dtype=torch.float16,
33
+ 24-11-19 17:29:47 | I | orig_dtype=torch.bfloat16),
34
+ 24-11-19 17:29:47 | I | eval=LlmEvalConfig(
35
+ 24-11-19 17:29:47 | I | num_gpus=1,
36
+ 24-11-19 17:29:47 | I | batch_size=8,
37
+ 24-11-19 17:29:47 | I | tasks=['wikitext'],
38
+ 24-11-19 17:29:47 | I | max_seq_length=-4096,
39
+ 24-11-19 17:29:47 | I | evaluators=['gptq']),
40
+ 24-11-19 17:29:47 | I | quant=LlmQuantConfig(
41
+ 24-11-19 17:29:47 | I | wgts=LlmWeightQuantizerConfig(
42
+ 24-11-19 17:29:47 | I | dtype=None,
43
+ 24-11-19 17:29:47 | I | zero_point=None,
44
+ 24-11-19 17:29:47 | I | group_shapes=((-1, -1, -1),),
45
+ 24-11-19 17:29:47 | I | scale_dtypes=(None,),
46
+ 24-11-19 17:29:47 | I | intermediate_dtypes=(),
47
+ 24-11-19 17:29:47 | I | intermediate_levels=(),
48
+ 24-11-19 17:29:47 | I | needs_dequant_saturation=False,
49
+ 24-11-19 17:29:47 | I | skips=[],
50
+ 24-11-19 17:29:47 | I | static=False,
51
+ 24-11-19 17:29:47 | I | kernel_gptq=None,
52
+ 24-11-19 17:29:47 | I | calib_range=None),
53
+ 24-11-19 17:29:47 | I | ipts=LlmActivationQuantizerConfig(
54
+ 24-11-19 17:29:47 | I | dtype=None,
55
+ 24-11-19 17:29:47 | I | zero_point=None,
56
+ 24-11-19 17:29:47 | I | group_shapes=((-1, -1, -1),),
57
+ 24-11-19 17:29:47 | I | scale_dtypes=(None,),
58
+ 24-11-19 17:29:47 | I | intermediate_dtypes=(),
59
+ 24-11-19 17:29:47 | I | intermediate_levels=(),
60
+ 24-11-19 17:29:47 | I | needs_dequant_saturation=False,
61
+ 24-11-19 17:29:47 | I | skips=[],
62
+ 24-11-19 17:29:47 | I | static=False,
63
+ 24-11-19 17:29:47 | I | kernel_gptq=None,
64
+ 24-11-19 17:29:47 | I | calib_range=None),
65
+ 24-11-19 17:29:47 | I | opts=LlmActivationQuantizerConfig(
66
+ 24-11-19 17:29:47 | I | dtype=None,
67
+ 24-11-19 17:29:47 | I | zero_point=None,
68
+ 24-11-19 17:29:47 | I | group_shapes=((-1, -1, -1),),
69
+ 24-11-19 17:29:47 | I | scale_dtypes=(None,),
70
+ 24-11-19 17:29:47 | I | intermediate_dtypes=(),
71
+ 24-11-19 17:29:47 | I | intermediate_levels=(),
72
+ 24-11-19 17:29:47 | I | needs_dequant_saturation=False,
73
+ 24-11-19 17:29:47 | I | skips=[],
74
+ 24-11-19 17:29:47 | I | static=False,
75
+ 24-11-19 17:29:47 | I | kernel_gptq=None,
76
+ 24-11-19 17:29:47 | I | calib_range=None),
77
+ 24-11-19 17:29:47 | I | calib=LlmCalibDataLoaderConfig(
78
+ 24-11-19 17:29:47 | I | data=pileval,
79
+ 24-11-19 17:29:47 | I | num_samples=128,
80
+ 24-11-19 17:29:47 | I | batch_size=1,
81
+ 24-11-19 17:29:47 | I | path=mit-han-lab/pile-val-backup,
82
+ 24-11-19 17:29:47 | I | seq_length=1024,
83
+ 24-11-19 17:29:47 | I | min_seq_length=0,
84
+ 24-11-19 17:29:47 | I | max_seq_length=0,
85
+ 24-11-19 17:29:47 | I | local_path=),
86
+ 24-11-19 17:29:47 | I | rotation=None,
87
+ 24-11-19 17:29:47 | I | reorder=None,
88
+ 24-11-19 17:29:47 | I | smooth=None,
89
+ 24-11-19 17:29:47 | I | develop_dtype=torch.float32),
90
+ 24-11-19 17:29:47 | I | seed=12345,
91
+ 24-11-19 17:29:47 | I | skip_eval=False,
92
+ 24-11-19 17:29:47 | I | load_from=,
93
+ 24-11-19 17:29:47 | I | save_model=False,
94
+ 24-11-19 17:29:47 | I | copy_on_save=False)
95
+ 24-11-19 17:29:47 | I | === Dumped Configurations ===
96
+ 24-11-19 17:29:47 | I | { 'cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': 'runs/shang'},
97
+ 24-11-19 17:29:47 | I | 'copy_on_save': False,
98
+ 24-11-19 17:29:47 | I | 'eval': {'batch_size': 8, 'evaluators': ['gptq'], 'max_seq_length': -4096, 'num_gpus': 1, 'tasks': ['wikitext']},
99
+ 24-11-19 17:29:47 | I | 'load_from': '',
100
+ 24-11-19 17:29:47 | I | 'model': { 'dtype': 'torch.float16',
101
+ 24-11-19 17:29:47 | I | 'family': 'llama-3',
102
+ 24-11-19 17:29:47 | I | 'local_path': '/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k',
103
+ 24-11-19 17:29:47 | I | 'local_root': '/home/yujunlin/models',
104
+ 24-11-19 17:29:47 | I | 'name': 'llama-3-8b-instruct-gradient-1048k',
105
+ 24-11-19 17:29:47 | I | 'path': '/home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k',
106
+ 24-11-19 17:29:47 | I | 'root': ''},
107
+ 24-11-19 17:29:47 | I | 'output': {'dirname': 'default-pileval.128x1024.[0-0]', 'job': 'run', 'root': 'runs/shang'},
108
+ 24-11-19 17:29:47 | I | 'quant': { 'calib': { 'data': 'pileval',
109
+ 24-11-19 17:29:47 | I | 'local_path': '',
110
+ 24-11-19 17:29:47 | I | 'max_seq_length': 0,
111
+ 24-11-19 17:29:47 | I | 'min_seq_length': 0,
112
+ 24-11-19 17:29:47 | I | 'num_samples': 128,
113
+ 24-11-19 17:29:47 | I | 'path': 'mit-han-lab/pile-val-backup',
114
+ 24-11-19 17:29:47 | I | 'seq_length': 1024},
115
+ 24-11-19 17:29:47 | I | 'develop_dtype': 'torch.float32',
116
+ 24-11-19 17:29:47 | I | 'enable_reorder': False,
117
+ 24-11-19 17:29:47 | I | 'enable_rotation': False,
118
+ 24-11-19 17:29:47 | I | 'enable_smooth': False,
119
+ 24-11-19 17:29:47 | I | 'ipts': { 'dtype': None,
120
+ 24-11-19 17:29:47 | I | 'enable_calib_range': False,
121
+ 24-11-19 17:29:47 | I | 'group_shapes': [[-1, -1, -1]],
122
+ 24-11-19 17:29:47 | I | 'scale_dtypes': [None],
123
+ 24-11-19 17:29:47 | I | 'skips': [],
124
+ 24-11-19 17:29:47 | I | 'static': False,
125
+ 24-11-19 17:29:47 | I | 'zero_point': None},
126
+ 24-11-19 17:29:47 | I | 'opts': { 'dtype': None,
127
+ 24-11-19 17:29:47 | I | 'enable_calib_range': False,
128
+ 24-11-19 17:29:47 | I | 'group_shapes': [[-1, -1, -1]],
129
+ 24-11-19 17:29:47 | I | 'scale_dtypes': [None],
130
+ 24-11-19 17:29:47 | I | 'skips': [],
131
+ 24-11-19 17:29:47 | I | 'static': False,
132
+ 24-11-19 17:29:47 | I | 'zero_point': None},
133
+ 24-11-19 17:29:47 | I | 'wgts': { 'dtype': None,
134
+ 24-11-19 17:29:47 | I | 'enable_calib_range': False,
135
+ 24-11-19 17:29:47 | I | 'enable_kernel_gptq': False,
136
+ 24-11-19 17:29:47 | I | 'group_shapes': [[-1, -1, -1]],
137
+ 24-11-19 17:29:47 | I | 'intermediate_dtypes': [],
138
+ 24-11-19 17:29:47 | I | 'intermediate_levels': [],
139
+ 24-11-19 17:29:47 | I | 'needs_dequant_saturation': False,
140
+ 24-11-19 17:29:47 | I | 'scale_dtypes': [None],
141
+ 24-11-19 17:29:47 | I | 'skips': [],
142
+ 24-11-19 17:29:47 | I | 'zero_point': None}},
143
+ 24-11-19 17:29:47 | I | 'save_model': False,
144
+ 24-11-19 17:29:47 | I | 'seed': 12345,
145
+ 24-11-19 17:29:47 | I | 'skip_eval': False}
146
+ 24-11-19 17:29:47 | I | === Output Directory ===
147
+ 24-11-19 17:29:47 | I | runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947
148
+ 24-11-19 17:29:47 | I | === Start Evaluating ===
149
+ 24-11-19 17:29:47 | I | * Building model llama-3-8b-instruct-gradient-1048k from /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
150
+ 24-11-19 17:29:48 | I | We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
151
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.0.self_attn
152
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.1.self_attn
153
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.2.self_attn
154
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.3.self_attn
155
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.4.self_attn
156
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.5.self_attn
157
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.6.self_attn
158
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.7.self_attn
159
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.8.self_attn
160
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.9.self_attn
161
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.10.self_attn
162
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.11.self_attn
163
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.12.self_attn
164
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.13.self_attn
165
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.14.self_attn
166
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.15.self_attn
167
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.16.self_attn
168
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.17.self_attn
169
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.18.self_attn
170
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.19.self_attn
171
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.20.self_attn
172
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.21.self_attn
173
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.22.self_attn
174
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.23.self_attn
175
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.24.self_attn
176
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.25.self_attn
177
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.26.self_attn
178
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.27.self_attn
179
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.28.self_attn
180
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.29.self_attn
181
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.30.self_attn
182
+ 24-11-19 17:29:56 | I | - Patching LlamaSdpaAttention.forward in model.layers.31.self_attn
183
+ 24-11-19 17:29:56 | I | * Development dtype is torch.float32
184
+ 24-11-19 17:29:56 | I | * Evaluating model
185
+ 24-11-19 17:29:56 | W | `pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way.
186
+ 24-11-19 17:29:56 | I | Using model type 'default'
187
+ 24-11-19 17:29:56 | W | Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration
188
+ 24-11-19 17:29:56 | I | - Evaluator: gptq
189
+ 24-11-19 17:29:56 | I | - Tasks: ['wikitext']
190
+ 24-11-19 17:29:56 | I | - Batch_size: 8
191
+ 24-11-19 17:29:56 | I | + Max_seq_length: 2048
192
+ 24-11-19 17:29:56 | D | Starting new HTTPS connection (1): huggingface.co:443
193
+ 24-11-19 17:30:03 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
194
+ 24-11-19 17:30:03 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
195
+ 24-11-19 17:30:03 | D | Attempting to acquire lock 23438954666640 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
196
+ 24-11-19 17:30:03 | D | Lock 23438954666640 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
197
+ 24-11-19 17:30:03 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
198
+ 24-11-19 17:30:03 | D | Attempting to release lock 23438954666640 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
199
+ 24-11-19 17:30:03 | D | Lock 23438954666640 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
200
+ 24-11-19 17:30:15 | I | - Results:
201
+ 24-11-19 17:30:15 | I | | Task |Version| Metric |Value | |Stderr|
202
+ 24-11-19 17:30:15 | I | |--------|------:|---------------|-----:|---|-----:|
203
+ 24-11-19 17:30:15 | I | |wikitext| 1|word_perplexity|7.8331|± |7.8331|
204
+ 24-11-19 17:30:15 | I |
205
+ 24-11-19 17:30:15 | I | + Max_seq_length: 4096
206
+ 24-11-19 17:30:15 | D | Starting new HTTPS connection (2): huggingface.co:443
207
+ 24-11-19 17:30:21 | W | Using the latest cached version of the dataset since wikitext couldn't be found on the Hugging Face Hub
208
+ 24-11-19 17:30:21 | W | Found the latest cached dataset configuration 'wikitext-2-raw-v1' at /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3 (last modified on Tue Oct 8 19:51:38 2024).
209
+ 24-11-19 17:30:21 | D | Attempting to acquire lock 23438952840800 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
210
+ 24-11-19 17:30:21 | D | Lock 23438952840800 acquired on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
211
+ 24-11-19 17:30:21 | D | open file: /home/yujunlin/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json
212
+ 24-11-19 17:30:21 | D | Attempting to release lock 23438952840800 on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
213
+ 24-11-19 17:30:21 | D | Lock 23438952840800 released on /home/yujunlin/.cache/huggingface/datasets/_home_yujunlin_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock
214
+ 24-11-19 17:30:32 | I | - Results:
215
+ 24-11-19 17:30:32 | I | | Task |Version| Metric |Value | |Stderr|
216
+ 24-11-19 17:30:32 | I | |--------|------:|---------------|-----:|---|-----:|
217
+ 24-11-19 17:30:32 | I | |wikitext| 1|word_perplexity|7.2618|± |7.2618|
218
+ 24-11-19 17:30:32 | I |
219
+ 24-11-19 17:30:32 | I | * Saving results to runs/shang/llm/llama-3/llama-3-8b-instruct-gradient-1048k/w.16-x.16-y.16/w.fp16-x.fp16-y.fp16/w.tnsr.fp16-x.tnsr.fp16-y.tnsr.fp16/default-pileval.128x1024.[0-0]/run-241119.172947
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/config-241119.200545.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache:
2
+ root: runs/shang
3
+ path:
4
+ rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-3-8b-instruct-gradient-1048k.pt
5
+ reorder: ''
6
+ smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/llama-3-8b-instruct-gradient-1048k.pt
7
+ wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
8
+ acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.g20.bn2/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
9
+ output:
10
+ root: runs/shang
11
+ dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
12
+ job: run
13
+ model:
14
+ name: llama-3-8b-instruct-gradient-1048k
15
+ family: llama-3
16
+ path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
17
+ root: ''
18
+ local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
19
+ local_root: /home/yujunlin/models
20
+ dtype: torch.float16
21
+ eval:
22
+ num_gpus: 1
23
+ batch_size: 8
24
+ tasks:
25
+ - wikitext
26
+ max_seq_length: -4096
27
+ evaluators:
28
+ - gptq
29
+ quant:
30
+ wgts:
31
+ dtype: sint8
32
+ zero_point: null
33
+ group_shapes:
34
+ - - 1
35
+ - -1
36
+ - -1
37
+ scale_dtypes:
38
+ - torch.float16
39
+ intermediate_dtypes: []
40
+ intermediate_levels: []
41
+ needs_dequant_saturation: false
42
+ skips: []
43
+ enable_kernel_gptq: true
44
+ kernel_gptq:
45
+ damp_percentage: 0.01
46
+ block_size: 128
47
+ num_inv_tries: 250
48
+ hessian_block_size: 512
49
+ enable_calib_range: true
50
+ calib_range:
51
+ degree: 2
52
+ objective: OutputsError
53
+ strategy: GridSearch
54
+ granularity: Group
55
+ element_batch_size: 64
56
+ sample_batch_size: -1
57
+ element_size: 512
58
+ sample_size: -1
59
+ pre_reshape: true
60
+ outputs_device: cpu
61
+ ratio: 1.0
62
+ max_shrink: 0.2
63
+ max_expand: 1.0
64
+ num_grids: 80
65
+ allow_scale: false
66
+ skips: []
67
+ ipts:
68
+ dtype: sint8
69
+ zero_point: null
70
+ group_shapes:
71
+ - - 1
72
+ - -1
73
+ - -1
74
+ scale_dtypes:
75
+ - torch.float16
76
+ skips: []
77
+ static: false
78
+ enable_calib_range: false
79
+ opts:
80
+ dtype: sint8
81
+ zero_point: null
82
+ group_shapes:
83
+ - - -1
84
+ - -1
85
+ - -1
86
+ scale_dtypes:
87
+ - torch.float16
88
+ skips:
89
+ - attn_q
90
+ static: true
91
+ enable_calib_range: true
92
+ calib_range:
93
+ degree: 2
94
+ objective: OutputsError
95
+ strategy: Manual
96
+ granularity: Layer
97
+ element_batch_size: -1
98
+ sample_batch_size: -1
99
+ element_size: -1
100
+ sample_size: -1
101
+ pre_reshape: true
102
+ outputs_device: cpu
103
+ ratio: 1.0
104
+ max_shrink: 0.2
105
+ max_expand: 1.0
106
+ num_grids: 80
107
+ allow_scale: false
108
+ skips: []
109
+ calib:
110
+ data: pileval
111
+ num_samples: 128
112
+ path: mit-han-lab/pile-val-backup
113
+ seq_length: 1024
114
+ min_seq_length: 0
115
+ max_seq_length: 0
116
+ local_path: ''
117
+ enable_rotation: true
118
+ rotation:
119
+ random: false
120
+ transforms:
121
+ - out_proj
122
+ enable_reorder: false
123
+ enable_smooth: true
124
+ smooth:
125
+ enable_proj: false
126
+ enable_attn: true
127
+ attn:
128
+ degree: 2
129
+ strategy: GridSearch
130
+ sample_batch_size: -1
131
+ sample_size: -1
132
+ outputs_device: cpu
133
+ allow_a_quant: true
134
+ allow_b_quant: true
135
+ spans:
136
+ - - AbsMax
137
+ - AbsMax
138
+ alpha: 0.5
139
+ beta: -2
140
+ num_grids: 20
141
+ develop_dtype: torch.float32
142
+ seed: 12345
143
+ skip_eval: false
144
+ load_from: ''
145
+ save_model: 'true'
146
+ copy_on_save: false
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/acts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee8c1a641aeb1fd1a3376f43e71fb6e4d46ed629e0117cc990b225cc4521eeab
3
+ size 36034
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a0a4420a3ab6b037c08654751c88818251fea6062d3041237ff4f2e3b00907
3
+ size 16060644786
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/rotation.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50249bca461c76fdeb0f51b07f019025e975e67b47eb49f2a69e5d5dee80e195
3
+ size 134219107
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/scale.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8f507ae04bfb9f02a9d7dc98d60b7dd69a9d6ed4b654ed8d64a71f7f52c487
3
+ size 5631594
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/smooth.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64eef470c18d4efd8590cc9d25734e3313da1181fc3f4282ab6d09cfd58ecd93
3
+ size 535234
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/model/wgts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e479d958e3157b658360082fd2c6699cc38833c35d198babf006616a1564d95a
3
+ size 5593150
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/results-241119.200545.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gptq": {
3
+ "2048": {
4
+ "results": {
5
+ "wikitext": {
6
+ "word_perplexity": 7.989659368184836
7
+ }
8
+ },
9
+ "versions": {
10
+ "wikitext": 1
11
+ },
12
+ "config": {
13
+ "model": "llama-3-8b-instruct-gradient-1048k"
14
+ },
15
+ "model": "llama-3-8b-instruct-gradient-1048k"
16
+ },
17
+ "4096": {
18
+ "results": {
19
+ "wikitext": {
20
+ "word_perplexity": 7.396661695312681
21
+ }
22
+ },
23
+ "versions": {
24
+ "wikitext": 1
25
+ },
26
+ "config": {
27
+ "model": "llama-3-8b-instruct-gradient-1048k"
28
+ },
29
+ "model": "llama-3-8b-instruct-gradient-1048k"
30
+ }
31
+ }
32
+ }
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.183745.log ADDED
The diff for this file is too large to render. See raw diff
 
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.GridSearch.bn2.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200545/run-241119.200545.log ADDED
The diff for this file is too large to render. See raw diff
 
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/config-241119.200729.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache:
2
+ root: runs/shang
3
+ path:
4
+ rotation: runs/shang/llm/cache/quant/rotation/hadamard/llama-3-8b-instruct-gradient-1048k.pt
5
+ reorder: ''
6
+ smooth: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/smooth/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/llama-3-8b-instruct-gradient-1048k.pt
7
+ wgts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/wgts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
8
+ acts: runs/shang/llm/cache/quant/pileval.128x1024.[0-0]/acts/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/w.skip.[]-x.skip.[]-y.skip.[attn_q]/rotate.hadamard.[out_proj]/smooth.attn.OutputsError.Manual.Layer.d2.en1.sn1/smooth.attn.[a.AbsMax.b.AbsMax]/smooth.attn.a0p5.b0/w.kernel.gptq.d0p01.b128/w.range.OutputsError.GridSearch.Group.d2.e512.sn1/w.range.r.[0p2.1].g80/w.range.skip.[]/y.range.OutputsError.Manual.Layer.d2.en1.sn1/y.range.r.[1]/y.range.skip.[]/llama-3-8b-instruct-gradient-1048k.pt
9
+ output:
10
+ root: runs/shang
11
+ dirname: skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]
12
+ job: run
13
+ model:
14
+ name: llama-3-8b-instruct-gradient-1048k
15
+ family: llama-3
16
+ path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
17
+ root: ''
18
+ local_path: /home/yujunlin/models/llama-3/llama-3-8b-instruct-gradient-1048k
19
+ local_root: /home/yujunlin/models
20
+ dtype: torch.float16
21
+ eval:
22
+ num_gpus: 1
23
+ batch_size: 8
24
+ tasks:
25
+ - wikitext
26
+ max_seq_length: -4096
27
+ evaluators:
28
+ - gptq
29
+ quant:
30
+ wgts:
31
+ dtype: sint8
32
+ zero_point: null
33
+ group_shapes:
34
+ - - 1
35
+ - -1
36
+ - -1
37
+ scale_dtypes:
38
+ - torch.float16
39
+ intermediate_dtypes: []
40
+ intermediate_levels: []
41
+ needs_dequant_saturation: false
42
+ skips: []
43
+ enable_kernel_gptq: true
44
+ kernel_gptq:
45
+ damp_percentage: 0.01
46
+ block_size: 128
47
+ num_inv_tries: 250
48
+ hessian_block_size: 512
49
+ enable_calib_range: true
50
+ calib_range:
51
+ degree: 2
52
+ objective: OutputsError
53
+ strategy: GridSearch
54
+ granularity: Group
55
+ element_batch_size: 64
56
+ sample_batch_size: -1
57
+ element_size: 512
58
+ sample_size: -1
59
+ pre_reshape: true
60
+ outputs_device: cpu
61
+ ratio: 1.0
62
+ max_shrink: 0.2
63
+ max_expand: 1.0
64
+ num_grids: 80
65
+ allow_scale: false
66
+ skips: []
67
+ ipts:
68
+ dtype: sint8
69
+ zero_point: null
70
+ group_shapes:
71
+ - - 1
72
+ - -1
73
+ - -1
74
+ scale_dtypes:
75
+ - torch.float16
76
+ skips: []
77
+ static: false
78
+ enable_calib_range: false
79
+ opts:
80
+ dtype: sint8
81
+ zero_point: null
82
+ group_shapes:
83
+ - - -1
84
+ - -1
85
+ - -1
86
+ scale_dtypes:
87
+ - torch.float16
88
+ skips:
89
+ - attn_q
90
+ static: true
91
+ enable_calib_range: true
92
+ calib_range:
93
+ degree: 2
94
+ objective: OutputsError
95
+ strategy: Manual
96
+ granularity: Layer
97
+ element_batch_size: -1
98
+ sample_batch_size: -1
99
+ element_size: -1
100
+ sample_size: -1
101
+ pre_reshape: true
102
+ outputs_device: cpu
103
+ ratio: 1.0
104
+ max_shrink: 0.2
105
+ max_expand: 1.0
106
+ num_grids: 80
107
+ allow_scale: false
108
+ skips: []
109
+ calib:
110
+ data: pileval
111
+ num_samples: 128
112
+ path: mit-han-lab/pile-val-backup
113
+ seq_length: 1024
114
+ min_seq_length: 0
115
+ max_seq_length: 0
116
+ local_path: ''
117
+ enable_rotation: true
118
+ rotation:
119
+ random: false
120
+ transforms:
121
+ - out_proj
122
+ enable_reorder: false
123
+ enable_smooth: true
124
+ smooth:
125
+ enable_proj: false
126
+ enable_attn: true
127
+ attn:
128
+ degree: 2
129
+ strategy: Manual
130
+ sample_batch_size: -1
131
+ sample_size: -1
132
+ outputs_device: cpu
133
+ allow_a_quant: true
134
+ allow_b_quant: true
135
+ spans:
136
+ - - AbsMax
137
+ - AbsMax
138
+ alpha: 0.5
139
+ beta: 0
140
+ num_grids: 20
141
+ develop_dtype: torch.float32
142
+ seed: 12345
143
+ skip_eval: false
144
+ load_from: ''
145
+ save_model: 'true'
146
+ copy_on_save: false
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/acts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac70125cfcf7096842127bff88c84a458b44f2fc66bf8fb1970940c922e9e805
3
+ size 36034
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125f19e7dc83de3adaf1c1b7c9c20d58c72f075b630c568116fe3e6da7c90719
3
+ size 16060644786
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/rotation.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50249bca461c76fdeb0f51b07f019025e975e67b47eb49f2a69e5d5dee80e195
3
+ size 134219107
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/scale.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e586371fed1a42757c5fdf37cca9febf4f2fbd8a7c4a5dd97902d01fc95c931
3
+ size 5631594
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/smooth.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f26f4aed70f6838fd246750c38631eed6601e521a12ba700c6c065f27e37dd
3
+ size 535234
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/model/wgts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b399ac3cecb57039c93754780228596a35530dee066aee45312a98c3933a7f4b
3
+ size 5593150
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/results-241119.200729.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gptq": {
3
+ "2048": {
4
+ "results": {
5
+ "wikitext": {
6
+ "word_perplexity": 7.989206018837591
7
+ }
8
+ },
9
+ "versions": {
10
+ "wikitext": 1
11
+ },
12
+ "config": {
13
+ "model": "llama-3-8b-instruct-gradient-1048k"
14
+ },
15
+ "model": "llama-3-8b-instruct-gradient-1048k"
16
+ },
17
+ "4096": {
18
+ "results": {
19
+ "wikitext": {
20
+ "word_perplexity": 7.399224506977849
21
+ }
22
+ },
23
+ "versions": {
24
+ "wikitext": 1
25
+ },
26
+ "config": {
27
+ "model": "llama-3-8b-instruct-gradient-1048k"
28
+ },
29
+ "model": "llama-3-8b-instruct-gradient-1048k"
30
+ }
31
+ }
32
+ }
runs/llama-3-8b-instruct-gradient-1048k/w.8-x.8-y.8/w.sint8-x.sint8-y.sint8/w.gchn.fp16-x.gchn.fp16-y.tsnr.fp16/rotate-smooth.attn-w.static.gptq.range-y.static/skip.y.[q]-gptq-rot.[+out]-smth.attn.a0p5.b0.[AbsMax]-w.range.[0p2.1.g80]-pileval.128x1024.[0-0]/run-241119.200729/run-241119.200729.log ADDED
The diff for this file is too large to render. See raw diff