pingnie commited on
Commit
17162c6
·
1 Parent(s): 07fa1fd

add debug info

Browse files
backend-cli.py CHANGED
@@ -172,6 +172,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[in
172
  results['results'][task_name][f"{key},none"] = value
173
 
174
  results['results'][task_name]['batch_size,none'] = batch_size
 
175
  print(f"gpu_stats_list: {gpu_stats_list}")
176
  print("GPU Usage:", gpu_info)
177
 
 
172
  results['results'][task_name][f"{key},none"] = value
173
 
174
  results['results'][task_name]['batch_size,none'] = batch_size
175
+ results['results'][task_name]['precision,none'] = eval_request.precision
176
  print(f"gpu_stats_list: {gpu_stats_list}")
177
  print("GPU Usage:", gpu_info)
178
 
src/display/utils.py CHANGED
@@ -15,11 +15,11 @@ MULTIPLE_CHOICEs = ["mmlu"]
15
 
16
  GPU_TEMP = 'Temp(C)'
17
  GPU_Power = 'Power(W)'
18
- GPU_Mem = 'Mem(M)'
19
  GPU_Name = "GPU"
20
  GPU_Util = 'Util(%)'
21
  BATCH_SIZE = 'bs'
22
-
23
  system_metrics_to_name_map = {
24
  "end_to_end_time": f"{E2Es}",
25
  "prefilling_time": f"{PREs}",
@@ -32,6 +32,7 @@ gpu_metrics_to_name_map = {
32
  GPU_Power: GPU_Power,
33
  GPU_Mem: GPU_Mem,
34
  "batch_size": BATCH_SIZE,
 
35
  GPU_Name: GPU_Name,
36
  }
37
 
@@ -105,6 +106,7 @@ for task in Tasks:
105
  # System performance metrics
106
  auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
107
  auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
 
108
  auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
109
  auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
110
  auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
 
15
 
16
  GPU_TEMP = 'Temp(C)'
17
  GPU_Power = 'Power(W)'
18
+ GPU_Mem = 'Mem(G)'
19
  GPU_Name = "GPU"
20
  GPU_Util = 'Util(%)'
21
  BATCH_SIZE = 'bs'
22
+ PRECISION = "Precision"
23
  system_metrics_to_name_map = {
24
  "end_to_end_time": f"{E2Es}",
25
  "prefilling_time": f"{PREs}",
 
32
  GPU_Power: GPU_Power,
33
  GPU_Mem: GPU_Mem,
34
  "batch_size": BATCH_SIZE,
35
+ "precision": PRECISION,
36
  GPU_Name: GPU_Name,
37
  }
38
 
 
106
  # System performance metrics
107
  auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
108
  auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
109
+ # auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True)])
110
  auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
111
  auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
112
  auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
src/leaderboard/read_evals.py CHANGED
@@ -106,6 +106,9 @@ class EvalResult:
106
  if "GPU" in metric:
107
  results[benchmark][metric] = value
108
  continue
 
 
 
109
 
110
  if "rouge" in metric and "truthful" not in benchmark:
111
  multiplier = 1.0
 
106
  if "GPU" in metric:
107
  results[benchmark][metric] = value
108
  continue
109
+ if "precision" in metric:
110
+ results[benchmark][metric] = value
111
+ continue
112
 
113
  if "rouge" in metric and "truthful" not in benchmark:
114
  multiplier = 1.0
src/utils.py CHANGED
@@ -74,13 +74,13 @@ def parse_nvidia_smi():
74
  gpu_info.update({
75
  GPU_TEMP: temp,
76
  GPU_Power: power_usage,
77
- GPU_Mem: mem_usage,
78
  GPU_Util: gpu_util
79
  })
80
 
81
  if len(gpu_info) >= 4:
82
  gpu_stats.append(gpu_info)
83
- print(f"len(gpu_stats): {len(gpu_stats)}")
84
  gpu_name = f"{len(gpu_stats)}x{gpu_name}"
85
  gpu_stats_total = {
86
  GPU_TEMP: 0,
@@ -94,7 +94,7 @@ def parse_nvidia_smi():
94
  gpu_stats_total[GPU_Power] += gpu_stat[GPU_Power]
95
  gpu_stats_total[GPU_Mem] += gpu_stat[GPU_Mem]
96
  gpu_stats_total[GPU_Util] += gpu_stat[GPU_Util]
97
-
98
  gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
99
  gpu_stats_total[GPU_Power] /= len(gpu_stats)
100
  gpu_stats_total[GPU_Util] /= len(gpu_stats)
 
74
  gpu_info.update({
75
  GPU_TEMP: temp,
76
  GPU_Power: power_usage,
77
+ GPU_Mem: round(mem_usage / 1024, 2),
78
  GPU_Util: gpu_util
79
  })
80
 
81
  if len(gpu_info) >= 4:
82
  gpu_stats.append(gpu_info)
83
+ print(f"gpu_stats: {gpu_stats}")
84
  gpu_name = f"{len(gpu_stats)}x{gpu_name}"
85
  gpu_stats_total = {
86
  GPU_TEMP: 0,
 
94
  gpu_stats_total[GPU_Power] += gpu_stat[GPU_Power]
95
  gpu_stats_total[GPU_Mem] += gpu_stat[GPU_Mem]
96
  gpu_stats_total[GPU_Util] += gpu_stat[GPU_Util]
97
+ gpu_stats_total[GPU_Mem] = gpu_stats_total[GPU_Mem] # G
98
  gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
99
  gpu_stats_total[GPU_Power] /= len(gpu_stats)
100
  gpu_stats_total[GPU_Util] /= len(gpu_stats)