Yeyito_gpu commited on
Commit
05b3b2d
โ€ข
1 Parent(s): 22c2b9c

Unloading models + current evals

Browse files
data/code_eval_board.csv CHANGED
@@ -29,4 +29,8 @@ T,Models,ARC,HellaSwag,MMLU,TruthfulQA,Winogrande,GSM8K,Reference Model
29
  ๐Ÿ”ถ,chargoddard/loyal-piano-m7,0.11,0.13,0.19,0.45,0.0,0.97,mistralai/Mistral-7B-v0.1
30
  ๐Ÿ”ถ,rishiraj/CatPPT,0.09,0.12,0.19,0.44,0.0,0.98,mistralai/Mistral-7B-v0.1
31
  ๐Ÿ”ถ,togethercomputer/RedPajama-INCITE-Instruct-3B-v1,0.08,0.12,0.19,0.43,0.0,0.77,mistralai/Mistral-7B-v0.1
32
-
 
 
 
 
 
29
  ๐Ÿ”ถ,chargoddard/loyal-piano-m7,0.11,0.13,0.19,0.45,0.0,0.97,mistralai/Mistral-7B-v0.1
30
  ๐Ÿ”ถ,rishiraj/CatPPT,0.09,0.12,0.19,0.44,0.0,0.98,mistralai/Mistral-7B-v0.1
31
  ๐Ÿ”ถ,togethercomputer/RedPajama-INCITE-Instruct-3B-v1,0.08,0.12,0.19,0.43,0.0,0.77,mistralai/Mistral-7B-v0.1
32
+ ๐Ÿ”ถ,jan-hq/trinity-v1,0.07,0.16,0.18,0.35,0.0,0.95,mistralai/Mistral-7B-v0.1
33
+ ๐Ÿ”ถ,lmsys/vicuna-7b-v1.5,0.13,0.16,0.22,0.62,0.0,0.96,mistralai/Mistral-7B-v0.1
34
+ ๐ŸŸข,huggyllama/llama-7b,0.11,0.17,0.22,0.46,0.0,0.79,mistralai/Mistral-7B-v0.1
35
+ ๐ŸŸข,tiiuae/falcon-7b-instruct,0.06,0.16,0.19,0.56,0.0,0.98,mistralai/Mistral-7B-v0.1
36
+ ๐Ÿ”ถ,NousResearch/Nous-Hermes-llama-2-7b,0.09,0.18,0.26,0.5,0.0,0.96,mistralai/Mistral-7B-v0.1
data/queue.csv CHANGED
@@ -1,12 +1,7 @@
1
  Type,Model,ref_model
2
- ๐Ÿ”ถ finetuned,lmsys/vicuna-7b-v1.5,mistralai/Mistral-7B-v0.1
3
- ๐Ÿ”ถ finetuned,jan-hq/trinity-v1,mistralai/Mistral-7B-v0.1
4
- ๐Ÿ”ถ finetuned,microsoft/Orca-2-7b,huggyllama/llama-7b
5
- ๐ŸŸข base,huggyllama/llama-7b,mistralai/Mistral-7B-v0.1
6
  ๐Ÿ”ถ finetuned,openaccess-ai-collective/DPOpenHermes-7B-v2,mistralai/Mistral-7B-v0.1
7
- ๐ŸŸข base,tiiuae/falcon-7b-instruct,mistralai/Mistral-7B-v0.1
8
  ๐ŸŸข base,01-ai/Yi-6B,mistralai/Mistral-7B-v0.1
9
- ๐Ÿ”ถ finetuned,NousResearch/Nous-Hermes-llama-2-7b,mistralai/Mistral-7B-v0.1
10
  ๐Ÿ”ถ finetuned,VAGOsolutions/SauerkrautLM-SOLAR-Instruct,mistralai/Mistral-7B-v0.1
11
  ๐Ÿ”ถ finetuned,VAGOsolutions/SauerkrautLM-SOLAR-Instruct,huggyllama/llama-7b
12
  ๐Ÿ”ถ finetuned,VAGOsolutions/SauerkrautLM-SOLAR-Instruct,upstage/SOLAR-10.7B-v1.0
 
1
  Type,Model,ref_model
 
 
 
 
2
  ๐Ÿ”ถ finetuned,openaccess-ai-collective/DPOpenHermes-7B-v2,mistralai/Mistral-7B-v0.1
3
+ ๐Ÿ”ถ finetuned,microsoft/Orca-2-7B,mistralai/Mistral-7B-v0.1
4
  ๐ŸŸข base,01-ai/Yi-6B,mistralai/Mistral-7B-v0.1
 
5
  ๐Ÿ”ถ finetuned,VAGOsolutions/SauerkrautLM-SOLAR-Instruct,mistralai/Mistral-7B-v0.1
6
  ๐Ÿ”ถ finetuned,VAGOsolutions/SauerkrautLM-SOLAR-Instruct,huggyllama/llama-7b
7
  ๐Ÿ”ถ finetuned,VAGOsolutions/SauerkrautLM-SOLAR-Instruct,upstage/SOLAR-10.7B-v1.0
detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-310.pyc CHANGED
Binary files a/detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-310.pyc and b/detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-310.pyc differ
 
detect-pretrain-code-contamination/src/__pycache__/eval.cpython-310.pyc CHANGED
Binary files a/detect-pretrain-code-contamination/src/__pycache__/eval.cpython-310.pyc and b/detect-pretrain-code-contamination/src/__pycache__/eval.cpython-310.pyc differ
 
detect-pretrain-code-contamination/src/__pycache__/options.cpython-310.pyc CHANGED
Binary files a/detect-pretrain-code-contamination/src/__pycache__/options.cpython-310.pyc and b/detect-pretrain-code-contamination/src/__pycache__/options.cpython-310.pyc differ
 
detect-pretrain-code-contamination/src/__pycache__/run.cpython-310.pyc CHANGED
Binary files a/detect-pretrain-code-contamination/src/__pycache__/run.cpython-310.pyc and b/detect-pretrain-code-contamination/src/__pycache__/run.cpython-310.pyc differ
 
detect-pretrain-code-contamination/src/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/detect-pretrain-code-contamination/src/__pycache__/utils.cpython-310.pyc and b/detect-pretrain-code-contamination/src/__pycache__/utils.cpython-310.pyc differ
 
detect-pretrain-code-contamination/src/run.py CHANGED
@@ -37,10 +37,7 @@ def load_data(filename):
37
 
38
  return loaded_data
39
 
40
- def unload_model(model,tokenizer):
41
- print("[X] Cannot unload model! Functionality not implemented!")
42
-
43
- def load_model(name1,ref_model):
44
  if name1 not in models:
45
  model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
46
  model1.eval()
@@ -120,7 +117,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
120
  neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
121
  except:
122
  ### MODEL 2 likelihoods
123
- model2, tokenizer2 = load_model(ref_model,ref_model)
124
  inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
125
  for ex in tqdm(test_data):
126
  text = ex[col_name]
@@ -136,14 +133,22 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
136
  new_ex = get_neighbors(text,inference2_pass[counter][2],model2,tokenizer2,ratio_gen,data_name)
137
  counter = counter + 1
138
  neighbors_dls.append(new_ex)
139
- unload_model(model2,tokenizer2)
 
 
 
 
 
 
 
 
140
  # Because it uses temp it is not invariant, however taking a snapshot in time should be just fine.
141
  save_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt',inference2_pass)
142
  save_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt',neighbors_dls)
143
  print("Saved ref data, exiting.")
144
 
145
  ### MODEL 1 likelihoods
146
- model1, tokenizer1 = load_model(target_model,ref_model)
147
  inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
148
  for ex in tqdm(test_data):
149
  text = ex[col_name]
@@ -158,7 +163,14 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
158
  new_ex = RMIA_1(text,inference1_pass[counter][2],inference2_pass[counter][2],model1,tokenizer1,ratio_gen,neighbors_dls[counter])
159
  counter = counter + 1
160
  results.append(new_ex)
161
- unload_model(model1,tokenizer1)
 
 
 
 
 
 
 
162
 
163
  ### Inference ex
164
  all_output = []
 
37
 
38
  return loaded_data
39
 
40
+ def load_model(name1):
 
 
 
41
  if name1 not in models:
42
  model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
43
  model1.eval()
 
117
  neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
118
  except:
119
  ### MODEL 2 likelihoods
120
+ model2, tokenizer2 = load_model(ref_model)
121
  inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
122
  for ex in tqdm(test_data):
123
  text = ex[col_name]
 
133
  new_ex = get_neighbors(text,inference2_pass[counter][2],model2,tokenizer2,ratio_gen,data_name)
134
  counter = counter + 1
135
  neighbors_dls.append(new_ex)
136
+
137
+ del models[ref_model]
138
+ del models[ref_model + "_tokenizer"]
139
+ model2.cpu()
140
+ del model2
141
+ del tokenizer2
142
+ gc.collect()
143
+ torch.cuda.empty_cache()
144
+
145
  # Because it uses temp it is not invariant, however taking a snapshot in time should be just fine.
146
  save_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt',inference2_pass)
147
  save_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt',neighbors_dls)
148
  print("Saved ref data, exiting.")
149
 
150
  ### MODEL 1 likelihoods
151
+ model1, tokenizer1 = load_model(target_model)
152
  inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
153
  for ex in tqdm(test_data):
154
  text = ex[col_name]
 
163
  new_ex = RMIA_1(text,inference1_pass[counter][2],inference2_pass[counter][2],model1,tokenizer1,ratio_gen,neighbors_dls[counter])
164
  counter = counter + 1
165
  results.append(new_ex)
166
+
167
+ del models[target_model]
168
+ del models[target_model + "_tokenizer"]
169
+ model1.cpu()
170
+ del model1
171
+ del tokenizer1
172
+ gc.collect()
173
+ torch.cuda.empty_cache()
174
 
175
  ### Inference ex
176
  all_output = []
detect-pretrain-code-contamination/src/utils.py CHANGED
@@ -4,7 +4,7 @@ from torch.nn import CrossEntropyLoss
4
 
5
  def evaluate_model(model, tokenizer, dl):
6
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7
- model = model.to(device)
8
  losses = []
9
  for batch in dl:
10
  batch = tokenizer(batch, padding=True, return_tensors='pt', truncation=True, max_length=150)
 
4
 
5
  def evaluate_model(model, tokenizer, dl):
6
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7
+ #model = model.to(device)
8
  losses = []
9
  for batch in dl:
10
  batch = tokenizer(batch, padding=True, return_tensors='pt', truncation=True, max_length=150)
requirements.txt CHANGED
@@ -9,3 +9,5 @@ scikit-learn
9
  accelerate
10
  gradio
11
  plotly
 
 
 
9
  accelerate
10
  gradio
11
  plotly
12
+ sentencepiece
13
+ protobuf