Yeyito commited on
Commit
2a135fe
1 Parent(s): 5b3849a

Upload 16 files

Browse files
detect-pretrain-code-contamination DELETED
@@ -1 +0,0 @@
1
- Subproject commit 616114e2334dc8dc8b7b538f6dbcc639cc42cb2c
 
 
detect-pretrain-code-contamination/README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Detect-Pretrain-Code-Contamination
2
+
3
+ This repository contains scripts for detecting pretraining code contamination in datasets.
4
+
5
+ ## Datasets
6
+ You can specify the dataset for analysis. Example datasets include `truthful_qa` and `cais/mmlu`.
7
+
8
+ ## Usage
9
+ Run the script with the desired models and dataset. Below are two examples of how to use the script with different models and the `truthful_qa` dataset.
10
+
11
+ ### Example 1:
12
+ ```bash
13
+ DATASET=truthful_qa
14
+ python src/run.py --target_model Fredithefish/ReasonixPajama-3B-HF --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
15
+ ```
16
+
17
+ The output of the script provides a metric for dataset contamination. If #the result < 0.1# with a percentage greater than 0.85, it is highly likely that the dataset has been trained.
detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-311.pyc ADDED
Binary file (2.16 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/analyze.cpython-39.pyc ADDED
Binary file (1.27 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/eval.cpython-311.pyc ADDED
Binary file (9.99 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/eval.cpython-39.pyc ADDED
Binary file (4.68 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/options.cpython-311.pyc ADDED
Binary file (2.46 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/options.cpython-39.pyc ADDED
Binary file (1.45 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/run.cpython-311.pyc ADDED
Binary file (13.5 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/utils.cpython-311.pyc ADDED
Binary file (3.49 kB). View file
 
detect-pretrain-code-contamination/src/__pycache__/utils.cpython-39.pyc ADDED
Binary file (1.53 kB). View file
 
detect-pretrain-code-contamination/src/analyze.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import statistics
3
+
4
+ def load_jsonl(path):
5
+ with open(path) as f:
6
+ data = [json.loads(line) for line in f]
7
+ return data
8
+
9
+ def analyze_data(data):
10
+ all_rmia = []
11
+ all_large_1 = []
12
+ for ex in data:
13
+ # Min_20.0% Prob
14
+ score = ex["pred"]["minkprob_w/_ref"] # minkprob_w/_ref
15
+ all_rmia.append(score)
16
+ if score < 0.1:
17
+ all_large_1.append(score)
18
+ result = "result < 0.1, %: ", len(all_large_1)/len(all_rmia)
19
+ print(result)
20
+ return result
21
+ # print(f"RMIA mean: {statistics.mean(all_rmia)}")
22
+ # print(f"RMIA std: {statistics.stdev(all_rmia)}")
23
+ # print(f"RMIA min: {min(all_rmia)}")
24
+ # print(f"RMIA max: {max(all_rmia)}")
25
+ # # 25% percentile
26
+ # print(f"RMIA 25%: {statistics.quantiles(all_rmia)[0]}")
27
+ # # 50% percentile
28
+ # print(f"RMIA 50%: {statistics.quantiles(all_rmia)[1]}")
29
+ # # 75% percentile
30
+ # print(f"RMIA 75%: {statistics.quantiles(all_rmia)[2]}")
31
+
32
+
33
+
34
+
35
+ if __name__ == "__main__":
36
+ print("contaminated model")
37
+ task = "ai2_arc" # ai2_arc cais/mmlu truthful_qa
38
+ # /fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/ai2_arc/Fredithefish/ReasonixPajama-3B-HF_togethercomputer/RedPajama-INCITE-Chat-3B-v1/input/all_output.jsonl
39
+ path = f"/fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/{task}/Fredithefish/ReasonixPajama-3B-HF_huggyllama/llama-7b/input/all_output.jsonl"
40
+ data = load_jsonl(path)
41
+ analyze_data(data)
42
+
43
+ print("raw model")
44
+ path = f"/fsx-onellm/swj0419/attack/test_contamination/detect-pretrain-code/out/{task}/togethercomputer/RedPajama-INCITE-Chat-3B-v1_huggyllama/llama-7b/input/all_output.jsonl"
45
+ data = load_jsonl(path)
46
+ analyze_data(data)
47
+
detect-pretrain-code-contamination/src/eval.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ logging.basicConfig(level='ERROR')
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ import json
6
+ from collections import defaultdict
7
+ import matplotlib.pyplot as plt
8
+ from sklearn.metrics import auc, roc_curve
9
+ import matplotlib
10
+ import random
11
+ from ipdb import set_trace as bp
12
+ import time
13
+
14
+ matplotlib.rcParams['pdf.fonttype'] = 42
15
+ matplotlib.rcParams['ps.fonttype'] = 42
16
+
17
+
18
+ matplotlib.rcParams['pdf.fonttype'] = 42
19
+ matplotlib.rcParams['ps.fonttype'] = 42
20
+
21
+ # plot data
22
+ def sweep(score, x):
23
+ """
24
+ Compute a ROC curve and then return the FPR, TPR, AUC, and ACC.
25
+ """
26
+ fpr, tpr, _ = roc_curve(x, -score)
27
+ acc = np.max(1-(fpr+(1-tpr))/2)
28
+ return fpr, tpr, auc(fpr, tpr), acc
29
+
30
+
31
+ def do_plot(prediction, answers, sweep_fn=sweep, metric='auc', legend="", output_dir=None):
32
+ """
33
+ Generate the ROC curves by using ntest models as test models and the rest to train.
34
+ """
35
+ fpr, tpr, auc, acc = sweep_fn(np.array(prediction), np.array(answers, dtype=bool))
36
+
37
+ low = tpr[np.where(fpr<.05)[0][-1]]
38
+ # bp()
39
+ print('Attack %s AUC %.4f, Accuracy %.4f, TPR@5%%FPR of %.4f\n'%(legend, auc,acc, low))
40
+
41
+ metric_text = ''
42
+ if metric == 'auc':
43
+ metric_text = 'auc=%.3f'%auc
44
+ elif metric == 'acc':
45
+ metric_text = 'acc=%.3f'%acc
46
+
47
+ plt.plot(fpr, tpr, label=legend+metric_text)
48
+ return legend, auc,acc, low
49
+
50
+
51
+ def fig_fpr_tpr(all_output, output_dir):
52
+ print("output_dir", output_dir)
53
+ answers = []
54
+ metric2predictions = defaultdict(list)
55
+ for ex in all_output:
56
+ answers.append(ex["label"])
57
+ for metric in ex["pred"].keys():
58
+ if ("raw" in metric) and ("clf" not in metric):
59
+ continue
60
+ metric2predictions[metric].append(ex["pred"][metric])
61
+
62
+ plt.figure(figsize=(4,3))
63
+ with open(f"{output_dir}/auc.txt", "w") as f:
64
+ for metric, predictions in metric2predictions.items():
65
+ legend, auc, acc, low = do_plot(predictions, answers, legend=metric, metric='auc', output_dir=output_dir)
66
+ f.write('%s AUC %.4f, Accuracy %.4f, [email protected]%%FPR of %.4f\n'%(legend, auc, acc, low))
67
+
68
+ plt.semilogx()
69
+ plt.semilogy()
70
+ plt.xlim(1e-5,1)
71
+ plt.ylim(1e-5,1)
72
+ plt.xlabel("False Positive Rate")
73
+ plt.ylabel("True Positive Rate")
74
+ plt.plot([0, 1], [0, 1], ls='--', color='gray')
75
+ plt.subplots_adjust(bottom=.18, left=.18, top=.96, right=.96)
76
+ plt.legend(fontsize=8)
77
+ plt.savefig(f"{output_dir}/auc.png")
78
+
79
+
80
+ def load_jsonl(input_path):
81
+ with open(input_path, 'r') as f:
82
+ data = [json.loads(line) for line in tqdm(f)]
83
+ random.seed(0)
84
+ random.shuffle(data)
85
+ return data
86
+
87
+ def dump_jsonl(data, path):
88
+ with open(path, 'w') as f:
89
+ for line in tqdm(data):
90
+ f.write(json.dumps(line) + "\n")
91
+
92
+ def read_jsonl(path):
93
+ with open(path, 'r') as f:
94
+ return [json.loads(line) for line in tqdm(f)]
95
+
96
+ def convert_huggingface_data_to_list_dic(dataset):
97
+ all_data = []
98
+ for i in range(len(dataset)):
99
+ ex = dataset[i]
100
+ all_data.append(ex)
101
+ return all_data
102
+
103
+
104
+ def process_truthful_qa(data):
105
+ new_data = []
106
+ for ex in data:
107
+ new_ex = {}
108
+ label = ex["mc2_targets"]["labels"].index(1)
109
+ output = ex["mc2_targets"]["choices"][label]
110
+ # We change to mc2 instead of mc1 as it's those that open llm lead uses. (check about)
111
+ new_ex["output"] = output
112
+ new_ex["input"] = ex["question"] + " " + output
113
+ new_data.append(new_ex)
114
+ return new_data
115
+
116
+
117
+
118
+ def process_mmlu(data):
119
+ new_data = []
120
+ for ex in data:
121
+ new_ex = {}
122
+ label = ex["choices"][ex["answer"]]
123
+ output = label
124
+ new_ex["output"] = output
125
+ new_ex["input"] = ex["question"] + " " + output
126
+ new_data.append(new_ex)
127
+ return new_data
128
+
129
+
130
+ def process_arc(data):
131
+ new_data = []
132
+ choice2label = {"A": 0, "B": 1, "C": 2, "D": 3}
133
+ for ex in data:
134
+ new_ex = {}
135
+ # bp()
136
+ # print(ex["answerKey"])
137
+ if ex["answerKey"] not in choice2label:
138
+ continue
139
+ label = choice2label[ex["answerKey"]]
140
+ output = ex["choices"]["text"][label]
141
+ new_ex["output"] = output
142
+ new_ex["input"] = ex["question"] + " " + output
143
+ new_data.append(new_ex)
144
+ return new_data
145
+
146
+ def process_gsm8k(data):
147
+ new_data = []
148
+ for ex in data:
149
+ new_ex = {}
150
+ #label = ;;
151
+ output = ex["answer"]
152
+ new_ex["output"] = output
153
+ new_ex["input"] = ex["question"] + " " + output
154
+ new_data.append(new_ex)
155
+ return new_data
156
+
157
+ def process_winogrande(data):
158
+ new_data = []
159
+ for ex in data:
160
+ new_ex = {}
161
+ label = int(ex["answer"])
162
+ output = ex[f"option{label}"]
163
+ new_ex["output"] = output
164
+ new_ex["input"] = ex["sentence"] + " " + output
165
+ new_data.append(new_ex)
166
+ return new_data
167
+ # I'm not sure if that's the correct format for winogrande given how the dataset works.
168
+
169
+ def process_hellaswag(data):
170
+ new_data = []
171
+ for ex in data:
172
+ new_ex = {}
173
+ label = int(ex["label"]) # For some reason label is in str and not int?
174
+ output = ex["endings"][label]
175
+ new_ex["output"] = output
176
+ new_ex["input"] = ex["ctx"] + " " + output
177
+ new_data.append(new_ex)
178
+ return new_data
detect-pretrain-code-contamination/src/options.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from pathlib import Path
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class Options():
9
+ def __init__(self):
10
+ self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
11
+ self.initialize_parser()
12
+
13
+ def initialize_parser(self):
14
+ self.parser.add_argument('--target_model', type=str, default="text-davinci-003", help="the model to attack: huggyllama/llama-65b, text-davinci-003")
15
+ self.parser.add_argument('--ref_model', type=str, default="huggyllama/llama-7b")
16
+ self.parser.add_argument('--output_dir', type=str, default="out")
17
+ self.parser.add_argument('--data', type=str, default="swj0419/WikiMIA", help="the dataset to evaluate: default is WikiMIA")
18
+ self.parser.add_argument('--length', type=int, default=64, help="the length of the input text to evaluate. Choose from 32, 64, 128, 256")
19
+ self.parser.add_argument('--key_name', type=str, default="input", help="the key name corresponding to the input text. Selecting from: input, parapgrase")
20
+ self.parser.add_argument('--ratio_gen', type=float, default=0.4)
21
+
22
+
23
+
detect-pretrain-code-contamination/src/run.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ logging.basicConfig(level='ERROR')
3
+ import numpy as np
4
+ from pathlib import Path
5
+ import openai
6
+ import torch
7
+ import zlib
8
+ import statistics
9
+ from torch.utils.data import DataLoader
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM
11
+ from tqdm import tqdm
12
+ import math
13
+ import numpy as np
14
+ from datasets import load_dataset
15
+ from options import Options
16
+ from ipdb import set_trace as bp
17
+ from eval import *
18
+ from utils import evaluate_model
19
+ from analyze import analyze_data
20
+ import argparse
21
+ import os
22
+ import sys
23
+ import gc
24
+ import pickle
25
+
26
+ def save_data(filename, data):
27
+ with open(filename, 'wb') as filehandle:
28
+ # store the data as binary data stream
29
+ pickle.dump(data, filehandle)
30
+
31
+ def load_data(filename):
32
+ with open(filename, 'rb') as filehandle:
33
+ # read the data as binary data stream
34
+ loaded_data = pickle.load(filehandle)
35
+
36
+ return loaded_data
37
+
38
+ def unload_model(model,tokenizer):
39
+ model = model.cpu()
40
+ del model
41
+ del tokenizer
42
+ time.sleep(0.5)
43
+ gc.collect()
44
+ torch.cuda.empty_cache()
45
+
46
+ def load_model(name1):
47
+ model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
48
+ model1.eval()
49
+ tokenizer1 = AutoTokenizer.from_pretrained(name1)
50
+
51
+ tokenizer1.pad_token = tokenizer1.eos_token
52
+ return model1, tokenizer1
53
+
54
+ def calculatePerplexity(sentence, model, tokenizer, gpu):
55
+ """
56
+ exp(loss)
57
+ """
58
+ input_ids = torch.tensor(tokenizer.encode(sentence)).unsqueeze(0)
59
+ input_ids = input_ids.to(gpu)
60
+ with torch.no_grad():
61
+ outputs = model(input_ids, labels=input_ids)
62
+ loss, logits = outputs[:2]
63
+
64
+ '''
65
+ extract logits:
66
+ '''
67
+ # Apply softmax to the logits to get probabilities
68
+ probabilities = torch.nn.functional.log_softmax(logits, dim=-1)
69
+ # probabilities = torch.nn.functional.softmax(logits, dim=-1)
70
+ all_prob = []
71
+ input_ids_processed = input_ids[0][1:]
72
+
73
+ for i, token_id in enumerate(input_ids_processed):
74
+ probability = probabilities[0, i, token_id].item()
75
+ all_prob.append(probability)
76
+ return torch.exp(loss).item(), all_prob, loss.item()
77
+
78
+ def sample_generation(sentence, model, tokenizer, args):
79
+ half_sentence_index = math.ceil(len(sentence.split())*args['prefix_length'])
80
+
81
+ if half_sentence_index > 0:
82
+ prefix = " ".join(sentence.split()[:half_sentence_index])
83
+ else:
84
+ prefix = '<|startoftext|> '
85
+
86
+ input_ids = torch.tensor(tokenizer.encode(prefix)).unsqueeze(0)
87
+ input_ids = input_ids.to(model.device)
88
+
89
+ output = model.generate(input_ids, max_new_tokens=len(sentence.split())-half_sentence_index, min_new_tokens=1, num_return_sequences=args['num_z'], pad_token_id=tokenizer.eos_token_id, **args['generate_args'])
90
+ # print(output)
91
+ complete_generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)
92
+
93
+ return complete_generated_text
94
+
95
+
96
+ def RMIA_1(text,target_loss,ref_loss,model1,tokenizer1,ratio_gen,neighbors_dl):
97
+ target_losses_z = evaluate_model(model1,tokenizer1,neighbors_dl)
98
+ result = torch.count_nonzero(target_losses_z < target_loss).item() / len(target_losses_z)
99
+ return result
100
+
101
+ def get_neighbors(text,ref_loss,model2,tokenizer2,ratio_gen):
102
+ cur_args = {'prefix_length': ratio_gen, 'num_z': 100, 'generate_args': {'do_sample': True}}
103
+ neighbors = sample_generation(text, model2, tokenizer2, cur_args)
104
+ neighbors_dl = DataLoader(neighbors, batch_size=32, shuffle=False)
105
+ return neighbors_dl
106
+
107
+ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name):
108
+ print(f"all data size: {len(test_data)}")
109
+ random.seed(0)
110
+ random.shuffle(test_data)
111
+ test_data = test_data[:100]
112
+
113
+ inference2_pass = None
114
+ neighbors_dls = None
115
+ ref_model_clean = ref_model.replace("/","-")
116
+ data_name_clean = data_name.replace("/","-")
117
+ os.makedirs(os.path.join(f"saves/{ref_model_clean}",f"{data_name_clean}"),exist_ok=True)
118
+ try:
119
+ inference2_pass = load_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt')
120
+ neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
121
+ except:
122
+ ### MODEL 2 likelihoods
123
+ model2, tokenizer2 = load_model(ref_model)
124
+ inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
125
+ for ex in tqdm(test_data):
126
+ text = ex[col_name]
127
+ new_ex = inference_model2(model2, tokenizer2, text)
128
+ inference2_pass.append(new_ex)
129
+ # Invariant. Doesn't take in model1 so I'm good
130
+
131
+ ### Neighbors:
132
+ neighbors_dls = []
133
+ counter = 0
134
+ for ex in tqdm(test_data):
135
+ text = ex[col_name]
136
+ new_ex = get_neighbors(text,inference2_pass[counter][2],model2,tokenizer2,ratio_gen)
137
+ counter = counter + 1
138
+ neighbors_dls.append(new_ex)
139
+ unload_model(model2,tokenizer2)
140
+ # Because it uses temp it is not invariant, however taking a snapshot in time should be just fine.
141
+ save_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt',inference2_pass)
142
+ save_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt',neighbors_dls)
143
+ print("Saved ref data, exiting.")
144
+
145
+ ### MODEL 1 likelihoods
146
+ model1, tokenizer1 = load_model(target_model)
147
+ inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
148
+ for ex in tqdm(test_data):
149
+ text = ex[col_name]
150
+ new_ex = inference_model1(model1,tokenizer1,text)
151
+ inference1_pass.append(new_ex)
152
+
153
+ ### RIMA results
154
+ model1, tokenizer1 = load_model(target_model)
155
+ counter = 0
156
+ results = []
157
+ for ex in tqdm(test_data):
158
+ text = ex[col_name]
159
+ new_ex = RMIA_1(text,inference1_pass[counter][2],inference2_pass[counter][2],model1,tokenizer1,ratio_gen,neighbors_dls[counter])
160
+ counter = counter + 1
161
+ results.append(new_ex)
162
+ unload_model(model1,tokenizer1)
163
+
164
+ ### Inference ex
165
+ all_output = []
166
+ counter = 0
167
+ for ex in tqdm(test_data):
168
+ text = ex[col_name]
169
+ pred = {}
170
+ pred["minkprob_w/_ref"] = results[counter]
171
+ pred["ppl"] = inference1_pass[counter][0]
172
+ pred["ppl/Ref_ppl (calibrate PPL to the reference model)"] = inference1_pass[counter][2]-inference2_pass[counter][2]
173
+ pred["ppl/lowercase_ppl"] = -(np.log(inference1_pass[counter][3]) / np.log(inference1_pass[counter][0])).item()
174
+ zlib_entropy = len(zlib.compress(bytes(text, 'utf-8')))
175
+ pred["ppl/zlib"] = np.log(inference1_pass[counter][0])/zlib_entropy
176
+ ex["pred"] = pred
177
+ counter = counter + 1
178
+ all_output.append(ex)
179
+ return all_output
180
+
181
+ def inference_model1 (model1, tokenizer1, text):
182
+ p1, all_prob, p1_likelihood = calculatePerplexity(text, model1, tokenizer1, gpu=model1.device)
183
+ p_lower, _, p_lower_likelihood = calculatePerplexity(text.lower(), model1, tokenizer1, gpu=model1.device)
184
+ return [p1, all_prob, p1_likelihood, p_lower, p_lower_likelihood]
185
+
186
+ def inference_model2 (model2, tokenizer2, text):
187
+ p_ref, all_prob_ref, p_ref_likelihood = calculatePerplexity(text, model2, tokenizer2, gpu=model2.device)
188
+ return [p_ref,all_prob_ref,p_ref_likelihood]
189
+
190
+ def main(target_model,ref_model,output_dir,data,length,key_name,ratio_gen):
191
+ output_dir = f"{output_dir}/{target_model}_{ref_model}/{key_name}"
192
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
193
+ # load model and data
194
+ data_name = data
195
+ if "jsonl" in data:
196
+ data = load_jsonl(f"{data}")
197
+ elif data == "truthful_qa":
198
+ # bp()
199
+ dataset = load_dataset(data, "multiple_choice", split="validation")
200
+ data = convert_huggingface_data_to_list_dic(dataset)
201
+ data = process_truthful_qa(data)
202
+ elif data == "cais/mmlu":
203
+ dataset = load_dataset(data, "all", split="test")
204
+ data = convert_huggingface_data_to_list_dic(dataset)
205
+ data = process_mmlu(data)
206
+ elif data == "ai2_arc":
207
+ dataset = load_dataset(data, "ARC-Challenge", split="test")
208
+ data = convert_huggingface_data_to_list_dic(dataset)
209
+ data = process_arc(data)
210
+ elif data == "gsm8k":
211
+ dataset = load_dataset(data, "main", split="test")
212
+ data = convert_huggingface_data_to_list_dic(dataset)
213
+ data = process_gsm8k(data)
214
+ elif data == "Rowan/hellaswag":
215
+ dataset = load_dataset(data, "default", split="validation")
216
+ # We use validation since labels for the test set are not available?
217
+ data = convert_huggingface_data_to_list_dic(dataset)
218
+ data = process_hellaswag(data)
219
+ elif data == "winogrande":
220
+ dataset = load_dataset(data,"winogrande_debiased", split="validation")
221
+ data = convert_huggingface_data_to_list_dic(dataset)
222
+ data = process_winogrande(data)
223
+
224
+ #model1, model2, tokenizer1, tokenizer2 = load_model(target_model, ref_model)
225
+
226
+ all_output = evaluate_data(data,key_name, target_model, ref_model,ratio_gen,data_name)
227
+ dump_jsonl(all_output, f"{output_dir}/all_output.jsonl")
228
+ return analyze_data(all_output)
229
+ # fig_fpr_tpr(all_output, output_dir)
230
+
detect-pretrain-code-contamination/src/scripts/run.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ DATASET=truthful_qa #cais/mmlu #truthful_qa
3
+ python src/run.py --target_model Fredithefish/ReasonixPajama-3B-HF --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
4
+
5
+
6
+ # DATASET=cais/mmlu #cais/mmlu #truthful_qa
7
+ DATASET=truthful_qa #cais/mmlu #truthful_qa
8
+ python src/run.py --target_model togethercomputer/RedPajama-INCITE-Chat-3B-v1 --ref_model huggyllama/llama-7b --data $DATASET --output_dir out/$DATASET --ratio_gen 0.4
detect-pretrain-code-contamination/src/utils.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tqdm import tqdm
2
+ import torch
3
+ from torch.nn import CrossEntropyLoss
4
+
5
+ def evaluate_model(model, tokenizer, dl):
6
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7
+ model = model.to(device)
8
+ losses = []
9
+ for batch in dl:
10
+ batch = tokenizer(batch, padding=True, return_tensors='pt', truncation=True, max_length=150)
11
+ labels = torch.tensor([
12
+ [-100 if mask == 0 else token for mask, token in mask_and_tokens] for mask_and_tokens in [zip(masks, labels) for masks, labels in zip(batch['attention_mask'], batch['input_ids'])]
13
+ ])
14
+ batch['labels'] = labels
15
+ batch = {k: v.to(device) for k, v in batch.items()}
16
+
17
+ with torch.no_grad():
18
+ outputs = model(batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
19
+ shift_logits = outputs.logits[..., :-1, :].contiguous()
20
+ shift_labels = batch['labels'][..., 1:].contiguous()
21
+ loss_fct = CrossEntropyLoss(reduction='none')
22
+ loss = loss_fct(shift_logits.transpose(1,2), shift_labels)
23
+ num_tokens = torch.sum(shift_labels != -100, dim=1)
24
+ loss_sum = torch.sum(loss, dim=1)
25
+ loss = loss_sum / num_tokens
26
+ losses.append(loss)
27
+ losses = torch.cat(losses)
28
+ return losses