Commit
·
ec0d012
1
Parent(s):
ccb418b
model improved
Browse files- config.json +1 -1
- maker.py +1 -1
- pytorch_model.bin +2 -2
- tokenizer_config.json +1 -0
config.json
CHANGED
@@ -9347,7 +9347,7 @@
|
|
9347 |
"position_embedding_type": "absolute",
|
9348 |
"tokenizer_class": "BertTokenizer",
|
9349 |
"torch_dtype": "float32",
|
9350 |
-
"transformers_version": "4.
|
9351 |
"type_vocab_size": 2,
|
9352 |
"use_cache": true,
|
9353 |
"vocab_size": 30000
|
|
|
9347 |
"position_embedding_type": "absolute",
|
9348 |
"tokenizer_class": "BertTokenizer",
|
9349 |
"torch_dtype": "float32",
|
9350 |
+
"transformers_version": "4.48.3",
|
9351 |
"type_vocab_size": 2,
|
9352 |
"use_cache": true,
|
9353 |
"vocab_size": 30000
|
maker.py
CHANGED
@@ -52,7 +52,7 @@ devDS=UDgoeswithDataset("dev.conllu",tkz)
|
|
52 |
testDS=UDgoeswithDataset("test.conllu",tkz)
|
53 |
lid=trainDS(devDS,testDS)
|
54 |
cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
|
55 |
-
arg=TrainingArguments(num_train_epochs=
|
56 |
trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
|
57 |
trn.train()
|
58 |
trn.save_model(tgt)
|
|
|
52 |
testDS=UDgoeswithDataset("test.conllu",tkz)
|
53 |
lid=trainDS(devDS,testDS)
|
54 |
cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
|
55 |
+
arg=TrainingArguments(num_train_epochs=10,per_device_train_batch_size=16,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1,save_safetensors=False)
|
56 |
trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
|
57 |
trn.train()
|
58 |
trn.save_model(tgt)
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd5442dafd851a2e56f7e097d4ef367e80c572ffd731fcce7f030f96eab55dc5
|
3 |
+
size 67318118
|
tokenizer_config.json
CHANGED
@@ -46,6 +46,7 @@
|
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"do_lowercase": false,
|
|
|
49 |
"mask_token": "[MASK]",
|
50 |
"model_max_length": 128,
|
51 |
"never_split": [
|
|
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": true,
|
48 |
"do_lowercase": false,
|
49 |
+
"extra_special_tokens": {},
|
50 |
"mask_token": "[MASK]",
|
51 |
"model_max_length": 128,
|
52 |
"never_split": [
|