KoichiYasuoka commited on
Commit
ec0d012
·
1 Parent(s): ccb418b

model improved

Browse files
Files changed (4) hide show
  1. config.json +1 -1
  2. maker.py +1 -1
  3. pytorch_model.bin +2 -2
  4. tokenizer_config.json +1 -0
config.json CHANGED
@@ -9347,7 +9347,7 @@
9347
  "position_embedding_type": "absolute",
9348
  "tokenizer_class": "BertTokenizer",
9349
  "torch_dtype": "float32",
9350
- "transformers_version": "4.40.1",
9351
  "type_vocab_size": 2,
9352
  "use_cache": true,
9353
  "vocab_size": 30000
 
9347
  "position_embedding_type": "absolute",
9348
  "tokenizer_class": "BertTokenizer",
9349
  "torch_dtype": "float32",
9350
+ "transformers_version": "4.48.3",
9351
  "type_vocab_size": 2,
9352
  "use_cache": true,
9353
  "vocab_size": 30000
maker.py CHANGED
@@ -52,7 +52,7 @@ devDS=UDgoeswithDataset("dev.conllu",tkz)
52
  testDS=UDgoeswithDataset("test.conllu",tkz)
53
  lid=trainDS(devDS,testDS)
54
  cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
55
- arg=TrainingArguments(num_train_epochs=3,per_device_train_batch_size=48,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1,save_safetensors=False)
56
  trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
57
  trn.train()
58
  trn.save_model(tgt)
 
52
  testDS=UDgoeswithDataset("test.conllu",tkz)
53
  lid=trainDS(devDS,testDS)
54
  cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True)
55
+ arg=TrainingArguments(num_train_epochs=10,per_device_train_batch_size=16,output_dir="/tmp",overwrite_output_dir=True,save_total_limit=2,evaluation_strategy="epoch",learning_rate=5e-05,warmup_ratio=0.1,save_safetensors=False)
56
  trn=Trainer(args=arg,data_collator=DataCollatorForTokenClassification(tkz),model=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True),train_dataset=trainDS,eval_dataset=devDS)
57
  trn.train()
58
  trn.save_model(tgt)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b0827682edbb5050271e666f0e0686c9b092a1f0bc51545c7c492d0678f9f1f
3
- size 67315955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5442dafd851a2e56f7e097d4ef367e80c572ffd731fcce7f030f96eab55dc5
3
+ size 67318118
tokenizer_config.json CHANGED
@@ -46,6 +46,7 @@
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "do_lowercase": false,
 
49
  "mask_token": "[MASK]",
50
  "model_max_length": 128,
51
  "never_split": [
 
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "do_lowercase": false,
49
+ "extra_special_tokens": {},
50
  "mask_token": "[MASK]",
51
  "model_max_length": 128,
52
  "never_split": [