First model version

Browse files

Files changed (10) hide show

added_tokens.json +1 -0
config.json +39 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
runs/version_0/events.out.tfevents.1643396122.idea-node-05.89614.0 +3 -0
runs/version_0/hparams.yaml +74 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"<\|response\|>": 50260, "<\|author\|>": 50258, "<\|message\|>": 50259, "<\|pad\|>": 50257}

config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "gpt2-large",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 1280,
+  "n_head": 20,
+  "n_inner": null,
+  "n_layer": 36,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.13.0",
+  "use_cache": true,
+  "vocab_size": 50261
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc9226e612703dea0b4c7104030872ef9e603ff260d2fe0383dfb9f416acf3f
+size 3134078283

runs/version_0/events.out.tfevents.1643396122.idea-node-05.89614.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8e0badce3282b038f1628d70f35622d5a86ccfb2f31b073c13da6e2cc5720b2
+size 34178

runs/version_0/hparams.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+early_stopping_patience: 12
+checkpoint_save_top_k: -1
+checkpoint_save_weights_only: true
+dataset_file: tweet_response_training.csv
+base_modelpath: gpt2-large
+max_sequence_length: 512
+batch_size: 3
+learning_rate: 5.0e-05
+weight_decay: 0.0
+adam_epsilon: 1.0e-08
+warmup_steps: 0
+random_state: 42
+overwrite_prepared_data: false
+val_split: 0.1
+logger: true
+checkpoint_callback: null
+enable_checkpointing: true
+default_root_dir: null
+gradient_clip_val: 1.0
+gradient_clip_algorithm: null
+process_position: 0
+num_nodes: 1
+num_processes: 1
+devices: null
+gpus: 1
+auto_select_gpus: false
+tpu_cores: null
+ipus: null
+log_gpu_memory: null
+progress_bar_refresh_rate: null
+enable_progress_bar: true
+overfit_batches: 0.0
+track_grad_norm: -1
+check_val_every_n_epoch: 1
+fast_dev_run: false
+accumulate_grad_batches: 4
+max_epochs: 15
+min_epochs: null
+max_steps: -1
+min_steps: null
+max_time: null
+limit_train_batches: 1.0
+limit_val_batches: 1.0
+limit_test_batches: 1.0
+limit_predict_batches: 1.0
+val_check_interval: 0.25
+flush_logs_every_n_steps: null
+log_every_n_steps: 50
+accelerator: null
+strategy: null
+sync_batchnorm: false
+precision: 32
+enable_model_summary: true
+weights_summary: top
+weights_save_path: null
+num_sanity_val_steps: 2
+resume_from_checkpoint: null
+profiler: null
+benchmark: false
+deterministic: false
+reload_dataloaders_every_n_epochs: 0
+reload_dataloaders_every_epoch: false
+auto_lr_find: false
+replace_sampler_ddp: true
+detect_anomaly: false
+auto_scale_batch_size: false
+prepare_data_per_node: null
+plugins: null
+amp_backend: native
+amp_level: null
+move_metrics_to_cpu: false
+multiple_trainloader_mode: max_size_cycle
+stochastic_weight_avg: false
+terminate_on_nan: null

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "unk_token": "<\|endoftext\|>", "pad_token": "<\|pad\|>", "additional_special_tokens": ["<\|author\|>", "<\|message\|>", "<\|response\|>"]}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2-large", "tokenizer_class": "GPT2Tokenizer"}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff