Training in progress epoch 0

Browse files

Files changed (7) hide show

README.md +55 -0
config.json +479 -0
special_tokens_map.json +7 -0
tf_model.h5 +3 -0
tokenizer.json +0 -0
tokenizer_config.json +55 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+---
+license: apache-2.0
+base_model: bert-base-uncased
+tags:
+- generated_from_keras_callback
+model-index:
+- name: vladjr/bert-full-competicao
+  results: []
+---
+<!-- This model card has been generated automatically according to the information Keras had access to. You should
+probably proofread and complete it, then remove this comment. -->
+# vladjr/bert-full-competicao
+This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Train Loss: 3.1264
+- Validation Loss: 1.3286
+- Train Accuracy: 0.9194
+- Epoch: 0
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 2900, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False}
+- training_precision: float32
+### Training results
+| Train Loss | Validation Loss | Train Accuracy | Epoch |
+|:----------:|:---------------:|:--------------:|:-----:|
+| 3.1264     | 1.3286          | 0.9194         | 0     |
+### Framework versions
+- Transformers 4.34.1
+- TensorFlow 2.14.0
+- Datasets 2.14.6
+- Tokenizers 0.14.1

config.json ADDED Viewed

	@@ -0,0 +1,479 @@

+{
+  "_name_or_path": "bert-base-uncased",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "secrecy rate",
+    "1": "markov geographic model",
+    "2": "graph convolution networks",
+    "3": "convolutional neural network",
+    "4": "computed tomography",
+    "5": "betweenness centrality",
+    "6": "forward error correction",
+    "7": "fusion center",
+    "8": "random vaccination",
+    "9": "adversarial risk analysis",
+    "10": "nash equilibrium",
+    "11": "maximum likelihood",
+    "12": "synthetic aperture radar",
+    "13": "sound pressure level",
+    "14": "support vector machine",
+    "15": "high performance computing",
+    "16": "access point",
+    "17": "downlink",
+    "18": "strictly piecewise",
+    "19": "atomic , independent , declarative , and absolute",
+    "20": "shortest dependency path",
+    "21": "multi - layer same - resolution compressed",
+    "22": "marginal contribution",
+    "23": "spectral angle distance",
+    "24": "information retrieval",
+    "25": "resource description framework",
+    "26": "atomic function computation",
+    "27": "part of speech",
+    "28": "long term evolution",
+    "29": "mean squared error",
+    "30": "permutation invariant training",
+    "31": "minimum generation error",
+    "32": "alternating least squares",
+    "33": "reinforcement learning",
+    "34": "machine learning",
+    "35": "recurrent neural network",
+    "36": "recurrent weighted average",
+    "37": "question answering",
+    "38": "multiple parallel instances",
+    "39": "gaussian process",
+    "40": "base station",
+    "41": "receiver operating characteristic",
+    "42": "threshold algorithm",
+    "43": "click through rates",
+    "44": "virtual machine",
+    "45": "test case prioritization",
+    "46": "neural network",
+    "47": "belief propagation",
+    "48": "contention adaptions",
+    "49": "dynamic induction control",
+    "50": "information embedding cost",
+    "51": "lifelong metric learning",
+    "52": "linear programming",
+    "53": "multiple description coding",
+    "54": "latent dirichlet allocation",
+    "55": "collaborative filtering",
+    "56": "medium access control",
+    "57": "description logics",
+    "58": "radio frequency",
+    "59": "adaptive radix tree",
+    "60": "integer linear programming",
+    "61": "minimum risk training",
+    "62": "constructive interference",
+    "63": "line of sight",
+    "64": "deep belief network",
+    "65": "average precision",
+    "66": "dropped pronoun",
+    "67": "rate distortion function",
+    "68": "intellectual property",
+    "69": "geometric programming",
+    "70": "gaussian mixture model",
+    "71": "language model",
+    "72": "adversarially robust distillation",
+    "73": "controlled natural language",
+    "74": "federated learning",
+    "75": "augmented reality",
+    "76": "matrix factorization",
+    "77": "principal component analysis",
+    "78": "node classification",
+    "79": "smart object",
+    "80": "poisson point process",
+    "81": "attention network",
+    "82": "constrained least squares",
+    "83": "global positioning system",
+    "84": "prepositional phrase",
+    "85": "artificial neural network",
+    "86": "directed belief net",
+    "87": "false positive rate",
+    "88": "latent semantic analysis",
+    "89": "artificial intelligence",
+    "90": "model predictive control",
+    "91": "genetic algorithm",
+    "92": "access part'",
+    "93": "sensing application recently",
+    "94": "mutual information",
+    "95": "universal dependencies",
+    "96": "secrecy outage probability",
+    "97": "statistical compressed sensing",
+    "98": "information bottleneck",
+    "99": "ergodic sum capacity",
+    "100": "image signal processor",
+    "101": "particle swarm optimization",
+    "102": "differential rectifier",
+    "103": "technical debt",
+    "104": "deep learning",
+    "105": "hybrid monte carlo",
+    "106": "ordinary differential equation",
+    "107": "scalar multiplication",
+    "108": "inductive logic programming",
+    "109": "simulated annealing",
+    "110": "entity set expansion",
+    "111": "autism spectrum disorders",
+    "112": "artificial bee colony",
+    "113": "property graph",
+    "114": "centralized solution",
+    "115": "social status",
+    "116": "taint dependency sequences",
+    "117": "expectation maximization",
+    "118": "machine translation",
+    "119": "dynamic vision sensor",
+    "120": "automatic speech recognition",
+    "121": "user equipment",
+    "122": "random neural networks",
+    "123": "mean absolute error",
+    "124": "bayesian network",
+    "125": "singular value decomposition",
+    "126": "multimedia event detection",
+    "127": "median recovery error",
+    "128": "nearest neighbor",
+    "129": "friendly jamming",
+    "130": "formal methods",
+    "131": "intraclass correlation coefficient",
+    "132": "central cloud",
+    "133": "cumulative activation",
+    "134": "mitral valve",
+    "135": "discriminative correlation filter",
+    "136": "transformation error",
+    "137": "relation extraction",
+    "138": "linear discriminant analysis",
+    "139": "integrated circuit",
+    "140": "stochastic block model",
+    "141": "information extraction",
+    "142": "socially assistive robots",
+    "143": "hierarchical attention network",
+    "144": "deep reinforcement learning",
+    "145": "logistic regression",
+    "146": "message passing interface",
+    "147": "bug reports",
+    "148": "alzheimer 's disease",
+    "149": "data science and analytics",
+    "150": "automatic differentiation",
+    "151": "conditional random field",
+    "152": "false negatives",
+    "153": "sequential monte carlo",
+    "154": "basic question",
+    "155": "physical access",
+    "156": "point multiplication",
+    "157": "leicester scientific corpus",
+    "158": "transformation encoder",
+    "159": "deep convolutional neural network",
+    "160": "thompson sampling",
+    "161": "orthogonal least square",
+    "162": "acquaintance vaccination",
+    "163": "rate - selective",
+    "164": "dynamic assignment ratio",
+    "165": "multiple description",
+    "166": "million song dataset",
+    "167": "machine type communications",
+    "168": "self attention network",
+    "169": "term frequency",
+    "170": "portable document format",
+    "171": "parameter server",
+    "172": "physical machines",
+    "173": "exponential moving average",
+    "174": "matrix pair beamformer",
+    "175": "optimal transport",
+    "176": "finite element method",
+    "177": "differential evolution",
+    "178": "product - based neural network",
+    "179": "mean average conceptual similarity",
+    "180": "power splitting",
+    "181": "parkinson 's disease",
+    "182": "new persian",
+    "183": "artifact disentanglement network",
+    "184": "statistical machine translation",
+    "185": "manifold geometry matching",
+    "186": "batch normalization",
+    "187": "rank residual constraint",
+    "188": "oblivious transfer",
+    "189": "positive pointwise mutual information",
+    "190": "triad significance profile",
+    "191": "reverse classification accuracy",
+    "192": "fully connected",
+    "193": "corresponding arcs",
+    "194": "maximum a posteriori",
+    "195": "false positive",
+    "196": "certain natural language",
+    "197": "strategic dependency",
+    "198": "strictly local",
+    "199": "internet protocol",
+    "200": "foveal tilt effects",
+    "201": "dynamic cluster",
+    "202": "domain name system",
+    "203": "mean average precision",
+    "204": "semantic role labeling",
+    "205": "recurrent convolution",
+    "206": "optical character recognition",
+    "207": "charging current",
+    "208": "low resolution",
+    "209": "power system operations",
+    "210": "compressive sensing",
+    "211": "optimal power flow",
+    "212": "deep context prediction",
+    "213": "secondary users",
+    "214": "o - d demand estimation",
+    "215": "fully convolutional neural network",
+    "216": "maximal ratio combining",
+    "217": "quantile random forest",
+    "218": "adaptive threshold",
+    "219": "situation entity",
+    "220": "relay station",
+    "221": "discrete choice models",
+    "222": "random forest",
+    "223": "left ventricle",
+    "224": "artificial noise"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "access part'": 92,
+    "access point": 16,
+    "acquaintance vaccination": 162,
+    "adaptive radix tree": 59,
+    "adaptive threshold": 218,
+    "adversarial risk analysis": 9,
+    "adversarially robust distillation": 72,
+    "alternating least squares": 32,
+    "alzheimer 's disease": 148,
+    "artifact disentanglement network": 183,
+    "artificial bee colony": 112,
+    "artificial intelligence": 89,
+    "artificial neural network": 85,
+    "artificial noise": 224,
+    "atomic , independent , declarative , and absolute": 19,
+    "atomic function computation": 26,
+    "attention network": 81,
+    "augmented reality": 75,
+    "autism spectrum disorders": 111,
+    "automatic differentiation": 150,
+    "automatic speech recognition": 120,
+    "average precision": 65,
+    "base station": 40,
+    "basic question": 154,
+    "batch normalization": 186,
+    "bayesian network": 124,
+    "belief propagation": 47,
+    "betweenness centrality": 5,
+    "bug reports": 147,
+    "central cloud": 132,
+    "centralized solution": 114,
+    "certain natural language": 196,
+    "charging current": 207,
+    "click through rates": 43,
+    "collaborative filtering": 55,
+    "compressive sensing": 210,
+    "computed tomography": 4,
+    "conditional random field": 151,
+    "constrained least squares": 82,
+    "constructive interference": 62,
+    "contention adaptions": 48,
+    "controlled natural language": 73,
+    "convolutional neural network": 3,
+    "corresponding arcs": 193,
+    "cumulative activation": 133,
+    "data science and analytics": 149,
+    "deep belief network": 64,
+    "deep context prediction": 212,
+    "deep convolutional neural network": 159,
+    "deep learning": 104,
+    "deep reinforcement learning": 144,
+    "description logics": 57,
+    "differential evolution": 177,
+    "differential rectifier": 102,
+    "directed belief net": 86,
+    "discrete choice models": 221,
+    "discriminative correlation filter": 135,
+    "domain name system": 202,
+    "downlink": 17,
+    "dropped pronoun": 66,
+    "dynamic assignment ratio": 164,
+    "dynamic cluster": 201,
+    "dynamic induction control": 49,
+    "dynamic vision sensor": 119,
+    "entity set expansion": 110,
+    "ergodic sum capacity": 99,
+    "expectation maximization": 117,
+    "exponential moving average": 173,
+    "false negatives": 152,
+    "false positive": 195,
+    "false positive rate": 87,
+    "federated learning": 74,
+    "finite element method": 176,
+    "formal methods": 130,
+    "forward error correction": 6,
+    "foveal tilt effects": 200,
+    "friendly jamming": 129,
+    "fully connected": 192,
+    "fully convolutional neural network": 215,
+    "fusion center": 7,
+    "gaussian mixture model": 70,
+    "gaussian process": 39,
+    "genetic algorithm": 91,
+    "geometric programming": 69,
+    "global positioning system": 83,
+    "graph convolution networks": 2,
+    "hierarchical attention network": 143,
+    "high performance computing": 15,
+    "hybrid monte carlo": 105,
+    "image signal processor": 100,
+    "inductive logic programming": 108,
+    "information bottleneck": 98,
+    "information embedding cost": 50,
+    "information extraction": 141,
+    "information retrieval": 24,
+    "integer linear programming": 60,
+    "integrated circuit": 139,
+    "intellectual property": 68,
+    "internet protocol": 199,
+    "intraclass correlation coefficient": 131,
+    "language model": 71,
+    "latent dirichlet allocation": 54,
+    "latent semantic analysis": 88,
+    "left ventricle": 223,
+    "leicester scientific corpus": 157,
+    "lifelong metric learning": 51,
+    "line of sight": 63,
+    "linear discriminant analysis": 138,
+    "linear programming": 52,
+    "logistic regression": 145,
+    "long term evolution": 28,
+    "low resolution": 208,
+    "machine learning": 34,
+    "machine translation": 118,
+    "machine type communications": 167,
+    "manifold geometry matching": 185,
+    "marginal contribution": 22,
+    "markov geographic model": 1,
+    "matrix factorization": 76,
+    "matrix pair beamformer": 174,
+    "maximal ratio combining": 216,
+    "maximum a posteriori": 194,
+    "maximum likelihood": 11,
+    "mean absolute error": 123,
+    "mean average conceptual similarity": 179,
+    "mean average precision": 203,
+    "mean squared error": 29,
+    "median recovery error": 127,
+    "medium access control": 56,
+    "message passing interface": 146,
+    "million song dataset": 166,
+    "minimum generation error": 31,
+    "minimum risk training": 61,
+    "mitral valve": 134,
+    "model predictive control": 90,
+    "multi - layer same - resolution compressed": 21,
+    "multimedia event detection": 126,
+    "multiple description": 165,
+    "multiple description coding": 53,
+    "multiple parallel instances": 38,
+    "mutual information": 94,
+    "nash equilibrium": 10,
+    "nearest neighbor": 128,
+    "neural network": 46,
+    "new persian": 182,
+    "node classification": 78,
+    "o - d demand estimation": 214,
+    "oblivious transfer": 188,
+    "optical character recognition": 206,
+    "optimal power flow": 211,
+    "optimal transport": 175,
+    "ordinary differential equation": 106,
+    "orthogonal least square": 161,
+    "parameter server": 171,
+    "parkinson 's disease": 181,
+    "part of speech": 27,
+    "particle swarm optimization": 101,
+    "permutation invariant training": 30,
+    "physical access": 155,
+    "physical machines": 172,
+    "point multiplication": 156,
+    "poisson point process": 80,
+    "portable document format": 170,
+    "positive pointwise mutual information": 189,
+    "power splitting": 180,
+    "power system operations": 209,
+    "prepositional phrase": 84,
+    "principal component analysis": 77,
+    "product - based neural network": 178,
+    "property graph": 113,
+    "quantile random forest": 217,
+    "question answering": 37,
+    "radio frequency": 58,
+    "random forest": 222,
+    "random neural networks": 122,
+    "random vaccination": 8,
+    "rank residual constraint": 187,
+    "rate - selective": 163,
+    "rate distortion function": 67,
+    "receiver operating characteristic": 41,
+    "recurrent convolution": 205,
+    "recurrent neural network": 35,
+    "recurrent weighted average": 36,
+    "reinforcement learning": 33,
+    "relation extraction": 137,
+    "relay station": 220,
+    "resource description framework": 25,
+    "reverse classification accuracy": 191,
+    "scalar multiplication": 107,
+    "secondary users": 213,
+    "secrecy outage probability": 96,
+    "secrecy rate": 0,
+    "self attention network": 168,
+    "semantic role labeling": 204,
+    "sensing application recently": 93,
+    "sequential monte carlo": 153,
+    "shortest dependency path": 20,
+    "simulated annealing": 109,
+    "singular value decomposition": 125,
+    "situation entity": 219,
+    "smart object": 79,
+    "social status": 115,
+    "socially assistive robots": 142,
+    "sound pressure level": 13,
+    "spectral angle distance": 23,
+    "statistical compressed sensing": 97,
+    "statistical machine translation": 184,
+    "stochastic block model": 140,
+    "strategic dependency": 197,
+    "strictly local": 198,
+    "strictly piecewise": 18,
+    "support vector machine": 14,
+    "synthetic aperture radar": 12,
+    "taint dependency sequences": 116,
+    "technical debt": 103,
+    "term frequency": 169,
+    "test case prioritization": 45,
+    "thompson sampling": 160,
+    "threshold algorithm": 42,
+    "transformation encoder": 158,
+    "transformation error": 136,
+    "triad significance profile": 190,
+    "universal dependencies": 95,
+    "user equipment": 121,
+    "virtual machine": 44
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.34.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02120eb976c87ea82775449de53b1dc855f52acbe11c4dd93f09919f61ebdf25
+size 438909076

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff