|
{ |
|
"api_key": null, |
|
"verify_url": "http://johnrachwan.pythonanywhere.com", |
|
"smash_config": { |
|
"pruners": "None", |
|
"factorizers": "None", |
|
"quantizers": "['llm-int8']", |
|
"compilers": "None", |
|
"task": "text_text_generation", |
|
"device": "cuda", |
|
"cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsvm71r2d8", |
|
"batch_size": 1, |
|
"n_quantization_bits": 8, |
|
"tokenizer": "LlamaTokenizerFast(name_or_path='vj1148/llama-2-7b-legal', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={\n\t0: AddedToken(\"<unk>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t1: AddedToken(\"<s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),\n\t32000: AddedToken(\"<pad>\", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False),\n}", |
|
"model_config": "{'vocab_size': 32000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': 0, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'vj1148/llama-2-7b-legal', 'transformers_version': '4.37.1', 'model_type': 'llama'}", |
|
"model_name": "vj1148/llama-2-7b-legal", |
|
"pruning_ratio": 0.0, |
|
"output_deviation": 0.005, |
|
"max_batch_size": 1, |
|
"qtype_weight": "torch.qint8", |
|
"qtype_activation": "torch.quint8", |
|
"qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>", |
|
"qscheme": "torch.per_tensor_symmetric", |
|
"qconfig": "x86", |
|
"group_size": 128, |
|
"damp_percent": 0.1, |
|
"save_load_fn": "bitsandbytes" |
|
} |
|
} |