Souradeep Nanda commited on
Commit
31f401e
1 Parent(s): fd02f63

Initial commit

Browse files
Files changed (3) hide show
  1. README.md +38 -0
  2. config.json +35 -0
  3. pytorch_model.bin +3 -0
README.md CHANGED
@@ -1,3 +1,41 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ # Beam Retrieval: General End-to-End Retrieval for Multi-Hop Question Answering (Zhang et all 2023)
6
+
7
+ Unofficial mirror of [Beam Retriever](https://github.com/canghongjian/beam_retriever)
8
+
9
+ This is the finetuned **encoder only** of the Beam Retriever model which can be used for maximum inner product search.
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from transformers import DebertaV2Model
15
+
16
+ finetuned_encoder = DebertaV2Model.from_pretrained('scholarly-shadows-syndicate/beam_retriever_unofficial_encoder_only')
17
+ ```
18
+
19
+ ## Citations
20
+
21
+ ```bibtex
22
+ @article{Zhang2023BeamRG,
23
+ title={Beam Retrieval: General End-to-End Retrieval for Multi-Hop Question Answering},
24
+ author={Jiahao Zhang and H. Zhang and Dongmei Zhang and Yong Liu and Sheng Huang},
25
+ journal={ArXiv},
26
+ year={2023},
27
+ volume={abs/2308.08973},
28
+ url={https://api.semanticscholar.org/CorpusID:261030563}
29
+ }
30
+ ```
31
+
32
+ ```bibtex
33
+ @article{He2020DeBERTaDB,
34
+ title={DeBERTa: Decoding-enhanced BERT with Disentangled Attention},
35
+ author={Pengcheng He and Xiaodong Liu and Jianfeng Gao and Weizhu Chen},
36
+ journal={ArXiv},
37
+ year={2020},
38
+ volume={abs/2006.03654},
39
+ url={https://api.semanticscholar.org/CorpusID:219531210}
40
+ }
41
+ ```
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2Model"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 1024,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.29.2",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c783a10446bb851a58769cb22681c707f0308f616b973cb8f5c384b3a6e5941
3
+ size 1736180262