--- tags: - mteb base_model: mixedbread-ai/mxbai-embed-mini-v1 library_name: sentence-transformers model-index: - name: mxbai-embed-xsmall-v1 results: - task: type: Retrieval dataset: type: arguana name: MTEB ArguAna config: default split: test revision: None metrics: - type: ndcg_at_1 value: 25.18 - type: ndcg_at_3 value: 39.22 - type: ndcg_at_5 value: 43.93 - type: ndcg_at_10 value: 49.58 - type: ndcg_at_30 value: 53.41 - type: ndcg_at_100 value: 54.11 - type: map_at_1 value: 25.18 - type: map_at_3 value: 35.66 - type: map_at_5 value: 38.25 - type: map_at_10 value: 40.58 - type: map_at_30 value: 41.6 - type: map_at_100 value: 41.69 - type: recall_at_1 value: 25.18 - type: recall_at_3 value: 49.57 - type: recall_at_5 value: 61.09 - type: recall_at_10 value: 78.59 - type: recall_at_30 value: 94.03 - type: recall_at_100 value: 97.94 - type: precision_at_1 value: 25.18 - type: precision_at_3 value: 16.52 - type: precision_at_5 value: 12.22 - type: precision_at_10 value: 7.86 - type: precision_at_30 value: 3.13 - type: precision_at_100 value: 0.98 - type: accuracy_at_3 value: 49.57 - type: accuracy_at_5 value: 61.09 - type: accuracy_at_10 value: 78.59 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackAndroidRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 44.35 - type: ndcg_at_3 value: 49.64 - type: ndcg_at_5 value: 51.73 - type: ndcg_at_10 value: 54.82 - type: ndcg_at_30 value: 57.64 - type: ndcg_at_100 value: 59.77 - type: map_at_1 value: 36.26 - type: map_at_3 value: 44.35 - type: map_at_5 value: 46.26 - type: map_at_10 value: 48.24 - type: map_at_30 value: 49.34 - type: map_at_100 value: 49.75 - type: recall_at_1 value: 36.26 - type: recall_at_3 value: 51.46 - type: recall_at_5 value: 57.78 - type: recall_at_10 value: 66.5 - type: recall_at_30 value: 77.19 - type: recall_at_100 value: 87.53 - type: precision_at_1 value: 44.35 - type: precision_at_3 value: 23.65 - type: precision_at_5 value: 16.88 - type: precision_at_10 value: 10.7 - type: precision_at_30 value: 4.53 - type: precision_at_100 value: 1.65 - type: accuracy_at_3 value: 60.51 - type: accuracy_at_5 value: 67.67 - type: accuracy_at_10 value: 74.68 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackEnglishRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 39.43 - type: ndcg_at_3 value: 44.13 - type: ndcg_at_5 value: 46.06 - type: ndcg_at_10 value: 48.31 - type: ndcg_at_30 value: 51.06 - type: ndcg_at_100 value: 53.07 - type: map_at_1 value: 31.27 - type: map_at_3 value: 39.07 - type: map_at_5 value: 40.83 - type: map_at_10 value: 42.23 - type: map_at_30 value: 43.27 - type: map_at_100 value: 43.66 - type: recall_at_1 value: 31.27 - type: recall_at_3 value: 45.89 - type: recall_at_5 value: 51.44 - type: recall_at_10 value: 58.65 - type: recall_at_30 value: 69.12 - type: recall_at_100 value: 78.72 - type: precision_at_1 value: 39.43 - type: precision_at_3 value: 21.61 - type: precision_at_5 value: 15.34 - type: precision_at_10 value: 9.27 - type: precision_at_30 value: 4.01 - type: precision_at_100 value: 1.52 - type: accuracy_at_3 value: 55.48 - type: accuracy_at_5 value: 60.76 - type: accuracy_at_10 value: 67.45 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackGamingRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 45.58 - type: ndcg_at_3 value: 52.68 - type: ndcg_at_5 value: 55.28 - type: ndcg_at_10 value: 57.88 - type: ndcg_at_30 value: 60.6 - type: ndcg_at_100 value: 62.03 - type: map_at_1 value: 39.97 - type: map_at_3 value: 49.06 - type: map_at_5 value: 50.87 - type: map_at_10 value: 52.2 - type: map_at_30 value: 53.06 - type: map_at_100 value: 53.28 - type: recall_at_1 value: 39.97 - type: recall_at_3 value: 57.4 - type: recall_at_5 value: 63.83 - type: recall_at_10 value: 71.33 - type: recall_at_30 value: 81.81 - type: recall_at_100 value: 89.0 - type: precision_at_1 value: 45.58 - type: precision_at_3 value: 23.55 - type: precision_at_5 value: 16.01 - type: precision_at_10 value: 9.25 - type: precision_at_30 value: 3.67 - type: precision_at_100 value: 1.23 - type: accuracy_at_3 value: 62.76 - type: accuracy_at_5 value: 68.84 - type: accuracy_at_10 value: 75.8 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackGisRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 27.35 - type: ndcg_at_3 value: 34.23 - type: ndcg_at_5 value: 37.1 - type: ndcg_at_10 value: 40.26 - type: ndcg_at_30 value: 43.54 - type: ndcg_at_100 value: 45.9 - type: map_at_1 value: 25.28 - type: map_at_3 value: 31.68 - type: map_at_5 value: 33.38 - type: map_at_10 value: 34.79 - type: map_at_30 value: 35.67 - type: map_at_100 value: 35.96 - type: recall_at_1 value: 25.28 - type: recall_at_3 value: 38.95 - type: recall_at_5 value: 45.82 - type: recall_at_10 value: 55.11 - type: recall_at_30 value: 68.13 - type: recall_at_100 value: 80.88 - type: precision_at_1 value: 27.35 - type: precision_at_3 value: 14.65 - type: precision_at_5 value: 10.44 - type: precision_at_10 value: 6.37 - type: precision_at_30 value: 2.65 - type: precision_at_100 value: 0.97 - type: accuracy_at_3 value: 42.15 - type: accuracy_at_5 value: 49.15 - type: accuracy_at_10 value: 58.53 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackMathematicaRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 18.91 - type: ndcg_at_3 value: 24.37 - type: ndcg_at_5 value: 26.11 - type: ndcg_at_10 value: 29.37 - type: ndcg_at_30 value: 33.22 - type: ndcg_at_100 value: 35.73 - type: map_at_1 value: 15.23 - type: map_at_3 value: 21.25 - type: map_at_5 value: 22.38 - type: map_at_10 value: 23.86 - type: map_at_30 value: 24.91 - type: map_at_100 value: 25.24 - type: recall_at_1 value: 15.23 - type: recall_at_3 value: 28.28 - type: recall_at_5 value: 32.67 - type: recall_at_10 value: 42.23 - type: recall_at_30 value: 56.87 - type: recall_at_100 value: 69.44 - type: precision_at_1 value: 18.91 - type: precision_at_3 value: 11.9 - type: precision_at_5 value: 8.48 - type: precision_at_10 value: 5.63 - type: precision_at_30 value: 2.64 - type: precision_at_100 value: 1.02 - type: accuracy_at_3 value: 33.95 - type: accuracy_at_5 value: 38.81 - type: accuracy_at_10 value: 49.13 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackPhysicsRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 36.96 - type: ndcg_at_3 value: 42.48 - type: ndcg_at_5 value: 44.57 - type: ndcg_at_10 value: 47.13 - type: ndcg_at_30 value: 50.65 - type: ndcg_at_100 value: 53.14 - type: map_at_1 value: 30.1 - type: map_at_3 value: 37.97 - type: map_at_5 value: 39.62 - type: map_at_10 value: 41.06 - type: map_at_30 value: 42.13 - type: map_at_100 value: 42.53 - type: recall_at_1 value: 30.1 - type: recall_at_3 value: 45.98 - type: recall_at_5 value: 51.58 - type: recall_at_10 value: 59.24 - type: recall_at_30 value: 72.47 - type: recall_at_100 value: 84.53 - type: precision_at_1 value: 36.96 - type: precision_at_3 value: 20.5 - type: precision_at_5 value: 14.4 - type: precision_at_10 value: 8.62 - type: precision_at_30 value: 3.67 - type: precision_at_100 value: 1.38 - type: accuracy_at_3 value: 54.09 - type: accuracy_at_5 value: 60.25 - type: accuracy_at_10 value: 67.37 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackProgrammersRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 28.65 - type: ndcg_at_3 value: 34.3 - type: ndcg_at_5 value: 36.8 - type: ndcg_at_10 value: 39.92 - type: ndcg_at_30 value: 42.97 - type: ndcg_at_100 value: 45.45 - type: map_at_1 value: 23.35 - type: map_at_3 value: 30.36 - type: map_at_5 value: 32.15 - type: map_at_10 value: 33.74 - type: map_at_30 value: 34.69 - type: map_at_100 value: 35.02 - type: recall_at_1 value: 23.35 - type: recall_at_3 value: 37.71 - type: recall_at_5 value: 44.23 - type: recall_at_10 value: 53.6 - type: recall_at_30 value: 64.69 - type: recall_at_100 value: 77.41 - type: precision_at_1 value: 28.65 - type: precision_at_3 value: 16.74 - type: precision_at_5 value: 12.21 - type: precision_at_10 value: 7.61 - type: precision_at_30 value: 3.29 - type: precision_at_100 value: 1.22 - type: accuracy_at_3 value: 44.86 - type: accuracy_at_5 value: 52.4 - type: accuracy_at_10 value: 61.07 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackStatsRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 26.07 - type: ndcg_at_3 value: 31.62 - type: ndcg_at_5 value: 33.23 - type: ndcg_at_10 value: 35.62 - type: ndcg_at_30 value: 38.41 - type: ndcg_at_100 value: 40.81 - type: map_at_1 value: 22.96 - type: map_at_3 value: 28.85 - type: map_at_5 value: 29.97 - type: map_at_10 value: 31.11 - type: map_at_30 value: 31.86 - type: map_at_100 value: 32.15 - type: recall_at_1 value: 22.96 - type: recall_at_3 value: 35.14 - type: recall_at_5 value: 39.22 - type: recall_at_10 value: 46.52 - type: recall_at_30 value: 57.58 - type: recall_at_100 value: 70.57 - type: precision_at_1 value: 26.07 - type: precision_at_3 value: 14.11 - type: precision_at_5 value: 9.69 - type: precision_at_10 value: 5.81 - type: precision_at_30 value: 2.45 - type: precision_at_100 value: 0.92 - type: accuracy_at_3 value: 39.42 - type: accuracy_at_5 value: 43.41 - type: accuracy_at_10 value: 50.92 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackTexRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 21.78 - type: ndcg_at_3 value: 25.74 - type: ndcg_at_5 value: 27.86 - type: ndcg_at_10 value: 30.3 - type: ndcg_at_30 value: 33.51 - type: ndcg_at_100 value: 36.12 - type: map_at_1 value: 17.63 - type: map_at_3 value: 22.7 - type: map_at_5 value: 24.14 - type: map_at_10 value: 25.31 - type: map_at_30 value: 26.22 - type: map_at_100 value: 26.56 - type: recall_at_1 value: 17.63 - type: recall_at_3 value: 28.37 - type: recall_at_5 value: 33.99 - type: recall_at_10 value: 41.23 - type: recall_at_30 value: 53.69 - type: recall_at_100 value: 67.27 - type: precision_at_1 value: 21.78 - type: precision_at_3 value: 12.41 - type: precision_at_5 value: 9.07 - type: precision_at_10 value: 5.69 - type: precision_at_30 value: 2.61 - type: precision_at_100 value: 1.03 - type: accuracy_at_3 value: 33.62 - type: accuracy_at_5 value: 39.81 - type: accuracy_at_10 value: 47.32 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackUnixRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 30.97 - type: ndcg_at_3 value: 36.13 - type: ndcg_at_5 value: 39.0 - type: ndcg_at_10 value: 41.78 - type: ndcg_at_30 value: 44.96 - type: ndcg_at_100 value: 47.52 - type: map_at_1 value: 26.05 - type: map_at_3 value: 32.77 - type: map_at_5 value: 34.6 - type: map_at_10 value: 35.93 - type: map_at_30 value: 36.88 - type: map_at_100 value: 37.22 - type: recall_at_1 value: 26.05 - type: recall_at_3 value: 40.0 - type: recall_at_5 value: 47.34 - type: recall_at_10 value: 55.34 - type: recall_at_30 value: 67.08 - type: recall_at_100 value: 80.2 - type: precision_at_1 value: 30.97 - type: precision_at_3 value: 16.6 - type: precision_at_5 value: 12.03 - type: precision_at_10 value: 7.3 - type: precision_at_30 value: 3.08 - type: precision_at_100 value: 1.15 - type: accuracy_at_3 value: 45.62 - type: accuracy_at_5 value: 53.64 - type: accuracy_at_10 value: 61.66 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackWebmastersRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 29.64 - type: ndcg_at_3 value: 35.49 - type: ndcg_at_5 value: 37.77 - type: ndcg_at_10 value: 40.78 - type: ndcg_at_30 value: 44.59 - type: ndcg_at_100 value: 46.97 - type: map_at_1 value: 24.77 - type: map_at_3 value: 31.33 - type: map_at_5 value: 32.95 - type: map_at_10 value: 34.47 - type: map_at_30 value: 35.7 - type: map_at_100 value: 36.17 - type: recall_at_1 value: 24.77 - type: recall_at_3 value: 38.16 - type: recall_at_5 value: 44.1 - type: recall_at_10 value: 53.31 - type: recall_at_30 value: 68.43 - type: recall_at_100 value: 80.24 - type: precision_at_1 value: 29.64 - type: precision_at_3 value: 16.8 - type: precision_at_5 value: 12.21 - type: precision_at_10 value: 7.83 - type: precision_at_30 value: 3.89 - type: precision_at_100 value: 1.63 - type: accuracy_at_3 value: 45.45 - type: accuracy_at_5 value: 51.58 - type: accuracy_at_10 value: 61.07 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackWordpressRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 23.47 - type: ndcg_at_3 value: 27.98 - type: ndcg_at_5 value: 30.16 - type: ndcg_at_10 value: 32.97 - type: ndcg_at_30 value: 36.3 - type: ndcg_at_100 value: 38.47 - type: map_at_1 value: 21.63 - type: map_at_3 value: 26.02 - type: map_at_5 value: 27.32 - type: map_at_10 value: 28.51 - type: map_at_30 value: 29.39 - type: map_at_100 value: 29.66 - type: recall_at_1 value: 21.63 - type: recall_at_3 value: 31.47 - type: recall_at_5 value: 36.69 - type: recall_at_10 value: 44.95 - type: recall_at_30 value: 58.2 - type: recall_at_100 value: 69.83 - type: precision_at_1 value: 23.47 - type: precision_at_3 value: 11.71 - type: precision_at_5 value: 8.32 - type: precision_at_10 value: 5.23 - type: precision_at_30 value: 2.29 - type: precision_at_100 value: 0.86 - type: accuracy_at_3 value: 34.01 - type: accuracy_at_5 value: 39.37 - type: accuracy_at_10 value: 48.24 - task: type: Retrieval dataset: type: climate-fever name: MTEB ClimateFEVER config: default split: test revision: None metrics: - type: ndcg_at_1 value: 19.8 - type: ndcg_at_3 value: 17.93 - type: ndcg_at_5 value: 19.39 - type: ndcg_at_10 value: 22.42 - type: ndcg_at_30 value: 26.79 - type: ndcg_at_100 value: 29.84 - type: map_at_1 value: 9.09 - type: map_at_3 value: 12.91 - type: map_at_5 value: 14.12 - type: map_at_10 value: 15.45 - type: map_at_30 value: 16.73 - type: map_at_100 value: 17.21 - type: recall_at_1 value: 9.09 - type: recall_at_3 value: 16.81 - type: recall_at_5 value: 20.9 - type: recall_at_10 value: 27.65 - type: recall_at_30 value: 41.23 - type: recall_at_100 value: 53.57 - type: precision_at_1 value: 19.8 - type: precision_at_3 value: 13.36 - type: precision_at_5 value: 10.33 - type: precision_at_10 value: 7.15 - type: precision_at_30 value: 3.66 - type: precision_at_100 value: 1.49 - type: accuracy_at_3 value: 36.22 - type: accuracy_at_5 value: 44.1 - type: accuracy_at_10 value: 55.11 - task: type: Retrieval dataset: type: dbpedia-entity name: MTEB DBPedia config: default split: test revision: None metrics: - type: ndcg_at_1 value: 42.75 - type: ndcg_at_3 value: 35.67 - type: ndcg_at_5 value: 33.58 - type: ndcg_at_10 value: 32.19 - type: ndcg_at_30 value: 31.82 - type: ndcg_at_100 value: 35.87 - type: map_at_1 value: 7.05 - type: map_at_3 value: 10.5 - type: map_at_5 value: 12.06 - type: map_at_10 value: 14.29 - type: map_at_30 value: 17.38 - type: map_at_100 value: 19.58 - type: recall_at_1 value: 7.05 - type: recall_at_3 value: 11.89 - type: recall_at_5 value: 14.7 - type: recall_at_10 value: 19.78 - type: recall_at_30 value: 29.88 - type: recall_at_100 value: 42.4 - type: precision_at_1 value: 54.25 - type: precision_at_3 value: 39.42 - type: precision_at_5 value: 33.15 - type: precision_at_10 value: 25.95 - type: precision_at_30 value: 15.51 - type: precision_at_100 value: 7.9 - type: accuracy_at_3 value: 72.0 - type: accuracy_at_5 value: 77.75 - type: accuracy_at_10 value: 83.5 - task: type: Retrieval dataset: type: fever name: MTEB FEVER config: default split: test revision: None metrics: - type: ndcg_at_1 value: 40.19 - type: ndcg_at_3 value: 50.51 - type: ndcg_at_5 value: 53.51 - type: ndcg_at_10 value: 56.45 - type: ndcg_at_30 value: 58.74 - type: ndcg_at_100 value: 59.72 - type: map_at_1 value: 37.56 - type: map_at_3 value: 46.74 - type: map_at_5 value: 48.46 - type: map_at_10 value: 49.7 - type: map_at_30 value: 50.31 - type: map_at_100 value: 50.43 - type: recall_at_1 value: 37.56 - type: recall_at_3 value: 58.28 - type: recall_at_5 value: 65.45 - type: recall_at_10 value: 74.28 - type: recall_at_30 value: 83.42 - type: recall_at_100 value: 88.76 - type: precision_at_1 value: 40.19 - type: precision_at_3 value: 20.99 - type: precision_at_5 value: 14.24 - type: precision_at_10 value: 8.12 - type: precision_at_30 value: 3.06 - type: precision_at_100 value: 0.98 - type: accuracy_at_3 value: 62.3 - type: accuracy_at_5 value: 69.94 - type: accuracy_at_10 value: 79.13 - task: type: Retrieval dataset: type: fiqa name: MTEB FiQA2018 config: default split: test revision: None metrics: - type: ndcg_at_1 value: 34.41 - type: ndcg_at_3 value: 33.2 - type: ndcg_at_5 value: 34.71 - type: ndcg_at_10 value: 37.1 - type: ndcg_at_30 value: 40.88 - type: ndcg_at_100 value: 44.12 - type: map_at_1 value: 17.27 - type: map_at_3 value: 25.36 - type: map_at_5 value: 27.76 - type: map_at_10 value: 29.46 - type: map_at_30 value: 30.74 - type: map_at_100 value: 31.29 - type: recall_at_1 value: 17.27 - type: recall_at_3 value: 30.46 - type: recall_at_5 value: 36.91 - type: recall_at_10 value: 44.47 - type: recall_at_30 value: 56.71 - type: recall_at_100 value: 70.72 - type: precision_at_1 value: 34.41 - type: precision_at_3 value: 22.32 - type: precision_at_5 value: 16.91 - type: precision_at_10 value: 10.53 - type: precision_at_30 value: 4.62 - type: precision_at_100 value: 1.79 - type: accuracy_at_3 value: 50.77 - type: accuracy_at_5 value: 57.56 - type: accuracy_at_10 value: 65.12 - task: type: Retrieval dataset: type: hotpotqa name: MTEB HotpotQA config: default split: test revision: None metrics: - type: ndcg_at_1 value: 57.93 - type: ndcg_at_3 value: 44.21 - type: ndcg_at_5 value: 46.4 - type: ndcg_at_10 value: 48.37 - type: ndcg_at_30 value: 50.44 - type: ndcg_at_100 value: 51.86 - type: map_at_1 value: 28.97 - type: map_at_3 value: 36.79 - type: map_at_5 value: 38.31 - type: map_at_10 value: 39.32 - type: map_at_30 value: 39.99 - type: map_at_100 value: 40.2 - type: recall_at_1 value: 28.97 - type: recall_at_3 value: 41.01 - type: recall_at_5 value: 45.36 - type: recall_at_10 value: 50.32 - type: recall_at_30 value: 57.38 - type: recall_at_100 value: 64.06 - type: precision_at_1 value: 57.93 - type: precision_at_3 value: 27.34 - type: precision_at_5 value: 18.14 - type: precision_at_10 value: 10.06 - type: precision_at_30 value: 3.82 - type: precision_at_100 value: 1.28 - type: accuracy_at_3 value: 71.03 - type: accuracy_at_5 value: 75.14 - type: accuracy_at_10 value: 79.84 - task: type: Retrieval dataset: type: msmarco name: MTEB MSMARCO config: default split: dev revision: None metrics: - type: ndcg_at_1 value: 19.74 - type: ndcg_at_3 value: 29.47 - type: ndcg_at_5 value: 32.99 - type: ndcg_at_10 value: 36.76 - type: ndcg_at_30 value: 40.52 - type: ndcg_at_100 value: 42.78 - type: map_at_1 value: 19.2 - type: map_at_3 value: 26.81 - type: map_at_5 value: 28.78 - type: map_at_10 value: 30.35 - type: map_at_30 value: 31.3 - type: map_at_100 value: 31.57 - type: recall_at_1 value: 19.2 - type: recall_at_3 value: 36.59 - type: recall_at_5 value: 45.08 - type: recall_at_10 value: 56.54 - type: recall_at_30 value: 72.05 - type: recall_at_100 value: 84.73 - type: precision_at_1 value: 19.74 - type: precision_at_3 value: 12.61 - type: precision_at_5 value: 9.37 - type: precision_at_10 value: 5.89 - type: precision_at_30 value: 2.52 - type: precision_at_100 value: 0.89 - type: accuracy_at_3 value: 37.38 - type: accuracy_at_5 value: 46.06 - type: accuracy_at_10 value: 57.62 - task: type: Retrieval dataset: type: nq name: MTEB NQ config: default split: test revision: None metrics: - type: ndcg_at_1 value: 25.9 - type: ndcg_at_3 value: 35.97 - type: ndcg_at_5 value: 40.27 - type: ndcg_at_10 value: 44.44 - type: ndcg_at_30 value: 48.31 - type: ndcg_at_100 value: 50.14 - type: map_at_1 value: 23.03 - type: map_at_3 value: 32.45 - type: map_at_5 value: 34.99 - type: map_at_10 value: 36.84 - type: map_at_30 value: 37.92 - type: map_at_100 value: 38.16 - type: recall_at_1 value: 23.03 - type: recall_at_3 value: 43.49 - type: recall_at_5 value: 53.41 - type: recall_at_10 value: 65.65 - type: recall_at_30 value: 80.79 - type: recall_at_100 value: 90.59 - type: precision_at_1 value: 25.9 - type: precision_at_3 value: 16.76 - type: precision_at_5 value: 12.54 - type: precision_at_10 value: 7.78 - type: precision_at_30 value: 3.23 - type: precision_at_100 value: 1.1 - type: accuracy_at_3 value: 47.31 - type: accuracy_at_5 value: 57.16 - type: accuracy_at_10 value: 69.09 - task: type: Retrieval dataset: type: nfcorpus name: MTEB NFCorpus config: default split: test revision: None metrics: - type: ndcg_at_1 value: 40.87 - type: ndcg_at_3 value: 36.79 - type: ndcg_at_5 value: 34.47 - type: ndcg_at_10 value: 32.05 - type: ndcg_at_30 value: 29.23 - type: ndcg_at_100 value: 29.84 - type: map_at_1 value: 5.05 - type: map_at_3 value: 8.5 - type: map_at_5 value: 9.87 - type: map_at_10 value: 11.71 - type: map_at_30 value: 13.48 - type: map_at_100 value: 14.86 - type: recall_at_1 value: 5.05 - type: recall_at_3 value: 9.55 - type: recall_at_5 value: 11.91 - type: recall_at_10 value: 16.07 - type: recall_at_30 value: 22.13 - type: recall_at_100 value: 30.7 - type: precision_at_1 value: 42.72 - type: precision_at_3 value: 34.78 - type: precision_at_5 value: 30.03 - type: precision_at_10 value: 23.93 - type: precision_at_30 value: 14.61 - type: precision_at_100 value: 7.85 - type: accuracy_at_3 value: 58.2 - type: accuracy_at_5 value: 64.09 - type: accuracy_at_10 value: 69.35 - task: type: Retrieval dataset: type: quora name: MTEB QuoraRetrieval config: default split: test revision: None metrics: - type: ndcg_at_1 value: 80.62 - type: ndcg_at_3 value: 84.62 - type: ndcg_at_5 value: 86.25 - type: ndcg_at_10 value: 87.7 - type: ndcg_at_30 value: 88.63 - type: ndcg_at_100 value: 88.95 - type: map_at_1 value: 69.91 - type: map_at_3 value: 80.7 - type: map_at_5 value: 82.57 - type: map_at_10 value: 83.78 - type: map_at_30 value: 84.33 - type: map_at_100 value: 84.44 - type: recall_at_1 value: 69.91 - type: recall_at_3 value: 86.36 - type: recall_at_5 value: 90.99 - type: recall_at_10 value: 95.19 - type: recall_at_30 value: 98.25 - type: recall_at_100 value: 99.47 - type: precision_at_1 value: 80.62 - type: precision_at_3 value: 37.03 - type: precision_at_5 value: 24.36 - type: precision_at_10 value: 13.4 - type: precision_at_30 value: 4.87 - type: precision_at_100 value: 1.53 - type: accuracy_at_3 value: 92.25 - type: accuracy_at_5 value: 95.29 - type: accuracy_at_10 value: 97.74 - task: type: Retrieval dataset: type: scidocs name: MTEB SCIDOCS config: default split: test revision: None metrics: - type: ndcg_at_1 value: 24.1 - type: ndcg_at_3 value: 20.18 - type: ndcg_at_5 value: 17.72 - type: ndcg_at_10 value: 21.5 - type: ndcg_at_30 value: 26.66 - type: ndcg_at_100 value: 30.95 - type: map_at_1 value: 4.88 - type: map_at_3 value: 9.09 - type: map_at_5 value: 10.99 - type: map_at_10 value: 12.93 - type: map_at_30 value: 14.71 - type: map_at_100 value: 15.49 - type: recall_at_1 value: 4.88 - type: recall_at_3 value: 11.55 - type: recall_at_5 value: 15.91 - type: recall_at_10 value: 22.82 - type: recall_at_30 value: 35.7 - type: recall_at_100 value: 50.41 - type: precision_at_1 value: 24.1 - type: precision_at_3 value: 19.0 - type: precision_at_5 value: 15.72 - type: precision_at_10 value: 11.27 - type: precision_at_30 value: 5.87 - type: precision_at_100 value: 2.49 - type: accuracy_at_3 value: 43.0 - type: accuracy_at_5 value: 51.6 - type: accuracy_at_10 value: 62.7 - task: type: Retrieval dataset: type: scifact name: MTEB SciFact config: default split: test revision: None metrics: - type: ndcg_at_1 value: 52.33 - type: ndcg_at_3 value: 61.47 - type: ndcg_at_5 value: 63.82 - type: ndcg_at_10 value: 65.81 - type: ndcg_at_30 value: 67.75 - type: ndcg_at_100 value: 68.96 - type: map_at_1 value: 50.46 - type: map_at_3 value: 58.51 - type: map_at_5 value: 60.12 - type: map_at_10 value: 61.07 - type: map_at_30 value: 61.64 - type: map_at_100 value: 61.8 - type: recall_at_1 value: 50.46 - type: recall_at_3 value: 67.81 - type: recall_at_5 value: 73.6 - type: recall_at_10 value: 79.31 - type: recall_at_30 value: 86.8 - type: recall_at_100 value: 93.5 - type: precision_at_1 value: 52.33 - type: precision_at_3 value: 24.56 - type: precision_at_5 value: 16.27 - type: precision_at_10 value: 8.9 - type: precision_at_30 value: 3.28 - type: precision_at_100 value: 1.06 - type: accuracy_at_3 value: 69.67 - type: accuracy_at_5 value: 75.0 - type: accuracy_at_10 value: 80.67 - task: type: Retrieval dataset: type: trec-covid name: MTEB TRECCOVID config: default split: test revision: None metrics: - type: ndcg_at_1 value: 57.0 - type: ndcg_at_3 value: 53.78 - type: ndcg_at_5 value: 52.62 - type: ndcg_at_10 value: 48.9 - type: ndcg_at_30 value: 44.2 - type: ndcg_at_100 value: 36.53 - type: map_at_1 value: 0.16 - type: map_at_3 value: 0.41 - type: map_at_5 value: 0.62 - type: map_at_10 value: 1.07 - type: map_at_30 value: 2.46 - type: map_at_100 value: 5.52 - type: recall_at_1 value: 0.16 - type: recall_at_3 value: 0.45 - type: recall_at_5 value: 0.72 - type: recall_at_10 value: 1.33 - type: recall_at_30 value: 3.46 - type: recall_at_100 value: 8.73 - type: precision_at_1 value: 62.0 - type: precision_at_3 value: 57.33 - type: precision_at_5 value: 56.0 - type: precision_at_10 value: 52.0 - type: precision_at_30 value: 46.2 - type: precision_at_100 value: 37.22 - type: accuracy_at_3 value: 82.0 - type: accuracy_at_5 value: 90.0 - type: accuracy_at_10 value: 92.0 - task: type: Retrieval dataset: type: webis-touche2020 name: MTEB Touche2020 config: default split: test revision: None metrics: - type: ndcg_at_1 value: 20.41 - type: ndcg_at_3 value: 17.62 - type: ndcg_at_5 value: 17.16 - type: ndcg_at_10 value: 17.09 - type: ndcg_at_30 value: 20.1 - type: ndcg_at_100 value: 26.33 - type: map_at_1 value: 2.15 - type: map_at_3 value: 3.59 - type: map_at_5 value: 5.07 - type: map_at_10 value: 6.95 - type: map_at_30 value: 9.01 - type: map_at_100 value: 10.54 - type: recall_at_1 value: 2.15 - type: recall_at_3 value: 4.5 - type: recall_at_5 value: 7.54 - type: recall_at_10 value: 12.46 - type: recall_at_30 value: 21.9 - type: recall_at_100 value: 36.58 - type: precision_at_1 value: 22.45 - type: precision_at_3 value: 19.05 - type: precision_at_5 value: 17.55 - type: precision_at_10 value: 15.51 - type: precision_at_30 value: 10.07 - type: precision_at_100 value: 5.57 - type: accuracy_at_3 value: 42.86 - type: accuracy_at_5 value: 53.06 - type: accuracy_at_10 value: 69.39 - task: type: Retrieval dataset: type: BeIR/cqadupstack name: MTEB CQADupstackRetrieval config: default split: test revision: None metrics: - type: ndcg_at_10 value: 41.59 license: apache-2.0 language: - en pipeline_tag: feature-extraction ---

The crispy sentence embedding family from Mixedbread.

# mixedbread-ai/mxbai-embed-xsmall-v1 This model is an open-source English embedding model developed by [Mixedbread](https://mixedbread.ai). It's built upon [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) and trained with the [AnglE loss](https://arxiv.org/abs/2309.12871) and [Espresso](https://arxiv.org/abs/2402.14776). Read more details in our [blog post](https://www.mixedbread.ai/blog/mxbai-embed-xsmall-v1). **In a bread loaf**: - State-of-the-art performance - Supports both [binary quantization and Matryoshka Representation Learning (MRL)](#binary-quantization-and-matryoshka). - Optimized for retrieval tasks - 4096 context support ## Performance ## Binary Quantization and Matryoshka Our model supports both [binary quantization](https://www.mixedbread.ai/blog/binary-quantization) and [Matryoshka Representation Learning (MRL)](https://www.mixedbread.ai/blog/mxbai-embed-2d-large-v1), allowing for significant efficiency gains: - Binary quantization: Retains 93.9% of performance while increasing efficiency by a factor of 32 - MRL: A 33% reduction in vector size still leaves 96.2% of model performance These optimizations can lead to substantial reductions in infrastructure costs for cloud computing and vector databases. Read more [here](https://www.mixedbread.ai/blog/binary-mrl). ## Quickstart Here are several ways to produce German sentence embeddings using our model.
angle-emb ```bash pip install -U angle-emb ``` ```python from angle_emb import AnglE from angle_emb.utils import cosine_similarity # 1. Specify preferred dimensions dimensions = 384 # 2. Load model and set pooling strategy to avg model = AnglE.from_pretrained( "mixedbread-ai/mxbai-embed-xsmall-v1", pooling_strategy='avg').cuda() query = 'A man is eating a piece of bread' docs = [ query, "A man is eating food.", "A man is eating pasta.", "The girl is carrying a baby.", "A man is riding a horse.", ] # 3. Encode embeddings = model.encode(docs, embedding_size=dimensions) for doc, emb in zip(docs[1:], embeddings[1:]): print(f'{query} ||| {doc}', cosine_similarity(embeddings[0], emb)) ```
Sentence Transformers ```bash python -m pip install -U sentence-transformers ``` ```python from sentence_transformers import SentenceTransformer from sentence_transformers.util import cos_sim # 1. Specify preferred dimensions dimensions = 384 # 2. Load model model = SentenceTransformer("mixedbread-ai/mxbai-embed-xsmall-v1", truncate_dim=dimensions) query = 'A man is eating a piece of bread' docs = [ query, "A man is eating food.", "A man is eating pasta.", "The girl is carrying a baby.", "A man is riding a horse.", ] # 3. Encode embeddings = model.encode(docs) similarities = cos_sim(embeddings[0], embeddings[1:]) print('similarities:', similarities) ```
transformers ```bash pip install -U transformers ``` ```python from typing import Dict import torch import numpy as np from transformers import AutoModel, AutoTokenizer from sentence_transformers.util import cos_sim def pooling(outputs: torch.Tensor, inputs: Dict) -> np.ndarray: outputs = torch.sum( outputs * inputs["attention_mask"][:, :, None], dim=1) / torch.sum(inputs["attention_mask"]) return outputs.detach().cpu().numpy() # 1. Load model model_id = 'mixedbread-ai/mxbai-embed-xsmall-v1' tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModel.from_pretrained(model_id).cuda() query = 'A man is eating a piece of bread' docs = [ query, "A man is eating food.", "A man is eating pasta.", "The girl is carrying a baby.", "A man is riding a horse.", ] # 2. Encode inputs = tokenizer(docs, padding=True, return_tensors='pt') for k, v in inputs.items(): inputs[k] = v.cuda() outputs = model(**inputs).last_hidden_state embeddings = pooling(outputs, inputs) # 3. Compute similarity scores similarities = cos_sim(embeddings[0], embeddings[1:]) print('similarities:', similarities) ```
Batched API ```bash python -m pip install batched ``` ```python import uvicorn import batched from fastapi import FastAPI from fastapi.responses import ORJSONResponse from sentence_transformers import SentenceTransformer from pydantic import BaseModel app = FastAPI() model = SentenceTransformer('mixedbread-ai/mxbai-embed-xsmall-v1') model.encode = batched.aio.dynamically(model.encode) class EmbeddingsRequest(BaseModel): input: str | list[str] @app.post("/embeddings") async def embeddings(request: EmbeddingsRequest): return ORJSONResponse({"embeddings": await model.encode(request.input)}) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000) ```
## Community Join our [discord community](https://www.mixedbread.ai/redirects/discord) to share your feedback and thoughts. We're here to help and always happy to discuss the exciting field of machine learning! ## License Apache 2.0 ## Citation ```bibtex @online{xsmall2024mxbai, title={Every Byte Matters: Introducing mxbai-embed-xsmall-v1}, author={Sean Lee and Julius Lipp and Rui Huang and Darius Koenig}, year={2024}, url={https://www.mixedbread.ai/blog/mxbai-embed-xsmall-v1}, } ```