File size: 3,315 Bytes
b8a3ef1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
node_lines:
- node_line_name: retrieve_node_line  # Arbitrary node line name
  nodes:
    - node_type: retrieval
      strategy:
        metrics: [ retrieval_f1, retrieval_recall, retrieval_precision,
                   retrieval_ndcg, retrieval_map, retrieval_mrr ]
        speed_threshold: 10
      top_k: 10
      modules:
        - module_type: bm25
          bm25_tokenizer: [ porter_stemmer, space, gpt2 ]
        - module_type: vectordb
          embedding_model: openai
          embedding_batch: 256
        - module_type: hybrid_rrf
          weight_range: (4,80)
        - module_type: hybrid_cc
          normalize_method: [ mm, tmm, z, dbsf ]
          weight_range: (0.0, 1.0)
          test_weight_size: 101
    - node_type: passage_augmenter
      strategy:
        metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ]
        speed_threshold: 5
      top_k: 5
      embedding_model: openai
      modules:
        - module_type: pass_passage_augmenter
        - module_type: prev_next_augmenter
          mode: next
    - node_type: passage_filter
      strategy:
        metrics: [ retrieval_f1, retrieval_recall, retrieval_precision ]
        speed_threshold: 5
      modules:
        - module_type: pass_passage_filter
        - module_type: similarity_threshold_cutoff
          threshold: 0.85
        - module_type: similarity_percentile_cutoff
          percentile: 0.6
        - module_type: threshold_cutoff
          threshold: 0.85
        - module_type: percentile_cutoff
          percentile: 0.6
    - node_type: passage_compressor
      strategy:
        metrics: [retrieval_token_f1, retrieval_token_recall, retrieval_token_precision]
        speed_threshold: 10
      modules:
        - module_type: pass_compressor
        - module_type: tree_summarize
          llm: openai
          model: gpt-4o-mini
        - module_type: refine
          llm: openai
          model: gpt-4o-mini
        - module_type: longllmlingua
- node_line_name: post_retrieve_node_line  # Arbitrary node line name
  nodes:
    - node_type: prompt_maker
      strategy:
        metrics:
          - metric_name: bleu
          - metric_name: meteor
          - metric_name: rouge
          - metric_name: sem_score
            embedding_model: openai
        speed_threshold: 10
        generator_modules:
          - module_type: llama_index_llm
            llm: openai
            model: [gpt-4o-mini]
      modules:
        - module_type: fstring
          prompt: ["Tell me something about the question: {query} \n\n {retrieved_contents}",
                   "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?"]
        - module_type: long_context_reorder
          prompt: [ "Tell me something about the question: {query} \n\n {retrieved_contents}",
                    "Question: {query} \n Something to read: {retrieved_contents} \n What's your answer?" ]
    - node_type: generator
      strategy:
        metrics:
          - metric_name: bleu
          - metric_name: meteor
          - metric_name: rouge
          - metric_name: sem_score
            embedding_model: openai
        speed_threshold: 10
      modules:
        - module_type: llama_index_llm
          llm: [openai]
          model: [gpt-4o-mini]
          temperature: [0.5, 1.0]