,OpenAI,Base Arctic,Fine-tuned Arctic 0,"{'context_recall': 1.0, 'faithfulness': 0.0, 'factual_correctness': np.float64(0.67), 'answer_relevancy': np.float64(0.9798966352038251), 'context_entity_recall': 0.9999999966666667, 'noise_sensitivity_relevant': np.float64(0.5)}","{'context_recall': 0.0, 'faithfulness': 0.0, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.0), 'context_entity_recall': 0.1999999996, 'noise_sensitivity_relevant': np.float64(0.0)}","{'context_recall': 0.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.812980950807575), 'context_entity_recall': 0.0, 'noise_sensitivity_relevant': np.float64(0.0)}" 1,"{'context_recall': 1.0, 'faithfulness': 0.8571428571428571, 'factual_correctness': np.float64(0.62), 'answer_relevancy': np.float64(0.9710186678865266), 'context_entity_recall': 0.8999999991, 'noise_sensitivity_relevant': np.float64(0.16666666666666666)}","{'context_recall': 0.0, 'faithfulness': 0.0, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.0), 'context_entity_recall': 0.0, 'noise_sensitivity_relevant': np.float64(0.0)}","{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.818210604470556), 'context_entity_recall': 0.8749999989062499, 'noise_sensitivity_relevant': np.float64(0.6)}" 2,"{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.32), 'answer_relevancy': np.float64(0.962952166791465), 'context_entity_recall': 0.999999995, 'noise_sensitivity_relevant': np.float64(0.07142857142857142)}","{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(1.0), 'answer_relevancy': np.float64(0.9245964216906574), 'context_entity_recall': 0.6666666644444444, 'noise_sensitivity_relevant': np.float64(0.0)}","{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.15), 'answer_relevancy': np.float64(0.8934791921420707), 'context_entity_recall': 0.999999995, 'noise_sensitivity_relevant': np.float64(0.8888888888888888)}" 3,"{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.9999981405579151), 'context_entity_recall': 0.3333333322222222, 'noise_sensitivity_relevant': np.float64(1.0)}","{'context_recall': 0.5, 'faithfulness': 0.25, 'factual_correctness': np.float64(0.57), 'answer_relevancy': np.float64(0.955327383960776), 'context_entity_recall': 0.9999999966666667, 'noise_sensitivity_relevant': np.float64(0.0)}","{'context_recall': 1.0, 'faithfulness': 0.42857142857142855, 'factual_correctness': np.float64(0.22), 'answer_relevancy': np.float64(0.9456642966540557), 'context_entity_recall': 0.999999995, 'noise_sensitivity_relevant': np.float64(0.2857142857142857)}" 4,"{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.5), 'answer_relevancy': np.float64(0.9584384212778746), 'context_entity_recall': 0.9999999900000002, 'noise_sensitivity_relevant': np.float64(0.4)}","{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.67), 'answer_relevancy': np.float64(0.9788634419240716), 'context_entity_recall': 0.999999995, 'noise_sensitivity_relevant': np.float64(0.4)}","{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.75), 'answer_relevancy': np.float64(0.9999999999999996), 'context_entity_recall': 0.6666666644444444, 'noise_sensitivity_relevant': np.float64(0.4)}" 5,"{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.93), 'answer_relevancy': np.float64(0.9704647246092887), 'context_entity_recall': 0.571428570612245, 'noise_sensitivity_relevant': np.float64(0.0)}","{'context_recall': 0.0, 'faithfulness': 0.0, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.0), 'context_entity_recall': 0.0, 'noise_sensitivity_relevant': np.float64(0.0)}","{'context_recall': 0.75, 'faithfulness': 0.9090909090909091, 'factual_correctness': np.float64(0.76), 'answer_relevancy': np.float64(0.964455051772553), 'context_entity_recall': 0.9999999983333333, 'noise_sensitivity_relevant': np.float64(0.18181818181818182)}" 6,"{'context_recall': 0.0, 'faithfulness': 0.6666666666666666, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.0), 'context_entity_recall': 0.0, 'noise_sensitivity_relevant': np.float64(0.6666666666666666)}","{'context_recall': 0.0, 'faithfulness': 0.4444444444444444, 'factual_correctness': np.float64(0.22), 'answer_relevancy': np.float64(0.9582018957242698), 'context_entity_recall': 0.249999999375, 'noise_sensitivity_relevant': np.float64(0.3333333333333333)}","{'context_recall': 1.0, 'faithfulness': 0.5, 'factual_correctness': np.float64(0.57), 'answer_relevancy': np.float64(0.980401682770414), 'context_entity_recall': 0.1999999996, 'noise_sensitivity_relevant': np.float64(0.3333333333333333)}" 7,"{'context_recall': 1.0, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.6), 'answer_relevancy': np.float64(0.9680198367369263), 'context_entity_recall': 0.4285714279591837, 'noise_sensitivity_relevant': np.float64(0.3333333333333333)}","{'context_recall': 0.3333333333333333, 'faithfulness': 1.0, 'factual_correctness': np.float64(0.2), 'answer_relevancy': np.float64(0.9257048870782064), 'context_entity_recall': 0.14285714265306124, 'noise_sensitivity_relevant': np.float64(0.5)}","{'context_recall': 0.6666666666666666, 'faithfulness': 0.625, 'factual_correctness': np.float64(0.56), 'answer_relevancy': np.float64(0.9496106445743336), 'context_entity_recall': 0.2857142853061225, 'noise_sensitivity_relevant': np.float64(0.25)}" 8,"{'context_recall': 1.0, 'faithfulness': 0.9166666666666666, 'factual_correctness': np.float64(0.71), 'answer_relevancy': np.float64(0.9999999999999997), 'context_entity_recall': 0.3636363633057851, 'noise_sensitivity_relevant': np.float64(0.25)}","{'context_recall': 0.0, 'faithfulness': 0.6666666666666666, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.0), 'context_entity_recall': 0.12499999984374999, 'noise_sensitivity_relevant': np.float64(0.6666666666666666)}","{'context_recall': 1.0, 'faithfulness': 0.7142857142857143, 'factual_correctness': np.float64(0.82), 'answer_relevancy': np.float64(0.9648538873768647), 'context_entity_recall': 0.5999999988, 'noise_sensitivity_relevant': np.float64(0.07142857142857142)}" 9,"{'context_recall': 1.0, 'faithfulness': 0.75, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.8340107209677066), 'context_entity_recall': 0.7499999981250001, 'noise_sensitivity_relevant': np.float64(0.25)}","{'context_recall': 0.0, 'faithfulness': 0.0, 'factual_correctness': np.float64(0.0), 'answer_relevancy': np.float64(0.0), 'context_entity_recall': 0.5999999988, 'noise_sensitivity_relevant': np.float64(0.0)}","{'context_recall': 0.5, 'faithfulness': 0.5, 'factual_correctness': np.float64(0.8), 'answer_relevancy': np.float64(0.9633619812256754), 'context_entity_recall': 0.7499999981250001, 'noise_sensitivity_relevant': np.float64(0.0)}"