{ "time": "2025-02-11 13:23:00", "results": { "IO": { "META": { "Algorithm": "IO", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 37.83, "Cost($)": 0.3328 }, "AQuA": { "Score": 38.98, "Cost($)": 0.038 }, "MATH-500": { "Score": 17.2, "Cost($)": 0.2436 } }, "ReAct-Pro*": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 74.91, "Cost($)": 3.4633 }, "AQuA": { "Score": 64.57, "Cost($)": 0.4928 }, "MATH-500": { "Score": 23.8, "Cost($)": 2.0406 } }, "PoT": { "META": { "Algorithm": "PoT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 76.88, "Cost($)": 0.6902 }, "AQuA": { "Score": 59.45, "Cost($)": 0.1748 }, "MATH-500": { "Score": 28.8, "Cost($)": 0.168 } }, "CoT": { "META": { "Algorithm": "CoT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 78.7, "Cost($)": 0.6788 }, "AQuA": { "Score": 61.02, "Cost($)": 0.0957 }, "MATH-500": { "Score": 39.8, "Cost($)": 0.3189 } }, "SC-CoT": { "META": { "Algorithm": "SC-CoT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 79.91, "Cost($)": 3.3938 }, "AQuA": { "Score": 66.14, "Cost($)": 0.7888 }, "MATH-500": { "Score": 28.8, "Cost($)": 1.9764 } }, "ToT": { "META": { "Algorithm": "ToT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 67.93, "Cost($)": 9.1707 }, "AQuA": { "Score": 57.09, "Cost($)": 1.1513 }, "MATH-500": { "Score": 9.8, "Cost($)": 5.2914 } }, "IO-Doubao-lite-32k": { "META": { "Algorithm": "IO", "LLM": "Doubao-lite-32k", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 72.02, "Cost($)": 0.0354 }, "AQuA": { "Score": 79.13, "Cost($)": 0.0058 }, "MATH-500": { "Score": 37.4, "Cost($)": 0.0187 } }, "ReAct-Pro*-Doubao-lite-32k": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Doubao-lite-32k", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 85.6, "Cost($)": 0.2512 }, "AQuA": { "Score": 77.56, "Cost($)": 0.0445 }, "MATH-500": { "Score": 47.2, "Cost($)": 0.186 } }, "PoT-Doubao-lite-32k": { "META": { "Algorithm": "PoT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 79.61, "Cost($)": 0.0576 }, "AQuA": { "Score": 71.65, "Cost($)": 0.0147 }, "MATH-500": { "Score": 32.6, "Cost($)": 0.0144 } }, "CoT-Doubao-lite-32k": { "META": { "Algorithm": "CoT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 89.31, "Cost($)": 0.0558 }, "AQuA": { "Score": 82.68, "Cost($)": 0.0066 }, "MATH-500": { "Score": 59.0, "Cost($)": 0.0255 } }, "SC-CoT-Doubao-lite-32k": { "META": { "Algorithm": "SC-CoT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 87.26, "Cost($)": 0.2083 }, "AQuA": { "Score": 81.1, "Cost($)": 0.0519 }, "MATH-500": { "Score": 49.2, "Cost($)": 0.1406 } }, "ToT-Doubao-lite-32k": { "META": { "Algorithm": "ToT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/1/7" }, "gsm8k": { "Score": 37.83, "Cost($)": 0.8739 }, "AQuA": { "Score": 45.28, "Cost($)": 0.0881 }, "MATH-500": { "Score": 1.2, "Cost($)": 0.2371 } }, "IO-gpt-4o": { "META": { "Algorithm": "IO", "LLM": "gpt-4o", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 88.4, "Cost($)": 3.3463 }, "AQuA": { "Score": 75.59, "Cost($)": 1.1453 }, "MATH-500": { "Score": 41.8, "Cost($)": 2.7907 } }, "ReAct-Pro*-gpt-4o": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "gpt-4o", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 63.31, "Cost($)": 39.0751 }, "AQuA": { "Score": 57.48, "Cost($)": 2.304 }, "MATH-500": { "Score": 54.0, "Cost($)": 17.7735 } }, "PoT-gpt-4o": { "META": { "Algorithm": "PoT", "LLM": "gpt-4o", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 93.1, "Cost($)": 4.2166 }, "AQuA": { "Score": 75.2, "Cost($)": 1.6087 }, "MATH-500": { "Score": 46.2, "Cost($)": 1.5994 } }, "CoT-gpt-4o": { "META": { "Algorithm": "CoT", "LLM": "gpt-4o", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 94.09, "Cost($)": 4.5367 }, "AQuA": { "Score": 82.68, "Cost($)": 1.0417 }, "MATH-500": { "Score": 68.0, "Cost($)": 3.0569 } }, "SC-CoT-gpt-4o": { "META": { "Algorithm": "SC-CoT", "LLM": "gpt-4o", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 90.3, "Cost($)": 31.0542 }, "AQuA": { "Score": 86.61, "Cost($)": 8.1485 }, "MATH-500": { "Score": 34.4, "Cost($)": 19.6538 } }, "ToT-gpt-4o": { "META": { "Algorithm": "ToT", "LLM": "gpt-4o", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 91.13, "Cost($)": 86.8581 }, "AQuA": { "Score": 81.5, "Cost($)": 8.5295 }, "MATH-500": { "Score": 3.2, "Cost($)": 40.8094 } }, "IO-Qwen2.5-72B-Instruct": { "META": { "Algorithm": "IO", "LLM": "Qwen2.5-72B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 86.58, "Cost($)": 0.4899 }, "AQuA": { "Score": 84.25, "Cost($)": 0.0742 }, "MATH-500": { "Score": 70.2, "Cost($)": 0.2506 } }, "ReAct-Pro*-Qwen2.5-72B-Instruct": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Qwen2.5-72B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 87.26, "Cost($)": 10.5479 }, "AQuA": { "Score": 73.23, "Cost($)": 0.3177 }, "MATH-500": { "Score": 62.8, "Cost($)": 3.4541 } }, "PoT-Qwen2.5-72B-Instruct": { "META": { "Algorithm": "PoT", "LLM": "Qwen2.5-72B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 92.34, "Cost($)": 0.7054 }, "AQuA": { "Score": 75.2, "Cost($)": 0.1645 }, "MATH-500": { "Score": 47.2, "Cost($)": 0.233 } }, "CoT-Qwen2.5-72B-Instruct": { "META": { "Algorithm": "CoT", "LLM": "Qwen2.5-72B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 92.87, "Cost($)": 0.7195 }, "AQuA": { "Score": 86.22, "Cost($)": 0.0808 }, "MATH-500": { "Score": 80.2, "Cost($)": 0.349 } }, "SC-CoT-Qwen2.5-72B-Instruct": { "META": { "Algorithm": "SC-CoT", "LLM": "Qwen2.5-72B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 93.86, "Cost($)": 5.9858 }, "AQuA": { "Score": 85.04, "Cost($)": 1.0348 }, "MATH-500": { "Score": 74.0, "Cost($)": 3.1556 } }, "ToT-Qwen2.5-72B-Instruct": { "META": { "Algorithm": "ToT", "LLM": "Qwen2.5-72B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 88.88, "Cost($)": 23.5911 }, "AQuA": { "Score": 81.1, "Cost($)": 3.7389 }, "MATH-500": { "Score": 10.8, "Cost($)": 9.0421 } }, "IO-Llama-3.3-70B-Instruct": { "META": { "Algorithm": "IO", "LLM": "Llama-3.3-70B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 92.27, "Cost($)": 0.4709 }, "AQuA": { "Score": 82.68, "Cost($)": 0.0798 }, "MATH-500": { "Score": 69.4, "Cost($)": 0.2386 } }, "ReAct-Pro*-Llama-3.3-70B-Instruct": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Llama-3.3-70B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 87.64, "Cost($)": 10.1124 }, "AQuA": { "Score": 79.13, "Cost($)": 0.768 }, "MATH-500": { "Score": 64.6, "Cost($)": 3.1806 } }, "PoT-Llama-3.3-70B-Instruct": { "META": { "Algorithm": "PoT", "LLM": "Llama-3.3-70B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 73.09, "Cost($)": 0.9736 }, "AQuA": { "Score": 79.53, "Cost($)": 0.1746 }, "MATH-500": { "Score": 42.6, "Cost($)": 0.2839 } }, "CoT-Llama-3.3-70B-Instruct": { "META": { "Algorithm": "CoT", "LLM": "Llama-3.3-70B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 93.93, "Cost($)": 0.687 }, "AQuA": { "Score": 83.46, "Cost($)": 0.0927 }, "MATH-500": { "Score": 71.2, "Cost($)": 0.3463 } }, "SC-CoT-Llama-3.3-70B-Instruct": { "META": { "Algorithm": "SC-CoT", "LLM": "Llama-3.3-70B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 95.07, "Cost($)": 6.2005 }, "AQuA": { "Score": 82.28, "Cost($)": 1.0756 }, "MATH-500": { "Score": 74.2, "Cost($)": 3.2239 } }, "ToT-Llama-3.3-70B-Instruct": { "META": { "Algorithm": "ToT", "LLM": "Llama-3.3-70B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 91.89, "Cost($)": 20.8753 }, "AQuA": { "Score": 83.07, "Cost($)": 2.9404 }, "MATH-500": { "Score": 1.4, "Cost($)": 8.2699 } }, "IO-Qwen2.5-7B-Instruct": { "META": { "Algorithm": "IO", "LLM": "Qwen2.5-7B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 57.24, "Cost($)": 0.0 }, "AQuA": { "Score": 78.74, "Cost($)": 0.0 }, "MATH-500": { "Score": 59.4, "Cost($)": 0.0 } }, "ReAct-Pro*-Qwen2.5-7B-Instruct": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Qwen2.5-7B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 82.87, "Cost($)": 0.0 }, "AQuA": { "Score": 74.41, "Cost($)": 0.0 }, "MATH-500": { "Score": 48.8, "Cost($)": 0.0 } }, "PoT-Qwen2.5-7B-Instruct": { "META": { "Algorithm": "PoT", "LLM": "Qwen2.5-7B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 58.83, "Cost($)": 0.0 }, "AQuA": { "Score": 68.11, "Cost($)": 0.0 }, "MATH-500": { "Score": 39.6, "Cost($)": 0.0 } }, "CoT-Qwen2.5-7B-Instruct": { "META": { "Algorithm": "CoT", "LLM": "Qwen2.5-7B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 85.67, "Cost($)": 0.0 }, "AQuA": { "Score": 80.71, "Cost($)": 0.0 }, "MATH-500": { "Score": 69.8, "Cost($)": 0.0 } }, "SC-CoT-Qwen2.5-7B-Instruct": { "META": { "Algorithm": "SC-CoT", "LLM": "Qwen2.5-7B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 91.13, "Cost($)": 0.0 }, "AQuA": { "Score": 79.92, "Cost($)": 0.0 }, "MATH-500": { "Score": 67.0, "Cost($)": 0.0 } }, "ToT-Qwen2.5-7B-Instruct": { "META": { "Algorithm": "ToT", "LLM": "Qwen2.5-7B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 72.21, "Cost($)": 0.0 }, "AQuA": { "Score": 53.94, "Cost($)": 0.0 }, "MATH-500": { "Score": 1.4, "Cost($)": 0.0 } }, "IO-Llama-3.1-8B-Instruct": { "META": { "Algorithm": "IO", "LLM": "Llama-3.1-8B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 57.16, "Cost($)": 0.0 }, "AQuA": { "Score": 51.18, "Cost($)": 0.0 }, "MATH-500": { "Score": 38.6, "Cost($)": 0.0 } }, "ReAct-Pro*-Llama-3.1-8B-Instruct": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Llama-3.1-8B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 67.78, "Cost($)": 0.0 }, "AQuA": { "Score": 55.51, "Cost($)": 0.0 }, "MATH-500": { "Score": 28.8, "Cost($)": 0.0 } }, "PoT-Llama-3.1-8B-Instruct": { "META": { "Algorithm": "PoT", "LLM": "Llama-3.1-8B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 38.67, "Cost($)": 0.0 }, "AQuA": { "Score": 36.61, "Cost($)": 0.0 }, "MATH-500": { "Score": 25.4, "Cost($)": 0.0 } }, "CoT-Llama-3.1-8B-Instruct": { "META": { "Algorithm": "CoT", "LLM": "Llama-3.1-8B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 75.44, "Cost($)": 0.0 }, "AQuA": { "Score": 60.63, "Cost($)": 0.0 }, "MATH-500": { "Score": 25.8, "Cost($)": 0.0 } }, "SC-CoT-Llama-3.1-8B-Instruct": { "META": { "Algorithm": "SC-CoT", "LLM": "Llama-3.1-8B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 73.46, "Cost($)": 0.0 }, "AQuA": { "Score": 59.45, "Cost($)": 0.0 }, "MATH-500": { "Score": 30.2, "Cost($)": 0.0 } }, "ToT-Llama-3.1-8B-Instruct": { "META": { "Algorithm": "ToT", "LLM": "Llama-3.1-8B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 65.05, "Cost($)": 0.0 }, "AQuA": { "Score": 59.06, "Cost($)": 0.0 }, "MATH-500": { "Score": 1.8, "Cost($)": 0.0 } }, "IO-Internllm2_5-7B": { "META": { "Algorithm": "IO", "LLM": "Internllm2_5-7B", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 11.6, "Cost($)": 0.0 }, "AQuA": { "Score": 47.64, "Cost($)": 0.0 }, "MATH-500": { "Score": 22.8, "Cost($)": 0.0 } }, "ReAct-Pro*-Internllm2_5-7B": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Internllm2_5-7B", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 33.51, "Cost($)": 0.0 }, "AQuA": { "Score": 40.94, "Cost($)": 0.0 }, "MATH-500": { "Score": 14.8, "Cost($)": 0.0 } }, "PoT-Internllm2_5-7B": { "META": { "Algorithm": "PoT", "LLM": "Internllm2_5-7B", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 38.21, "Cost($)": 0.0 }, "AQuA": { "Score": 36.61, "Cost($)": 0.0 }, "MATH-500": { "Score": 15.0, "Cost($)": 0.0 } }, "CoT-Internllm2_5-7B": { "META": { "Algorithm": "CoT", "LLM": "Internllm2_5-7B", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 77.71, "Cost($)": 0.0 }, "AQuA": { "Score": 52.76, "Cost($)": 0.0 }, "MATH-500": { "Score": 46.6, "Cost($)": 0.0 } }, "SC-CoT-Internllm2_5-7B": { "META": { "Algorithm": "SC-CoT", "LLM": "Internllm2_5-7B", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 48.22, "Cost($)": 0.0 }, "AQuA": { "Score": 39.37, "Cost($)": 0.0 }, "MATH-500": { "Score": 9.8, "Cost($)": 0.0 } }, "ToT-Internllm2_5-7B": { "META": { "Algorithm": "ToT", "LLM": "Internllm2_5-7B", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 20.85, "Cost($)": 0.0 }, "AQuA": { "Score": 35.83, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.2, "Cost($)": 0.0 } }, "IO-Qwen2-1.5B-Instruct": { "META": { "Algorithm": "IO", "LLM": "Qwen2-1.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 16.68, "Cost($)": 0.0 }, "AQuA": { "Score": 29.13, "Cost($)": 0.0 }, "MATH-500": { "Score": 7.0, "Cost($)": 0.0 } }, "ReAct-Pro*-Qwen2-1.5B-Instruct": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Qwen2-1.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 24.87, "Cost($)": 0.0 }, "AQuA": { "Score": 25.59, "Cost($)": 0.0 }, "MATH-500": { "Score": 8.2, "Cost($)": 0.0 } }, "PoT-Qwen2-1.5B-Instruct": { "META": { "Algorithm": "PoT", "LLM": "Qwen2-1.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 18.5, "Cost($)": 0.0 }, "AQuA": { "Score": 30.71, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.8, "Cost($)": 0.0 } }, "CoT-Qwen2-1.5B-Instruct": { "META": { "Algorithm": "CoT", "LLM": "Qwen2-1.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 55.5, "Cost($)": 0.0 }, "AQuA": { "Score": 40.55, "Cost($)": 0.0 }, "MATH-500": { "Score": 15.2, "Cost($)": 0.0 } }, "SC-CoT-Qwen2-1.5B-Instruct": { "META": { "Algorithm": "SC-CoT", "LLM": "Qwen2-1.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 11.75, "Cost($)": 0.0 }, "AQuA": { "Score": 23.62, "Cost($)": 0.0 }, "MATH-500": { "Score": 3.8, "Cost($)": 0.0 } }, "ToT-Qwen2-1.5B-Instruct": { "META": { "Algorithm": "ToT", "LLM": "Qwen2-1.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 19.64, "Cost($)": 0.0 }, "AQuA": { "Score": 31.5, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.8, "Cost($)": 0.0 } }, "IO-Qwen2-0.5B-Instruct": { "META": { "Algorithm": "IO", "LLM": "Qwen2-0.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 14.71, "Cost($)": 0.0 }, "AQuA": { "Score": 27.17, "Cost($)": 0.0 }, "MATH-500": { "Score": 2.6, "Cost($)": 0.0 } }, "ReAct-Pro*-Qwen2-0.5B-Instruct": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Qwen2-0.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 7.66, "Cost($)": 0.0 }, "AQuA": { "Score": 24.02, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.6, "Cost($)": 0.0 } }, "PoT-Qwen2-0.5B-Instruct": { "META": { "Algorithm": "PoT", "LLM": "Qwen2-0.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 9.63, "Cost($)": 0.0 }, "AQuA": { "Score": 17.32, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.0, "Cost($)": 0.0 } }, "CoT-Qwen2-0.5B-Instruct": { "META": { "Algorithm": "CoT", "LLM": "Qwen2-0.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 35.94, "Cost($)": 0.0 }, "AQuA": { "Score": 33.07, "Cost($)": 0.0 }, "MATH-500": { "Score": 6.2, "Cost($)": 0.0 } }, "SC-CoT-Qwen2-0.5B-Instruct": { "META": { "Algorithm": "SC-CoT", "LLM": "Qwen2-0.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 1.67, "Cost($)": 0.0 }, "AQuA": { "Score": 22.83, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.8, "Cost($)": 0.0 } }, "ToT-Qwen2-0.5B-Instruct": { "META": { "Algorithm": "ToT", "LLM": "Qwen2-0.5B-Instruct", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 0.0, "Cost($)": 0.0 }, "AQuA": { "Score": 29.92, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.0, "Cost($)": 0.0 } }, "IO-deepseek-r1:1.5b": { "META": { "Algorithm": "IO", "LLM": "deepseek-r1:1.5b", "Eval Date": "2025/1/22" }, "gsm8k": { "Score": 64.14, "Cost($)": 0.0 }, "AQuA": { "Score": 68.9, "Cost($)": 0.0 }, "MATH-500": { "Score": 43.8, "Cost($)": 0.0 } }, "ReAct-Pro*-deepseek-r1:1.5b": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "deepseek-r1:1.5b", "Eval Date": "2025/2/10" }, "gsm8k": { "Score": 35.94, "Cost($)": 0.0 }, "AQuA": { "Score": 54.33, "Cost($)": 0.0 }, "MATH-500": { "Score": 24.4, "Cost($)": 0.0 } }, "PoT-deepseek-r1:1.5b": { "META": { "Algorithm": "PoT", "LLM": "deepseek-r1:1.5b", "Eval Date": "2025/2/10" }, "gsm8k": { "Score": 11.9, "Cost($)": 0.0 }, "AQuA": { "Score": 54.72, "Cost($)": 0.0 }, "MATH-500": { "Score": 1.0, "Cost($)": 0.0 } }, "CoT-deepseek-r1:1.5b": { "META": { "Algorithm": "CoT", "LLM": "deepseek-r1:1.5b", "Eval Date": "2025/1/23" }, "gsm8k": { "Score": 70.66, "Cost($)": 0.0 }, "AQuA": { "Score": 71.65, "Cost($)": 0.0 }, "MATH-500": { "Score": 49.4, "Cost($)": 0.0 } }, "SC-CoT-deepseek-r1:1.5b": { "META": { "Algorithm": "SC-CoT", "LLM": "deepseek-r1:1.5b", "Eval Date": "2025/2/10" }, "gsm8k": { "Score": 55.34, "Cost($)": 0.0 }, "AQuA": { "Score": 59.06, "Cost($)": 0.0 }, "MATH-500": { "Score": 38.0, "Cost($)": 0.0 } }, "ToT-deepseek-r1:1.5b": { "META": { "Algorithm": "ToT", "LLM": "deepseek-r1:1.5b", "Eval Date": "2025/2/10" }, "gsm8k": { "Score": 23.12, "Cost($)": 0.0 }, "AQuA": { "Score": 24.8, "Cost($)": 0.0 }, "MATH-500": { "Score": 0.4, "Cost($)": 0.0 } } } }