Rank,Algorithm,Dataset,LLM,Eval Date,Score,Pass rate,X-shot,Cost($),Framework,Samples,All tokens,Total input tokens,Average input tokens,Total output tokens,Average output tokens 1,SC-CoT,AQuA,gpt-4o,2025/1/22,86.61,0.9882,0,8.1485,,254,1373206,744478,2931,628728,2475 2,CoT,AQuA,Qwen2.5-72B-Instruct,2025/1/22,86.22,0.9921,0,0.0808,,254,143289,25143,99,118146,465 3,SC-CoT,AQuA,Qwen2.5-72B-Instruct,2025/1/22,85.04,0.9921,0,1.0348,,254,1835669,1051218,4139,784451,3088 4,IO,AQuA,Qwen2.5-72B-Instruct,2025/1/22,84.25,0.9961,0,0.0742,,254,131604,25397,100,106207,418 5,CoT,AQuA,Llama-3.3-70B-Instruct,2025/1/22,83.46,0.9843,0,0.0927,,254,164389,32555,128,131834,519 6,ToT,AQuA,Llama-3.3-70B-Instruct,2025/1/22,83.07,1.0,0,2.9404,,254,5215848,4735188,18642,480660,1892 7,IO,AQuA,Llama-3.3-70B-Instruct,2025/1/22,82.68,0.9921,0,0.0798,,254,141567,32809,129,108758,428 8,CoT,AQuA,Doubao-lite-32k,2025/1/7,82.68,0.9724,0,0.0066,,254,94577,27978,110,66599,262 9,CoT,AQuA,gpt-4o,2025/1/22,82.68,0.9803,0,1.0417,,254,123017,25123,99,97894,385 10,SC-CoT,AQuA,Llama-3.3-70B-Instruct,2025/1/22,82.28,0.9921,0,1.0756,,254,1907924,1135251,4469,772673,3042 11,ToT,AQuA,gpt-4o,2025/1/22,81.5,0.9921,0,8.5295,,254,2613607,2347538,9242,266069,1048 12,SC-CoT,AQuA,Doubao-lite-32k,2025/1/7,81.1,0.9724,0,0.0519,,254,885986,503751,1983,382235,1505 13,ToT,AQuA,Qwen2.5-72B-Instruct,2025/1/22,81.1,0.9921,0,3.7389,,254,6632255,6371642,25085,260613,1026 14,CoT,AQuA,Qwen2.5-7B-Instruct,2025/1/22,80.71,0.9961,0,0.0,,254,149736,33017,130,116719,460 15,SC-CoT,AQuA,Qwen2.5-7B-Instruct,2025/1/22,79.92,1.0,0,0.0,,254,1845332,1098280,4324,747052,2941 16,PoT,AQuA,Llama-3.3-70B-Instruct,2025/1/22,79.53,0.9921,0,0.1746,,254,309799,240735,948,69064,272 17,IO,AQuA,Doubao-lite-32k,2025/1/7,79.13,1.0,0,0.0058,,254,87742,33058,130,54684,215 18,ReAct-Pro*,AQuA,Llama-3.3-70B-Instruct,2025/1/22,79.13,0.9961,0,0.768,,254,1362379,1119143,4406,243236,958 19,IO,AQuA,Qwen2.5-7B-Instruct,2025/1/22,78.74,0.9843,0,0.0,,254,137771,33271,131,104500,411 20,ReAct-Pro*,AQuA,Doubao-lite-32k,2025/1/7,77.56,0.9606,0,0.0445,,254,1032841,977890,3850,54951,216 21,IO,AQuA,gpt-4o,2025/1/22,75.59,0.9724,0,1.1453,,254,133752,25631,101,108121,426 22,PoT,AQuA,gpt-4o,2025/1/22,75.2,1.0,0,1.6087,,254,327908,222717,877,105191,414 23,PoT,AQuA,Qwen2.5-72B-Instruct,2025/1/22,75.2,1.0,0,0.1645,,254,291764,249215,981,42549,168 24,ReAct-Pro*,AQuA,Qwen2.5-7B-Instruct,2025/1/22,74.41,0.9921,0,0.0,,254,695844,564165,2221,131679,518 25,ReAct-Pro*,AQuA,Qwen2.5-72B-Instruct,2025/1/22,73.23,1.0,0,0.3177,,254,563603,441765,1739,121838,480 26,PoT,AQuA,Doubao-lite-32k,2025/1/7,71.65,0.9685,0,0.0147,,254,309436,259863,1023,49573,195 27,CoT,AQuA,deepseek-r1:1.5b,2025/1/23,71.65,0.9685,0,0.0,,254,333072,26413,104,306659,1207 28,IO,AQuA,deepseek-r1:1.5b,2025/1/22,68.9,0.9488,0,0.0,,254,351767,26667,105,325100,1280 29,PoT,AQuA,Qwen2.5-7B-Instruct,2025/1/22,68.11,1.0,0,0.0,,254,313728,264517,1041,49211,194 30,SC-CoT,AQuA,gpt-3.5-turbo,2025/1/7,66.14,0.9921,0,0.7888,,254,847335,482192,1898,365143,1438 31,ReAct-Pro*,AQuA,gpt-3.5-turbo,2025/1/7,64.57,0.9803,0,0.4928,,254,903587,862614,3396,40973,161 32,CoT,AQuA,gpt-3.5-turbo,2025/1/7,61.02,0.937,0,0.0957,,254,80793,25447,100,55346,218 33,CoT,AQuA,Llama-3.1-8B-Instruct,2025/1/22,60.63,1.0,0,0.0,,254,144435,32555,128,111880,440 34,PoT,AQuA,gpt-3.5-turbo,2025/1/7,59.45,1.0,0,0.1748,,254,266654,225162,886,41492,163 35,SC-CoT,AQuA,Llama-3.1-8B-Instruct,2025/1/22,59.45,0.9724,0,0.0,,254,1651333,971003,3823,680330,2678 36,SC-CoT,AQuA,deepseek-r1:1.5b,2025/2/10,59.06,0.9685,0,0.0,,254,5802711,2547772,10031,3254939,12815 37,ToT,AQuA,Llama-3.1-8B-Instruct,2025/1/22,59.06,1.0,0,0.0,,254,5739684,4896222,19276,843462,3321 38,ReAct-Pro*,AQuA,gpt-4o,2025/1/22,57.48,0.9724,0,2.304,,254,692096,615589,2424,76507,301 39,ToT,AQuA,gpt-3.5-turbo,2025/1/7,57.09,0.9961,0,1.1513,,254,2001396,1850767,7286,150629,593 40,ReAct-Pro*,AQuA,Llama-3.1-8B-Instruct,2025/1/22,55.51,0.9685,0,0.0,,254,4340821,3764723,14822,576098,2268 41,PoT,AQuA,deepseek-r1:1.5b,2025/2/10,54.72,0.9724,0,0.0,,254,1016647,250690,987,765957,3016 42,ReAct-Pro*,AQuA,deepseek-r1:1.5b,2025/2/10,54.33,0.9646,0,0.0,,254,14445041,10578715,41648,3866326,15222 43,ToT,AQuA,Qwen2.5-7B-Instruct,2025/1/22,53.94,1.0,0,0.0,,254,8602682,8224468,32380,378214,1489 44,CoT,AQuA,Internllm2_5-7B,2025/1/22,52.76,0.8937,0,0.0,,254,127520,26610,105,100910,397 45,IO,AQuA,Llama-3.1-8B-Instruct,2025/1/22,51.18,0.9882,0,0.0,,254,133106,26459,104,106647,420 46,IO,AQuA,Internllm2_5-7B,2025/1/22,47.64,0.9094,0,0.0,,254,185041,50232,198,134809,531 47,ToT,AQuA,Doubao-lite-32k,2025/1/7,45.28,0.7402,0,0.0881,,254,2000550,1850249,7284,150301,592 48,ReAct-Pro*,AQuA,Internllm2_5-7B,2025/1/22,40.94,0.9685,0,0.0,,254,4428801,3592039,14142,836762,3294 49,CoT,AQuA,Qwen2-1.5B-Instruct,2025/1/22,40.55,0.9882,0,0.0,,254,110040,30477,120,79563,313 50,SC-CoT,AQuA,Internllm2_5-7B,2025/1/22,39.37,0.9803,0,0.0,,254,2296222,1420494,5592,875728,3448 51,IO,AQuA,gpt-3.5-turbo,2025/1/7,38.98,1.0,0,0.038,,254,42471,25701,101,16770,66 52,PoT,AQuA,Llama-3.1-8B-Instruct,2025/1/22,36.61,0.9685,0,0.0,,254,290914,240613,947,50301,198 53,PoT,AQuA,Internllm2_5-7B,2025/1/22,36.61,0.9882,0,0.0,,254,301962,233505,919,68457,270 54,ToT,AQuA,Internllm2_5-7B,2025/1/22,35.83,0.9961,0,0.0,,254,4734560,4263136,16784,471424,1856 55,CoT,AQuA,Qwen2-0.5B-Instruct,2025/1/22,33.07,0.9882,0,0.0,,254,117339,30477,120,86862,342 56,ToT,AQuA,Qwen2-1.5B-Instruct,2025/1/22,31.5,0.9882,0,0.0,,254,6250702,6058022,23850,192680,759 57,PoT,AQuA,Qwen2-1.5B-Instruct,2025/1/22,30.71,0.9646,0,0.0,,254,298475,246560,971,51915,204 58,ToT,AQuA,Qwen2-0.5B-Instruct,2025/1/22,29.92,1.0,0,0.0,,254,8700281,8100085,31890,600196,2363 59,IO,AQuA,Qwen2-1.5B-Instruct,2025/1/22,29.13,0.9764,0,0.0,,254,71047,27937,110,43110,170 60,IO,AQuA,Qwen2-0.5B-Instruct,2025/1/22,27.17,0.9882,0,0.0,,254,110415,27937,110,82478,325 61,ReAct-Pro*,AQuA,Qwen2-1.5B-Instruct,2025/1/22,25.59,0.9606,0,0.0,,254,5072004,4555858,17936,516146,2032 62,ToT,AQuA,deepseek-r1:1.5b,2025/2/10,24.8,0.5551,0,0.0,,254,794512,605028,2382,189484,746 63,ReAct-Pro*,AQuA,Qwen2-0.5B-Instruct,2025/1/22,24.02,0.9685,0,0.0,,254,7170087,6344167,24977,825920,3252 64,SC-CoT,AQuA,Qwen2-1.5B-Instruct,2025/1/22,23.62,0.9646,0,0.0,,254,1775335,1034362,4072,740973,2917 65,SC-CoT,AQuA,Qwen2-0.5B-Instruct,2025/1/22,22.83,0.9724,0,0.0,,254,2215091,1246929,4909,968162,3812 66,PoT,AQuA,Qwen2-0.5B-Instruct,2025/1/22,17.32,0.9213,0,0.0,,254,322281,258867,1019,63414,250 1,CoT,MATH-500,Qwen2.5-72B-Instruct,2025/1/22,80.2,1.0,4,0.349,,500,619015,338549,677,280466,561 2,SC-CoT,MATH-500,Llama-3.3-70B-Instruct,2025/1/22,74.2,1.0,4,3.2239,,500,5718739,3959492,7919,1759247,3518 3,SC-CoT,MATH-500,Qwen2.5-72B-Instruct,2025/1/22,74.0,1.0,4,3.1556,,500,5597513,3823997,7648,1773516,3547 4,CoT,MATH-500,Llama-3.3-70B-Instruct,2025/1/22,71.2,1.0,4,0.3463,,500,614221,342879,686,271342,543 5,IO,MATH-500,Qwen2.5-72B-Instruct,2025/1/22,70.2,1.0,4,0.2506,,500,444591,169549,339,275042,550 6,CoT,MATH-500,Qwen2.5-7B-Instruct,2025/1/22,69.8,1.0,4,0.0,,500,617204,354049,708,263155,526 7,IO,MATH-500,Llama-3.3-70B-Instruct,2025/1/22,69.4,1.0,4,0.2386,,500,423216,155879,312,267337,535 8,CoT,MATH-500,gpt-4o,2025/1/22,68.0,1.0,4,3.0569,,500,552688,329332,659,223356,447 9,SC-CoT,MATH-500,Qwen2.5-7B-Instruct,2025/1/22,67.0,1.0,4,0.0,,500,5451484,3833751,7668,1617733,3235 10,ReAct-Pro*,MATH-500,Llama-3.3-70B-Instruct,2025/1/22,64.6,1.0,4,3.1806,,500,5641879,5223611,10447,418268,837 11,ReAct-Pro*,MATH-500,Qwen2.5-72B-Instruct,2025/1/22,62.8,1.0,4,3.4541,,500,6127117,5747268,11495,379849,760 12,IO,MATH-500,Qwen2.5-7B-Instruct,2025/1/22,59.4,1.0,4,0.0,,500,411362,169549,339,241813,484 13,CoT,MATH-500,Doubao-lite-32k,2025/1/7,59.0,1.0,4,0.0255,,500,479941,336370,673,143571,287 14,ReAct-Pro*,MATH-500,gpt-4o,2025/1/22,54.0,1.0,4,17.7735,,500,6153255,5834537,11669,318718,637 15,CoT,MATH-500,deepseek-r1:1.5b,2025/1/23,49.4,1.0,4,0.0,,500,1199129,341549,683,857580,1715 16,SC-CoT,MATH-500,Doubao-lite-32k,2025/1/7,49.2,1.0,4,0.1406,,500,2470810,1507651,3015,963159,1926 17,ReAct-Pro*,MATH-500,Qwen2.5-7B-Instruct,2025/1/22,48.8,1.0,4,0.0,,500,4990240,4646708,9293,343532,687 18,ReAct-Pro*,MATH-500,Doubao-lite-32k,2025/1/7,47.2,1.0,4,0.186,,500,4388666,4234620,8469,154046,308 19,PoT,MATH-500,Qwen2.5-72B-Instruct,2025/1/22,47.2,0.822,4,0.233,,500,413372,242549,485,170823,342 20,CoT,MATH-500,Internllm2_5-7B,2025/1/22,46.6,1.0,4,0.0,,500,546774,332883,666,213891,428 21,PoT,MATH-500,gpt-4o,2025/1/22,46.2,0.864,4,1.5994,,500,340960,241357,483,99603,199 22,IO,MATH-500,deepseek-r1:1.5b,2025/1/22,43.8,1.0,4,0.0,,500,1022548,157049,314,865499,1731 23,PoT,MATH-500,Llama-3.3-70B-Instruct,2025/1/22,42.6,0.802,4,0.2839,,500,503596,253879,508,249717,499 24,IO,MATH-500,gpt-4o,2025/1/22,41.8,1.0,4,2.7907,,500,394447,153832,308,240615,481 25,CoT,MATH-500,gpt-3.5-turbo,2025/1/7,39.8,1.0,4,0.3189,,500,432196,329381,659,102815,206 26,PoT,MATH-500,Qwen2.5-7B-Instruct,2025/1/22,39.6,0.744,4,0.0,,500,408812,258549,517,150263,301 27,IO,MATH-500,Llama-3.1-8B-Instruct,2025/1/22,38.6,1.0,4,0.0,,500,503934,155563,311,348371,697 28,SC-CoT,MATH-500,deepseek-r1:1.5b,2025/2/10,38.0,1.0,4,0.0,,500,14742109,7080559,14161,7661550,15323 29,IO,MATH-500,Doubao-lite-32k,2025/1/7,37.4,1.0,4,0.0187,,500,311730,166870,334,144860,290 30,SC-CoT,MATH-500,gpt-4o,2025/1/22,34.4,1.0,4,19.6538,,500,3455323,1986584,3973,1468739,2937 31,PoT,MATH-500,Doubao-lite-32k,2025/1/7,32.6,0.68,4,0.0144,,500,303148,254377,509,48771,98 32,SC-CoT,MATH-500,Llama-3.1-8B-Instruct,2025/1/22,30.2,1.0,4,0.0,,500,5034937,3546673,7093,1488264,2977 33,ReAct-Pro*,MATH-500,Llama-3.1-8B-Instruct,2025/1/22,28.8,1.0,4,0.0,,500,8763629,7486706,14973,1276923,2554 34,PoT,MATH-500,gpt-3.5-turbo,2025/1/7,28.8,0.838,4,0.168,,500,271916,239902,480,32014,64 35,SC-CoT,MATH-500,gpt-3.5-turbo,2025/1/7,28.8,1.0,4,1.9764,,500,2238812,1381818,2764,856994,1714 36,CoT,MATH-500,Llama-3.1-8B-Instruct,2025/1/22,25.8,1.0,4,0.0,,500,625568,342879,686,282689,565 37,PoT,MATH-500,Llama-3.1-8B-Instruct,2025/1/22,25.4,0.684,4,0.0,,500,462271,253879,508,208392,417 38,ReAct-Pro*,MATH-500,deepseek-r1:1.5b,2025/2/10,24.4,1.0,4,0.0,,500,30177348,20729970,41460,9447378,18895 39,ReAct-Pro*,MATH-500,gpt-3.5-turbo,2025/1/7,23.8,1.0,4,2.0406,,500,3832714,3708461,7417,124253,249 40,IO,MATH-500,Internllm2_5-7B,2025/1/22,22.8,1.0,4,0.0,,500,467888,201883,404,266005,532 41,IO,MATH-500,gpt-3.5-turbo,2025/1/7,17.2,1.0,4,0.2436,,500,265625,154881,310,110744,221 42,CoT,MATH-500,Qwen2-1.5B-Instruct,2025/1/22,15.2,1.0,4,0.0,,500,536377,349049,698,187328,375 43,PoT,MATH-500,Internllm2_5-7B,2025/1/22,15.0,0.324,4,0.0,,500,368709,247883,496,120826,242 44,ReAct-Pro*,MATH-500,Internllm2_5-7B,2025/1/22,14.8,1.0,4,0.0,,500,14186105,11831496,23663,2354609,4709 45,ToT,MATH-500,Qwen2.5-72B-Instruct,2025/1/22,10.8,1.0,4,9.0421,,500,16039361,15657730,31315,381631,763 46,SC-CoT,MATH-500,Internllm2_5-7B,2025/1/22,9.8,1.0,4,0.0,,500,5838466,4193296,8387,1645170,3290 47,ToT,MATH-500,gpt-3.5-turbo,2025/1/7,9.8,1.0,4,5.2914,,500,10001767,9711244,19422,290523,581 48,ReAct-Pro*,MATH-500,Qwen2-1.5B-Instruct,2025/1/22,8.2,1.0,4,0.0,,500,8987061,8430774,16862,556287,1113 49,IO,MATH-500,Qwen2-1.5B-Instruct,2025/1/22,7.0,1.0,4,0.0,,500,413878,158777,318,255101,510 50,CoT,MATH-500,Qwen2-0.5B-Instruct,2025/1/22,6.2,1.0,4,0.0,,500,549188,349049,698,200139,400 51,SC-CoT,MATH-500,Qwen2-1.5B-Instruct,2025/1/22,3.8,0.99,4,0.0,,500,5569442,3832429,7665,1737013,3474 52,ToT,MATH-500,gpt-4o,2025/1/22,3.2,1.0,4,40.8094,,500,15242432,14881985,29764,360447,721 53,IO,MATH-500,Qwen2-0.5B-Instruct,2025/1/22,2.6,1.0,4,0.0,,500,429330,159049,318,270281,541 54,ToT,MATH-500,Llama-3.1-8B-Instruct,2025/1/22,1.8,0.908,4,0.0,,500,9035000,7729000,15458,1306000,2612 55,ToT,MATH-500,Llama-3.3-70B-Instruct,2025/1/22,1.4,0.698,4,8.2699,,500,14669500,14099500,28199,570000,1140 56,ToT,MATH-500,Qwen2.5-7B-Instruct,2025/1/22,1.4,0.916,4,0.0,,500,10167500,9749000,19498,418500,837 57,ToT,MATH-500,Doubao-lite-32k,2025/1/7,1.2,0.942,4,0.2371,,500,5564500,5338500,10677,226000,452 58,PoT,MATH-500,deepseek-r1:1.5b,2025/2/10,1.0,0.016,4,0.0,,500,1031067,245549,491,785518,1571 59,PoT,MATH-500,Qwen2-1.5B-Instruct,2025/1/22,0.8,0.022,4,0.0,,500,786870,248509,497,538361,1077 60,SC-CoT,MATH-500,Qwen2-0.5B-Instruct,2025/1/22,0.8,1.0,4,0.0,,500,6862056,4448663,8897,2413393,4827 61,ToT,MATH-500,Qwen2-1.5B-Instruct,2025/1/22,0.8,0.972,4,0.0,,500,4535000,4408000,8816,127000,254 62,ReAct-Pro*,MATH-500,Qwen2-0.5B-Instruct,2025/1/22,0.6,1.0,4,0.0,,500,19442440,18137392,36275,1305048,2610 63,ToT,MATH-500,deepseek-r1:1.5b,2025/2/10,0.4,0.716,4,0.0,,500,1941500,1831000,3662,110500,221 64,ToT,MATH-500,Internllm2_5-7B,2025/1/22,0.2,0.99,4,0.0,,500,8350500,7515000,15030,835500,1671 65,PoT,MATH-500,Qwen2-0.5B-Instruct,2025/1/22,0.0,0.0,4,0.0,,500,437202,253549,507,183653,367 66,ToT,MATH-500,Qwen2-0.5B-Instruct,2025/1/22,0.0,0.962,4,0.0,,500,5996500,5590500,11181,406000,812 1,SC-CoT,gsm8k,Llama-3.3-70B-Instruct,2025/1/22,95.07,1.0,8,6.2005,,1319,10998794,8413717,6379,2585077,1960 2,CoT,gsm8k,gpt-4o,2025/1/22,94.09,1.0,8,4.5367,,1319,1165166,948668,719,216498,164 3,CoT,gsm8k,Llama-3.3-70B-Instruct,2025/1/22,93.93,1.0,8,0.687,,1319,1218665,990168,751,228497,173 4,SC-CoT,gsm8k,Qwen2.5-72B-Instruct,2025/1/22,93.86,1.0,8,5.9858,,1319,10618008,8136223,6168,2481785,1882 5,PoT,gsm8k,gpt-4o,2025/1/22,93.1,0.9977,8,4.2166,,1319,1247912,1101672,835,146240,111 6,CoT,gsm8k,Qwen2.5-72B-Instruct,2025/1/22,92.87,1.0,8,0.7195,,1319,1276252,1005119,762,271133,206 7,PoT,gsm8k,Qwen2.5-72B-Instruct,2025/1/22,92.34,0.9939,8,0.7054,,1319,1251210,1106682,839,144528,110 8,IO,gsm8k,Llama-3.3-70B-Instruct,2025/1/22,92.27,1.0,8,0.4709,,1319,835275,583916,443,251359,191 9,ToT,gsm8k,Llama-3.3-70B-Instruct,2025/1/22,91.89,1.0,8,20.8753,,1319,37029687,35096810,26609,1932877,1465 10,SC-CoT,gsm8k,Qwen2.5-7B-Instruct,2025/1/22,91.13,1.0,8,0.0,,1319,11140985,8586888,6510,2554097,1936 11,ToT,gsm8k,gpt-4o,2025/1/22,91.13,1.0,8,86.8581,,1319,30769735,29445237,22324,1324498,1004 12,SC-CoT,gsm8k,gpt-4o,2025/1/22,90.3,0.9992,8,31.0542,,1319,5798173,3590336,2722,2207837,1674 13,CoT,gsm8k,Doubao-lite-32k,2025/1/7,89.31,1.0,8,0.0558,,1319,1201820,1042095,790,159725,121 14,ToT,gsm8k,Qwen2.5-72B-Instruct,2025/1/22,88.88,1.0,8,23.5911,,1319,41847148,40435361,30656,1411787,1070 15,IO,gsm8k,gpt-4o,2025/1/22,88.4,1.0,8,3.3463,,1319,741446,542416,411,199030,151 16,ReAct-Pro*,gsm8k,Llama-3.3-70B-Instruct,2025/1/22,87.64,0.9992,8,10.1124,,1319,17937864,17038928,12918,898936,682 17,ReAct-Pro*,gsm8k,Qwen2.5-72B-Instruct,2025/1/22,87.26,1.0,8,10.5479,,1319,18710437,18160983,13769,549454,417 18,SC-CoT,gsm8k,Doubao-lite-32k,2025/1/7,87.26,0.9992,8,0.2083,,1319,3888813,2691714,2041,1197099,908 19,IO,gsm8k,Qwen2.5-72B-Instruct,2025/1/22,86.58,1.0,8,0.4899,,1319,869060,555340,421,313720,238 20,CoT,gsm8k,Qwen2.5-7B-Instruct,2025/1/22,85.67,1.0,8,0.0,,1319,1290805,1046008,793,244797,186 21,ReAct-Pro*,gsm8k,Doubao-lite-32k,2025/1/7,85.6,0.9962,8,0.2512,,1319,5998639,5862016,4444,136623,104 22,ReAct-Pro*,gsm8k,Qwen2.5-7B-Instruct,2025/1/22,82.87,1.0,8,0.0,,1319,14850914,14355752,10884,495162,375 23,SC-CoT,gsm8k,gpt-3.5-turbo,2025/1/7,79.91,0.9992,8,3.3938,,1319,4089612,2740652,2078,1348960,1023 24,PoT,gsm8k,Doubao-lite-32k,2025/1/7,79.61,0.9257,8,0.0576,,1319,1288055,1170038,887,118017,89 25,CoT,gsm8k,gpt-3.5-turbo,2025/1/7,78.7,1.0,8,0.6788,,1319,1088041,953242,723,134799,102 26,CoT,gsm8k,Internllm2_5-7B,2025/1/22,77.71,0.997,8,0.0,,1319,1202163,968163,734,234000,177 27,PoT,gsm8k,gpt-3.5-turbo,2025/1/7,76.88,0.9924,8,0.6902,,1319,1187080,1090418,827,96662,73 28,CoT,gsm8k,Llama-3.1-8B-Instruct,2025/1/22,75.44,0.9992,8,0.0,,1319,1248329,990168,751,258161,196 29,ReAct-Pro*,gsm8k,gpt-3.5-turbo,2025/1/7,74.91,0.9939,8,3.4633,,1319,6646286,6506164,4933,140122,106 30,SC-CoT,gsm8k,Llama-3.1-8B-Instruct,2025/1/22,73.46,0.9955,8,0.0,,1319,11778716,8630514,6543,3148202,2387 31,PoT,gsm8k,Llama-3.3-70B-Instruct,2025/1/22,73.09,0.7961,8,0.9736,,1319,1727044,1126025,854,601019,456 32,ToT,gsm8k,Qwen2.5-7B-Instruct,2025/1/22,72.21,0.9901,8,0.0,,1319,31657319,20196528,15312,11460791,8689 33,IO,gsm8k,Doubao-lite-32k,2025/1/7,72.02,0.9992,8,0.0354,,1319,740483,617377,468,123106,93 34,CoT,gsm8k,deepseek-r1:1.5b,2025/1/23,70.66,0.9977,8,0.0,,1319,2090625,1011714,767,1078911,818 35,ToT,gsm8k,gpt-3.5-turbo,2025/1/7,67.93,0.997,8,9.1707,,1319,16727175,15920037,12070,807138,612 36,ReAct-Pro*,gsm8k,Llama-3.1-8B-Instruct,2025/1/22,67.78,0.9856,8,0.0,,1319,22835767,21044978,15955,1790789,1358 37,ToT,gsm8k,Llama-3.1-8B-Instruct,2025/1/22,65.05,0.9196,8,0.0,,1319,16432102,15554967,11793,877135,665 38,IO,gsm8k,deepseek-r1:1.5b,2025/1/22,64.14,0.9962,8,0.0,,1319,1483051,561935,426,921116,698 39,ReAct-Pro*,gsm8k,gpt-4o,2025/1/22,63.31,0.9955,8,39.0751,,1319,14715887,14411173,10926,304714,231 40,PoT,gsm8k,Qwen2.5-7B-Instruct,2025/1/22,58.83,0.7051,8,0.0,,1319,1362822,1145390,868,217432,165 41,IO,gsm8k,Qwen2.5-7B-Instruct,2025/1/22,57.24,1.0,8,0.0,,1319,887913,596229,452,291684,221 42,IO,gsm8k,Llama-3.1-8B-Instruct,2025/1/22,57.16,0.9955,8,0.0,,1319,1745429,550941,418,1194488,906 43,CoT,gsm8k,Qwen2-1.5B-Instruct,2025/1/22,55.5,1.0,8,0.0,,1319,1218525,1032818,783,185707,141 44,SC-CoT,gsm8k,deepseek-r1:1.5b,2025/2/10,55.34,0.997,8,0.0,,1319,25785865,14540096,11024,11245769,8526 45,SC-CoT,gsm8k,Internllm2_5-7B,2025/1/22,48.22,0.9841,8,0.0,,1319,14526431,10678792,8096,3847639,2917 46,PoT,gsm8k,Llama-3.1-8B-Instruct,2025/1/22,38.67,0.5542,8,0.0,,1319,1391111,1147538,870,243573,185 47,PoT,gsm8k,Internllm2_5-7B,2025/1/22,38.21,0.489,8,0.0,,1319,1324949,1136843,862,188106,143 48,IO,gsm8k,gpt-3.5-turbo,2025/1/7,37.83,0.9992,8,0.3328,,1319,586553,546990,415,39563,30 49,ToT,gsm8k,Doubao-lite-32k,2025/1/7,37.83,0.8734,8,0.8739,,1319,20274349,19208597,14563,1065752,808 50,ReAct-Pro*,gsm8k,deepseek-r1:1.5b,2025/2/10,35.94,0.9962,8,0.0,,1319,24219077,19299381,14632,4919696,3730 51,CoT,gsm8k,Qwen2-0.5B-Instruct,2025/1/22,35.94,0.9992,8,0.0,,1319,1223459,1032818,783,190641,145 52,ReAct-Pro*,gsm8k,Internllm2_5-7B,2025/1/22,33.51,0.9795,8,0.0,,1319,35669989,30120070,22836,5549919,4208 53,ReAct-Pro*,gsm8k,Qwen2-1.5B-Instruct,2025/1/22,24.87,0.8021,8,0.0,,1319,9828001,9133603,6925,694398,526 54,ToT,gsm8k,deepseek-r1:1.5b,2025/2/10,23.12,0.7248,8,0.0,,1319,3421486,2738244,2076,683242,518 55,ToT,gsm8k,Internllm2_5-7B,2025/1/22,20.85,0.7013,8,0.0,,1319,13178129,11768118,8922,1410011,1069 56,ToT,gsm8k,Qwen2-1.5B-Instruct,2025/1/22,19.64,0.7726,8,0.0,,1319,12758687,12124248,9192,634439,481 57,PoT,gsm8k,Qwen2-1.5B-Instruct,2025/1/22,18.5,0.3101,8,0.0,,1319,1327522,1151528,873,175994,133 58,IO,gsm8k,Qwen2-1.5B-Instruct,2025/1/22,16.68,1.0,8,0.0,,1319,736996,568530,431,168466,128 59,IO,gsm8k,Qwen2-0.5B-Instruct,2025/1/22,14.71,1.0,8,0.0,,1319,834897,568116,431,266781,202 60,PoT,gsm8k,deepseek-r1:1.5b,2025/2/10,11.9,0.1744,8,0.0,,1319,1954509,1138872,863,815637,618 61,SC-CoT,gsm8k,Qwen2-1.5B-Instruct,2025/1/22,11.75,0.9189,8,0.0,,1319,12411942,9066115,6873,3345827,2537 62,IO,gsm8k,Internllm2_5-7B,2025/1/22,11.6,0.9795,8,0.0,,1319,1113728,679302,515,434426,329 63,PoT,gsm8k,Qwen2-0.5B-Instruct,2025/1/22,9.63,0.1691,8,0.0,,1319,1389135,1151528,873,237607,180 64,ReAct-Pro*,gsm8k,Qwen2-0.5B-Instruct,2025/1/22,7.66,0.9522,8,0.0,,1319,55392611,52431343,39751,2961268,2245 65,SC-CoT,gsm8k,Qwen2-0.5B-Instruct,2025/1/22,1.67,0.9469,8,0.0,,1319,16465720,11019864,8355,5445856,4129 66,ToT,gsm8k,Qwen2-0.5B-Instruct,2025/1/22,0.0,0.0,8,0.0,,1319,0,0,0,0,0