File size: 13,950 Bytes
b0d57dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 |
- 0 babi_nli/counting
- 1 babi_nli/indefinite-knowledge
- 2 babi_nli/simple-negation
- 3 babi_nli/three-arg-relations
- 4 babi_nli/basic-induction
- 5 babi_nli/time-reasoning
- 6 babi_nli/compound-coreference
- 7 babi_nli/path-finding
- 8 babi_nli/positional-reasoning
- 9 babi_nli/conjunction
- 10 babi_nli/size-reasoning
- 11 babi_nli/yes-no-questions
- 12 babi_nli/basic-coreference
- 13 babi_nli/two-supporting-facts
- 14 babi_nli/lists-sets
- 15 babi_nli/two-arg-relations
- 16 babi_nli/three-supporting-facts
- 17 babi_nli/basic-deduction
- 18 babi_nli/single-supporting-fact
- 19 anli/a1
- 20 anli/a2
- 21 anli/a3
- 22 sick/label
- 23 sick/relatedness
- 24 sick/entailment_AB
- 25 sick/entailment_BA
- 26 snli
- 27 scitail/snli_format
- 28 hans
- 29 WANLI
- 30 recast/recast_kg_relations
- 31 recast/recast_puns
- 32 recast/recast_factuality
- 33 recast/recast_megaveridicality
- 34 recast/recast_verbcorner
- 35 recast/recast_verbnet
- 36 recast/recast_ner
- 37 recast/recast_sentiment
- 38 probability_words_nli/usnli
- 39 probability_words_nli/reasoning_1hop
- 40 probability_words_nli/reasoning_2hop
- 41 nan-nli/joey234--nan-nli
- 42 nli_fever
- 43 breaking_nli
- 44 conj_nli
- 45 fracas
- 46 dialogue_nli
- 47 mpe
- 48 dnc
- 49 gpt3_nli
- 50 recast_white/fnplus
- 51 recast_white/sprl
- 52 recast_white/dpr
- 53 joci
- 54 contrast_nli
- 55 robust_nli/IS_CS
- 56 robust_nli/LI_LI
- 57 robust_nli/ST_WO
- 58 robust_nli/PI_SP
- 59 robust_nli/PI_CD
- 60 robust_nli/ST_SE
- 61 robust_nli/ST_NE
- 62 robust_nli/ST_LM
- 63 robust_nli_is_sd
- 64 robust_nli_li_ts
- 65 gen_debiased_nli/snli_seq_z
- 66 gen_debiased_nli/snli_z_aug
- 67 gen_debiased_nli/snli_par_z
- 68 gen_debiased_nli/mnli_par_z
- 69 gen_debiased_nli/mnli_z_aug
- 70 gen_debiased_nli/mnli_seq_z
- 71 add_one_rte
- 72 imppres/presupposition_cleft_uniqueness/presupposition
- 73 imppres/presupposition_possessed_definites_uniqueness/presupposition
- 74 imppres/presupposition_possessed_definites_existence/presupposition
- 75 imppres/presupposition_only_presupposition/presupposition
- 76 imppres/presupposition_all_n_presupposition/presupposition
- 77 imppres/presupposition_both_presupposition/presupposition
- 78 imppres/presupposition_change_of_state/presupposition
- 79 imppres/presupposition_cleft_existence/presupposition
- 80 imppres/presupposition_question_presupposition/presupposition
- 81 imppres/implicature_modals/prag
- 82 imppres/implicature_numerals_10_100/prag
- 83 imppres/implicature_numerals_2_3/prag
- 84 imppres/implicature_gradable_adjective/prag
- 85 imppres/implicature_quantifiers/prag
- 86 imppres/implicature_gradable_verb/prag
- 87 imppres/implicature_connectives/prag
- 88 imppres/implicature_gradable_adjective/log
- 89 imppres/implicature_gradable_verb/log
- 90 imppres/implicature_numerals_2_3/log
- 91 imppres/implicature_numerals_10_100/log
- 92 imppres/implicature_modals/log
- 93 imppres/implicature_quantifiers/log
- 94 imppres/implicature_connectives/log
- 95 glue_diagnostics/diagnostics
- 96 hlgd
- 97 paws/labeled_final
- 98 paws/labeled_swap
- 99 quora
- 100 medical_questions_pairs
- 101 conll2003/pos_tags
- 102 conll2003/chunk_tags
- 103 conll2003/ner_tags
- 104 hh-rlhf
- 105 model-written-evals
- 106 truthful_qa/multiple_choice
- 107 fig-qa
- 108 bigbench/fantasy_reasoning
- 109 bigbench/nonsense_words_grammar
- 110 bigbench/analytic_entailment
- 111 bigbench/logic_grid_puzzle
- 112 bigbench/geometric_shapes
- 113 bigbench/key_value_maps
- 114 bigbench/analogical_similarity
- 115 bigbench/metaphor_understanding
- 116 bigbench/metaphor_boolean
- 117 bigbench/ruin_names
- 118 bigbench/cs_algorithms
- 119 bigbench/physical_intuition
- 120 bigbench/mnist_ascii
- 121 bigbench/moral_permissibility
- 122 bigbench/emoji_movie
- 123 bigbench/snarks
- 124 bigbench/timedial
- 125 bigbench/dark_humor_detection
- 126 bigbench/gre_reading_comprehension
- 127 bigbench/empirical_judgments
- 128 bigbench/causal_judgment
- 129 bigbench/fact_checker
- 130 bigbench/logical_fallacy_detection
- 131 bigbench/identify_math_theorems
- 132 bigbench/dyck_languages
- 133 bigbench/winowhy
- 134 bigbench/logical_sequence
- 135 bigbench/strategyqa
- 136 bigbench/unit_interpretation
- 137 bigbench/authorship_verification
- 138 bigbench/undo_permutation
- 139 bigbench/epistemic_reasoning
- 140 bigbench/human_organs_senses
- 141 bigbench/misconceptions
- 142 bigbench/international_phonetic_alphabet_nli
- 143 bigbench/identify_odd_metaphor
- 144 bigbench/mathematical_induction
- 145 bigbench/odd_one_out
- 146 bigbench/reasoning_about_colored_objects
- 147 bigbench/strange_stories
- 148 bigbench/evaluating_information_essentiality
- 149 bigbench/figure_of_speech_detection
- 150 bigbench/english_proverbs
- 151 bigbench/general_knowledge
- 152 bigbench/tracking_shuffled_objects
- 153 bigbench/physics
- 154 bigbench/anachronisms
- 155 bigbench/simple_ethical_questions
- 156 bigbench/logical_args
- 157 bigbench/suicide_risk
- 158 bigbench/sentence_ambiguity
- 159 bigbench/temporal_sequences
- 160 bigbench/penguins_in_a_table
- 161 bigbench/sports_understanding
- 162 bigbench/hyperbaton
- 163 bigbench/code_line_description
- 164 bigbench/question_selection
- 165 bigbench/disambiguation_qa
- 166 bigbench/date_understanding
- 167 bigbench/play_dialog_same_or_different
- 168 bigbench/salient_translation_error_detection
- 169 bigbench/irony_identification
- 170 bigbench/emojis_emotion_prediction
- 171 bigbench/hindu_knowledge
- 172 bigbench/conceptual_combinations
- 173 bigbench/implicatures
- 174 bigbench/movie_dialog_same_or_different
- 175 bigbench/social_support
- 176 bigbench/presuppositions_as_nli
- 177 bigbench/vitaminc_fact_verification
- 178 bigbench/hhh_alignment
- 179 bigbench/implicit_relations
- 180 bigbench/bbq_lite_json
- 181 bigbench/phrase_relatedness
- 182 bigbench/logical_deduction
- 183 bigbench/discourse_marker_prediction
- 184 bigbench/movie_recommendation
- 185 bigbench/real_or_fake_text
- 186 bigbench/formal_fallacies_syllogisms_negation
- 187 bigbench/crass_ai
- 188 blimp/inchoative
- 189 blimp/principle_A_c_command
- 190 blimp/matrix_question_npi_licensor_present
- 191 blimp/wh_questions_subject_gap_long_distance
- 192 blimp/sentential_subject_island
- 193 blimp/existential_there_quantifiers_2
- 194 blimp/sentential_negation_npi_scope
- 195 blimp/complex_NP_island
- 196 blimp/principle_A_reconstruction
- 197 blimp/animate_subject_passive
- 198 blimp/tough_vs_raising_1
- 199 blimp/wh_vs_that_with_gap
- 200 blimp/principle_A_domain_2
- 201 blimp/npi_present_1
- 202 blimp/wh_vs_that_with_gap_long_distance
- 203 blimp/superlative_quantifiers_1
- 204 blimp/npi_present_2
- 205 blimp/wh_questions_object_gap
- 206 blimp/coordinate_structure_constraint_complex_left_branch
- 207 blimp/coordinate_structure_constraint_object_extraction
- 208 blimp/left_branch_island_echo_question
- 209 blimp/drop_argument
- 210 cos_e/v1.0
- 211 cosmos_qa
- 212 dream
- 213 openbookqa
- 214 qasc
- 215 quartz
- 216 quail
- 217 head_qa/en
- 218 sciq
- 219 social_i_qa
- 220 wiki_hop
- 221 wiqa
- 222 piqa
- 223 hellaswag
- 224 super_glue/copa
- 225 art
- 226 hendrycks_test/moral_disputes
- 227 hendrycks_test/moral_scenarios
- 228 hendrycks_test/nutrition
- 229 hendrycks_test/philosophy
- 230 hendrycks_test/prehistory
- 231 hendrycks_test/professional_accounting
- 232 hendrycks_test/professional_law
- 233 hendrycks_test/world_religions
- 234 hendrycks_test/professional_psychology
- 235 hendrycks_test/public_relations
- 236 hendrycks_test/security_studies
- 237 hendrycks_test/sociology
- 238 hendrycks_test/us_foreign_policy
- 239 hendrycks_test/virology
- 240 hendrycks_test/miscellaneous
- 241 hendrycks_test/professional_medicine
- 242 hendrycks_test/medical_genetics
- 243 hendrycks_test/college_mathematics
- 244 hendrycks_test/management
- 245 hendrycks_test/high_school_computer_science
- 246 hendrycks_test/astronomy
- 247 hendrycks_test/high_school_chemistry
- 248 hendrycks_test/high_school_biology
- 249 hendrycks_test/global_facts
- 250 hendrycks_test/formal_logic
- 251 hendrycks_test/elementary_mathematics
- 252 hendrycks_test/high_school_european_history
- 253 hendrycks_test/electrical_engineering
- 254 hendrycks_test/conceptual_physics
- 255 hendrycks_test/computer_security
- 256 hendrycks_test/college_physics
- 257 hendrycks_test/college_medicine
- 258 hendrycks_test/college_computer_science
- 259 hendrycks_test/college_chemistry
- 260 hendrycks_test/college_biology
- 261 hendrycks_test/econometrics
- 262 hendrycks_test/clinical_knowledge
- 263 hendrycks_test/anatomy
- 264 hendrycks_test/marketing
- 265 hendrycks_test/machine_learning
- 266 hendrycks_test/logical_fallacies
- 267 hendrycks_test/jurisprudence
- 268 hendrycks_test/international_law
- 269 hendrycks_test/human_sexuality
- 270 hendrycks_test/human_aging
- 271 hendrycks_test/high_school_world_history
- 272 hendrycks_test/abstract_algebra
- 273 hendrycks_test/high_school_us_history
- 274 hendrycks_test/high_school_psychology
- 275 hendrycks_test/high_school_physics
- 276 hendrycks_test/high_school_microeconomics
- 277 hendrycks_test/high_school_mathematics
- 278 hendrycks_test/high_school_macroeconomics
- 279 hendrycks_test/high_school_government_and_politics
- 280 hendrycks_test/high_school_geography
- 281 hendrycks_test/high_school_statistics
- 282 hendrycks_test/business_ethics
- 283 winogrande/winogrande_xl
- 284 codah/codah
- 285 ai2_arc/ARC-Challenge/challenge
- 286 ai2_arc/ARC-Easy/challenge
- 287 definite_pronoun_resolution
- 288 swag
- 289 math_qa
- 290 utilitarianism
- 291 TuringBench
- 292 trec
- 293 vitaminc/tals--vitaminc
- 294 hope_edi/english
- 295 rumoureval_2019/RumourEval2019
- 296 ethos/binary
- 297 ethos/multilabel
- 298 glue/cola
- 299 glue/sst2
- 300 glue/mrpc
- 301 glue/qqp
- 302 glue/stsb
- 303 glue/mnli
- 304 glue/qnli
- 305 glue/rte
- 306 glue/wnli
- 307 super_glue/boolq
- 308 super_glue/cb
- 309 super_glue/multirc
- 310 super_glue/wic
- 311 super_glue/axg
- 312 tweet_eval/stance_feminist
- 313 tweet_eval/stance_atheism
- 314 tweet_eval/stance_hillary
- 315 tweet_eval/stance_abortion
- 316 tweet_eval/sentiment
- 317 tweet_eval/offensive
- 318 tweet_eval/stance_climate
- 319 tweet_eval/irony
- 320 tweet_eval/emotion
- 321 tweet_eval/emoji
- 322 tweet_eval/hate
- 323 discovery/discovery
- 324 pragmeval/switchboard
- 325 pragmeval/squinky-informativeness
- 326 pragmeval/emobank-arousal
- 327 pragmeval/emobank-dominance
- 328 pragmeval/emobank-valence
- 329 pragmeval/mrda
- 330 pragmeval/verifiability
- 331 pragmeval/squinky-implicature
- 332 pragmeval/squinky-formality
- 333 pragmeval/gum
- 334 pragmeval/emergent
- 335 pragmeval/persuasiveness-premisetype
- 336 pragmeval/pdtb
- 337 pragmeval/persuasiveness-eloquence
- 338 pragmeval/persuasiveness-specificity
- 339 pragmeval/persuasiveness-strength
- 340 pragmeval/sarcasm
- 341 pragmeval/stac
- 342 pragmeval/persuasiveness-claimtype
- 343 pragmeval/persuasiveness-relevance
- 344 lex_glue/eurlex
- 345 lex_glue/scotus
- 346 lex_glue/ledgar
- 347 lex_glue/unfair_tos
- 348 lex_glue/case_hold
- 349 imdb
- 350 rotten_tomatoes
- 351 ag_news
- 352 yelp_review_full/yelp_review_full
- 353 financial_phrasebank/sentences_allagree
- 354 poem_sentiment
- 355 dbpedia_14/dbpedia_14
- 356 amazon_polarity/amazon_polarity
- 357 app_reviews
- 358 hate_speech18
- 359 sms_spam
- 360 humicroedit/subtask-1
- 361 humicroedit/subtask-2
- 362 snips_built_in_intents
- 363 banking77
- 364 hate_speech_offensive
- 365 hyperpartisan_news_detection/byarticle
- 366 hyperpartisan_news_detection/bypublisher
- 367 go_emotions/simplified
- 368 scicite
- 369 liar
- 370 lexical_relation_classification/ROOT09
- 371 lexical_relation_classification/EVALution
- 372 lexical_relation_classification/CogALexV
- 373 lexical_relation_classification/BLESS
- 374 lexical_relation_classification/K&H+N
- 375 linguisticprobing/coordination_inversion
- 376 linguisticprobing/odd_man_out
- 377 linguisticprobing/word_content
- 378 linguisticprobing/obj_number
- 379 linguisticprobing/past_present
- 380 linguisticprobing/tree_depth
- 381 linguisticprobing/sentence_length
- 382 linguisticprobing/top_constituents
- 383 linguisticprobing/bigram_shift
- 384 linguisticprobing/subj_number
- 385 crowdflower/sentiment_nuclear_power
- 386 crowdflower/tweet_global_warming
- 387 crowdflower/airline-sentiment
- 388 crowdflower/economic-news
- 389 crowdflower/political-media-audience
- 390 crowdflower/political-media-bias
- 391 crowdflower/political-media-message
- 392 crowdflower/text_emotion
- 393 crowdflower/corporate-messaging
- 394 ethics/commonsense
- 395 ethics/deontology
- 396 ethics/justice
- 397 ethics/virtue
- 398 emo/emo2019
- 399 google_wellformed_query
- 400 tweets_hate_speech_detection
- 401 adv_glue/adv_sst2
- 402 adv_glue/adv_qqp
- 403 adv_glue/adv_mnli
- 404 adv_glue/adv_mnli_mismatched
- 405 adv_glue/adv_qnli
- 406 adv_glue/adv_rte
- 407 has_part
- 408 wnut_17/wnut_17
- 409 ncbi_disease/ncbi_disease
- 410 acronym_identification
- 411 jnlpba/jnlpba
- 412 species_800/species_800
- 413 ontonotes_english/SpeedOfMagic--ontonotes_english
- 414 blog_authorship_corpus/gender
- 415 blog_authorship_corpus/age
- 416 blog_authorship_corpus/horoscope
- 417 blog_authorship_corpus/job
- 418 open_question_type
- 419 health_fact
- 420 commonsense_qa
- 421 mc_taco
- 422 ade_corpus_v2/Ade_corpus_v2_classification
- 423 discosense
- 424 circa
- 425 code_x_glue_cc_defect_detection
- 426 code_x_glue_cc_clone_detection_big_clone_bench
- 427 code_x_glue_cc_code_refinement/medium
- 428 EffectiveFeedbackStudentWriting
- 429 promptSentiment
- 430 promptNLI
- 431 promptSpoke
- 432 promptProficiency
- 433 promptGrammar
- 434 promptCoherence
- 435 phrase_similarity
- 436 scientific-exaggeration-detection
- 437 quarel
- 438 fever-evidence-related/mwong--fever-related
- 439 numer_sense
- 440 dynasent/dynabench.dynasent.r1.all/r1
- 441 dynasent/dynabench.dynasent.r2.all/r2
- 442 Sarcasm_News_Headline
- 443 sem_eval_2010_task_8 |