kaikaidai commited on
Commit
b77c18b
·
verified ·
1 Parent(s): 6a688c6

New models appear more often

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -686,7 +686,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
686
  score3_description,
687
  score4_description,
688
  score5_description,
689
- is_first_game, # Add state variable as input
690
  ):
691
  # Build prompt data dictionary
692
  prompt_data = {
@@ -705,21 +705,20 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
705
  active_models = [name for name, info in model_data.items()
706
  if info.get("active", True)]
707
 
708
-
709
  # Define new models list
710
- new_models = ["Atla-8B-preview", "Flow-Judge-v0.1"]
711
 
712
  if is_first_game:
713
- # For the first game, ensure new model is one of the models to catch up on votes
714
- atla_model = "Atla-8B-preview"
715
- other_models = [m for m in active_models if m != atla_model]
716
  other_model = random.choice(other_models)
717
 
718
  # Randomly assign new model to either position A or B
719
  if random.random() < 0.5:
720
- model_a, model_b = atla_model, other_model
721
  else:
722
- model_a, model_b = other_model, atla_model
723
  else:
724
  # For subsequent games, new models appears 40% of the time
725
  if random.random() < 0.4:
@@ -758,12 +757,14 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
758
  is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
759
  is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
760
  is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
761
- is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
 
 
762
 
763
  if is_prometheus_a:
764
  score_a_val, critique_a_val = prometheus_parse_model_response(response_a)
765
  score_a_val = f"{score_a_val} / 5"
766
- elif is_atla_a:
767
  score_a_val, critique_a_val = atla_parse_model_response(response_a)
768
  score_a_val = f"{score_a_val} / 5"
769
  elif is_flow_judge_a:
@@ -776,7 +777,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
776
  if is_prometheus_b:
777
  score_b_val, critique_b_val = prometheus_parse_model_response(response_b)
778
  score_b_val = f"{score_b_val} / 5"
779
- elif is_atla_b:
780
  score_b_val, critique_b_val = atla_parse_model_response(response_b)
781
  score_b_val = f"{score_b_val} / 5"
782
  elif is_flow_judge_b:
 
686
  score3_description,
687
  score4_description,
688
  score5_description,
689
+ is_first_game,
690
  ):
691
  # Build prompt data dictionary
692
  prompt_data = {
 
705
  active_models = [name for name, info in model_data.items()
706
  if info.get("active", True)]
707
 
 
708
  # Define new models list
709
+ new_models = ["Atla-8B-preview", "Flow-Judge-0.1", "SFR-LLaMA-3.1-70B-Judge"] # add "Flow-Judge-1.0" once ready
710
 
711
  if is_first_game:
712
+ # For the first game, ensure Salesforce model is one of the models to catch up on votes
713
+ salesforce_model = "SFR-LLaMA-3.1-70B-Judge"
714
+ other_models = [m for m in active_models if m != salesforce_model]
715
  other_model = random.choice(other_models)
716
 
717
  # Randomly assign new model to either position A or B
718
  if random.random() < 0.5:
719
+ model_a, model_b = salesforce_model, other_model
720
  else:
721
+ model_a, model_b = other_model, salesforce_model
722
  else:
723
  # For subsequent games, new models appears 40% of the time
724
  if random.random() < 0.4:
 
757
  is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
758
  is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
759
  is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
760
+ is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
761
+ is_salesforce_a = (model_data.get(model_a)['organization'] == 'Salesforce')
762
+ is_salesforce_b = (model_data.get(model_b)['organization'] == 'Salesforce')
763
 
764
  if is_prometheus_a:
765
  score_a_val, critique_a_val = prometheus_parse_model_response(response_a)
766
  score_a_val = f"{score_a_val} / 5"
767
+ elif is_atla_a or is_salesforce_a: # Same parser for Atla and Salesforce
768
  score_a_val, critique_a_val = atla_parse_model_response(response_a)
769
  score_a_val = f"{score_a_val} / 5"
770
  elif is_flow_judge_a:
 
777
  if is_prometheus_b:
778
  score_b_val, critique_b_val = prometheus_parse_model_response(response_b)
779
  score_b_val = f"{score_b_val} / 5"
780
+ elif is_atla_b or is_salesforce_b: # Same parser for Atla and Salesforce
781
  score_b_val, critique_b_val = atla_parse_model_response(response_b)
782
  score_b_val = f"{score_b_val} / 5"
783
  elif is_flow_judge_b: