File size: 5,782 Bytes
beb2b32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import json
import logging
import pathlib
import pandas as pd
import gradio as gr
import schedule
import time
from datetime import datetime, timezone
from src.display.utils import EvalQueueColumn

from src.envs import API

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class VoteManager:
    def __init__(self, votes_path, eval_requests_path, repo_id):
        self.votes_path = votes_path
        self.eval_requests_path = eval_requests_path
        self.repo_id = repo_id
        self.vote_dataset = self.read_vote_dataset()
        self.vote_check_set = self.make_check_set(self.vote_dataset)
        self.votes_to_upload = []

    def init_vote_dataset(self):
        self.vote_dataset = self.read_vote_dataset()
        self.vote_check_set = self.make_check_set(self.vote_dataset)

    def read_vote_dataset(self):
        result = []
        votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl"
        if votes_file.exists():
            with open(votes_file, "r") as f:
                for line in f:
                    data = json.loads(line.strip())
                    result.append(data)
        result = pd.DataFrame(result)
        return result

    def make_check_set(self, vote_dataset: pd.DataFrame):
        result = list()
        for row in vote_dataset.itertuples(index=False, name='vote'):
            result.append((row.model, row.revision, row.username))
        return set(result)
    
    def get_model_revision(self, selected_model: str) -> str:
        """Fetch the revision for the given model from the request files."""
        for user_folder in pathlib.Path(self.eval_requests_path).iterdir():
            if user_folder.is_dir():
                for file in user_folder.glob("*.json"):
                    with open(file, "r") as f:
                        data = json.load(f)
                        if data.get("model") == selected_model:
                            return data.get("revision", "main")
        return "main"

    def create_request_vote_df(self, pending_models_df: gr.Dataframe):
        if pending_models_df.empty or not "model_name" in pending_models_df.columns:
            return pending_models_df
        self.vote_dataset = self.read_vote_dataset()
        vote_counts = self.vote_dataset.groupby(['model', 'revision']).size().reset_index(name='vote_count')

        pending_models_df_votes = pd.merge(
            pending_models_df, 
            vote_counts, 
            left_on=["model_name", 'revision'], 
            right_on=['model', 'revision'], 
            how='left'
        )
        # Filling empty votes
        pending_models_df_votes['vote_count'] = pending_models_df_votes['vote_count'].fillna(0)
        pending_models_df_votes = pending_models_df_votes.sort_values(by=["vote_count", "model_name"], ascending=[False, True])
        # Removing useless columns
        pending_models_df_votes = pending_models_df_votes.drop(["model_name", "model"], axis=1)
        return pending_models_df_votes

    # Function to be called when a user votes for a model
    def add_vote(
            self,
            selected_model: str,
            pending_models_df: gr.Dataframe,
            profile: gr.OAuthProfile | None
        ):
        logger.debug(f"Type of list before usage: {type(list)}")
        # model_name, revision, user_id, timestamp
        if selected_model in ["str", ""]:
            gr.Warning("No model selected")
            return
        
        if profile is None:
            gr.Warning("Hub Login required")
            return

        vote_username = profile.username
        model_revision = self.get_model_revision(selected_model)
        
        # tuple (immutable) for checking than already voted for model
        check_tuple = (selected_model, model_revision, vote_username)
        if check_tuple in self.vote_check_set:
            gr.Warning("Already voted for this model")
            return
        
        current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

        vote_obj = {
            "model": selected_model,
            "revision": model_revision,
            "username": vote_username,
            "timestamp": current_time
        }

        # Append the vote to the JSONL file
        try:
            votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl"
            with open(votes_file, "a") as f:
                f.write(json.dumps(vote_obj) + "\n")
            logger.info(f"Vote added locally: {vote_obj}")

            self.votes_to_upload.append(vote_obj)
        except Exception as e:
            logger.error(f"Failed to write vote to file: {e}")
            gr.Warning("Failed to record vote. Please try again")
            return
        
        self.vote_check_set.add(check_tuple)
        gr.Info(f"Voted for {selected_model}")

        return self.create_request_vote_df(pending_models_df)

    def upload_votes(self):
        if self.votes_to_upload:
            votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl"
            try:
                with open(votes_file, "rb") as f:
                    API.upload_file(
                        path_or_fileobj=f,
                        path_in_repo="votes_data.jsonl",
                        repo_id=self.repo_id,
                        repo_type="dataset",
                        commit_message="Updating votes_data.jsonl with new votes",
                    )
                logger.info("Votes uploaded to votes repository")
                self.votes_to_upload.clear()
            except Exception as e:
                logger.error(f"Failed to upload votes to repository: {e}")

def run_scheduler(vote_manager):
    while True:
        schedule.run_pending()
        time.sleep(1)