Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,490 Bytes
1ffc326 9b14fa5 18abd06 1ffc326 9b14fa5 8b88d2c 1ffc326 9b14fa5 8b88d2c 1ffc326 9b14fa5 1ffc326 9b14fa5 1ffc326 9b14fa5 1ffc326 8b88d2c 1ffc326 8b88d2c 1ffc326 08ae6c5 9b14fa5 6902167 9b14fa5 6902167 19999b4 9b14fa5 08ae6c5 1ffc326 9b14fa5 1ffc326 9b14fa5 1ffc326 9b14fa5 95c19d6 1ffc326 9b14fa5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import logging
import pprint
from huggingface_hub import snapshot_download
from src.backend.manage_requests import (
FAILED_STATUS,
FINISHED_STATUS,
PENDING_STATUS,
RUNNING_STATUS,
check_completed_evals,
get_eval_requests,
set_eval_request,
)
from src.backend.run_eval_suite_lighteval import run_evaluation
from src.backend.sort_queue import sort_models_by_priority
from src.envs import (
ACCELERATOR,
API,
EVAL_REQUESTS_PATH_BACKEND,
EVAL_RESULTS_PATH_BACKEND,
LIMIT,
QUEUE_REPO,
REGION,
RESULTS_REPO,
TASKS_LIGHTEVAL,
TOKEN,
VENDOR,
)
from src.logging import setup_logger
logging.getLogger("openai").setLevel(logging.WARNING)
logger = setup_logger(__name__)
# logging.basicConfig(level=logging.ERROR)
pp = pprint.PrettyPrinter(width=80)
snapshot_download(
repo_id=RESULTS_REPO,
revision="main",
local_dir=EVAL_RESULTS_PATH_BACKEND,
repo_type="dataset",
max_workers=60,
token=TOKEN,
)
snapshot_download(
repo_id=QUEUE_REPO,
revision="main",
local_dir=EVAL_REQUESTS_PATH_BACKEND,
repo_type="dataset",
max_workers=60,
token=TOKEN,
)
def run_auto_eval():
current_pending_status = [PENDING_STATUS]
# pull the eval dataset from the hub and parse any eval requests
# check completed evals and set them to finished
check_completed_evals(
api=API,
checked_status=RUNNING_STATUS,
completed_status=FINISHED_STATUS,
failed_status=FAILED_STATUS,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND,
hf_repo_results=RESULTS_REPO,
local_dir_results=EVAL_RESULTS_PATH_BACKEND,
)
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
eval_requests = get_eval_requests(
job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND
)
# Sort the evals by priority (first submitted first run)
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
logger.info(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
if len(eval_requests) == 0:
return
eval_request = eval_requests[0]
logger.info(pp.pformat(eval_request))
set_eval_request(
api=API,
eval_request=eval_request,
set_to_status=RUNNING_STATUS,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND,
)
# This needs to be done
# instance_size, instance_type = get_instance_for_model(eval_request)
# For GPU
# instance_size, instance_type = "small", "g4dn.xlarge"
# For CPU
# Updated naming available at https://huggingface.co/docs/inference-endpoints/pricing
instance_size, instance_type = "x4", "intel-icl"
logger.info(
f"Starting Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}"
)
run_evaluation(
eval_request=eval_request,
task_names=TASKS_LIGHTEVAL,
local_dir=EVAL_RESULTS_PATH_BACKEND,
batch_size=1,
accelerator=ACCELERATOR,
region=REGION,
vendor=VENDOR,
instance_size=instance_size,
instance_type=instance_type,
limit=LIMIT,
)
logger.info(
f"Completed Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}"
)
if __name__ == "__main__":
run_auto_eval()
|