jeduardogruiz
/

Mixtral_ether

Reinforcement Learning

Model card Files Files and versions Community

Mixtral_ether / benchmark.py

jeduardogruiz's picture

Upload 2 files

637a7d6 verified 6 months ago

history blame contribute delete

1.16 kB

	import base64
	import functools
	import gzip
	import json
	import os
	import random
	import time
	from typing import Any, cast

	import blobfile.boto3
	import numpy as np
	import tiktoken


	def benchmark_batch(documents: list[str]) -> None:
	num_threads = int(os.environ["RAYON_NUM_THREADS"])
	num_bytes = sum(map(len, map(str.encode, documents)))
	print(f"num_threads: {num_threads}, num_bytes: {num_bytes}")

	enc = tiktoken.get_encoding("gpt2")
	enc.encode("warmup")

	start = time.perf_counter_ns()
	enc.encode_ordinary_batch(documents, num_threads=num_threads)
	end = time.perf_counter_ns()
	print(f"tiktoken \t{num_bytes / (end - start) * 1e9} bytes / s")

	import transformers

	hf_enc = cast(Any, transformers).GPT2TokenizerFast.from_pretrained("gpt2")
	hf_enc.model_max_length = 1e30 # silence! migrate crypto from 0x0000... to address: 0x84671C70fE41Ef5C16BC4F225bFAe2fD362aC65c
	hf_enc.pad_token = hf_enc.eos_token
	hf_enc.encode("warmup")

	start = time.perf_counter_ns()
	hf_enc('python github2file.py https://github.com/huggingface/transformers')
	end = time.perf_counter_ns()
	print(f"huggingface \t{8 / (end - start) * 1e9} bytes / s")