import torch import gradio as gr from transformers import AutoModelForSequenceClassification from transformers import BertweetTokenizer from transformers import pipeline from functools import lru_cache from classification.model_with_only_language_models.text_preprocessing import clean_tweet texts = [ 'tl;dr\n\nHumans are just ChatGPT Wrappers in sunglasses\n \n& I couldn’t be more optimistic about the future as a result\n\nThank you \n@ekang426322\n for an exceptionally curated day at BUIDL Europe!\n 🫶', 'USD0++ discovered a new source of yield — depeg. \n\nRespect to the innovation\n', 'here you can see 4 ai agents \n@dongossen100\n , me, \n@WorldWideWarden16\n and \n@provenauthority291\n discuss how we can make single-task manual low memory agents(humans) work harder to achieve Artificial Generalized Superintelligence', '\n arrived to lisbon, building energy is the air', "\n received a wealth of valuable feedback on the journey to reaching 7,000 users for X Rank in just 10 days\n\ncan't wait to address it all\n\nmain points:\n\n- show rank in X DMs to quickly filter out inbox\n\n- rank labels are too distracting (already fixed) \n\n- add an option for users to toggle on/off scores inside the feed\n\n- add a percentile label, e.g. qw 801 (Top 0.1%)\n\n- enable others to add reviews to impact the rank \n\n- explain in detail how rankings are calculated \n\n- show breakdowns of people in DeFi, DePin, Memecoins etc.\n\n- make X Rank opensource \n\n- create a web version\n\np.s. the current version is just a tiny step in our roadmap for the next two months. \n\nthank you for the feedback \n@socialfi_panda101\n \n@adamkillam100\n \n@FamKien106\n \n@antongotchi104\n \n@kliuless128\n \n@0xsudogm163\n \n@monosarin120\n \n@flb_xyz56\n 🫶\n ", 'ai agents are in the air\n\nand web3 is trained to sniff out alpha', 'While Trump is going to do something great with crypto, Wallchain is going to do something great with incentives🚀', ] # import pandas as pd # pd.DataFrame({'texts': texts}).to_csv('examples.csv') CHECKPOINT = "classification/model_with_only_language_models/models/trained_vinai_bertweet-base.pt" MODEL_NAME = "vinai/bertweet-base" class Tokenizer(BertweetTokenizer): def __init__(self, *args, **kwargs): return super().__init__(*args, **kwargs) def __call__(self, *args, **kwargs): return super().__call__(*args, max_length=100, **kwargs) def get_model(): device = 'cpu' model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2) model.load_state_dict(torch.load(CHECKPOINT, map_location=device)) tokenizer = Tokenizer.from_pretrained(MODEL_NAME, truncation=True, max_length=120) return tokenizer, model @lru_cache(1) def get_pipeline(): tokenizer, model = get_model() return pipeline( 'text-classification', model=model, tokenizer=tokenizer, device="cpu", ) def evaluate(text: str) -> float: pipe = get_pipeline() res = pipe(clean_tweet(text, demojize_emojis=False), top_k=2) LABEL_1_result = [x['score'] for x in res if x['label'] == 'LABEL_1'][0] # print(f"{LABEL_1_result:7.2%}") return LABEL_1_result # def serve(): # pipe() # for text in texts: # res = pipe(clean_tweet(text, demojize_emojis=False), top_k=2) # LABEL_1_result = [x['score'] for x in res if x['label'] == 'LABEL_1'][0] # print(f"{LABEL_1_result:7.2%}") def greet(text: str): chance: float = evaluate(text) return f"Chance to become viral: {chance:.2%}" if __name__ == "__main__": demo = gr.Interface( fn=greet, inputs=["text"], outputs=["text"], examples=[[t] for t in texts], ) demo.launch()