Spaces:
Build error
Build error
File size: 3,305 Bytes
e487255 a833f5f e487255 a833f5f a07780a a833f5f a07780a a833f5f a07780a a833f5f e487255 a833f5f e487255 a833f5f a07780a e487255 a833f5f e487255 a833f5f e487255 a833f5f e487255 a07780a e487255 e3f4b64 a833f5f a07780a e487255 a833f5f e487255 a833f5f e487255 a07780a e487255 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import sys
tabpfn_path = 'TabPFN'
sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c8400203ebd062346970b4f2c0ccda5a40618)
from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier
import numpy as np
import pandas as pd
import torch
import gradio as gr
import openml
from sklearn.model_selection import cross_val_score
def compute(file, y_attribute, cv_folds):
if file is None:
return 'Please upload a .arff file', y_attribute
if file.name.endswith('.arff'):
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
dataset_format="array")
if y_attribute not in attribute_names_:
return f"**Select attribute from {', '.join(attribute_names_)}**", y_attribute
X, y, categorical_indicator_, attribute_names_ = dataset.get_data(
dataset_format="array", target=y_attribute)
else:
return 'Please upload a .arff file', y_attribute
order = np.arange(y.shape[0])
np.random.seed(13)
np.random.shuffle(order)
X, y = torch.tensor(X[order]), torch.tensor(y[order])
classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu')
scores = cross_val_score(classifier, X, y, cv=cv_folds, scoring='roc_auc_ovo')
print(scores)
# classifier.fit(x_train, y_train)
# y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True)
# print(file, type(file))
return f"ROC AUC OVO Cross Val mean is {sum(scores) / len(scores)} from {scores}. " + (
"The PFN is only trained for datasets with up to 1024 training examples and it had to extrapolate to greater datasets for this evaluation." if len(
y) // cv_folds > 1024 else ""), y_attribute
def upload_file(file):
if file is None:
return
if file.name.endswith('.arff'):
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
print(y_attribute)
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
dataset_format="array")
return f"Select attribute from {', '.join(attribute_names_)}", attribute_names_[-1]
else:
return 'Please upload a .arff file', None
with gr.Blocks() as demo:
gr.Markdown("""This demo allows you to play with the **TabPFN**.
Upload a .arff file, select an attribute to predict and the number of cross validation folds and get the ROC AUC OVO score for one seed.
""")
inp_file = gr.File(
label='Drop a .arff file.')
cv_folds = gr.Dropdown([2, 3, 4, 5], value=2, label='Number of CV folds')
out_text = gr.Markdown()
y_attribute = gr.Textbox(label='y attribute')
examples = gr.Examples(examples=['balance-scale.arff'],
inputs=[inp_file],
outputs=[out_text, y_attribute],
fn=upload_file,
cache_examples=True)
btn = gr.Button("Predict Empty Table Cells")
# out_table = gr.DataFrame()
inp_file.change(fn=upload_file, inputs=inp_file, outputs=[out_text, y_attribute])
btn.click(fn=compute, inputs=[inp_file, y_attribute, cv_folds], outputs=[out_text, y_attribute])
demo.launch() |