Spaces:
Build error
Build error
Samuel Mueller
commited on
Commit
·
a833f5f
1
Parent(s):
8c2994a
eval demo w/ shuffling
Browse files
app.py
CHANGED
@@ -8,89 +8,76 @@ import pandas as pd
|
|
8 |
import torch
|
9 |
import gradio as gr
|
10 |
import openml
|
|
|
11 |
|
12 |
|
13 |
-
def compute(
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
x_eval = torch.tensor(np.delete(eval_table, y_column, axis=1).astype(np.float32))
|
32 |
-
|
33 |
-
y_train = train_table[:, y_column]
|
34 |
-
except ValueError:
|
35 |
-
return "**Please only add numbers (to the inputs) or leave fields empty.**", None
|
36 |
|
37 |
classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu')
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
# print(file, type(file))
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
|
46 |
|
47 |
def upload_file(file):
|
|
|
|
|
48 |
if file.name.endswith('.arff'):
|
49 |
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
|
|
|
50 |
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
|
51 |
-
dataset_format="array"
|
52 |
-
)
|
53 |
-
|
54 |
-
return
|
55 |
-
elif file.name.endswith('.csv') or file.name.endswith('.data'):
|
56 |
-
df = pd.read_csv(file.name, header=None)
|
57 |
-
df.columns = np.arange(len(df.columns))
|
58 |
-
print(df)
|
59 |
-
return df
|
60 |
|
61 |
|
62 |
-
example = \
|
63 |
-
[
|
64 |
-
[1, 2, 1],
|
65 |
-
[2, 1, 1],
|
66 |
-
[1, 1, 1],
|
67 |
-
[2, 2, 2],
|
68 |
-
[3, 4, 2],
|
69 |
-
[3, 2, 2],
|
70 |
-
[2, 3, '']
|
71 |
-
]
|
72 |
-
|
73 |
with gr.Blocks() as demo:
|
74 |
gr.Markdown("""This demo allows you to play with the **TabPFN**.
|
75 |
You can either change the table manually (we have filled it with a toy benchmark, sum up to 3 has label 1 and over that label 2).
|
76 |
The network predicts fields you leave empty. Only one column can have empty entries that are predicted.
|
77 |
Please, provide everything but the label column as numeric values. It is ok to encode classes as integers.
|
78 |
""")
|
79 |
-
inp_table = gr.DataFrame(type='numpy', value=example, headers=[''] * 3)
|
80 |
inp_file = gr.File(
|
81 |
label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.')
|
|
|
|
|
|
|
|
|
|
|
82 |
examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'],
|
83 |
inputs=[inp_file],
|
84 |
-
outputs=[
|
85 |
fn=upload_file,
|
86 |
cache_examples=True)
|
87 |
btn = gr.Button("Predict Empty Table Cells")
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
out_text = gr.Markdown()
|
92 |
-
out_table = gr.DataFrame()
|
93 |
-
|
94 |
-
btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table])
|
95 |
|
96 |
demo.launch()
|
|
|
8 |
import torch
|
9 |
import gradio as gr
|
10 |
import openml
|
11 |
+
from sklearn.model_selection import cross_val_score
|
12 |
|
13 |
|
14 |
+
def compute(file, y_attribute, cv_folds):
|
15 |
+
if file is None:
|
16 |
+
return 'Please upload a .arff file', y_attribute
|
17 |
+
if file.name.endswith('.arff'):
|
18 |
+
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
|
19 |
+
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
|
20 |
+
dataset_format="array")
|
21 |
+
if y_attribute not in attribute_names_:
|
22 |
+
return f"**Select attribute from {', '.join(attribute_names_)}**"
|
23 |
+
X, y, categorical_indicator_, attribute_names_ = dataset.get_data(
|
24 |
+
dataset_format="array", target=y_attribute)
|
25 |
+
else:
|
26 |
+
return 'Please upload a .arff file', y_attribute
|
27 |
+
|
28 |
+
order = np.arange(y.shape[0])
|
29 |
+
np.random.seed(13)
|
30 |
+
np.random.shuffle(order)
|
31 |
+
X, y = torch.tensor(X[order]), torch.tensor(y[order])
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu')
|
34 |
+
|
35 |
+
scores = cross_val_score(classifier, X, y, cv=cv_folds, scoring='roc_auc_ovo')
|
36 |
+
print(scores)
|
37 |
+
# classifier.fit(x_train, y_train)
|
38 |
+
# y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True)
|
39 |
|
40 |
# print(file, type(file))
|
41 |
+
return f"ROC AUC OVO Cross Val mean is {sum(scores) / len(scores)} from {scores}. " + (
|
42 |
+
"The PFN is only trained for datasets with up to 1024 training examples and it had to extrapolate to greater datasets for this evaluation." if len(
|
43 |
+
y) // cv_folds > 1024 else ""), y_attribute
|
44 |
|
45 |
|
46 |
def upload_file(file):
|
47 |
+
if file is None:
|
48 |
+
return
|
49 |
if file.name.endswith('.arff'):
|
50 |
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
|
51 |
+
print(y_attribute)
|
52 |
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
|
53 |
+
dataset_format="array")
|
54 |
+
return f"Select attribute from {', '.join(attribute_names_)}", attribute_names_[-1]
|
55 |
+
else:
|
56 |
+
return 'Please upload a .arff file', None
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
with gr.Blocks() as demo:
|
60 |
gr.Markdown("""This demo allows you to play with the **TabPFN**.
|
61 |
You can either change the table manually (we have filled it with a toy benchmark, sum up to 3 has label 1 and over that label 2).
|
62 |
The network predicts fields you leave empty. Only one column can have empty entries that are predicted.
|
63 |
Please, provide everything but the label column as numeric values. It is ok to encode classes as integers.
|
64 |
""")
|
|
|
65 |
inp_file = gr.File(
|
66 |
label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.')
|
67 |
+
cv_folds = gr.Dropdown([2, 3, 4, 5], value=2, label='Number of CV folds')
|
68 |
+
out_text = gr.Markdown()
|
69 |
+
|
70 |
+
y_attribute = gr.Textbox(label='y attribute')
|
71 |
+
|
72 |
examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'],
|
73 |
inputs=[inp_file],
|
74 |
+
outputs=[out_text, y_attribute],
|
75 |
fn=upload_file,
|
76 |
cache_examples=True)
|
77 |
btn = gr.Button("Predict Empty Table Cells")
|
78 |
+
# out_table = gr.DataFrame()
|
79 |
+
inp_file.change(fn=upload_file, inputs=inp_file, outputs=[out_text, y_attribute])
|
80 |
|
81 |
+
btn.click(fn=compute, inputs=[inp_file, y_attribute, cv_folds], outputs=[out_text, y_attribute])
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
demo.launch()
|