import gradio as gr import joblib import pandas as pd import datasets import json import numpy as np # Load the model pipe = joblib.load("./model.pkl") title = "Premium Amount Prediction" description = "This model predicts the Premium Amount. Drag and drop any slice from the dataset or edit values as you wish in the dataframe component below." # Load and prepare dataset df = datasets.load_dataset("silvaKenpachi/mental_health")["train"].to_pandas() df.dropna(axis=0, inplace=True) # Load configuration with open("./config.json") as f: config_dict = json.load(f) all_headers = config_dict["sklearn"]["columns"] # Filter headers to only include those present in the dataset headers = [col for col in all_headers if col in df.columns] # Define input and output interfaces #inputs = [gr.Dataframe(headers=headers, row_count=(2, "dynamic"), col_count=(len(headers), "fixed"), label="Input Data", interactive=True)] #working code that returns only 2 rows in output #inputs = [gr.Dataframe(headers=all_headers, row_count=(2, "dynamic"), col_count=(len(all_headers), "fixed"), label="Input Data", interactive=True)] #outputs = [gr.Dataframe(row_count=(2, "dynamic"), col_count=(1, "fixed"), label="Predictions", headers=["Depression"])] # Define input and output interfaces with dynamic row counts inputs = [gr.Dataframe( headers=headers, row_count=(10, "dynamic"), # Use tuple format (min_rows, "dynamic") col_count=(len(headers), "fixed"), label="Input Data", interactive=True )] outputs = [gr.Dataframe( row_count=(10, "dynamic"), # Use tuple format (min_rows, "dynamic") col_count=(2, "fixed"), label="Predictions", headers=["Name", "Depression"] )] #def infer(inputs): #data = pd.DataFrame(inputs, columns=headers) #predictions = pipe.predict(data) #return pd.DataFrame(predictions, columns=["Depression"]) #code to fix missing columns with na #def infer(inputs): #data = pd.DataFrame(inputs, columns=headers) # Add missing columns with default values (e.g., 0) #for col in all_headers: #if col not in data.columns: #data[col] = 0 # Ensure the order of columns matches the training data #data = data[all_headers] #predictions = pipe.predict(data) #return pd.DataFrame(predictions, columns=["Depression"]) #def infer(inputs): #data = pd.DataFrame(inputs, columns=headers) # Replace empty strings with NaN #data = data.replace('', np.nan) # Add missing columns with default values (e.g., 0) #for col in all_headers: #if col not in data.columns: #data[col] = 0 # Ensure the order of columns matches the training data #data = data[all_headers] # Fill NaN values with default values (e.g., 0) #data = data.fillna(0) # Convert all data to float #data = data.astype(float) #predictions = pipe.predict(data) #return pd.DataFrame(predictions, columns=["Name", "Depression"]) #return pd.DataFrame({ #'Name': data['Name'], #'Depression': predictions #}) def infer(inputs): # Create DataFrame from inputs data = pd.DataFrame(inputs, columns=headers) # Create a copy of the input DataFrame to preserve original data prediction_data = data.copy() # Replace empty strings with NaN for numeric columns only numeric_columns = [col for col in all_headers if col != 'Name'] prediction_data[numeric_columns] = prediction_data[numeric_columns].replace('', np.nan) # Add missing columns with default values for col in all_headers: if col not in prediction_data.columns: prediction_data[col] = 0 # Ensure the order of columns matches the training data prediction_data = prediction_data[all_headers] # Fill NaN values in numeric columns only prediction_data[numeric_columns] = prediction_data[numeric_columns].fillna(0) # Convert numeric columns to float prediction_data[numeric_columns] = prediction_data[numeric_columns].astype(float) # Make predictions predictions = pipe.predict(prediction_data) # Create output DataFrame using original names return pd.DataFrame({ 'Name': data['Name'], 'Depression': predictions }) gr.Interface( fn=infer, inputs=inputs, outputs=outputs, title=title, description=description, examples=[df[headers].head(3).values.tolist()], cache_examples=False ).launch(debug=True)