# app.py
import os
import json
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
from huggingface_hub import HfFolder
import subprocess
from AppointmentScheduler import AppointmentScheduler

# Authenticate Hugging Face Hub
hf_token = st.secrets["HF_TOKEN"]
HfFolder.save_token(hf_token)

def set_git_config():
    try:
        subprocess.run(['git', 'config', '--global', 'user.email', 'nilesh.hanotia@outlook.com'], check=True)
        subprocess.run(['git', 'config', '--global', 'user.name', 'Nilesh'], check=True)
        st.success("Git configuration set successfully.")
    except subprocess.CalledProcessError as e:
        st.error(f"Git configuration error: {str(e)}")

set_git_config()

@st.cache_data
def load_data(file_paths):
    combined_data = []
    for file_path in file_paths:
        file_path = file_path.strip()
        if not os.path.exists(file_path):
            st.error(f"File not found: {file_path}")
            return None
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            # Add a print to inspect the data structure
            print(f"Data loaded from {file_path}: {data}")
            
            # Assuming you're expecting 'intents' with 'examples'
            if 'intents' in data:
                for intent in data['intents']:
                    combined_data.extend(intent['examples'])
            else:
                st.error(f"Invalid format in file: {file_path}")
                return None
        except Exception as e:
            st.error(f"Error loading dataset from {file_path}: {str(e)}")
            return None
    print(f"Combined data: {combined_data}")  # Check the combined dataset
    return combined_data

@st.cache_resource
def initialize_model_and_tokenizer(model_name, num_labels):
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
        
        # Set the padding token to the EOS token
        tokenizer.pad_token = tokenizer.eos_token
        
        # Update the model config
        model.config.pad_token_id = tokenizer.pad_token_id
        
        # Resize the token embeddings as we added a new token
        model.resize_token_embeddings(len(tokenizer))
        
        return tokenizer, model
    except Exception as e:
        st.error(f"Error initializing model and tokenizer: {str(e)}")
        return None, None

def create_dataset(data, tokenizer, max_length, num_labels):
    texts = [item.get('prompt', '') for item in data if item.get('prompt')]
    labels = [item.get('label', 0) for item in data if item.get('prompt')]

    if not texts:
        raise ValueError("The input texts list is empty. Please check your data.")

    # Ensure all labels are within the valid range
    labels = [label if 0 <= label < num_labels else 0 for label in labels]

    # Tokenize the input texts with proper padding and truncation
    encodings = tokenizer(
        texts,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors='pt'
    )

    dataset = Dataset.from_dict({
        'input_ids': encodings['input_ids'],
        'attention_mask': encodings['attention_mask'],
        'labels': labels
    })
    return dataset
    
def main():
    st.title("Appointment Scheduling Platform")

    model_name = st.text_input("Enter model name", "distilgpt2")
    file_paths = st.text_area("Enter training data paths").split(',')
    max_length = st.number_input("Max token length", 128)
    num_epochs = st.number_input("Training epochs", 3)
    batch_size = st.number_input("Batch size", 8)
    learning_rate = st.number_input("Learning rate", 5e-5)
    num_labels = 3
    
    repo_id = st.text_input("Hugging Face Repo ID", "nileshhanotia/PeVe")
    tokenizer, model = initialize_model_and_tokenizer(model_name, num_labels)

    if tokenizer and model:
        data = load_data(file_paths)
        if data:
            print(f"Total data loaded: {len(data)}")
            print(f"Sample data item: {data[0] if data else 'No data'}")
            
            train_data, eval_data = data[:int(len(data)*0.8)], data[int(len(data)*0.8):]
            print(f"Train data size: {len(train_data)}, Eval data size: {len(eval_data)}")
            
            train_dataset = create_dataset(train_data, tokenizer, max_length, num_labels)
            eval_dataset = create_dataset(eval_data, tokenizer, max_length, num_labels)
            
            print(f"Train dataset size: {len(train_dataset)}, Eval dataset size: {len(eval_dataset)}")
            print(f"Sample train item: {train_dataset[0] if train_dataset else 'No data'}")
            
            training_args = TrainingArguments(
                output_dir='./results',
                evaluation_strategy='epoch',
                learning_rate=learning_rate,
                per_device_train_batch_size=batch_size,
                per_device_eval_batch_size=batch_size,
                num_train_epochs=num_epochs,
                logging_dir='./logs',
                push_to_hub=True,
                hub_model_id=repo_id,
            )
            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
            )
            if st.button('Start Training'):
                st.write("Training model...")
                trainer.train()
                trainer.push_to_hub()
                st.write(f"Model pushed to: {repo_id}")
                
    # Integrate AppointmentScheduler
    st.header("Appointment Scheduler")
    
    # Initialize session state for conversation history and scheduler
    if 'conversation_history' not in st.session_state:
        st.session_state.conversation_history = []
        st.session_state.scheduler = AppointmentScheduler()
        st.session_state.first_interaction = True

    user_input = st.text_input("Enter patient response")
    if user_input:
        # If it's the first interaction, start with the greeting
        if st.session_state.first_interaction:
            response = st.session_state.scheduler.handle_incoming_speech("hello")
            st.session_state.conversation_history.append(("Assistant", response))
            st.session_state.first_interaction = False

        # Use AppointmentScheduler to handle the response
        response = st.session_state.scheduler.handle_incoming_speech(user_input)
        st.session_state.conversation_history.append(("Patient", user_input))
        st.session_state.conversation_history.append(("Assistant", response))

    # Display conversation history
    for speaker, message in st.session_state.conversation_history:
        st.write(f"{speaker}: {message}")

if __name__ == "__main__":
    main()