import os   # to check if file exists
import sys  # to flush stdout
import markdown # to render answer

import gradio as gr
#import transformers
#from transformers import pipeline
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

model_repo="TheBloke/Nous-Hermes-13B-GGML"
model_filename="nous-hermes-13b.ggmlv3.q4_K_S.bin"

#model="TheBloke/Nous-Hermes-13B-GGML"
#model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin"

def download_model():
    # See https://github.com/OpenAccess-AI-Collective/ggml-webui/blob/main/tabbed.py
    file_path="/home/user/.cache/huggingface/hub/models--TheBloke--Nous-Hermes-13B-GGML/snapshots/f1a48f90a07550e1ba30e347b2be69d4fa5e393b/nous-hermes-13b.ggmlv3.q4_K_S.bin"
    if os.path.exists(file_path):
        return file_path
    else:
        print("Downloading model...")
        sys.stdout.flush()
        file = hf_hub_download(
                repo_id=model_repo, filename=model_filename
        )
        print("Downloaded " + file)
        return file

def question_answer(context, question, max_tokens):
    mfile=download_model()
    # structure the prompt to make it easier for the ai
    question1="\"\"\"\n" + question + "\n\"\"\"\n"
    text=context + "\n\nQuestion: " + question1 + "\nPlease use markdown formatting for answer. \nAnswer:\n" 
    llm = Llama(model_path=mfile)
    output = llm(text, max_tokens=max_tokens, stop=["### Response"], echo=True)
    print(output)

    # remove the context and leave only the answer
    answer=output['choices'][0]['text']
    answer = answer.replace(text, "", 1)
    
    # render the markdown and return the html and question
    html_answer = markdown.markdown(answer)
    return question, html_answer
    '''
            Output is of the form:
            {
              "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
              "object": "text_completion",
              "created": 1679561337,
              "model": "./models/7B/ggml-model.bin",
              "choices": [
                    {
                      "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
                      "index": 0,
                      "logprobs": None,
                      "finish_reason": "stop"
                    }
                ],
              "usage": {
                "prompt_tokens": 14,
                "completion_tokens": 28,
                "total_tokens": 42
              }
            }
    '''
    
    # old transformers code
    #generator = pipeline(model=model, device_map="auto")
    #return generator(text)


app=gr.Interface(fn=question_answer, inputs=["text", "text",gr.Slider(33, 2333)], outputs=["textbox", "html"])
app.launch()