diff --git a/.gitignore b/.gitignore index ddee9b537e47686f3d80562ad0addb0832ccffce..542424fdfcf87c78d0670bfbbfdd01fcce8d12e3 100644 --- a/.gitignore +++ b/.gitignore @@ -169,5 +169,16 @@ code/.chainlit/translations/ storage/logs/* vectorstores/* -*/.files/* +**/.files/* code/storage/models/ + +**/translations/en-US.json +**/translations/zh-CN.json + + +**/vectorstores/* + +**/private/students.json + +**/apps/*/storage/logs/* +**/apps/*/private/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 2cd39e3489b4c2e12003dc59d16804cc1ef25128..1bbcbff852a3c30173c167f3199001c1ee4804f4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,13 +3,18 @@ FROM python:3.11 WORKDIR /code COPY ./requirements.txt /code/requirements.txt +COPY ./setup.py /code/setup.py RUN pip install --upgrade pip RUN pip install --no-cache-dir -r /code/requirements.txt +RUN pip install -e . COPY . /code +# Copy .env file to the application directory +COPY .env /code/apps/ai_tutor/.env + # List the contents of the /code directory to verify files are copied correctly RUN ls -R /code @@ -17,12 +22,15 @@ RUN ls -R /code RUN chmod -R 777 /code # Create a logs directory and set permissions -RUN mkdir /code/logs && chmod 777 /code/logs +RUN mkdir /code/apps/ai_tutor/logs && chmod 777 /code/apps/ai_tutor/logs # Create a cache directory within the application's working directory RUN mkdir /.cache && chmod -R 777 /.cache -WORKDIR /code/code +WORKDIR /code/apps/ai_tutor + +# Expose the port the app runs on +EXPOSE 7860 RUN --mount=type=secret,id=HUGGINGFACEHUB_API_TOKEN,mode=0444,required=true RUN --mount=type=secret,id=OPENAI_API_KEY,mode=0444,required=true @@ -35,4 +43,4 @@ RUN --mount=type=secret,id=LITERAL_API_KEY_LOGGING,mode=0444,required=true RUN --mount=type=secret,id=CHAINLIT_AUTH_SECRET,mode=0444,required=true # Default command to run the application -CMD ["sh", "-c", "python -m modules.vectorstore.store_manager && uvicorn app:app --host 0.0.0.0 --port 7860"] +CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860 \ No newline at end of file diff --git a/Dockerfile.dev b/Dockerfile.dev index fe71dc7ef39e52d8433646a40075ef85d5ff4d07..c63abdafe2434209a44ac26ddef5794c456a207a 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -3,13 +3,18 @@ FROM python:3.11 WORKDIR /code COPY ./requirements.txt /code/requirements.txt +COPY ./setup.py /code/setup.py RUN pip install --upgrade pip RUN pip install --no-cache-dir -r /code/requirements.txt +RUN pip install -e . COPY . /code +# Copy .env file to the application directory +COPY .env /code/apps/ai_tutor/.env + # List the contents of the /code directory to verify files are copied correctly RUN ls -R /code @@ -17,15 +22,17 @@ RUN ls -R /code RUN chmod -R 777 /code # Create a logs directory and set permissions -RUN mkdir /code/logs && chmod 777 /code/logs +RUN mkdir /code/apps/ai_tutor/logs && chmod 777 /code/apps/ai_tutor/logs # Create a cache directory within the application's working directory RUN mkdir /.cache && chmod -R 777 /.cache -WORKDIR /code/code +WORKDIR /code/apps/ai_tutor + +RUN ls -R /code # Expose the port the app runs on -EXPOSE 8000 +EXPOSE 7860 # Default command to run the application -CMD ["sh", "-c", "python -m modules.vectorstore.store_manager && chainlit run main.py --host 0.0.0.0 --port 8000"] \ No newline at end of file +CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860 \ No newline at end of file diff --git a/README.md b/README.md index 1075eb5c1d0953ef17024e1e754e109e1eb1f977..13334f42e14510989b771a2f342ffbf749be08eb 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,12 @@ app_port: 7860 --- # DL4DS Tutor 🏃 +![Build Status](https://github.com/DL4DS/dl4ds_tutor/actions/workflows/push_to_hf_space.yml/badge.svg) +![License](https://img.shields.io/github/license/DL4DS/dl4ds_tutor) +![GitHub stars](https://img.shields.io/github/stars/DL4DS/dl4ds_tutor) +![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square) + + Check out the configuration reference at [Hugging Face Spaces Config Reference](https://huggingface.co/docs/hub/spaces-config-reference). You can find a "production" implementation of the Tutor running live at [DL4DS Tutor](https://dl4ds-dl4ds-tutor.hf.space/) from the @@ -30,26 +36,31 @@ Please visit [setup](https://dl4ds.github.io/dl4ds_tutor/guide/setup/) for more git clone https://github.com/DL4DS/dl4ds_tutor ``` -2. **Put your data under the `storage/data` directory** +2. Create your app in the apps folder. (An example is the `apps/ai_tutor` app) + ``` + cd apps + mkdir your_app + ``` + +2. **Put your data under the `apps/your_app/storage/data` directory** - Add URLs in the `urls.txt` file. - - Add other PDF files in the `storage/data` directory. + - Add other PDF files in the `apps/your_app/storage/data` directory. 3. **To test Data Loading (Optional)** ```bash - cd code - python -m modules.dataloader.data_loader --links "your_pdf_link" + cd apps/your_app + python -m modules.dataloader.data_loader --links "your_pdf_link" --config_file config/config.yml --project_config_file config/project_config.yml ``` 4. **Create the Vector Database** ```bash - cd code - python -m modules.vectorstore.store_manager + cd apps/your_app + python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml ``` - - Note: You need to run the above command when you add new data to the `storage/data` directory, or if the `storage/data/urls.txt` file is updated. 6. **Run the FastAPI App** ```bash - cd code + cd apps/your_app uvicorn app:app --port 7860 ``` @@ -64,7 +75,7 @@ The HuggingFace Space is built using the `Dockerfile` in the repository. To run ```bash docker build --tag dev -f Dockerfile.dev . -docker run -it --rm -p 8000:8000 dev +docker run -it --rm -p 7860:7860 dev ``` ## Contributing diff --git a/code/.chainlit/config.toml b/apps/ai_tutor/.chainlit/config.toml similarity index 98% rename from code/.chainlit/config.toml rename to apps/ai_tutor/.chainlit/config.toml index a76128d17ea50c55e41689ebf23f36b400567d04..4ee8911d7816e23f173acaf01b98f158bbc62d1e 100644 --- a/code/.chainlit/config.toml +++ b/apps/ai_tutor/.chainlit/config.toml @@ -69,7 +69,7 @@ github = "https://github.com/DL4DS/dl4ds_tutor" # Specify a CSS file that can be used to customize the user interface. # The CSS file can be served from the public directory or via an external link. -custom_css = "/public/test.css" +custom_css = "/public/files/test.css" # Specify a Javascript file that can be used to customize the user interface. # The Javascript file can be served from the public directory. diff --git a/apps/ai_tutor/README.md b/apps/ai_tutor/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ce60b629a88e9e59e51aec4e503994cd7bc9411f --- /dev/null +++ b/apps/ai_tutor/README.md @@ -0,0 +1,12 @@ +# WIP + + +## Run the encrypt_students script + +- If you don't want the emails to be public, run this script to encrypt the emails of the students. +- This will create a new file in the public/files/ directory. +- Place your file with the students' emails in the private/ directory (do not commit this file to the repository). + +```bash +python encrypt_students.py --students-file private/students.json --encrypted-students-file public/files/students_encrypted.json +``` diff --git a/code/app.py b/apps/ai_tutor/app.py similarity index 85% rename from code/app.py rename to apps/ai_tutor/app.py index 3b9393f4a3232a54c8f97fc834a3a85f743592f5..7b77ba2367bf33554488e2c389ac7313aa3d4693 100644 --- a/code/app.py +++ b/apps/ai_tutor/app.py @@ -8,24 +8,31 @@ from chainlit.utils import mount_chainlit import secrets import json import base64 -from modules.config.constants import ( +from config.constants import ( OAUTH_GOOGLE_CLIENT_ID, OAUTH_GOOGLE_CLIENT_SECRET, CHAINLIT_URL, - GITHUB_REPO, - DOCS_WEBSITE, - ALL_TIME_TOKENS_ALLOCATED, - TOKENS_LEFT, + EMAIL_ENCRYPTION_KEY, ) from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles -from modules.chat_processor.helpers import ( - get_user_details, +from helpers import ( get_time, reset_tokens_for_user, check_user_cooldown, - update_user_info, ) +from modules.chat_processor.helpers import get_user_details, update_user_info +from config.config_manager import config_manager +import hashlib + +# set config +config = config_manager.get_config().dict() + +# set constants +GITHUB_REPO = config["misc"]["github_repo"] +DOCS_WEBSITE = config["misc"]["docs_website"] +ALL_TIME_TOKENS_ALLOCATED = config["token_config"]["all_time_tokens_allocated"] +TOKENS_LEFT = config["token_config"]["tokens_left"] GOOGLE_CLIENT_ID = OAUTH_GOOGLE_CLIENT_ID GOOGLE_CLIENT_SECRET = OAUTH_GOOGLE_CLIENT_SECRET @@ -46,13 +53,8 @@ session_store = {} CHAINLIT_PATH = "/chainlit_tutor" # only admin is given any additional permissions for now -- no limits on tokens -USER_ROLES = { - "tgardos@bu.edu": ["instructor", "bu"], - "xthomas@bu.edu": ["admin", "instructor", "bu"], - "faridkar@bu.edu": ["instructor", "bu"], - "xavierohan1@gmail.com": ["guest"], - # Add more users and roles as needed -} +with open("public/files/students_encrypted.json", "r") as file: + USER_ROLES = json.load(file) # Create a Google OAuth flow flow = Flow.from_client_config( @@ -80,7 +82,20 @@ flow = Flow.from_client_config( def get_user_role(username: str): - return USER_ROLES.get(username, ["guest"]) # Default to "guest" role + + # Function to deterministically hash emails + def deterministic_hash(email, salt): + return hashlib.pbkdf2_hmac("sha256", email.encode(), salt, 100000).hex() + + # encrypt email (#FIXME: this is not the best way to do this, not really encryption, more like a hash) + encryption_salt = EMAIL_ENCRYPTION_KEY.encode() + encrypted_email = deterministic_hash(username, encryption_salt) + role = USER_ROLES.get(encrypted_email, ["guest"]) + + if "guest" in role: + return "unauthorized" + + return role async def get_user_info_from_cookie(request: Request): @@ -146,6 +161,11 @@ async def login_page(request: Request): # return response +@app.get("/unauthorized", response_class=HTMLResponse) +async def unauthorized(request: Request): + return templates.TemplateResponse("unauthorized.html", {"request": request}) + + @app.get("/login/google") async def login_google(request: Request): # Clear any existing session cookies to avoid conflicts with guest sessions @@ -176,6 +196,9 @@ async def auth_google(request: Request): profile_image = user_info.get("picture", "") role = get_user_role(email) + if role == "unauthorized": + return RedirectResponse("/unauthorized") + session_token = secrets.token_hex(16) session_store[session_token] = { "email": email, @@ -228,7 +251,11 @@ async def cooldown(request: Request): else: user_details.metadata["in_cooldown"] = False await update_user_info(user_details) - await reset_tokens_for_user(user_details) + await reset_tokens_for_user( + user_details, + config["token_config"]["tokens_left"], + config["token_config"]["regen_time"], + ) return RedirectResponse("/post-signin") @@ -262,7 +289,11 @@ async def post_signin(request: Request): return RedirectResponse("/cooldown") else: user_details.metadata["in_cooldown"] = False - await reset_tokens_for_user(user_details) + await reset_tokens_for_user( + user_details, + config["token_config"]["tokens_left"], + config["token_config"]["regen_time"], + ) if user_info: username = user_info["email"] @@ -335,7 +366,11 @@ async def get_tokens_left(request: Request): try: user_info = await get_user_info_from_cookie(request) user_details = await get_user_details(user_info["email"]) - await reset_tokens_for_user(user_details) + await reset_tokens_for_user( + user_details, + config["token_config"]["tokens_left"], + config["token_config"]["regen_time"], + ) tokens_left = user_details.metadata["tokens_left"] return {"tokens_left": tokens_left} except Exception as e: @@ -343,7 +378,7 @@ async def get_tokens_left(request: Request): return {"tokens_left": 0} -mount_chainlit(app=app, target="main.py", path=CHAINLIT_PATH) +mount_chainlit(app=app, target="chainlit_app.py", path=CHAINLIT_PATH) if __name__ == "__main__": import uvicorn diff --git a/code/main.py b/apps/ai_tutor/chainlit_app.py similarity index 95% rename from code/main.py rename to apps/ai_tutor/chainlit_app.py index e520efa9a0a2f6b85084978f4b05d1c336beefd4..914955a1d6af8903ce1059c5b13c5866190ac435 100644 --- a/code/main.py +++ b/apps/ai_tutor/chainlit_app.py @@ -1,12 +1,11 @@ import chainlit.data as cl_data import asyncio -from modules.config.constants import ( +from config.constants import ( LITERAL_API_KEY_LOGGING, LITERAL_API_URL, ) from modules.chat_processor.literal_ai import CustomLiteralDataLayer import json -import yaml from typing import Any, Dict, no_type_check import chainlit as cl from modules.chat.llm_tutor import LLMTutor @@ -18,11 +17,13 @@ from modules.chat.helpers import ( ) from modules.chat_processor.helpers import ( update_user_info, - get_time, + get_user_details, +) +from helpers import ( check_user_cooldown, reset_tokens_for_user, - get_user_details, ) +from helpers import get_time import copy from typing import Optional from chainlit.types import ThreadDict @@ -30,6 +31,7 @@ import time import base64 from langchain_community.callbacks import get_openai_callback from datetime import datetime, timezone +from config.config_manager import config_manager USER_TIMEOUT = 60_000 SYSTEM = "System" @@ -38,8 +40,8 @@ AGENT = "Agent" YOU = "User" ERROR = "Error" -with open("modules/config/config.yml", "r") as f: - config = yaml.safe_load(f) +# set config +config = config_manager.get_config().dict() async def setup_data_layer(): @@ -81,13 +83,6 @@ class Chatbot: """ self.config = config - async def _load_config(self): - """ - Load the configuration from a YAML file. - """ - with open("modules/config/config.yml", "r") as f: - return yaml.safe_load(f) - @no_type_check async def setup_llm(self): """ @@ -271,24 +266,24 @@ class Chatbot: print(e) return [ cl.Starter( - label="recording on CNNs?", + label="recording on Transformers?", message="Where can I find the recording for the lecture on Transformers?", - icon="/public/adv-screen-recorder-svgrepo-com.svg", + icon="/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg", ), cl.Starter( - label="where's the slides?", + label="where's the schedule?", message="When are the lectures? I can't find the schedule.", - icon="/public/alarmy-svgrepo-com.svg", + icon="/public/assets/images/starter_icons/alarmy-svgrepo-com.svg", ), cl.Starter( label="Due Date?", message="When is the final project due?", - icon="/public/calendar-samsung-17-svgrepo-com.svg", + icon="/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg", ), cl.Starter( label="Explain backprop.", message="I didn't understand the math behind backprop, could you explain it?", - icon="/public/acastusphoton-svgrepo-com.svg", + icon="/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg", ), ] @@ -305,7 +300,7 @@ class Chatbot: rename_dict = {"Chatbot": LLM} return rename_dict.get(orig_author, orig_author) - async def start(self, config=None): + async def start(self): """ Start the chatbot, initialize settings widgets, and display and load previous conversation if chat logging is enabled. @@ -313,10 +308,6 @@ class Chatbot: start_time = time.time() - self.config = ( - await self._load_config() if config is None else config - ) # Reload the configuration on chat resume - await self.make_llm_settings_widgets(self.config) # Reload the settings widgets user = cl.user_session.get("user") @@ -386,7 +377,11 @@ class Chatbot: # update user info with last message time user = cl.user_session.get("user") - await reset_tokens_for_user(user) + await reset_tokens_for_user( + user, + self.config["token_config"]["tokens_left"], + self.config["token_config"]["regen_time"], + ) updated_user = await get_user_details(user.identifier) user.metadata = updated_user.metadata cl.user_session.set("user", user) @@ -530,7 +525,6 @@ class Chatbot: elements=source_elements, author=LLM, actions=actions, - metadata=self.config, ).send() async def on_chat_resume(self, thread: ThreadDict): diff --git a/code/modules/config/config.yml b/apps/ai_tutor/config/config.yml similarity index 90% rename from code/modules/config/config.yml rename to apps/ai_tutor/config/config.yml index 3cdc2581f40daac5c156c5adcf5db213a4f51178..eed8ee7b9bf5e63c79f94af8116656ba48843325 100644 --- a/code/modules/config/config.yml +++ b/apps/ai_tutor/config/config.yml @@ -1,15 +1,15 @@ -log_dir: '../storage/logs' # str -log_chunk_dir: '../storage/logs/chunks' # str +log_dir: 'storage/logs' # str +log_chunk_dir: 'storage/logs/chunks' # str device: 'cpu' # str [cuda, cpu] vectorstore: load_from_HF: True # bool reparse_files: True # bool - data_path: '../storage/data' # str - url_file_path: '../storage/data/urls.txt' # str + data_path: 'storage/data' # str + url_file_path: 'storage/data/urls.txt' # str expand_urls: True # bool db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR] - db_path : '../vectorstores' # str + db_path : 'vectorstores' # str model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002'] search_top_k : 3 # int score_threshold : 0.2 # float diff --git a/apps/ai_tutor/config/config_manager.py b/apps/ai_tutor/config/config_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..c480631191fc867fa4b966240dd4ea02da50498a --- /dev/null +++ b/apps/ai_tutor/config/config_manager.py @@ -0,0 +1,188 @@ +from pydantic import BaseModel, conint, confloat, HttpUrl +from typing import Optional, List +import yaml + + +class FaissParams(BaseModel): + index_path: str = "vectorstores/faiss.index" + index_type: str = "Flat" # Options: [Flat, HNSW, IVF] + index_dimension: conint(gt=0) = 384 + index_nlist: conint(gt=0) = 100 + index_nprobe: conint(gt=0) = 10 + + +class ColbertParams(BaseModel): + index_name: str = "new_idx" + + +class VectorStoreConfig(BaseModel): + load_from_HF: bool = True + reparse_files: bool = True + data_path: str = "storage/data" + url_file_path: str = "storage/data/urls.txt" + expand_urls: bool = True + db_option: str = "RAGatouille" # Options: [FAISS, Chroma, RAGatouille, RAPTOR] + db_path: str = "vectorstores" + model: str = ( + "sentence-transformers/all-MiniLM-L6-v2" # Options: [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002] + ) + search_top_k: conint(gt=0) = 3 + score_threshold: confloat(ge=0.0, le=1.0) = 0.2 + + faiss_params: Optional[FaissParams] = None + colbert_params: Optional[ColbertParams] = None + + +class OpenAIParams(BaseModel): + temperature: confloat(ge=0.0, le=1.0) = 0.7 + + +class LocalLLMParams(BaseModel): + temperature: confloat(ge=0.0, le=1.0) = 0.7 + repo_id: str = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id + filename: str = ( + "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo + ) + model_path: str = ( + "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file + ) + + +class LLMParams(BaseModel): + llm_arch: str = "langchain" # Options: [langchain] + use_history: bool = True + generate_follow_up: bool = False + memory_window: conint(ge=1) = 3 + llm_style: str = "Normal" # Options: [Normal, ELI5] + llm_loader: str = ( + "gpt-4o-mini" # Options: [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini] + ) + openai_params: Optional[OpenAIParams] = None + local_llm_params: Optional[LocalLLMParams] = None + stream: bool = False + pdf_reader: str = "gpt" # Options: [llama, pymupdf, gpt] + + +class ChatLoggingConfig(BaseModel): + log_chat: bool = True + platform: str = "literalai" + callbacks: bool = True + + +class SplitterOptions(BaseModel): + use_splitter: bool = True + split_by_token: bool = True + remove_leftover_delimiters: bool = True + remove_chunks: bool = False + chunking_mode: str = "semantic" # Options: [fixed, semantic] + chunk_size: conint(gt=0) = 300 + chunk_overlap: conint(ge=0) = 30 + chunk_separators: List[str] = ["\n\n", "\n", " ", ""] + front_chunks_to_remove: Optional[conint(ge=0)] = None + last_chunks_to_remove: Optional[conint(ge=0)] = None + delimiters_to_remove: List[str] = ["\t", "\n", " ", " "] + + +class RetrieverConfig(BaseModel): + retriever_hf_paths: dict[str, str] = {"RAGatouille": "XThomasBU/Colbert_Index"} + + +class MetadataConfig(BaseModel): + metadata_links: List[HttpUrl] = [ + "https://dl4ds.github.io/sp2024/lectures/", + "https://dl4ds.github.io/sp2024/schedule/", + ] + slide_base_link: HttpUrl = "https://dl4ds.github.io" + + +class TokenConfig(BaseModel): + cooldown_time: conint(gt=0) = 60 + regen_time: conint(gt=0) = 180 + tokens_left: conint(gt=0) = 2000 + all_time_tokens_allocated: conint(gt=0) = 1000000 + + +class MiscConfig(BaseModel): + github_repo: HttpUrl = "https://github.com/DL4DS/dl4ds_tutor" + docs_website: HttpUrl = "https://dl4ds.github.io/dl4ds_tutor/" + + +class APIConfig(BaseModel): + timeout: conint(gt=0) = 60 + + +class Config(BaseModel): + log_dir: str = "storage/logs" + log_chunk_dir: str = "storage/logs/chunks" + device: str = "cpu" # Options: ['cuda', 'cpu'] + + vectorstore: VectorStoreConfig + llm_params: LLMParams + chat_logging: ChatLoggingConfig + splitter_options: SplitterOptions + retriever: RetrieverConfig + metadata: MetadataConfig + token_config: TokenConfig + misc: MiscConfig + api_config: APIConfig + + +class ConfigManager: + def __init__(self, config_path: str, project_config_path: str): + self.config_path = config_path + self.project_config_path = project_config_path + self.config = self.load_config() + self.validate_config() + + def load_config(self) -> Config: + with open(self.config_path, "r") as f: + config_data = yaml.safe_load(f) + + with open(self.project_config_path, "r") as f: + project_config_data = yaml.safe_load(f) + + # Merge the two configurations + merged_config = {**config_data, **project_config_data} + + return Config(**merged_config) + + def get_config(self) -> Config: + return ConfigWrapper(self.config) + + def validate_config(self): + # If any required fields are missing, raise an error + # required_fields = [ + # "vectorstore", "llm_params", "chat_logging", "splitter_options", + # "retriever", "metadata", "token_config", "misc", "api_config" + # ] + # for field in required_fields: + # if not hasattr(self.config, field): + # raise ValueError(f"Missing required configuration field: {field}") + + # # Validate types of specific fields + # if not isinstance(self.config.vectorstore, VectorStoreConfig): + # raise TypeError("vectorstore must be an instance of VectorStoreConfig") + # if not isinstance(self.config.llm_params, LLMParams): + # raise TypeError("llm_params must be an instance of LLMParams") + pass + + +class ConfigWrapper: + def __init__(self, config: Config): + self._config = config + + def __getitem__(self, key): + return getattr(self._config, key) + + def __getattr__(self, name): + return getattr(self._config, name) + + def dict(self): + return self._config.dict() + + +# Usage +config_manager = ConfigManager( + config_path="config/config.yml", project_config_path="config/project_config.yml" +) +# config = config_manager.get_config().dict() diff --git a/code/modules/config/constants.py b/apps/ai_tutor/config/constants.py similarity index 80% rename from code/modules/config/constants.py rename to apps/ai_tutor/config/constants.py index c22b905ba6f720ee0fa8ad5bb7eb68509068bfc3..506d0afc61c05719371979ba0fa60e491c72593f 100644 --- a/code/modules/config/constants.py +++ b/apps/ai_tutor/config/constants.py @@ -3,15 +3,6 @@ import os load_dotenv() -TIMEOUT = 60 -COOLDOWN_TIME = 60 -REGEN_TIME = 180 -TOKENS_LEFT = 2000 -ALL_TIME_TOKENS_ALLOCATED = 1000000 - -GITHUB_REPO = "https://github.com/DL4DS/dl4ds_tutor" -DOCS_WEBSITE = "https://dl4ds.github.io/dl4ds_tutor/" - # API Keys - Loaded from the .env file OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") @@ -20,6 +11,7 @@ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") LITERAL_API_KEY_LOGGING = os.getenv("LITERAL_API_KEY_LOGGING") LITERAL_API_URL = os.getenv("LITERAL_API_URL") CHAINLIT_URL = os.getenv("CHAINLIT_URL") +EMAIL_ENCRYPTION_KEY = os.getenv("EMAIL_ENCRYPTION_KEY") OAUTH_GOOGLE_CLIENT_ID = os.getenv("OAUTH_GOOGLE_CLIENT_ID") OAUTH_GOOGLE_CLIENT_SECRET = os.getenv("OAUTH_GOOGLE_CLIENT_SECRET") diff --git a/apps/ai_tutor/config/project_config.yml b/apps/ai_tutor/config/project_config.yml new file mode 100644 index 0000000000000000000000000000000000000000..cc593aed17ac55608408302c18ed84129bc5efe3 --- /dev/null +++ b/apps/ai_tutor/config/project_config.yml @@ -0,0 +1,20 @@ +retriever: + retriever_hf_paths: + RAGatouille: "XThomasBU/Colbert_Index" + +metadata: + metadata_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"] + slide_base_link: "https://dl4ds.github.io" + +token_config: + cooldown_time: 60 + regen_time: 180 + tokens_left: 2000 + all_time_tokens_allocated: 1000000 + +misc: + github_repo: "https://github.com/DL4DS/dl4ds_tutor" + docs_website: "https://dl4ds.github.io/dl4ds_tutor/" + +api_config: + timeout: 60 diff --git a/code/modules/config/prompts.py b/apps/ai_tutor/config/prompts.py similarity index 100% rename from code/modules/config/prompts.py rename to apps/ai_tutor/config/prompts.py diff --git a/apps/ai_tutor/encrypt_students.py b/apps/ai_tutor/encrypt_students.py new file mode 100644 index 0000000000000000000000000000000000000000..1eccf5c89a57497aaa6b22549eeef092cd0c9d80 --- /dev/null +++ b/apps/ai_tutor/encrypt_students.py @@ -0,0 +1,53 @@ +import os +import hashlib +import json +import argparse +from dotenv import load_dotenv + + +# Function to deterministically hash emails +def deterministic_hash(email, salt): + return hashlib.pbkdf2_hmac("sha256", email.encode(), salt, 100000).hex() + + +def main(args): + # Load the .env file + load_dotenv() + + # Get the encryption key (salt) + encryption_salt = os.getenv("EMAIL_ENCRYPTION_KEY").encode() + + # Load emails from the specified JSON file + with open(args.students_file, "r") as file: + emails = json.load(file) + + # Replace emails with deterministic hashed emails, {hashed_email: [roles]} + hashed_emails = { + deterministic_hash(email, encryption_salt): roles + for email, roles in emails.items() + } + + # Save hashed emails to the specified encrypted JSON file + with open(args.encrypted_students_file, "w") as file: + json.dump(hashed_emails, file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Encrypt student emails in a JSON file." + ) + parser.add_argument( + "--students-file", + type=str, + default="private/students.json", + help="Path to the students JSON file", + ) + parser.add_argument( + "--encrypted-students-file", + type=str, + default="public/files/students_encrypted.json", + help="Path to save the encrypted students JSON file", + ) + args = parser.parse_args() + + main(args) diff --git a/apps/ai_tutor/helpers.py b/apps/ai_tutor/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..3384c9ea1bb2308bcc86ad9e1ea04152f4d943da --- /dev/null +++ b/apps/ai_tutor/helpers.py @@ -0,0 +1,94 @@ +from datetime import datetime, timedelta, timezone +import tiktoken +from modules.chat_processor.helpers import update_user_info, convert_to_dict + + +def get_time(): + return datetime.now(timezone.utc).isoformat() + + +async def check_user_cooldown( + user_info, current_time, COOLDOWN_TIME, TOKENS_LEFT, REGEN_TIME +): + # # Check if no tokens left + tokens_left = user_info.metadata.get("tokens_left", 0) + if tokens_left > 0 and not user_info.metadata.get("in_cooldown", False): + return False, None + + user_info = convert_to_dict(user_info) + last_message_time_str = user_info["metadata"].get("last_message_time") + + # Convert from ISO format string to datetime object and ensure UTC timezone + last_message_time = datetime.fromisoformat(last_message_time_str).replace( + tzinfo=timezone.utc + ) + current_time = datetime.fromisoformat(current_time).replace(tzinfo=timezone.utc) + + # Calculate the elapsed time + elapsed_time = current_time - last_message_time + elapsed_time_in_seconds = elapsed_time.total_seconds() + + # Calculate when the cooldown period ends + cooldown_end_time = last_message_time + timedelta(seconds=COOLDOWN_TIME) + cooldown_end_time_iso = cooldown_end_time.isoformat() + + # Debug: Print the cooldown end time + print(f"Cooldown end time (ISO): {cooldown_end_time_iso}") + + # Check if the user is still in cooldown + if elapsed_time_in_seconds < COOLDOWN_TIME: + return True, cooldown_end_time_iso # Return in ISO 8601 format + + user_info["metadata"]["in_cooldown"] = False + # If not in cooldown, regenerate tokens + await reset_tokens_for_user(user_info, TOKENS_LEFT, REGEN_TIME) + + return False, None + + +async def reset_tokens_for_user(user_info, TOKENS_LEFT, REGEN_TIME): + user_info = convert_to_dict(user_info) + last_message_time_str = user_info["metadata"].get("last_message_time") + + last_message_time = datetime.fromisoformat(last_message_time_str).replace( + tzinfo=timezone.utc + ) + current_time = datetime.fromisoformat(get_time()).replace(tzinfo=timezone.utc) + + # Calculate the elapsed time since the last message + elapsed_time_in_seconds = (current_time - last_message_time).total_seconds() + + # Current token count (can be negative) + current_tokens = user_info["metadata"].get("tokens_left_at_last_message", 0) + current_tokens = min(current_tokens, TOKENS_LEFT) + + # Maximum tokens that can be regenerated + max_tokens = user_info["metadata"].get("max_tokens", TOKENS_LEFT) + + # Calculate how many tokens should have been regenerated proportionally + if current_tokens < max_tokens: + # Calculate the regeneration rate per second based on REGEN_TIME for full regeneration + regeneration_rate_per_second = max_tokens / REGEN_TIME + + # Calculate how many tokens should have been regenerated based on the elapsed time + tokens_to_regenerate = int( + elapsed_time_in_seconds * regeneration_rate_per_second + ) + + # Ensure the new token count does not exceed max_tokens + new_token_count = min(current_tokens + tokens_to_regenerate, max_tokens) + + print( + f"\n\n Adding {tokens_to_regenerate} tokens to the user, Time elapsed: {elapsed_time_in_seconds} seconds, Tokens after regeneration: {new_token_count}, Tokens before: {current_tokens} \n\n" + ) + + # Update the user's token count + user_info["metadata"]["tokens_left"] = new_token_count + + await update_user_info(user_info) + + +def get_num_tokens(text, model): + encoding = tiktoken.encoding_for_model(model) + tokens = encoding.encode(text) + return len(tokens) diff --git a/apps/ai_tutor/private/placeholder_students_file.json b/apps/ai_tutor/private/placeholder_students_file.json new file mode 100644 index 0000000000000000000000000000000000000000..61aeed079120abbb24c58aeafbfa317374fd7309 --- /dev/null +++ b/apps/ai_tutor/private/placeholder_students_file.json @@ -0,0 +1,5 @@ +{ + "abc@bu.edu": ["student", "bu"], + "xyz@bu.edu": ["student", "bu"], + "test@bu.edu": ["admin", "instructor", "bu"] +} \ No newline at end of file diff --git a/code/public/avatars/ai_tutor.png b/apps/ai_tutor/public/assets/images/avatars/ai-tutor.png similarity index 100% rename from code/public/avatars/ai_tutor.png rename to apps/ai_tutor/public/assets/images/avatars/ai-tutor.png diff --git a/code/public/logo_dark.png b/apps/ai_tutor/public/assets/images/avatars/ai_tutor.png similarity index 100% rename from code/public/logo_dark.png rename to apps/ai_tutor/public/assets/images/avatars/ai_tutor.png diff --git a/code/public/acastusphoton-svgrepo-com.svg b/apps/ai_tutor/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg similarity index 100% rename from code/public/acastusphoton-svgrepo-com.svg rename to apps/ai_tutor/public/assets/images/starter_icons/acastusphoton-svgrepo-com.svg diff --git a/code/public/adv-screen-recorder-svgrepo-com.svg b/apps/ai_tutor/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg similarity index 100% rename from code/public/adv-screen-recorder-svgrepo-com.svg rename to apps/ai_tutor/public/assets/images/starter_icons/adv-screen-recorder-svgrepo-com.svg diff --git a/code/public/alarmy-svgrepo-com.svg b/apps/ai_tutor/public/assets/images/starter_icons/alarmy-svgrepo-com.svg similarity index 100% rename from code/public/alarmy-svgrepo-com.svg rename to apps/ai_tutor/public/assets/images/starter_icons/alarmy-svgrepo-com.svg diff --git a/code/public/calendar-samsung-17-svgrepo-com.svg b/apps/ai_tutor/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg similarity index 100% rename from code/public/calendar-samsung-17-svgrepo-com.svg rename to apps/ai_tutor/public/assets/images/starter_icons/calendar-samsung-17-svgrepo-com.svg diff --git a/apps/ai_tutor/public/files/students_encrypted.json b/apps/ai_tutor/public/files/students_encrypted.json new file mode 100644 index 0000000000000000000000000000000000000000..826570552b1dcbfdf9cc1472a7bb5a3c68d9c2e5 --- /dev/null +++ b/apps/ai_tutor/public/files/students_encrypted.json @@ -0,0 +1 @@ +{"7f1cacca66ee914ddde2ee20e0f2c96651d60cd8aabd310ef25a9e6d88f42df0": ["instructor", "bu"], "f74d264b6b5b2b4c10ce69e4ec16e869e01cb5eb668ed846aa8f6dae5c96cda0": ["admin", "instructor", "bu"], "53401356a874b1539775c73a8564d5e5f4f840441630c9cf649e16d201454f20": ["instructor", "bu"]} \ No newline at end of file diff --git a/apps/ai_tutor/public/files/test.css b/apps/ai_tutor/public/files/test.css new file mode 100644 index 0000000000000000000000000000000000000000..dc4787b22a872e4050074bb2854632dd4b0b9e80 --- /dev/null +++ b/apps/ai_tutor/public/files/test.css @@ -0,0 +1,32 @@ +a[href*='https://github.com/Chainlit/chainlit'] { + visibility: hidden; +} + +/* Hide the default avatar image */ +.MuiAvatar-root img.MuiAvatar-img { + display: none; + } + +/* Target the container of the image and set a custom background image */ +.MuiAvatar-root.MuiAvatar-circular.css-m2icte { + background-image: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with your custom image URL */ + background-size: cover; /* Ensure the image covers the entire container */ + background-position: center; /* Center the image */ + width: 100px; /* Ensure the dimensions match the original */ + height: 100px; /* Ensure the dimensions match the original */ + border-radius: 50%; /* Maintain circular shape */ +} +.MuiAvatar-root.MuiAvatar-circular.css-v72an7 { + background-image: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with your custom image URL */ + background-size: cover; /* Ensure the image covers the entire container */ + background-position: center; /* Center the image */ + width: 40px; /* Ensure the dimensions match the original */ + height: 40px; /* Ensure the dimensions match the original */ + border-radius: 50%; /* Maintain circular shape */ +} + +.MuiStack-root.css-14k6mw7 img { + content: url('/public/assets/images/avatars/ai-tutor.png'); /* Replace with the path to your custom image */ + max-height: 45px; /* Ensure the height remains consistent */ + max-width: 45px; /* Ensure the width remains consistent */ +} \ No newline at end of file diff --git a/code/public/logo_light.png b/apps/ai_tutor/public/logo_dark.png similarity index 100% rename from code/public/logo_light.png rename to apps/ai_tutor/public/logo_dark.png diff --git a/apps/ai_tutor/public/logo_light.png b/apps/ai_tutor/public/logo_light.png new file mode 100644 index 0000000000000000000000000000000000000000..1b9e56d72a9738e8cd4838bfb7dce31fc92cd9ee Binary files /dev/null and b/apps/ai_tutor/public/logo_light.png differ diff --git a/storage/data/urls.txt b/apps/ai_tutor/storage/data/urls.txt similarity index 100% rename from storage/data/urls.txt rename to apps/ai_tutor/storage/data/urls.txt diff --git a/code/templates/cooldown.html b/apps/ai_tutor/templates/cooldown.html similarity index 98% rename from code/templates/cooldown.html rename to apps/ai_tutor/templates/cooldown.html index 099df21662c15b39d44cfa84f7dfd740f15d396c..a735a21a1b2e47d02b7cd0081102f29c588ad98c 100644 --- a/code/templates/cooldown.html +++ b/apps/ai_tutor/templates/cooldown.html @@ -121,7 +121,7 @@
It seems like you need to wait a bit before starting a new session.
diff --git a/code/templates/dashboard.html b/apps/ai_tutor/templates/dashboard.html similarity index 97% rename from code/templates/dashboard.html rename to apps/ai_tutor/templates/dashboard.html index a06c54fc633417abc528baf0991035941a1e0f2e..24d67ab67109213e9faee076ca9f0ed3523d0ec6 100644 --- a/code/templates/dashboard.html +++ b/apps/ai_tutor/templates/dashboard.html @@ -123,7 +123,7 @@Ready to start your AI tutoring session?
Tokens Left: {{ tokens_left }}
diff --git a/code/templates/error.html b/apps/ai_tutor/templates/error.html similarity index 100% rename from code/templates/error.html rename to apps/ai_tutor/templates/error.html diff --git a/code/templates/error_404.html b/apps/ai_tutor/templates/error_404.html similarity index 100% rename from code/templates/error_404.html rename to apps/ai_tutor/templates/error_404.html diff --git a/code/templates/login.html b/apps/ai_tutor/templates/login.html similarity index 98% rename from code/templates/login.html rename to apps/ai_tutor/templates/login.html index 934dd32ff0726307299951529c625f0c678d906e..d9551f546070161c5af23182a6bb525adcb3541d 100644 --- a/code/templates/login.html +++ b/apps/ai_tutor/templates/login.html @@ -107,7 +107,7 @@Welcome to the DS598 AI Tutor. Please sign in to continue.