vishalsh13 commited on
Commit
975d1b2
·
1 Parent(s): a018b00

Flash app commit

Browse files
Dockerfile CHANGED
@@ -1,26 +1,25 @@
1
- # Dockerfile
 
2
 
3
- FROM python:3.9-slim
 
4
 
5
- WORKDIR /app
 
 
 
6
 
7
- # Copy dependencies and install them
8
- COPY requirements.txt .
9
- RUN pip install --no-cache-dir -r requirements.txt
10
 
11
- # Copy application files
12
  COPY . .
13
 
14
- # Set environment variables for writable directories
15
- ENV HF_HOME=/tmp/.cache/huggingface
16
- # Cache directory for Hugging Face
17
- ENV TRANSFORMERS_CACHE=/tmp/.cache/transformers
18
- # Legacy cache for compatibility
19
- ENV BASE_PATH=/tmp/vector_db
20
- # Base path for vector database and metadata
21
 
22
- # Set environment variable for Hugging Face authentication token
23
- # The `hkey` secret from Hugging Face Spaces will automatically populate this
24
- ENV hkey=${hkey}
25
 
26
- CMD ["python", "app.py"]
 
 
1
+ # Base image with GPU support for Hugging Face Spaces
2
+ FROM nvidia/cuda:11.8.0-base-ubuntu20.04
3
 
4
+ # Set environment variables
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
 
7
+ # Install Python and dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ python3 python3-pip git wget unzip && \
10
+ rm -rf /var/lib/apt/lists/*
11
 
12
+ # Set the working directory
13
+ WORKDIR /app
 
14
 
15
+ # Copy the application files
16
  COPY . .
17
 
18
+ # Install Python dependencies
19
+ RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
 
20
 
21
+ # Expose the default port used by Spaces
22
+ EXPOSE 7860
 
23
 
24
+ # Start the Flask app
25
+ CMD ["python3", "run.py"]
app/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask
2
+
3
+ def create_app():
4
+ v_app = Flask(__name__)
5
+ v_app.config['UPLOAD_FOLDER'] = 'app/uploads/'
6
+ v_app.config['ALLOWED_EXTENSIONS'] = {'zip'}
7
+
8
+ with v_app.app_context():
9
+ from .routes import v_bp
10
+ v_app.register_blueprint(v_bp)
11
+
12
+ return v_app
app/routes.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Blueprint, render_template, request, send_file, jsonify
3
+ from .utils.zip_handler import handle_zip_upload
4
+ from .utils.vector_db import process_files_to_vectors
5
+
6
+ v_bp = Blueprint('routes', __name__)
7
+
8
+ @v_bp.route('/', methods=['GET', 'POST'])
9
+ def home():
10
+ if request.method == 'POST':
11
+ v_uploaded_file = request.files.get('file')
12
+ if v_uploaded_file and v_uploaded_file.filename.endswith('.zip'):
13
+ v_upload_path = os.path.join('app/uploads', v_uploaded_file.filename)
14
+ v_uploaded_file.save(v_upload_path)
15
+
16
+ # Process the zip file
17
+ v_output_path = handle_zip_upload(v_upload_path)
18
+
19
+ # Create or update vector database
20
+ v_result_path = process_files_to_vectors(v_output_path)
21
+
22
+ # Compress and send the result
23
+ result_zip = os.path.join('app/uploads/vectors/vector_db.zip')
24
+ os.system(f'zip -r {result_zip} {v_result_path}')
25
+ return send_file(result_zip, as_attachment=True)
26
+
27
+ return jsonify({'error': 'Please upload a valid zip file.'})
28
+
29
+ return render_template('index.html')
app/static/logo.png ADDED
app/templates/index.html ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Vector DB Creator</title>
7
+ </head>
8
+ <body>
9
+ <header>
10
+ <img src="{{ url_for('static', filename='logo.png') }}" alt="Logo" style="float:left; width:50px;">
11
+ <h1>Change your PDF, PPT, and CSV data to Vector DB</h1>
12
+ </header>
13
+ <main>
14
+ <p>Upload your data files or an existing vector database to create or update a vector DB.</p>
15
+ <form action="/" method="POST" enctype="multipart/form-data">
16
+ <label for="file">Upload ZIP File:</label>
17
+ <input type="file" name="file" id="file" accept=".zip" required>
18
+ <button type="submit">Upload</button>
19
+ </form>
20
+ <p>Your vector DB will be available for download after processing.</p>
21
+ </main>
22
+ </body>
23
+ </html>
app/utils/file_handler.py ADDED
File without changes
app/utils/vector_db.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def process_files_to_vectors(v_folder_path):
4
+ # Logic to create or update the vector DB
5
+ v_vector_folder = os.path.join(v_folder_path, 'vectors')
6
+ os.makedirs(v_vector_folder, exist_ok=True)
7
+
8
+ # Placeholder: Iterate over files and create vector representations
9
+ for v_root, _, v_files in os.walk(v_folder_path):
10
+ for v_file in v_files:
11
+ v_file_path = os.path.join(v_root, v_file)
12
+ if v_file.endswith(('.pdf', '.ppt', '.csv')):
13
+ # Process the files here (convert to vectors)
14
+ v_vector_file = os.path.join(v_vector_folder, v_file + '.vec')
15
+ with open(v_vector_file, 'w') as obj_out:
16
+ obj_out.write(f'Vector representation of {v_file}')
17
+
18
+ return v_vector_folder
app/utils/zip_handler.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+
4
+ def handle_zip_upload(v_zip_path):
5
+ v_extract_path = os.path.splitext(v_zip_path)[0]
6
+ with zipfile.ZipFile(v_zip_path, 'r') as obj_zip:
7
+ obj_zip.extractall(v_extract_path)
8
+ return v_extract_path
requirements.txt CHANGED
@@ -1,6 +1,9 @@
1
- faiss-cpu
2
- huggingface-hub
3
- sentence-transformers
4
- numpy
5
- pandas
6
- flask
 
 
 
 
1
+ Flask==2.3.2
2
+ transformers==4.27.4
3
+ sentence-transformers==2.2.2
4
+ torch==2.0.1
5
+ torchvision==0.15.2
6
+ numpy==1.24.2
7
+ PyMuPDF==1.22.5
8
+ python-pptx==0.6.21
9
+ pandas==1.5.3
run.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from app import create_app
2
+
3
+ if __name__ == "__main__":
4
+ obj_app = create_app()
5
+ obj_app.run(host='0.0.0.0', port=7860, debug=True)