Spaces:
Runtime error
Runtime error
Commit
·
975d1b2
1
Parent(s):
a018b00
Flash app commit
Browse files- Dockerfile +17 -18
- app/__init__.py +12 -0
- app/routes.py +29 -0
- app/static/logo.png +0 -0
- app/templates/index.html +23 -0
- app/utils/file_handler.py +0 -0
- app/utils/vector_db.py +18 -0
- app/utils/zip_handler.py +8 -0
- requirements.txt +9 -6
- run.py +5 -0
Dockerfile
CHANGED
@@ -1,26 +1,25 @@
|
|
1 |
-
#
|
|
|
2 |
|
3 |
-
|
|
|
4 |
|
5 |
-
|
|
|
|
|
|
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
10 |
|
11 |
-
# Copy application files
|
12 |
COPY . .
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
# Cache directory for Hugging Face
|
17 |
-
ENV TRANSFORMERS_CACHE=/tmp/.cache/transformers
|
18 |
-
# Legacy cache for compatibility
|
19 |
-
ENV BASE_PATH=/tmp/vector_db
|
20 |
-
# Base path for vector database and metadata
|
21 |
|
22 |
-
#
|
23 |
-
|
24 |
-
ENV hkey=${hkey}
|
25 |
|
26 |
-
|
|
|
|
1 |
+
# Base image with GPU support for Hugging Face Spaces
|
2 |
+
FROM nvidia/cuda:11.8.0-base-ubuntu20.04
|
3 |
|
4 |
+
# Set environment variables
|
5 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
6 |
|
7 |
+
# Install Python and dependencies
|
8 |
+
RUN apt-get update && apt-get install -y \
|
9 |
+
python3 python3-pip git wget unzip && \
|
10 |
+
rm -rf /var/lib/apt/lists/*
|
11 |
|
12 |
+
# Set the working directory
|
13 |
+
WORKDIR /app
|
|
|
14 |
|
15 |
+
# Copy the application files
|
16 |
COPY . .
|
17 |
|
18 |
+
# Install Python dependencies
|
19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
# Expose the default port used by Spaces
|
22 |
+
EXPOSE 7860
|
|
|
23 |
|
24 |
+
# Start the Flask app
|
25 |
+
CMD ["python3", "run.py"]
|
app/__init__.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask
|
2 |
+
|
3 |
+
def create_app():
|
4 |
+
v_app = Flask(__name__)
|
5 |
+
v_app.config['UPLOAD_FOLDER'] = 'app/uploads/'
|
6 |
+
v_app.config['ALLOWED_EXTENSIONS'] = {'zip'}
|
7 |
+
|
8 |
+
with v_app.app_context():
|
9 |
+
from .routes import v_bp
|
10 |
+
v_app.register_blueprint(v_bp)
|
11 |
+
|
12 |
+
return v_app
|
app/routes.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from flask import Blueprint, render_template, request, send_file, jsonify
|
3 |
+
from .utils.zip_handler import handle_zip_upload
|
4 |
+
from .utils.vector_db import process_files_to_vectors
|
5 |
+
|
6 |
+
v_bp = Blueprint('routes', __name__)
|
7 |
+
|
8 |
+
@v_bp.route('/', methods=['GET', 'POST'])
|
9 |
+
def home():
|
10 |
+
if request.method == 'POST':
|
11 |
+
v_uploaded_file = request.files.get('file')
|
12 |
+
if v_uploaded_file and v_uploaded_file.filename.endswith('.zip'):
|
13 |
+
v_upload_path = os.path.join('app/uploads', v_uploaded_file.filename)
|
14 |
+
v_uploaded_file.save(v_upload_path)
|
15 |
+
|
16 |
+
# Process the zip file
|
17 |
+
v_output_path = handle_zip_upload(v_upload_path)
|
18 |
+
|
19 |
+
# Create or update vector database
|
20 |
+
v_result_path = process_files_to_vectors(v_output_path)
|
21 |
+
|
22 |
+
# Compress and send the result
|
23 |
+
result_zip = os.path.join('app/uploads/vectors/vector_db.zip')
|
24 |
+
os.system(f'zip -r {result_zip} {v_result_path}')
|
25 |
+
return send_file(result_zip, as_attachment=True)
|
26 |
+
|
27 |
+
return jsonify({'error': 'Please upload a valid zip file.'})
|
28 |
+
|
29 |
+
return render_template('index.html')
|
app/static/logo.png
ADDED
![]() |
app/templates/index.html
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Vector DB Creator</title>
|
7 |
+
</head>
|
8 |
+
<body>
|
9 |
+
<header>
|
10 |
+
<img src="{{ url_for('static', filename='logo.png') }}" alt="Logo" style="float:left; width:50px;">
|
11 |
+
<h1>Change your PDF, PPT, and CSV data to Vector DB</h1>
|
12 |
+
</header>
|
13 |
+
<main>
|
14 |
+
<p>Upload your data files or an existing vector database to create or update a vector DB.</p>
|
15 |
+
<form action="/" method="POST" enctype="multipart/form-data">
|
16 |
+
<label for="file">Upload ZIP File:</label>
|
17 |
+
<input type="file" name="file" id="file" accept=".zip" required>
|
18 |
+
<button type="submit">Upload</button>
|
19 |
+
</form>
|
20 |
+
<p>Your vector DB will be available for download after processing.</p>
|
21 |
+
</main>
|
22 |
+
</body>
|
23 |
+
</html>
|
app/utils/file_handler.py
ADDED
File without changes
|
app/utils/vector_db.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
def process_files_to_vectors(v_folder_path):
|
4 |
+
# Logic to create or update the vector DB
|
5 |
+
v_vector_folder = os.path.join(v_folder_path, 'vectors')
|
6 |
+
os.makedirs(v_vector_folder, exist_ok=True)
|
7 |
+
|
8 |
+
# Placeholder: Iterate over files and create vector representations
|
9 |
+
for v_root, _, v_files in os.walk(v_folder_path):
|
10 |
+
for v_file in v_files:
|
11 |
+
v_file_path = os.path.join(v_root, v_file)
|
12 |
+
if v_file.endswith(('.pdf', '.ppt', '.csv')):
|
13 |
+
# Process the files here (convert to vectors)
|
14 |
+
v_vector_file = os.path.join(v_vector_folder, v_file + '.vec')
|
15 |
+
with open(v_vector_file, 'w') as obj_out:
|
16 |
+
obj_out.write(f'Vector representation of {v_file}')
|
17 |
+
|
18 |
+
return v_vector_folder
|
app/utils/zip_handler.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import zipfile
|
3 |
+
|
4 |
+
def handle_zip_upload(v_zip_path):
|
5 |
+
v_extract_path = os.path.splitext(v_zip_path)[0]
|
6 |
+
with zipfile.ZipFile(v_zip_path, 'r') as obj_zip:
|
7 |
+
obj_zip.extractall(v_extract_path)
|
8 |
+
return v_extract_path
|
requirements.txt
CHANGED
@@ -1,6 +1,9 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
sentence-transformers
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
1 |
+
Flask==2.3.2
|
2 |
+
transformers==4.27.4
|
3 |
+
sentence-transformers==2.2.2
|
4 |
+
torch==2.0.1
|
5 |
+
torchvision==0.15.2
|
6 |
+
numpy==1.24.2
|
7 |
+
PyMuPDF==1.22.5
|
8 |
+
python-pptx==0.6.21
|
9 |
+
pandas==1.5.3
|
run.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app import create_app
|
2 |
+
|
3 |
+
if __name__ == "__main__":
|
4 |
+
obj_app = create_app()
|
5 |
+
obj_app.run(host='0.0.0.0', port=7860, debug=True)
|