Spaces:
Sleeping
Sleeping
Gabriel Okiri
commited on
Commit
·
248c31b
1
Parent(s):
f88f6e3
..
Browse files- Dockerfile +13 -1
- app.py +51 -0
Dockerfile
CHANGED
@@ -2,9 +2,21 @@ FROM python:3.9-slim
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
COPY requirements.txt .
|
6 |
RUN pip install --no-cache-dir -r requirements.txt
|
7 |
|
|
|
8 |
COPY . .
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
build-essential \
|
8 |
+
python3-dev \
|
9 |
+
&& rm -rf /var/lib/apt/lists/*
|
10 |
+
|
11 |
+
# Copy requirements first to leverage Docker cache
|
12 |
COPY requirements.txt .
|
13 |
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
|
15 |
+
# Copy the rest of the application
|
16 |
COPY . .
|
17 |
|
18 |
+
# Expose port for Gradio
|
19 |
+
EXPOSE 7860
|
20 |
+
|
21 |
+
# Command to run the application
|
22 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
+
import torch
|
4 |
+
|
5 |
+
# Model initialization
|
6 |
+
model_name = "gpt2" # or your specific model
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
9 |
+
|
10 |
+
# Add special tokens for languages
|
11 |
+
special_tokens = {"additional_special_tokens": ["[YORUBA]", "[IGBO]", "[HAUSA]"]}
|
12 |
+
tokenizer.add_special_tokens(special_tokens)
|
13 |
+
model.resize_token_embeddings(len(tokenizer))
|
14 |
+
|
15 |
+
# Move model to GPU if available
|
16 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
+
model.to(device)
|
18 |
+
|
19 |
+
def generate_text(prompt, language):
|
20 |
+
# Add language tag to prompt
|
21 |
+
tagged_prompt = f"[{language.upper()}] {prompt}"
|
22 |
+
|
23 |
+
# Tokenize
|
24 |
+
inputs = tokenizer(tagged_prompt, return_tensors="pt").to(device)
|
25 |
+
|
26 |
+
# Generate
|
27 |
+
outputs = model.generate(
|
28 |
+
**inputs,
|
29 |
+
max_length=100,
|
30 |
+
num_return_sequences=1,
|
31 |
+
temperature=0.7,
|
32 |
+
pad_token_id=tokenizer.eos_token_id
|
33 |
+
)
|
34 |
+
|
35 |
+
# Decode and return
|
36 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
37 |
+
|
38 |
+
# Create Gradio interface
|
39 |
+
iface = gr.Interface(
|
40 |
+
fn=generate_text,
|
41 |
+
inputs=[
|
42 |
+
gr.Textbox(label="Enter your prompt"),
|
43 |
+
gr.Dropdown(choices=["YORUBA", "IGBO", "HAUSA"], label="Select Language")
|
44 |
+
],
|
45 |
+
outputs=gr.Textbox(label="Generated Text"),
|
46 |
+
title="Nigerian Language Generator",
|
47 |
+
description="Generate text in Yoruba, Igbo, or Hausa using a fine-tuned GPT model."
|
48 |
+
)
|
49 |
+
|
50 |
+
if __name__ == "__main__":
|
51 |
+
iface.launch()
|