Gabriel Okiri commited on
Commit
248c31b
·
1 Parent(s): f88f6e3
Files changed (2) hide show
  1. Dockerfile +13 -1
  2. app.py +51 -0
Dockerfile CHANGED
@@ -2,9 +2,21 @@ FROM python:3.9-slim
2
 
3
  WORKDIR /code
4
 
 
 
 
 
 
 
 
5
  COPY requirements.txt .
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
 
8
  COPY . .
9
 
10
- CMD ["python", "app.py"]
 
 
 
 
 
2
 
3
  WORKDIR /code
4
 
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ python3-dev \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Copy requirements first to leverage Docker cache
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir -r requirements.txt
14
 
15
+ # Copy the rest of the application
16
  COPY . .
17
 
18
+ # Expose port for Gradio
19
+ EXPOSE 7860
20
+
21
+ # Command to run the application
22
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ # Model initialization
6
+ model_name = "gpt2" # or your specific model
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
+
10
+ # Add special tokens for languages
11
+ special_tokens = {"additional_special_tokens": ["[YORUBA]", "[IGBO]", "[HAUSA]"]}
12
+ tokenizer.add_special_tokens(special_tokens)
13
+ model.resize_token_embeddings(len(tokenizer))
14
+
15
+ # Move model to GPU if available
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ model.to(device)
18
+
19
+ def generate_text(prompt, language):
20
+ # Add language tag to prompt
21
+ tagged_prompt = f"[{language.upper()}] {prompt}"
22
+
23
+ # Tokenize
24
+ inputs = tokenizer(tagged_prompt, return_tensors="pt").to(device)
25
+
26
+ # Generate
27
+ outputs = model.generate(
28
+ **inputs,
29
+ max_length=100,
30
+ num_return_sequences=1,
31
+ temperature=0.7,
32
+ pad_token_id=tokenizer.eos_token_id
33
+ )
34
+
35
+ # Decode and return
36
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+
38
+ # Create Gradio interface
39
+ iface = gr.Interface(
40
+ fn=generate_text,
41
+ inputs=[
42
+ gr.Textbox(label="Enter your prompt"),
43
+ gr.Dropdown(choices=["YORUBA", "IGBO", "HAUSA"], label="Select Language")
44
+ ],
45
+ outputs=gr.Textbox(label="Generated Text"),
46
+ title="Nigerian Language Generator",
47
+ description="Generate text in Yoruba, Igbo, or Hausa using a fine-tuned GPT model."
48
+ )
49
+
50
+ if __name__ == "__main__":
51
+ iface.launch()