Spaces:

Bijoy09
/

Bangla_spam_sms_detection_app

Running

App Files Files Community

Bijoy09 commited on 4 days ago

Commit

0d35220

•

1 Parent(s): eb64215

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -86

app.py CHANGED Viewed

@@ -3,17 +3,18 @@ from pydantic import BaseModel
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import os
 import logging
 app = FastAPI()
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set the cache directory for Hugging Face
 os.environ['TRANSFORMERS_CACHE'] = os.getenv('TRANSFORMERS_CACHE', '/app/cache')
 # Load model and tokenizer
 model_name = "Bijoy09/MObilebert"
 try:
@@ -23,99 +24,58 @@ try:
 except Exception as e:
     logger.error(f"Failed to load model or tokenizer: {e}")
     raise RuntimeError(f"Failed to load model or tokenizer: {e}")
 class TextRequest(BaseModel):
     text: str
 class BatchTextRequest(BaseModel):
     texts: list[str]
-@app.post("/predict/")
-async def predict(request: TextRequest):
-    try:
-        model.eval()
-        inputs = tokenizer.encode_plus(
-            request.text,
-            add_special_tokens=True,
-            max_length=64,
-            truncation=True,
-            padding='max_length',
-            return_attention_mask=True,
-            return_tensors='pt'
-        )
-        with torch.no_grad():
-            logits = model(inputs['input_ids'], attention_mask=inputs['attention_mask']).logits
-            prediction = torch.argmax(logits, dim=1).item()
-        return {"prediction": "Spam" if prediction == 1 else "Ham"}
-    except Exception as e:
-        logger.error(f"Prediction failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Prediction failed: {e}")
-# @app.post("/batch_predict/")
-# async def batch_predict(request: BatchTextRequest):
-#     try:
-#         model.eval()
-#         logger.info(f"Received batch prediction request for {len(request.texts)} texts")
-#         inputs = tokenizer(
-#             request.texts,
-#             add_special_tokens=True,
-#             max_length=64,
-#             truncation=True,
-#             padding='max_length',
-#             return_attention_mask=True,
-#             return_tensors='pt'
-#         )
-#         with torch.no_grad():
-#             logits = model(inputs['input_ids'], attention_mask=inputs['attention_mask']).logits
-#             predictions = torch.argmax(logits, dim=1).tolist()
-#         results = [
-#             {"id": idx + 1, "text": text, "prediction": "Spam" if pred == 1 else "Ham"}
-#             for idx, (text, pred) in enumerate(zip(request.texts, predictions))
-#         ]
-#         logger.info(f"Batch prediction results: {results}")
-#         return {"results": results}
-#     except Exception as e:
-#         logger.error(f"Batch prediction failed: {e}")
-#         raise HTTPException(status_code=500, detail=f"Batch prediction failed: {e}")
 @app.post("/batch_predict/")
 async def batch_predict(request: BatchTextRequest):
     try:
         model.eval()
-        # Batch encode all texts in the request at once
-        inputs = tokenizer(
-            request.texts,
-            add_special_tokens=True,
-            max_length=64,
-            truncation=True,
-            padding='max_length',
-            return_attention_mask=True,
-            return_tensors='pt'
-        )
-        # Run batch inference
-        with torch.no_grad():
-            logits = model(inputs['input_ids'], attention_mask=inputs['attention_mask']).logits
-            predictions = torch.argmax(logits, dim=1).tolist()
-        # Format results
-        results = [
-            {"id": idx + 1, "text": text, "prediction": "Spam" if pred == 1 else "Ham"}
-            for idx, (text, pred) in enumerate(zip(request.texts, predictions))
-        ]
         logger.info(f"Batch prediction results: {results}")
         return {"results": results}
     except Exception as e:
-        logging.error(f"Batch prediction failed: {e}")
         raise HTTPException(status_code=500, detail="Batch prediction failed. Please try again.")
 @app.get("/")
 async def root():
-    return {"message": "Welcome to the MobileBERT API"}

 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import os
+import re
 import logging
 app = FastAPI()
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set the cache directory for Hugging Face
 os.environ['TRANSFORMERS_CACHE'] = os.getenv('TRANSFORMERS_CACHE', '/app/cache')
 # Load model and tokenizer
 model_name = "Bijoy09/MObilebert"
 try:
 except Exception as e:
     logger.error(f"Failed to load model or tokenizer: {e}")
     raise RuntimeError(f"Failed to load model or tokenizer: {e}")
 class TextRequest(BaseModel):
     text: str
 class BatchTextRequest(BaseModel):
     texts: list[str]
+# Regular expression to detect Bangla characters
+bangla_regex = re.compile('[\u0980-\u09FF]')
+def contains_bangla(text):
+    return bool(bangla_regex.search(text))
 @app.post("/batch_predict/")
 async def batch_predict(request: BatchTextRequest):
     try:
         model.eval()
+        # Prepare the batch results
+        results = []
+        for idx, text in enumerate(request.texts):
+            logger.info(f" texts: {text}")
+            # Check if text contains Bangla characters
+            if not contains_bangla(text):
+                results.append({"id": idx + 1, "text": text, "prediction": "other"})
+                continue
+            # Encode and predict for texts containing Bangla characters
+            inputs = tokenizer.encode_plus(
+                text,
+                add_special_tokens=True,
+                max_length=64,
+                truncation=True,
+                padding='max_length',
+                return_attention_mask=True,
+                return_tensors='pt'
+            )
+            with torch.no_grad():
+                logits = model(inputs['input_ids'], attention_mask=inputs['attention_mask']).logits
+                prediction = torch.argmax(logits, dim=1).item()
+                label = "Spam" if prediction == 1 else "Ham"
+                results.append({"id": idx + 1, "text": text, "prediction": label})
         logger.info(f"Batch prediction results: {results}")
         return {"results": results}
     except Exception as e:
+        logger.error(f"Batch prediction failed: {e}")
         raise HTTPException(status_code=500, detail="Batch prediction failed. Please try again.")
 @app.get("/")
 async def root():
+    return {"message": "Welcome to the MobileBERT API"}