Spaces:

Bijoy09
/

Bangla_spam_sms_detection_app

Running

App Files Files Community

Bijoy09 commited on 4 days ago

Commit

e729235

•

1 Parent(s): 523e48e

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -26

app.py CHANGED Viewed

@@ -6,16 +6,16 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import os
 import re
 import logging
 app = FastAPI()
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set the cache directory for Hugging Face
 os.environ['TRANSFORMERS_CACHE'] = os.getenv('TRANSFORMERS_CACHE', '/app/cache')
 # Load model and tokenizer
 model_name = "Bijoy09/MObilebert"
 try:
@@ -25,45 +25,37 @@ try:
 except Exception as e:
     logger.error(f"Failed to load model or tokenizer: {e}")
     raise RuntimeError(f"Failed to load model or tokenizer: {e}")
 class TextRequest(BaseModel):
     text: str
 class BatchTextRequest(BaseModel):
     texts: list[str]
 # Regular expression to detect Bangla characters
 bangla_regex = re.compile('[\u0980-\u09FF]')
 def contains_bangla(text):
     return bool(bangla_regex.search(text))
-def remove_non_bangla(text):
-    return ''.join(bangla_regex.findall(text))
 @app.post("/batch_predict/")
 async def batch_predict(request: BatchTextRequest):
     try:
         model.eval()
         # Prepare the batch results
         results = []
         for idx, text in enumerate(request.texts):
-            logger.info(f" texts: {text}")
             # Check if text contains Bangla characters
             if not contains_bangla(text):
                 results.append({"id": idx + 1, "text": text, "prediction": "other"})
                 continue
-            # Remove non-Bangla characters
-            modified_text = remove_non_bangla(text)
-            logger.info(f"modified text: {modified_text}")
             # Encode and predict for texts containing Bangla characters
             inputs = tokenizer.encode_plus(
-                modified_text,
                 add_special_tokens=True,
                 max_length=64,
                 truncation=True,
@@ -71,20 +63,20 @@ async def batch_predict(request: BatchTextRequest):
                 return_attention_mask=True,
                 return_tensors='pt'
             )
             with torch.no_grad():
                 logits = model(inputs['input_ids'], attention_mask=inputs['attention_mask']).logits
                 prediction = torch.argmax(logits, dim=1).item()
                 label = "Spam" if prediction == 1 else "Ham"
                 results.append({"id": idx + 1, "text": text, "prediction": label})
         logger.info(f"Batch prediction results: {results}")
         return JSONResponse(content={"results": results}, media_type="application/json; charset=utf-8")
     except Exception as e:
         logger.error(f"Batch prediction failed: {e}")
         raise HTTPException(status_code=500, detail="Batch prediction failed. Please try again.")
 @app.get("/")
 async def root():
     return {"message": "Welcome to the MobileBERT API"}

 import os
 import re
 import logging
 app = FastAPI()
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set the cache directory for Hugging Face
 os.environ['TRANSFORMERS_CACHE'] = os.getenv('TRANSFORMERS_CACHE', '/app/cache')
 # Load model and tokenizer
 model_name = "Bijoy09/MObilebert"
 try:
 except Exception as e:
     logger.error(f"Failed to load model or tokenizer: {e}")
     raise RuntimeError(f"Failed to load model or tokenizer: {e}")
 class TextRequest(BaseModel):
     text: str
 class BatchTextRequest(BaseModel):
     texts: list[str]
 # Regular expression to detect Bangla characters
 bangla_regex = re.compile('[\u0980-\u09FF]')
 def contains_bangla(text):
     return bool(bangla_regex.search(text))
 @app.post("/batch_predict/")
 async def batch_predict(request: BatchTextRequest):
     try:
         model.eval()
         # Prepare the batch results
         results = []
         for idx, text in enumerate(request.texts):
             # Check if text contains Bangla characters
             if not contains_bangla(text):
                 results.append({"id": idx + 1, "text": text, "prediction": "other"})
                 continue
             # Encode and predict for texts containing Bangla characters
             inputs = tokenizer.encode_plus(
+                text,
                 add_special_tokens=True,
                 max_length=64,
                 truncation=True,
                 return_attention_mask=True,
                 return_tensors='pt'
             )
             with torch.no_grad():
                 logits = model(inputs['input_ids'], attention_mask=inputs['attention_mask']).logits
                 prediction = torch.argmax(logits, dim=1).item()
                 label = "Spam" if prediction == 1 else "Ham"
                 results.append({"id": idx + 1, "text": text, "prediction": label})
         logger.info(f"Batch prediction results: {results}")
         return JSONResponse(content={"results": results}, media_type="application/json; charset=utf-8")
     except Exception as e:
         logger.error(f"Batch prediction failed: {e}")
         raise HTTPException(status_code=500, detail="Batch prediction failed. Please try again.")
 @app.get("/")
 async def root():
     return {"message": "Welcome to the MobileBERT API"}