Spaces:

Empereur-Pirate
/

Empereur

Sleeping

App Files Files Community

Empereur-Pirate commited on Feb 13, 2024

Commit

44a140e

verified ·

1 Parent(s): 0551907

Update main.py

Browse files

Files changed (1) hide show

main.py +14 -14

main.py CHANGED Viewed

@@ -25,7 +25,7 @@ def t5(input: str) -> dict[str, str]:
 class ParseRaw(BaseModel):
     raw: bytes
-@app.post("/infer_t5")
 async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
     """Receive input and generate text."""
     try:
@@ -43,28 +43,28 @@ async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
     except AssertionError as e:
         return JSONResponse({"error": f"Invalid Input Format: {e}"}, status_code=400)
-@app.get("/infer_t5")
 def get_default_inference_endpoint():
     return {"message": "Use POST method to submit input data"}
-# Load the MIKU model and tokenizer
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 try:
-    # Attempt to load the model and tokenizer regularly
-    model_config = AutoConfig.from_pretrained("miqudev/miqu-1-70b")
-    model = AutoModelForCausalLM.from_pretrained("miqudev/miqu-1-70b", config=model_config).to(device)
-    tokenizer = AutoTokenizer.from_pretrained("miqudev/miqu-1-70b")
 except Exception as e:
     print("[WARNING]: Failed to load model and tokenizer conventionally.")
     print(f"Exception: {e}")
-    # Construct a dummy configuration object
-    model_config = AutoConfig.from_pretrained("miqudev/miqu-1-70b", trust_remote_code=True)
-    # Load the model using the constructed configuration
-    model = AutoModelForCausalLM.from_pretrained("miqudev/miqu-1-70b", config=model_config).to(device)
-    tokenizer = AutoTokenizer.from_pretrained("miqudev/miqu-1-70b")
 def miuk_answer(query: str) -> str:
     query_tokens = tokenizer.encode(query, return_tensors="pt")
@@ -72,7 +72,7 @@ def miuk_answer(query: str) -> str:
     answer = model.generate(query_tokens, max_length=128, temperature=1, pad_token_id=tokenizer.pad_token_id)
     return tokenizer.decode(answer[:, 0]).replace(" ", "")
-@app.post("/infer_miku")
 async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
     """Receive input and generate text."""
     try:
@@ -90,6 +90,6 @@ async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
     except AssertionError as e:
         return JSONResponse({"error": f"Invalid Input Format: {e}"}, status_code=400)
-@app.get("/infer_miku")
 def get_default_inference_endpoint():
     return {"message": "Use POST method to submit input data"}

 class ParseRaw(BaseModel):
     raw: bytes
+@app .post("/infer_t5")
 async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
     """Receive input and generate text."""
     try:
     except AssertionError as e:
         return JSONResponse({"error": f"Invalid Input Format: {e}"}, status_code=400)
+@app .get("/infer_t5")
 def get_default_inference_endpoint():
     return {"message": "Use POST method to submit input data"}
+# Initialize device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 try:
+    # Initiate the model and tokenizer with the corrected pre-trained weights
+    model_config = AutoConfig.from_pretrained("152334H/miqu-1-70b-sf", trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained("152334H/miqu-1-70b-sf", config=model_config).to(device)
+    tokenizer = AutoTokenizer.from_pretrained("152334H/miqu-1-70b-sf")
 except Exception as e:
     print("[WARNING]: Failed to load model and tokenizer conventionally.")
     print(f"Exception: {e}")
+    # Configure a fallback mechanism similar to the original implementation
+    model_config = AutoConfig.from_pretrained("152334H/miqu-1-70b-sf", trust_remote_code=True)
+    # Load the model using the fallback configuration
+    model = AutoModelForCausalLM.from_pretrained("152334H/miqu-1-70b-sf", config=model_config).to(device)
+    tokenizer = AutoTokenizer.from_pretrained("152334H/miqu-1-70b-sf")
 def miuk_answer(query: str) -> str:
     query_tokens = tokenizer.encode(query, return_tensors="pt")
     answer = model.generate(query_tokens, max_length=128, temperature=1, pad_token_id=tokenizer.pad_token_id)
     return tokenizer.decode(answer[:, 0]).replace(" ", "")
+@app .post("/infer_miku")
 async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
     """Receive input and generate text."""
     try:
     except AssertionError as e:
         return JSONResponse({"error": f"Invalid Input Format: {e}"}, status_code=400)
+@app .get("/infer_miku")
 def get_default_inference_endpoint():
     return {"message": "Use POST method to submit input data"}