Spaces:

ml6team
/

secret-agent-guardrail-challenge

Running

App Files Files Community

BertChristiaens commited on Jul 15

Commit

eac13d7

•

1 Parent(s): b735ab5

push demo

Browse files

Files changed (3) hide show

__init__.py +0 -0
config.py +1 -1
llm.py +15 -13

__init__.py ADDED Viewed

File without changes

config.py CHANGED Viewed

@@ -89,7 +89,7 @@ LEVEL_DESCRIPTIONS = {
         """,
     },
     6: {
-         "info": """
         - Guardrails to prevent missuse and the reveal of the secret.
         - Special characters around the user input.
         - LLM output is checked for the secret by another LLM judge.

         """,
     },
     6: {
+        "info": """
         - Guardrails to prevent missuse and the reveal of the secret.
         - Special characters around the user input.
         - LLM output is checked for the secret by another LLM judge.

llm.py CHANGED Viewed

@@ -15,7 +15,7 @@ special_checks = {
 def stream_request(variant: str, secret: str, user_input: str):
     """Stream the response from the model."""
     stream = client.deployments.invoke_with_stream(
-        key=config.ORQ_DEPLOYMENT_NAME,
         context={"step": variant},  # , "environments": []},
         inputs={"secret": secret, "user_input": user_input},
     )
@@ -28,23 +28,25 @@ def stream_request(variant: str, secret: str, user_input: str):
 def get_full_prompt(variant: str, secret: str = None, user_input: str = None):
     """Get the full prompt from a specific deployment."""
     deployment_config = client.deployments.get_config(
-        key=config.ORQ_DEPLOYMENT_NAME,
         context={"step": variant},  # , "environments": []},
     ).to_dict()
     prompts = {
-        p["role"]+"_prompt": p["content"] for p in deployment_config["messages"]
     }
     if secret:
         prompts["user_prompt"] = prompts["user_prompt"].replace("{{secret}}", secret)
     if user_input:
-        prompts["user_prompt"] = prompts["user_prompt"].replace("{{user_input}}", user_input)
     return prompts
 def run_judge(level: int, inputs: dict):
     generation = client.deployments.invoke(
-        key=config.ORQ_DEPLOYMENT_NAME,
         context={"step": special_checks[level]},
         inputs=inputs,
     )
@@ -57,31 +59,31 @@ def is_subsequence(main_string, sub_string):
     """
     Checks if sub_string is a subsequence of main_string.
     A subsequence allows arbitrary characters in between the characters of sub_string in main_string.
     Parameters:
     main_string (str): The string in which to search.
     sub_string (str): The string to search for.
     Returns:
     bool: True if sub_string is a subsequence of main_string, False otherwise.
     """
     main_string = main_string.lower()
     sub_string = sub_string.lower()
     main_len = len(main_string)
     sub_len = len(sub_string)
     if sub_len == 0:
         return True
     if main_len == 0:
         return False
     main_index = 0
     sub_index = 0
     while main_index < main_len and sub_index < sub_len:
         if main_string[main_index] == sub_string[sub_index]:
             sub_index += 1
         main_index += 1
-    return sub_index == sub_len

 def stream_request(variant: str, secret: str, user_input: str):
     """Stream the response from the model."""
     stream = client.deployments.invoke_with_stream(
+        key="llm-security-challenge-demo",
         context={"step": variant},  # , "environments": []},
         inputs={"secret": secret, "user_input": user_input},
     )
 def get_full_prompt(variant: str, secret: str = None, user_input: str = None):
     """Get the full prompt from a specific deployment."""
     deployment_config = client.deployments.get_config(
+        key="llm-security-challenge-demo",
         context={"step": variant},  # , "environments": []},
     ).to_dict()
     prompts = {
+        p["role"] + "_prompt": p["content"] for p in deployment_config["messages"]
     }
     if secret:
         prompts["user_prompt"] = prompts["user_prompt"].replace("{{secret}}", secret)
     if user_input:
+        prompts["user_prompt"] = prompts["user_prompt"].replace(
+            "{{user_input}}", user_input
+        )
     return prompts
 def run_judge(level: int, inputs: dict):
     generation = client.deployments.invoke(
+        key="llm-security-challenge-demo",
         context={"step": special_checks[level]},
         inputs=inputs,
     )
     """
     Checks if sub_string is a subsequence of main_string.
     A subsequence allows arbitrary characters in between the characters of sub_string in main_string.
     Parameters:
     main_string (str): The string in which to search.
     sub_string (str): The string to search for.
     Returns:
     bool: True if sub_string is a subsequence of main_string, False otherwise.
     """
     main_string = main_string.lower()
     sub_string = sub_string.lower()
     main_len = len(main_string)
     sub_len = len(sub_string)
     if sub_len == 0:
         return True
     if main_len == 0:
         return False
     main_index = 0
     sub_index = 0
     while main_index < main_len and sub_index < sub_len:
         if main_string[main_index] == sub_string[sub_index]:
             sub_index += 1
         main_index += 1
+    return sub_index == sub_len