talexm commited on
Commit
9a25cef
·
1 Parent(s): f7e7778

adding example for query protection search

Browse files
rag_sec/README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # RAG-Chagu Test Suite
3
+
4
+ ## Overview
5
+ This project demonstrates a RAG system enhanced with Chagu features for:
6
+ - Data Poisoning Detection
7
+ - Model Drift Handling
8
+ - Query Injection Attack Prevention
9
+ - Adversarial Embedding Detection
10
+
11
+ ## Setup
12
+
13
+ ### Install Dependencies
14
+ ```bash
15
+ pip install -r requirements.txt
16
+ ```
17
+
18
+ ### Run the Test Suite
19
+ ```bash
20
+ python rag_chagu_demo.py
21
+ ```
22
+
23
+ ## Requirements
24
+ - Python 3.8 or higher
rag_sec/__init__.py ADDED
File without changes
rag_sec/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (150 Bytes). View file
 
rag_sec/__pycache__/rag_chagu_demo.cpython-38-pytest-8.3.2.pyc ADDED
Binary file (3.5 kB). View file
 
rag_sec/rag_chagu_demo.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from difflib import get_close_matches
4
+
5
+ class DocumentSearcher:
6
+ def __init__(self):
7
+ self.documents = []
8
+ self.malicious_patterns = ["DROP TABLE", "SELECT *", "INSERT INTO", "DELETE FROM", "--", ";"]
9
+
10
+ def load_imdb_data(self):
11
+ # Define the dataset path using the HOME environment variable
12
+ home_dir = Path(os.getenv("HOME", "/")) # Fallback to root if HOME is not set
13
+ data_dir = home_dir / "data-sets/aclImdb/train"
14
+
15
+ pos_dir = data_dir / "pos"
16
+ neg_dir = data_dir / "neg"
17
+
18
+ print(f"Looking for positive reviews in: {pos_dir}")
19
+ print(f"Looking for negative reviews in: {neg_dir}")
20
+
21
+ if not pos_dir.exists() or not any(pos_dir.iterdir()):
22
+ print("No positive reviews found.")
23
+ if not neg_dir.exists() or not any(neg_dir.iterdir()):
24
+ print("No negative reviews found.")
25
+
26
+ # Load positive reviews
27
+ for filename in pos_dir.iterdir():
28
+ with open(filename, "r", encoding="utf-8") as file:
29
+ self.documents.append(file.read())
30
+
31
+ # Load negative reviews
32
+ for filename in neg_dir.iterdir():
33
+ with open(filename, "r", encoding="utf-8") as file:
34
+ self.documents.append(file.read())
35
+
36
+ print(f"Loaded {len(self.documents)} movie reviews from IMDB dataset.")
37
+
38
+ def load_txt_files(self, txt_dir=None):
39
+ if txt_dir is None:
40
+ home_dir = Path(os.getenv("HOME", "/"))
41
+ txt_dir = home_dir / "data-sets/txt-files/"
42
+
43
+ if not txt_dir.exists():
44
+ print("No .txt files directory found.")
45
+ return
46
+
47
+ # Load all .txt files
48
+ for filename in txt_dir.glob("*.txt"):
49
+ with open(filename, "r", encoding="utf-8") as file:
50
+ self.documents.append(file.read())
51
+
52
+ print(f"Loaded additional {len(self.documents)} documents from .txt files.")
53
+
54
+ def is_query_malicious(self, query):
55
+ for pattern in self.malicious_patterns:
56
+ if pattern.lower() in query.lower():
57
+ print(f"Warning: Malicious query detected - {pattern}")
58
+ return True
59
+ return False
60
+
61
+ def search_documents(self, query):
62
+ if self.is_query_malicious(query):
63
+ return [{"document": "ANOMALY: Query blocked due to detected malicious content.", "similarity": 0.0}]
64
+
65
+ # Use fuzzy matching for normal queries
66
+ matches = get_close_matches(query, self.documents, n=5, cutoff=0.3)
67
+
68
+ if not matches:
69
+ return [{"document": "No matching documents found.", "similarity": 0.0}]
70
+
71
+ return [{"document": match[:100] + "..."} for match in matches]
72
+
73
+ # Test the system with normal and malicious queries
74
+ def test_document_search():
75
+ searcher = DocumentSearcher()
76
+
77
+ # Load the IMDB movie reviews
78
+ searcher.load_imdb_data()
79
+
80
+ # Load additional .txt files
81
+ searcher.load_txt_files()
82
+
83
+ # Perform a normal query
84
+ normal_query = "This movie had great acting and a compelling storyline."
85
+ normal_results = searcher.search_documents(normal_query)
86
+ print("Normal Query Results:")
87
+ for result in normal_results:
88
+ print(f"Document: {result['document']}")
89
+
90
+ # Perform a query injection attack
91
+ malicious_query = "DROP TABLE reviews; SELECT * FROM confidential_data;"
92
+ attack_results = searcher.search_documents(malicious_query)
93
+ print("\nMalicious Query Results:")
94
+ for result in attack_results:
95
+ print(f"Document: {result['document']}")
96
+
97
+ if __name__ == "__main__":
98
+ test_document_search()
rag_sec/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+
2
+ sentence-transformers
3
+ numpy
4
+ scikit-learn
5
+ faiss-cpu
6
+ pandas
sec-rag-model/__init__.py ADDED
File without changes