File size: 4,356 Bytes
252fde6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# import logging
# from typing import List, Dict

# import requests
# from bs4 import BeautifulSoup
# from urllib3.exceptions import InsecureRequestWarning

# # Disable SSL warnings for requests
# requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

# logger = logging.getLogger(__name__)

# class WebSearcher:
#     def __init__(self):
#         self.headers = {
#             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
#         }
        
#     def extract_text(self, html_content: str) -> str:
#         soup = BeautifulSoup(html_content, 'html.parser')
#         # Remove unwanted elements
#         for element in soup(['script', 'style', 'nav', 'header', 'footer', 'iframe']):
#             element.decompose()
#         text = ' '.join(soup.stripped_strings)
#         return text[:8000]  # Limit text length

#     def search(self, query: str, max_results: int = 3) -> List[Dict]:
#         results = []
#         try:
#             with requests.Session() as session:
#                 # Google search parameters
#                 search_url = "https://www.google.com/search"
#                 params = {
#                     "q": query,
#                     "num": max_results,
#                     "hl": "en"
#                 }
                
#                 response = session.get(
#                     search_url,
#                     headers=self.headers,
#                     params=params,
#                     timeout=10,
#                     verify=False
#                 )
#                 response.raise_for_status()
                
#                 # Parse search results
#                 soup = BeautifulSoup(response.text, 'html.parser')
#                 search_results = soup.select('div.g')
                
#                 for result in search_results[:max_results]:
#                     link = result.find('a')
#                     if not link:
#                         continue
                        
#                     url = link.get('href', '')
#                     if not url.startswith('http'):
#                         continue
                        
#                     try:
#                         # Fetch webpage content
#                         page_response = session.get(
#                             url,
#                             headers=self.headers,
#                             timeout=5,
#                             verify=False
#                         )
#                         page_response.raise_for_status()
                        
#                         content = self.extract_text(page_response.text)
#                         results.append({
#                             "url": url,
#                             "content": content
#                         })
#                         logger.info(f"Successfully fetched content from {url}")
                        
#                     except Exception as e:
#                         logger.warning(f"Failed to fetch {url}: {str(e)}")
#                         continue
                        
#         except Exception as e:
#             logger.error(f"Search failed: {str(e)}")
            
#         return results[:max_results]




import logging
from typing import List, Dict
from transformers.agents import DuckDuckGoSearchTool

logger = logging.getLogger(__name__)

class WebSearcher:
    def __init__(self):
        self.search_tool = DuckDuckGoSearchTool()
    
    def search(self, query: str) -> List[Dict]:
        try:
            # Execute search
            search_results = self.search_tool(query)
            
            # Convert list to string if necessary
            if isinstance(search_results, list):
                search_results = ' '.join(str(result) for result in search_results)
            
            results = [{
                "url": "duckduckgo_search",
                "content": str(search_results) # Limit content length and ensure string
            }]
            
            return results
            
        except Exception as e:
            logger.error(f"Search error: {str(e)}")
            return []

# Initialize searcher
searcher = WebSearcher()