Spaces:
Sleeping
Sleeping
Update market_analysis_crew.py
Browse files- market_analysis_crew.py +73 -114
market_analysis_crew.py
CHANGED
@@ -2,49 +2,26 @@ from crewai import Agent, Task, Crew, Process
|
|
2 |
from langchain_openai import ChatOpenAI
|
3 |
from langchain.tools import Tool
|
4 |
from langchain_community.tools import WriteFileTool
|
5 |
-
from crewai_tools import SerperDevTool,
|
6 |
import os
|
7 |
import time
|
8 |
from pathlib import Path
|
9 |
import json
|
10 |
-
from dotenv import load_dotenv
|
11 |
-
|
12 |
-
# Load environment variables from .env
|
13 |
-
load_dotenv()
|
14 |
-
|
15 |
-
# Set OpenAI API key
|
16 |
-
os.environ["OPENAI_API_KEY"] = "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"
|
17 |
-
os.environ["FIRECRAWL_API_KEY"] = "fc-042d8dd61b82422888b7c23b7bc9e369" # Added Firecrawl API key
|
18 |
|
19 |
# Initialize tools and models
|
20 |
openai_model = ChatOpenAI(
|
21 |
model_name="gpt-4o-mini",
|
22 |
-
temperature=0.7
|
23 |
-
api_key=os.getenv("OPENAI_API_KEY")
|
24 |
)
|
25 |
|
26 |
class ReportGenerator:
|
27 |
def __init__(self):
|
28 |
-
# Initialize SerperDev with API key from .env
|
29 |
self.search_tool = SerperDevTool(
|
30 |
-
serper_api_key=os.getenv("SERPER_API_KEY"),
|
31 |
search_url="https://google.serper.dev/search",
|
32 |
n_results=10,
|
33 |
)
|
34 |
|
35 |
-
self.scrape_tool =
|
36 |
-
api_key=os.environ["FIRECRAWL_API_KEY"],
|
37 |
-
crawler_options={
|
38 |
-
"maxDepth": 2,
|
39 |
-
"mode": "fast",
|
40 |
-
"generateImgAltText": True,
|
41 |
-
"limit": 10
|
42 |
-
},
|
43 |
-
page_options={
|
44 |
-
"onlyMainContent": True,
|
45 |
-
"includeHtml": False
|
46 |
-
}
|
47 |
-
)
|
48 |
|
49 |
self.write_file_tool = Tool(
|
50 |
name="Write File",
|
@@ -52,11 +29,10 @@ class ReportGenerator:
|
|
52 |
func=self.write_file_tool_wrapper
|
53 |
)
|
54 |
|
55 |
-
# Initialize ChatOpenAI with
|
56 |
self.question_generator = ChatOpenAI(
|
57 |
-
model_name="gpt-
|
58 |
-
temperature=0.7
|
59 |
-
api_key=os.environ["OPENAI_API_KEY"] # Explicitly pass API key
|
60 |
)
|
61 |
|
62 |
# Add default values for common fields
|
@@ -1162,25 +1138,11 @@ Format the report in clear, professional markdown with appropriate headers, bull
|
|
1162 |
return default_questions
|
1163 |
|
1164 |
def scrape_company_website(self, url):
|
1165 |
-
"""Scrape company website using
|
1166 |
try:
|
1167 |
-
|
1168 |
-
|
1169 |
-
# Initialize crawler with proper configuration
|
1170 |
-
crawler = FirecrawlCrawlWebsiteTool(
|
1171 |
-
api_key=os.environ["FIRECRAWL_API_KEY"]
|
1172 |
-
)
|
1173 |
-
|
1174 |
-
# Run the crawler with URL parameter
|
1175 |
-
content = crawler.run(url)
|
1176 |
-
|
1177 |
-
# Convert content to string if needed
|
1178 |
-
if content and not isinstance(content, str):
|
1179 |
-
content = str(content)
|
1180 |
-
|
1181 |
-
print("Website scraping completed successfully")
|
1182 |
return content
|
1183 |
-
|
1184 |
except Exception as e:
|
1185 |
print(f"Error scraping website: {e}")
|
1186 |
return None
|
@@ -1189,18 +1151,20 @@ Format the report in clear, professional markdown with appropriate headers, bull
|
|
1189 |
"""Analyze website content using GPT to detect industry and other details"""
|
1190 |
try:
|
1191 |
if not website_data:
|
1192 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
1193 |
|
1194 |
print("Analyzing website content with AI...")
|
1195 |
|
1196 |
-
# Convert website_data to string and safely truncate
|
1197 |
-
website_content = str(website_data) if website_data else ''
|
1198 |
-
truncated_content = website_content[:2000] if len(website_content) > 2000 else website_content
|
1199 |
-
|
1200 |
prompt = f"""Analyze this website content for {company_name} and provide key business information.
|
1201 |
|
1202 |
Website Content:
|
1203 |
-
{
|
1204 |
|
1205 |
Return ONLY a JSON object with this exact format:
|
1206 |
{{
|
@@ -1212,30 +1176,32 @@ Format the report in clear, professional markdown with appropriate headers, bull
|
|
1212 |
}}
|
1213 |
"""
|
1214 |
|
|
|
1215 |
response = self.question_generator.invoke(prompt).content
|
|
|
|
|
1216 |
response = response.strip()
|
1217 |
if response.startswith('```json'):
|
1218 |
response = response[7:]
|
1219 |
if response.endswith('```'):
|
1220 |
response = response[:-3]
|
1221 |
|
|
|
1222 |
analysis = json.loads(response.strip())
|
1223 |
print("Website analysis completed successfully")
|
|
|
1224 |
return analysis
|
1225 |
|
1226 |
except Exception as e:
|
1227 |
print(f"Error analyzing website: {str(e)}")
|
1228 |
-
|
1229 |
-
|
1230 |
-
|
1231 |
-
|
1232 |
-
|
1233 |
-
|
1234 |
-
|
1235 |
-
|
1236 |
-
"products": ["Unknown"],
|
1237 |
-
"market_focus": "Global"
|
1238 |
-
}
|
1239 |
|
1240 |
def generate_questions(self, context):
|
1241 |
"""Generate questions based on company context and detail level"""
|
@@ -1248,23 +1214,19 @@ Format the report in clear, professional markdown with appropriate headers, bull
|
|
1248 |
|
1249 |
print(f"Generating {detail_level} questions for {report_type}...")
|
1250 |
|
1251 |
-
# Convert website_data to string and safely truncate
|
1252 |
-
website_content = str(website_data) if website_data else ''
|
1253 |
-
truncated_content = website_content[:1000] if len(website_content) > 1000 else website_content
|
1254 |
-
|
1255 |
if detail_level == 'quick':
|
1256 |
prompt = f"""Generate 2-3 brief but specific questions about {company_name} in the {industry} industry.
|
1257 |
Focus on core business metrics and market position.
|
1258 |
|
1259 |
Context:
|
1260 |
-
{
|
1261 |
|
1262 |
Return ONLY a JSON object with this exact format:
|
1263 |
{{
|
1264 |
"questions": [
|
1265 |
{{"id": 1, "question": "Brief, specific question about core metrics?"}},
|
1266 |
{{"id": 2, "question": "Brief question about market position?"}},
|
1267 |
-
{{"id": 3, "question": "Brief question about growth/strategy?"}}
|
1268 |
]
|
1269 |
}}
|
1270 |
"""
|
@@ -1278,7 +1240,7 @@ Format the report in clear, professional markdown with appropriate headers, bull
|
|
1278 |
- Business model
|
1279 |
|
1280 |
Context:
|
1281 |
-
{
|
1282 |
|
1283 |
Return ONLY a JSON object with this exact format:
|
1284 |
{{
|
@@ -1316,48 +1278,45 @@ Format the report in clear, professional markdown with appropriate headers, bull
|
|
1316 |
|
1317 |
except Exception as e:
|
1318 |
print(f"Error generating questions: {str(e)}")
|
1319 |
-
|
1320 |
-
|
1321 |
-
|
1322 |
-
|
1323 |
-
|
1324 |
-
|
1325 |
-
|
1326 |
-
|
1327 |
-
|
1328 |
-
|
1329 |
-
|
1330 |
-
|
1331 |
-
|
1332 |
-
|
1333 |
-
|
1334 |
-
|
1335 |
-
|
1336 |
-
|
1337 |
-
|
1338 |
-
|
1339 |
-
|
1340 |
-
|
1341 |
-
|
1342 |
-
|
1343 |
-
|
1344 |
-
|
1345 |
-
|
1346 |
-
|
1347 |
-
|
1348 |
-
|
1349 |
-
|
1350 |
-
|
1351 |
-
|
1352 |
-
|
1353 |
-
|
1354 |
-
|
1355 |
-
|
1356 |
-
|
1357 |
-
|
1358 |
-
"question": "What are your main customer acquisition channels?"
|
1359 |
-
}
|
1360 |
-
]
|
1361 |
|
1362 |
def create_reports(result, context, report_type):
|
1363 |
"""Create validation and report files"""
|
@@ -1414,4 +1373,4 @@ def get_report_generator():
|
|
1414 |
def get_market_analysis_crew(user_inputs):
|
1415 |
"""Backward compatibility function for existing code"""
|
1416 |
generator = ReportGenerator()
|
1417 |
-
return generator.create_market_analysis_crew(user_inputs)
|
|
|
2 |
from langchain_openai import ChatOpenAI
|
3 |
from langchain.tools import Tool
|
4 |
from langchain_community.tools import WriteFileTool
|
5 |
+
from crewai_tools import SerperDevTool, ScrapeWebsiteTool
|
6 |
import os
|
7 |
import time
|
8 |
from pathlib import Path
|
9 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Initialize tools and models
|
12 |
openai_model = ChatOpenAI(
|
13 |
model_name="gpt-4o-mini",
|
14 |
+
temperature=0.7
|
|
|
15 |
)
|
16 |
|
17 |
class ReportGenerator:
|
18 |
def __init__(self):
|
|
|
19 |
self.search_tool = SerperDevTool(
|
|
|
20 |
search_url="https://google.serper.dev/search",
|
21 |
n_results=10,
|
22 |
)
|
23 |
|
24 |
+
self.scrape_tool = ScrapeWebsiteTool()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
self.write_file_tool = Tool(
|
27 |
name="Write File",
|
|
|
29 |
func=self.write_file_tool_wrapper
|
30 |
)
|
31 |
|
32 |
+
# Initialize ChatOpenAI with higher temperature for more creative inference
|
33 |
self.question_generator = ChatOpenAI(
|
34 |
+
model_name="gpt-4-turbo-preview",
|
35 |
+
temperature=0.7
|
|
|
36 |
)
|
37 |
|
38 |
# Add default values for common fields
|
|
|
1138 |
return default_questions
|
1139 |
|
1140 |
def scrape_company_website(self, url):
|
1141 |
+
"""Scrape company website using existing tools"""
|
1142 |
try:
|
1143 |
+
scraper = ScrapeWebsiteTool(website_url=url)
|
1144 |
+
content = scraper.run()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1145 |
return content
|
|
|
1146 |
except Exception as e:
|
1147 |
print(f"Error scraping website: {e}")
|
1148 |
return None
|
|
|
1151 |
"""Analyze website content using GPT to detect industry and other details"""
|
1152 |
try:
|
1153 |
if not website_data:
|
1154 |
+
return {
|
1155 |
+
"industry": "Technology",
|
1156 |
+
"business_model": "B2B",
|
1157 |
+
"target_market": "General",
|
1158 |
+
"products": ["Unknown"],
|
1159 |
+
"market_focus": "Global"
|
1160 |
+
}
|
1161 |
|
1162 |
print("Analyzing website content with AI...")
|
1163 |
|
|
|
|
|
|
|
|
|
1164 |
prompt = f"""Analyze this website content for {company_name} and provide key business information.
|
1165 |
|
1166 |
Website Content:
|
1167 |
+
{website_data[:2000]}
|
1168 |
|
1169 |
Return ONLY a JSON object with this exact format:
|
1170 |
{{
|
|
|
1176 |
}}
|
1177 |
"""
|
1178 |
|
1179 |
+
# Use the question generator (ChatGPT) to analyze
|
1180 |
response = self.question_generator.invoke(prompt).content
|
1181 |
+
|
1182 |
+
# Clean the response
|
1183 |
response = response.strip()
|
1184 |
if response.startswith('```json'):
|
1185 |
response = response[7:]
|
1186 |
if response.endswith('```'):
|
1187 |
response = response[:-3]
|
1188 |
|
1189 |
+
# Parse JSON response
|
1190 |
analysis = json.loads(response.strip())
|
1191 |
print("Website analysis completed successfully")
|
1192 |
+
|
1193 |
return analysis
|
1194 |
|
1195 |
except Exception as e:
|
1196 |
print(f"Error analyzing website: {str(e)}")
|
1197 |
+
# Return default values if analysis fails
|
1198 |
+
return {
|
1199 |
+
"industry": "Technology",
|
1200 |
+
"business_model": "B2B",
|
1201 |
+
"target_market": "General",
|
1202 |
+
"products": ["Unknown"],
|
1203 |
+
"market_focus": "Global"
|
1204 |
+
}
|
|
|
|
|
|
|
1205 |
|
1206 |
def generate_questions(self, context):
|
1207 |
"""Generate questions based on company context and detail level"""
|
|
|
1214 |
|
1215 |
print(f"Generating {detail_level} questions for {report_type}...")
|
1216 |
|
|
|
|
|
|
|
|
|
1217 |
if detail_level == 'quick':
|
1218 |
prompt = f"""Generate 2-3 brief but specific questions about {company_name} in the {industry} industry.
|
1219 |
Focus on core business metrics and market position.
|
1220 |
|
1221 |
Context:
|
1222 |
+
{website_data[:1000]}
|
1223 |
|
1224 |
Return ONLY a JSON object with this exact format:
|
1225 |
{{
|
1226 |
"questions": [
|
1227 |
{{"id": 1, "question": "Brief, specific question about core metrics?"}},
|
1228 |
{{"id": 2, "question": "Brief question about market position?"}},
|
1229 |
+
{{"id": 3, "question": "Brief question about growth/strategy?"}},
|
1230 |
]
|
1231 |
}}
|
1232 |
"""
|
|
|
1240 |
- Business model
|
1241 |
|
1242 |
Context:
|
1243 |
+
{website_data[:1500]}
|
1244 |
|
1245 |
Return ONLY a JSON object with this exact format:
|
1246 |
{{
|
|
|
1278 |
|
1279 |
except Exception as e:
|
1280 |
print(f"Error generating questions: {str(e)}")
|
1281 |
+
# Return default questions based on detail level
|
1282 |
+
if detail_level == 'quick':
|
1283 |
+
return [
|
1284 |
+
{
|
1285 |
+
"id": 1,
|
1286 |
+
"question": f"What is {company_name}'s main competitive advantage in the {industry} market?"
|
1287 |
+
},
|
1288 |
+
{
|
1289 |
+
"id": 2,
|
1290 |
+
"question": "Who are your top 2-3 direct competitors?"
|
1291 |
+
},
|
1292 |
+
{
|
1293 |
+
"id": 3,
|
1294 |
+
"question": "What is your primary revenue stream?"
|
1295 |
+
}
|
1296 |
+
]
|
1297 |
+
else:
|
1298 |
+
return [
|
1299 |
+
{
|
1300 |
+
"id": 1,
|
1301 |
+
"question": f"What unique value proposition does {company_name} offer in the {industry} space?"
|
1302 |
+
},
|
1303 |
+
{
|
1304 |
+
"id": 2,
|
1305 |
+
"question": "Who are your main competitors and how do you differentiate?"
|
1306 |
+
},
|
1307 |
+
{
|
1308 |
+
"id": 3,
|
1309 |
+
"question": "What are your key growth metrics and targets?"
|
1310 |
+
},
|
1311 |
+
{
|
1312 |
+
"id": 4,
|
1313 |
+
"question": "What market opportunities are you targeting?"
|
1314 |
+
},
|
1315 |
+
{
|
1316 |
+
"id": 5,
|
1317 |
+
"question": "What are your main customer acquisition channels?"
|
1318 |
+
}
|
1319 |
+
]
|
|
|
|
|
|
|
1320 |
|
1321 |
def create_reports(result, context, report_type):
|
1322 |
"""Create validation and report files"""
|
|
|
1373 |
def get_market_analysis_crew(user_inputs):
|
1374 |
"""Backward compatibility function for existing code"""
|
1375 |
generator = ReportGenerator()
|
1376 |
+
return generator.create_market_analysis_crew(user_inputs)
|