varun324242 commited on
Commit
6f26806
·
verified ·
1 Parent(s): 3e7b1b4

Update market_analysis_crew.py

Browse files
Files changed (1) hide show
  1. market_analysis_crew.py +73 -114
market_analysis_crew.py CHANGED
@@ -2,49 +2,26 @@ from crewai import Agent, Task, Crew, Process
2
  from langchain_openai import ChatOpenAI
3
  from langchain.tools import Tool
4
  from langchain_community.tools import WriteFileTool
5
- from crewai_tools import SerperDevTool, FirecrawlCrawlWebsiteTool
6
  import os
7
  import time
8
  from pathlib import Path
9
  import json
10
- from dotenv import load_dotenv
11
-
12
- # Load environment variables from .env
13
- load_dotenv()
14
-
15
- # Set OpenAI API key
16
- os.environ["OPENAI_API_KEY"] = "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"
17
- os.environ["FIRECRAWL_API_KEY"] = "fc-042d8dd61b82422888b7c23b7bc9e369" # Added Firecrawl API key
18
 
19
  # Initialize tools and models
20
  openai_model = ChatOpenAI(
21
  model_name="gpt-4o-mini",
22
- temperature=0.7,
23
- api_key=os.getenv("OPENAI_API_KEY")
24
  )
25
 
26
  class ReportGenerator:
27
  def __init__(self):
28
- # Initialize SerperDev with API key from .env
29
  self.search_tool = SerperDevTool(
30
- serper_api_key=os.getenv("SERPER_API_KEY"),
31
  search_url="https://google.serper.dev/search",
32
  n_results=10,
33
  )
34
 
35
- self.scrape_tool = FirecrawlCrawlWebsiteTool(
36
- api_key=os.environ["FIRECRAWL_API_KEY"],
37
- crawler_options={
38
- "maxDepth": 2,
39
- "mode": "fast",
40
- "generateImgAltText": True,
41
- "limit": 10
42
- },
43
- page_options={
44
- "onlyMainContent": True,
45
- "includeHtml": False
46
- }
47
- )
48
 
49
  self.write_file_tool = Tool(
50
  name="Write File",
@@ -52,11 +29,10 @@ class ReportGenerator:
52
  func=self.write_file_tool_wrapper
53
  )
54
 
55
- # Initialize ChatOpenAI with API key
56
  self.question_generator = ChatOpenAI(
57
- model_name="gpt-4o-mini",
58
- temperature=0.7,
59
- api_key=os.environ["OPENAI_API_KEY"] # Explicitly pass API key
60
  )
61
 
62
  # Add default values for common fields
@@ -1162,25 +1138,11 @@ Format the report in clear, professional markdown with appropriate headers, bull
1162
  return default_questions
1163
 
1164
  def scrape_company_website(self, url):
1165
- """Scrape company website using Firecrawl"""
1166
  try:
1167
- print(f"Scraping website: {url}")
1168
-
1169
- # Initialize crawler with proper configuration
1170
- crawler = FirecrawlCrawlWebsiteTool(
1171
- api_key=os.environ["FIRECRAWL_API_KEY"]
1172
- )
1173
-
1174
- # Run the crawler with URL parameter
1175
- content = crawler.run(url)
1176
-
1177
- # Convert content to string if needed
1178
- if content and not isinstance(content, str):
1179
- content = str(content)
1180
-
1181
- print("Website scraping completed successfully")
1182
  return content
1183
-
1184
  except Exception as e:
1185
  print(f"Error scraping website: {e}")
1186
  return None
@@ -1189,18 +1151,20 @@ Format the report in clear, professional markdown with appropriate headers, bull
1189
  """Analyze website content using GPT to detect industry and other details"""
1190
  try:
1191
  if not website_data:
1192
- return self.get_default_analysis()
 
 
 
 
 
 
1193
 
1194
  print("Analyzing website content with AI...")
1195
 
1196
- # Convert website_data to string and safely truncate
1197
- website_content = str(website_data) if website_data else ''
1198
- truncated_content = website_content[:2000] if len(website_content) > 2000 else website_content
1199
-
1200
  prompt = f"""Analyze this website content for {company_name} and provide key business information.
1201
 
1202
  Website Content:
1203
- {truncated_content}
1204
 
1205
  Return ONLY a JSON object with this exact format:
1206
  {{
@@ -1212,30 +1176,32 @@ Format the report in clear, professional markdown with appropriate headers, bull
1212
  }}
1213
  """
1214
 
 
1215
  response = self.question_generator.invoke(prompt).content
 
 
1216
  response = response.strip()
1217
  if response.startswith('```json'):
1218
  response = response[7:]
1219
  if response.endswith('```'):
1220
  response = response[:-3]
1221
 
 
1222
  analysis = json.loads(response.strip())
1223
  print("Website analysis completed successfully")
 
1224
  return analysis
1225
 
1226
  except Exception as e:
1227
  print(f"Error analyzing website: {str(e)}")
1228
- return self.get_default_analysis()
1229
-
1230
- def get_default_analysis(self):
1231
- """Return default analysis values"""
1232
- return {
1233
- "industry": "Technology",
1234
- "business_model": "B2B",
1235
- "target_market": "General",
1236
- "products": ["Unknown"],
1237
- "market_focus": "Global"
1238
- }
1239
 
1240
  def generate_questions(self, context):
1241
  """Generate questions based on company context and detail level"""
@@ -1248,23 +1214,19 @@ Format the report in clear, professional markdown with appropriate headers, bull
1248
 
1249
  print(f"Generating {detail_level} questions for {report_type}...")
1250
 
1251
- # Convert website_data to string and safely truncate
1252
- website_content = str(website_data) if website_data else ''
1253
- truncated_content = website_content[:1000] if len(website_content) > 1000 else website_content
1254
-
1255
  if detail_level == 'quick':
1256
  prompt = f"""Generate 2-3 brief but specific questions about {company_name} in the {industry} industry.
1257
  Focus on core business metrics and market position.
1258
 
1259
  Context:
1260
- {truncated_content}
1261
 
1262
  Return ONLY a JSON object with this exact format:
1263
  {{
1264
  "questions": [
1265
  {{"id": 1, "question": "Brief, specific question about core metrics?"}},
1266
  {{"id": 2, "question": "Brief question about market position?"}},
1267
- {{"id": 3, "question": "Brief question about growth/strategy?"}}
1268
  ]
1269
  }}
1270
  """
@@ -1278,7 +1240,7 @@ Format the report in clear, professional markdown with appropriate headers, bull
1278
  - Business model
1279
 
1280
  Context:
1281
- {truncated_content}
1282
 
1283
  Return ONLY a JSON object with this exact format:
1284
  {{
@@ -1316,48 +1278,45 @@ Format the report in clear, professional markdown with appropriate headers, bull
1316
 
1317
  except Exception as e:
1318
  print(f"Error generating questions: {str(e)}")
1319
- return self.get_default_questions(company_name, industry, detail_level)
1320
-
1321
- def get_default_questions(self, company_name, industry, detail_level):
1322
- """Return default questions based on company name, industry, and detail level"""
1323
- if detail_level == 'quick':
1324
- return [
1325
- {
1326
- "id": 1,
1327
- "question": f"What is {company_name}'s main competitive advantage in the {industry} market?"
1328
- },
1329
- {
1330
- "id": 2,
1331
- "question": "Who are your top 2-3 direct competitors?"
1332
- },
1333
- {
1334
- "id": 3,
1335
- "question": "What is your primary revenue stream?"
1336
- }
1337
- ]
1338
- else:
1339
- return [
1340
- {
1341
- "id": 1,
1342
- "question": f"What unique value proposition does {company_name} offer in the {industry} space?"
1343
- },
1344
- {
1345
- "id": 2,
1346
- "question": "Who are your main competitors and how do you differentiate?"
1347
- },
1348
- {
1349
- "id": 3,
1350
- "question": "What are your key growth metrics and targets?"
1351
- },
1352
- {
1353
- "id": 4,
1354
- "question": "What market opportunities are you targeting?"
1355
- },
1356
- {
1357
- "id": 5,
1358
- "question": "What are your main customer acquisition channels?"
1359
- }
1360
- ]
1361
 
1362
  def create_reports(result, context, report_type):
1363
  """Create validation and report files"""
@@ -1414,4 +1373,4 @@ def get_report_generator():
1414
  def get_market_analysis_crew(user_inputs):
1415
  """Backward compatibility function for existing code"""
1416
  generator = ReportGenerator()
1417
- return generator.create_market_analysis_crew(user_inputs)
 
2
  from langchain_openai import ChatOpenAI
3
  from langchain.tools import Tool
4
  from langchain_community.tools import WriteFileTool
5
+ from crewai_tools import SerperDevTool, ScrapeWebsiteTool
6
  import os
7
  import time
8
  from pathlib import Path
9
  import json
 
 
 
 
 
 
 
 
10
 
11
  # Initialize tools and models
12
  openai_model = ChatOpenAI(
13
  model_name="gpt-4o-mini",
14
+ temperature=0.7
 
15
  )
16
 
17
  class ReportGenerator:
18
  def __init__(self):
 
19
  self.search_tool = SerperDevTool(
 
20
  search_url="https://google.serper.dev/search",
21
  n_results=10,
22
  )
23
 
24
+ self.scrape_tool = ScrapeWebsiteTool()
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  self.write_file_tool = Tool(
27
  name="Write File",
 
29
  func=self.write_file_tool_wrapper
30
  )
31
 
32
+ # Initialize ChatOpenAI with higher temperature for more creative inference
33
  self.question_generator = ChatOpenAI(
34
+ model_name="gpt-4-turbo-preview",
35
+ temperature=0.7
 
36
  )
37
 
38
  # Add default values for common fields
 
1138
  return default_questions
1139
 
1140
  def scrape_company_website(self, url):
1141
+ """Scrape company website using existing tools"""
1142
  try:
1143
+ scraper = ScrapeWebsiteTool(website_url=url)
1144
+ content = scraper.run()
 
 
 
 
 
 
 
 
 
 
 
 
 
1145
  return content
 
1146
  except Exception as e:
1147
  print(f"Error scraping website: {e}")
1148
  return None
 
1151
  """Analyze website content using GPT to detect industry and other details"""
1152
  try:
1153
  if not website_data:
1154
+ return {
1155
+ "industry": "Technology",
1156
+ "business_model": "B2B",
1157
+ "target_market": "General",
1158
+ "products": ["Unknown"],
1159
+ "market_focus": "Global"
1160
+ }
1161
 
1162
  print("Analyzing website content with AI...")
1163
 
 
 
 
 
1164
  prompt = f"""Analyze this website content for {company_name} and provide key business information.
1165
 
1166
  Website Content:
1167
+ {website_data[:2000]}
1168
 
1169
  Return ONLY a JSON object with this exact format:
1170
  {{
 
1176
  }}
1177
  """
1178
 
1179
+ # Use the question generator (ChatGPT) to analyze
1180
  response = self.question_generator.invoke(prompt).content
1181
+
1182
+ # Clean the response
1183
  response = response.strip()
1184
  if response.startswith('```json'):
1185
  response = response[7:]
1186
  if response.endswith('```'):
1187
  response = response[:-3]
1188
 
1189
+ # Parse JSON response
1190
  analysis = json.loads(response.strip())
1191
  print("Website analysis completed successfully")
1192
+
1193
  return analysis
1194
 
1195
  except Exception as e:
1196
  print(f"Error analyzing website: {str(e)}")
1197
+ # Return default values if analysis fails
1198
+ return {
1199
+ "industry": "Technology",
1200
+ "business_model": "B2B",
1201
+ "target_market": "General",
1202
+ "products": ["Unknown"],
1203
+ "market_focus": "Global"
1204
+ }
 
 
 
1205
 
1206
  def generate_questions(self, context):
1207
  """Generate questions based on company context and detail level"""
 
1214
 
1215
  print(f"Generating {detail_level} questions for {report_type}...")
1216
 
 
 
 
 
1217
  if detail_level == 'quick':
1218
  prompt = f"""Generate 2-3 brief but specific questions about {company_name} in the {industry} industry.
1219
  Focus on core business metrics and market position.
1220
 
1221
  Context:
1222
+ {website_data[:1000]}
1223
 
1224
  Return ONLY a JSON object with this exact format:
1225
  {{
1226
  "questions": [
1227
  {{"id": 1, "question": "Brief, specific question about core metrics?"}},
1228
  {{"id": 2, "question": "Brief question about market position?"}},
1229
+ {{"id": 3, "question": "Brief question about growth/strategy?"}},
1230
  ]
1231
  }}
1232
  """
 
1240
  - Business model
1241
 
1242
  Context:
1243
+ {website_data[:1500]}
1244
 
1245
  Return ONLY a JSON object with this exact format:
1246
  {{
 
1278
 
1279
  except Exception as e:
1280
  print(f"Error generating questions: {str(e)}")
1281
+ # Return default questions based on detail level
1282
+ if detail_level == 'quick':
1283
+ return [
1284
+ {
1285
+ "id": 1,
1286
+ "question": f"What is {company_name}'s main competitive advantage in the {industry} market?"
1287
+ },
1288
+ {
1289
+ "id": 2,
1290
+ "question": "Who are your top 2-3 direct competitors?"
1291
+ },
1292
+ {
1293
+ "id": 3,
1294
+ "question": "What is your primary revenue stream?"
1295
+ }
1296
+ ]
1297
+ else:
1298
+ return [
1299
+ {
1300
+ "id": 1,
1301
+ "question": f"What unique value proposition does {company_name} offer in the {industry} space?"
1302
+ },
1303
+ {
1304
+ "id": 2,
1305
+ "question": "Who are your main competitors and how do you differentiate?"
1306
+ },
1307
+ {
1308
+ "id": 3,
1309
+ "question": "What are your key growth metrics and targets?"
1310
+ },
1311
+ {
1312
+ "id": 4,
1313
+ "question": "What market opportunities are you targeting?"
1314
+ },
1315
+ {
1316
+ "id": 5,
1317
+ "question": "What are your main customer acquisition channels?"
1318
+ }
1319
+ ]
 
 
 
1320
 
1321
  def create_reports(result, context, report_type):
1322
  """Create validation and report files"""
 
1373
  def get_market_analysis_crew(user_inputs):
1374
  """Backward compatibility function for existing code"""
1375
  generator = ReportGenerator()
1376
+ return generator.create_market_analysis_crew(user_inputs)