arssite commited on
Commit
a599a69
·
verified ·
1 Parent(s): c1f211e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import WebBaseLoader
3
+ #from chains import Chain
4
+ #from portfolio import Portfolio
5
+ #from utils import clean_text
6
+ import re
7
+ import pandas as pd
8
+ import chromadb
9
+ import uuid
10
+ import os
11
+ from langchain_groq import ChatGroq
12
+ from langchain_core.prompts import PromptTemplate
13
+ from langchain_core.output_parsers import JsonOutputParser
14
+ from langchain_core.exceptions import OutputParserException
15
+ #from dotenv import load_dotenv
16
+ from google.colab import userdata
17
+
18
+
19
+ #load_dotenv()
20
+
21
+ class Chain:
22
+ def __init__(self):
23
+ self.llm = ChatGroq(temperature=0, groq_api_key="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", model_name="llama-3.1-70b-versatile")
24
+
25
+ def extract_jobs(self, cleaned_text):
26
+ prompt_extract = PromptTemplate.from_template(
27
+ """
28
+ ### SCRAPED TEXT FROM WEBSITE:
29
+ {page_data}
30
+ ### INSTRUCTION:
31
+ The scraped text is from the career's page of a website.
32
+ Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
33
+ Only return the valid JSON.
34
+ ### VALID JSON (NO PREAMBLE):
35
+ """
36
+ )
37
+ chain_extract = prompt_extract | self.llm
38
+ res = chain_extract.invoke(input={"page_data": cleaned_text})
39
+ try:
40
+ json_parser = JsonOutputParser()
41
+ res = json_parser.parse(res.content)
42
+ except OutputParserException:
43
+ raise OutputParserException("Context too big. Unable to parse jobs.")
44
+ return res if isinstance(res, list) else [res]
45
+
46
+ def write_mail(self, job, links):
47
+ prompt_email = PromptTemplate.from_template(
48
+ """
49
+ ### JOB DESCRIPTION:
50
+ {job_description}
51
+
52
+ ### INSTRUCTION:
53
+ You are Anmol R Srivastava, a student pursuing a bachelor's degree in Computer Science Engineering with a specialization in Artificial Intelligence and Machine Learning, graduating in 2025. You have experience in cloud computing, AI, and software development, focusing on building AI-driven systems for various applications. Your task is to write a cold email to a potential client regarding a project that involves creating a predictive analytics tool for supply chain management. Highlight your expertise in AI and machine learning, particularly in predictive models and scalable solutions. Mention your ability to deliver customized and efficient systems tailored to client needs. Also, include your portfolio links to showcase your work:
54
+
55
+ GitHub: https://github.com/arssite
56
+ LinkedIn: https://www.linkedin.com/in/anmol-r-srivastava/
57
+ Hugging Face: https://huggingface.co/arssite
58
+ Contact email: [email protected]
59
+ Also add the most relevant ones from the following links to showcase My Resume: {link_list}
60
+ .
61
+ Do not provide a preamble.
62
+ ### EMAIL (NO PREAMBLE):
63
+
64
+ """
65
+ )
66
+ chain_email = prompt_email | self.llm
67
+ res = chain_email.invoke({"job_description": str(job), "link_list": links})
68
+ return res.content
69
+
70
+
71
+
72
+ class Portfolio:
73
+ def __init__(self, file_path="links.csv"):
74
+ self.file_path = file_path
75
+ self.data = pd.read_csv(file_path)
76
+ self.chroma_client = chromadb.PersistentClient('vectorstore')
77
+ self.collection = self.chroma_client.get_or_create_collection(name="portfolio")
78
+
79
+ def load_portfolio(self):
80
+ if not self.collection.count():
81
+ for _, row in self.data.iterrows():
82
+ self.collection.add(documents=row["Techstack"],
83
+ metadatas={"links": row["Links"]},
84
+ ids=[str(uuid.uuid4())])
85
+
86
+ def query_links(self, skills):
87
+ return self.collection.query(query_texts=skills, n_results=2).get('metadatas', [])
88
+ def clean_text(text):
89
+ # Remove HTML tags
90
+ text = re.sub(r'<[^>]*?>', '', text)
91
+ # Remove URLs
92
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
93
+ # Remove special characters
94
+ text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
95
+ # Replace multiple spaces with a single space
96
+ text = re.sub(r'\s{2,}', ' ', text)
97
+ # Trim leading and trailing whitespace
98
+ text = text.strip()
99
+ # Remove extra whitespace
100
+ text = ' '.join(text.split())
101
+ return text
102
+ def create_streamlit_app(llm, portfolio, clean_text):
103
+ st.title("📧 Cold eMail Generator")
104
+ url_input = st.text_input("Enter a URL:", value="write Website or JD url")
105
+ submit_button = st.button("Submit")
106
+
107
+ if submit_button:
108
+ try:
109
+ loader = WebBaseLoader([url_input])
110
+ data = clean_text(loader.load().pop().page_content)
111
+ portfolio.load_portfolio()
112
+ jobs = llm.extract_jobs(data)
113
+ for job in jobs:
114
+ skills = job.get('skills', [])
115
+ links = portfolio.query_links(skills)
116
+ email = llm.write_mail(job, links)
117
+ st.code(email, language='markdown')
118
+ except Exception as e:
119
+ st.error(f"An Error Occurred: {e}")
120
+
121
+
122
+ if __name__ == "__main__":
123
+ chain = Chain()
124
+ portfolio = Portfolio()
125
+ st.set_page_config(layout="wide", page_title="Cold Email Generator by ARS", page_icon="📧")
126
+ create_streamlit_app(chain, portfolio, clean_text)