pwilczewski commited on
Commit
172e154
·
1 Parent(s): e35535e

full analysis

Browse files
Files changed (1) hide show
  1. app.py +58 -20
app.py CHANGED
@@ -9,23 +9,8 @@ import os
9
 
10
  # df = pd.read_csv("HOUST.csv")
11
  df = pd.read_csv("USSTHPI.csv")
12
- df.to_csv("testing.csv")
13
- df2 = pd.read_csv("testing.csv")
14
  python_repl_tool = PythonAstREPLTool(locals={"df": df})
15
 
16
- import matplotlib.pyplot as nplt
17
- def gen_plot(name):
18
- nplt.figure(figsize=(10, 5))
19
- nplt.plot(df['DATE'], df['USSTHPI'], label='USSTHPI')
20
- nplt.title('Time Series of USSTHPI')
21
- nplt.xlabel('Date')
22
- nplt.ylabel('USSTHPI')
23
- nplt.legend()
24
- nplt.xticks(rotation=45)
25
- nplt.tight_layout()
26
- nplt.savefig('plots/plot.png')
27
- return "plots/plot.png"
28
-
29
  # cell 2
30
  from langchain.agents import AgentExecutor, create_openai_tools_agent
31
  from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
@@ -88,21 +73,74 @@ llm = ChatOpenAI(model="gpt-4o-mini-2024-07-18", temperature=0, api_key=OPENAI_A
88
  llm_big = ChatOpenAI(model="gpt-4o", temperature=0, api_key=OPENAI_API_KEY)
89
 
90
  eda_task = """Using the data in the dataframe `df` and the package statsmodels, first run an augmented dickey fuller test on the data.
91
- Using matplotlib plot the time series, display it and save it to 'plots/plot.png'.
92
- Next use the statsmodel package to generate an ACF plot with zero flag set to False, display it and save it to 'plots/acf.png'.
93
- Then use the statsmodel package to generate a PACF plot with zero flag set to False, display it and save it to 'plots/pacf.png'"""
94
  eda_agent = create_agent(llm, [python_repl_tool], task=eda_task,)
95
  eda_node = functools.partial(agent_node, agent=eda_agent, name="EDA")
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  from langgraph.graph import END, StateGraph, START
98
 
99
  # add a chain to the node to analyze the ACF plot?
100
  workflow = StateGraph(AgentState)
101
  workflow.add_node("EDA", eda_node)
 
 
 
102
 
103
  # conditional_edge to refit and the loop refit with resid?
104
  workflow.add_edge(START, "EDA")
105
- workflow.add_edge("EDA", END)
 
 
 
106
 
107
  graph = workflow.compile()
108
 
@@ -110,5 +148,5 @@ def greet(name):
110
  resp = graph.invoke({"messages": [HumanMessage(content="Run the analysis")]}, debug=True)
111
  return resp
112
 
113
- demo = gr.Interface(fn=gen_plot, inputs="text", outputs="text")
114
  demo.launch(share=True)
 
9
 
10
  # df = pd.read_csv("HOUST.csv")
11
  df = pd.read_csv("USSTHPI.csv")
 
 
12
  python_repl_tool = PythonAstREPLTool(locals={"df": df})
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # cell 2
15
  from langchain.agents import AgentExecutor, create_openai_tools_agent
16
  from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 
73
  llm_big = ChatOpenAI(model="gpt-4o", temperature=0, api_key=OPENAI_API_KEY)
74
 
75
  eda_task = """Using the data in the dataframe `df` and the package statsmodels, first run an augmented dickey fuller test on the data.
76
+ Using matplotlib plot the time series, display it and save it to 'plot.png'.
77
+ Next use the statsmodel package to generate an ACF plot with zero flag set to False, display it and save it to 'acf.png'.
78
+ Then use the statsmodel package to generate a PACF plot with zero flag set to False, display it and save it to 'pacf.png'"""
79
  eda_agent = create_agent(llm, [python_repl_tool], task=eda_task,)
80
  eda_node = functools.partial(agent_node, agent=eda_agent, name="EDA")
81
 
82
+ difference_task = """Using the data in the dataframe `df` determine whether a log transformation is appropriate.
83
+ If a log transformation is appropriate generate a new column for the log of the series and use this data for analysis.
84
+ Then determine whether a linear difference is needed and if needed generate a new column for the differenced data.
85
+ If the data was differenced use the differenced data for analysis."""
86
+ diff_agent = create_agent(llm, [python_repl_tool], task=difference_task, )
87
+ diff_node = functools.partial(agent_node, agent=diff_agent, name="difference")
88
+
89
+ plot_template = ChatPromptTemplate.from_messages(
90
+ messages=[
91
+ SystemMessage(content="""Determine whether this time series is stationary or needs to be differenced?
92
+ Consider the results of the ADF test along with the plot of the time series, the ACF plot and the PACF plot."""),
93
+ MessagesPlaceholder(variable_name="messages"),
94
+ HumanMessagePromptTemplate.from_template(
95
+ template=[{"type": "image_url", "image_url": {"path": "plot.png"}},
96
+ {"type": "image_url", "image_url": {"path": "acf.png"}},
97
+ {"type": "image_url", "image_url": {"path": "pacf.png"}}]),
98
+ ]
99
+ )
100
+
101
+ plot_chain = plot_template | llm_big
102
+ plot_node = functools.partial(chain_node, chain=plot_chain, name="PlotAnalysis")
103
+
104
+ def router(state):
105
+ router_template = ChatPromptTemplate.from_messages(
106
+ messages=[
107
+ MessagesPlaceholder(variable_name="messages"),
108
+ HumanMessage("""If the time series is stationary, return true if it is not stationary return false.
109
+ Just return true or false, nothing else.""")
110
+ ]
111
+ )
112
+
113
+ router_chain = router_template | llm
114
+ response = router_chain.invoke({"messages": state["messages"]})
115
+
116
+ if response.content=="true":
117
+ return "ARIMA"
118
+ else:
119
+ return "Difference"
120
+
121
+ arima_task = """Using the data in the dataframe `df` and the package statsmodels.
122
+ Estimate an ARIMA model with the appropriate AR and MA terms.
123
+ Then display the model results.
124
+ Finally generate an autocorrelation and partial autocorrelation plot of the model residuals with zero flag set to False, display it and save it as 'resid_acf.png'"""
125
+
126
+ arima_agent = create_agent(llm, [python_repl_tool], task=arima_task,)
127
+ arima_node = functools.partial(agent_node, agent=arima_agent, name="ARIMA")
128
+
129
  from langgraph.graph import END, StateGraph, START
130
 
131
  # add a chain to the node to analyze the ACF plot?
132
  workflow = StateGraph(AgentState)
133
  workflow.add_node("EDA", eda_node)
134
+ workflow.add_node("PlotAnalysis", plot_node)
135
+ workflow.add_node("Difference", diff_node)
136
+ workflow.add_node("ARIMA", arima_node)
137
 
138
  # conditional_edge to refit and the loop refit with resid?
139
  workflow.add_edge(START, "EDA")
140
+ workflow.add_edge("EDA", "PlotAnalysis")
141
+ workflow.add_conditional_edges("PlotAnalysis", router)
142
+ workflow.add_edge("Difference", "EDA")
143
+ workflow.add_edge("ARIMA", END)
144
 
145
  graph = workflow.compile()
146
 
 
148
  resp = graph.invoke({"messages": [HumanMessage(content="Run the analysis")]}, debug=True)
149
  return resp
150
 
151
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
152
  demo.launch(share=True)