Spaces:

pwilczewski
/

gradiobox

Sleeping

App Files Files Community

pwilczewski commited on Sep 24, 2024

Commit

172e154

1 Parent(s): e35535e

full analysis

Browse files

Files changed (1) hide show

app.py +58 -20

app.py CHANGED Viewed

@@ -9,23 +9,8 @@ import os
 # df = pd.read_csv("HOUST.csv")
 df = pd.read_csv("USSTHPI.csv")
-df.to_csv("testing.csv")
-df2 = pd.read_csv("testing.csv")
 python_repl_tool = PythonAstREPLTool(locals={"df": df})
-import matplotlib.pyplot as nplt
-def gen_plot(name):
-    nplt.figure(figsize=(10, 5))
-    nplt.plot(df['DATE'], df['USSTHPI'], label='USSTHPI')
-    nplt.title('Time Series of USSTHPI')
-    nplt.xlabel('Date')
-    nplt.ylabel('USSTHPI')
-    nplt.legend()
-    nplt.xticks(rotation=45)
-    nplt.tight_layout()
-    nplt.savefig('plots/plot.png')
-    return "plots/plot.png"
 # cell 2
 from langchain.agents import AgentExecutor, create_openai_tools_agent
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
@@ -88,21 +73,74 @@ llm = ChatOpenAI(model="gpt-4o-mini-2024-07-18", temperature=0, api_key=OPENAI_A
 llm_big = ChatOpenAI(model="gpt-4o", temperature=0, api_key=OPENAI_API_KEY)
 eda_task = """Using the data in the dataframe `df` and the package statsmodels, first run an augmented dickey fuller test on the data.
-            Using matplotlib plot the time series, display it and save it to 'plots/plot.png'.
-            Next use the statsmodel package to generate an ACF plot with zero flag set to False, display it and save it to 'plots/acf.png'.
-            Then use the statsmodel package to generate a PACF plot with zero flag set to False, display it and save it to 'plots/pacf.png'"""
 eda_agent = create_agent(llm, [python_repl_tool], task=eda_task,)
 eda_node = functools.partial(agent_node, agent=eda_agent, name="EDA")
 from langgraph.graph import END, StateGraph, START
 # add a chain to the node to analyze the ACF plot?
 workflow = StateGraph(AgentState)
 workflow.add_node("EDA", eda_node)
 # conditional_edge to refit and the loop refit with resid?
 workflow.add_edge(START, "EDA")
-workflow.add_edge("EDA", END)
 graph = workflow.compile()
@@ -110,5 +148,5 @@ def greet(name):
     resp = graph.invoke({"messages": [HumanMessage(content="Run the analysis")]}, debug=True)
     return resp
-demo = gr.Interface(fn=gen_plot, inputs="text", outputs="text")
 demo.launch(share=True)

 # df = pd.read_csv("HOUST.csv")
 df = pd.read_csv("USSTHPI.csv")
 python_repl_tool = PythonAstREPLTool(locals={"df": df})
 # cell 2
 from langchain.agents import AgentExecutor, create_openai_tools_agent
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 llm_big = ChatOpenAI(model="gpt-4o", temperature=0, api_key=OPENAI_API_KEY)
 eda_task = """Using the data in the dataframe `df` and the package statsmodels, first run an augmented dickey fuller test on the data.
+            Using matplotlib plot the time series, display it and save it to 'plot.png'.
+            Next use the statsmodel package to generate an ACF plot with zero flag set to False, display it and save it to 'acf.png'.
+            Then use the statsmodel package to generate a PACF plot with zero flag set to False, display it and save it to 'pacf.png'"""
 eda_agent = create_agent(llm, [python_repl_tool], task=eda_task,)
 eda_node = functools.partial(agent_node, agent=eda_agent, name="EDA")
+difference_task = """Using the data in the dataframe `df` determine whether a log transformation is appropriate.
+                    If a log transformation is appropriate generate a new column for the log of the series and use this data for analysis.
+                    Then determine whether a linear difference is needed and if needed generate a new column for the differenced data.
+                    If the data was differenced use the differenced data for analysis."""
+diff_agent = create_agent(llm, [python_repl_tool], task=difference_task, )
+diff_node = functools.partial(agent_node, agent=diff_agent, name="difference")
+plot_template = ChatPromptTemplate.from_messages(
+    messages=[
+        SystemMessage(content="""Determine whether this time series is stationary or needs to be differenced?
+                      Consider the results of the ADF test along with the plot of the time series, the ACF plot and the PACF plot."""),
+        MessagesPlaceholder(variable_name="messages"),
+        HumanMessagePromptTemplate.from_template(
+            template=[{"type": "image_url", "image_url": {"path": "plot.png"}},
+                        {"type": "image_url", "image_url": {"path": "acf.png"}},
+                        {"type": "image_url", "image_url": {"path": "pacf.png"}}]),
+    ]
+)
+plot_chain = plot_template | llm_big
+plot_node = functools.partial(chain_node, chain=plot_chain, name="PlotAnalysis")
+def router(state):
+    router_template = ChatPromptTemplate.from_messages(
+        messages=[
+            MessagesPlaceholder(variable_name="messages"),
+            HumanMessage("""If the time series is stationary, return true if it is not stationary return false.
+                         Just return true or false, nothing else.""")
+        ]
+    )
+    router_chain = router_template | llm
+    response = router_chain.invoke({"messages": state["messages"]})
+    if response.content=="true":
+        return "ARIMA"
+    else:
+        return "Difference"
+arima_task = """Using the data in the dataframe `df` and the package statsmodels.
+    Estimate an ARIMA model with the appropriate AR and MA terms.
+    Then display the model results.
+    Finally generate an autocorrelation and partial autocorrelation plot of the model residuals with zero flag set to False, display it and save it as 'resid_acf.png'"""
+arima_agent = create_agent(llm, [python_repl_tool], task=arima_task,)
+arima_node = functools.partial(agent_node, agent=arima_agent, name="ARIMA")
 from langgraph.graph import END, StateGraph, START
 # add a chain to the node to analyze the ACF plot?
 workflow = StateGraph(AgentState)
 workflow.add_node("EDA", eda_node)
+workflow.add_node("PlotAnalysis", plot_node)
+workflow.add_node("Difference", diff_node)
+workflow.add_node("ARIMA", arima_node)
 # conditional_edge to refit and the loop refit with resid?
 workflow.add_edge(START, "EDA")
+workflow.add_edge("EDA", "PlotAnalysis")
+workflow.add_conditional_edges("PlotAnalysis", router)
+workflow.add_edge("Difference", "EDA")
+workflow.add_edge("ARIMA", END)
 graph = workflow.compile()
     resp = graph.invoke({"messages": [HumanMessage(content="Run the analysis")]}, debug=True)
     return resp
+demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch(share=True)