243 lines
6.1 KiB
Plaintext
243 lines
6.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "218af1ec",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")\n",
|
|
"import pandas as pd\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "33e62663",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签SimHei\n",
|
|
"plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"outputs": [],
|
|
"source": [
|
|
"df=pd.read_excel('./input/gdphb.xlsx')\n",
|
|
"\n",
|
|
"display(df)\n",
|
|
"# df=df[df[\"地区\"].str.strip()==\"安顺市\"]\n",
|
|
"def minmax_norm(df_input):\n",
|
|
" return (df_input - df_input.min()) / (df_input.max() - df_input.min()) * 40 + 60\n",
|
|
"global_df_arr=[]; #用来装需要预测的df数组\n",
|
|
"global_corr_obj= {};#地区各指标相关数组\n",
|
|
"for s in df[\"地区\"].unique():\n",
|
|
" #print(s)\n",
|
|
" new_df=df[df[\"地区\"].str.strip()==s ]\n",
|
|
" # print(new_df.columns)\n",
|
|
" norm_df_input=new_df.drop(columns=['地区', 'ds']);\n",
|
|
" # print(norm_df_input.dtypes)\n",
|
|
" norm_df= minmax_norm(norm_df_input);\n",
|
|
"\n",
|
|
" corr= norm_df.corr()\n",
|
|
" # print(s+\"地区指标相关性\")\n",
|
|
" # display(corr)\n",
|
|
" result = corr.to_json(orient=\"index\")\n",
|
|
" # print(result)\n",
|
|
" parsed_corr = json.loads(result.replace(\":null\",\":\\\"NaN\\\"\"))\n",
|
|
" global_corr_obj[s]=parsed_corr;\n",
|
|
"\n",
|
|
" # continue\n",
|
|
" for s_index in new_df.columns:\n",
|
|
" if s_index not in ['地区', 'ds', 'y'] :\n",
|
|
" #if sindex not in ['地区', 'ds', 'y'] and \"增速\" not in sindex:\n",
|
|
" sdf=new_df.loc[:,['地区', 'ds']]\n",
|
|
" sdf['y']=df[s_index]\n",
|
|
" sdf['title-text']=s_index\n",
|
|
" sdf= sdf.reset_index(drop=True)\n",
|
|
" sdf=sdf.dropna()\n",
|
|
" #print(sdf)\n",
|
|
" global_df_arr.append(sdf)\n",
|
|
"# print(df_arr)\n",
|
|
"#global_df_arr\n",
|
|
"\n",
|
|
"# display(global_corr_obj)"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"outputs": [],
|
|
"source": [
|
|
"# global_df_arr"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"outputs": [],
|
|
"source": [
|
|
"# print(\"各指标的相关性\")\n",
|
|
"# df.corr()"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"outputs": [],
|
|
"source": [
|
|
"# m = Prophet(changepoints=['2020-12-01']) #2020是个例外\n",
|
|
"from prophet import Prophet\n",
|
|
"from prophet.plot import plot_plotly, plot_components_plotly\n",
|
|
"def show_prophet(dfs):\n",
|
|
" json_arr=[];\n",
|
|
" for mdf in dfs:\n",
|
|
" # m = Prophet(changepoints=['2020-12-01'])\n",
|
|
" m = Prophet(changepoints=['2020-12-01'])\n",
|
|
" m.fit(mdf)\n",
|
|
" future = m.make_future_dataframe(periods=2,freq=\"Y\")\n",
|
|
" # print(future,m)\n",
|
|
" forecast = m.predict(future)\n",
|
|
" # forecast['y']=mdf.loc[:,['y']]\n",
|
|
" forecast= pd.concat([forecast,mdf.loc[:,'y']],axis=1)\n",
|
|
" # print(forecast.to_json(orient ='index'))\n",
|
|
" title= mdf[\"title-text\"][mdf.index[0]]\n",
|
|
" 地区=mdf[\"地区\"][mdf.index[0]]\n",
|
|
" print(地区+title+\"回归分析\")\n",
|
|
" fig=plot_plotly(m, forecast,xlabel=\"时间\",ylabel=title,)\n",
|
|
" fig.show()\n",
|
|
" result = forecast.to_json(orient=\"index\")\n",
|
|
" # print(result)\n",
|
|
" parsed = json.loads(result.replace(\":null\",\":\\\"NaN\\\"\"))\n",
|
|
" #data_obj=json.load(forecast.to_json(orient ='index'))\n",
|
|
"\n",
|
|
" json_obj={\"axis\":地区,\"info\":title,\"data\": parsed }\n",
|
|
" json_arr.append(json_obj)\n",
|
|
" # print(json_obj )\n",
|
|
" # print(title+\"趋势分析图\")\n",
|
|
" # fig.show()\n",
|
|
"\n",
|
|
" return json_arr\n",
|
|
"res=show_prophet(global_df_arr[:])\n",
|
|
"# print(res)\n"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"outputs": [],
|
|
"source": [
|
|
"print(res)"
|
|
],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"outputs": [],
|
|
"source": [],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"outputs": [],
|
|
"source": [],
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ca17354e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "fdf21f14-a2e5-4ee6-94fd-10dda9900f42",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "c09ee517baf2ef8cdccf4daa50b9eecbe95ad60e1eec620e80956ae2b91f8870"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
} |