import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.nonparametric.smoothers_lowess import lowess
3 Validation du modèle
= pd.read_csv("../donnees/ozone_long.txt", header = 0, sep = ";")
ozone = smf.ols("O3 ~ T6+T12+Ne12+Ne15+Vx+O3v", data=ozone).fit() mod_lin6v
= mod_lin6v.get_influence()
infl = pd.Series(range(1, ozone.shape[0]+1))
index = sm.nonparametric.lowess(infl.resid_studentized_external,index) resloess
"+k")
plt.plot(index, infl.resid_studentized_external,0], resloess[:,1],"r")
plt.plot(resloess[:,= plt.gca()
ax =-2)
ax.axhline(y=2) ax.axhline(y
#df = ozone.shape[0] - 3
\
sm.qqplot(infl.resid_studentized_external, =(ozone.shape[0]-infl.k_vars-1,), line='s') stats.t,distargs
=infl.cooks_distance
cook, pval =ozone.shape
n, p = plt.figure()
fig =2, color='k')
plt.bar(index, cook, lw fig.tight_layout()
= infl.hat_matrix_diag
hii = 3*p/n
seuil1 = 2*p/n
seuil2 =2, color='k')
plt.bar(index, hii, lw= plt.gca()
ax =seuil1, color='r', ls=':')
ax.axhline(y=seuil2, color='r', ls='--')
ax.axhline(y fig.tight_layout()
= sm.graphics.plot_ccpr_grid(mod_lin6v)
fig "", fontsize=16)
fig.suptitle(= 2
marker_size for ax in fig.axes:
'')
ax.set_ylabel(0].set_color('black')
ax.lines[for line in ax.get_lines():
line.set_markersize(marker_size)
=0.99, bottom=-0.5, left=0.01, right=0.99, hspace=0.1, wspace=0.4)
plt.subplots_adjust(top fig.tight_layout()
def plot_ccpr_grid_with_loess(mod, exog_idx=None, grid=None, fig=None):
= sm.graphics.plot_ccpr_grid(mod, exog_idx, grid, fig)
fig "", fontsize=16)
fig.suptitle(= 2
marker_size for ax in fig.axes:
= ax.lines[0].get_xdata()
x = ax.lines[0].get_ydata()
y '')
ax.set_ylabel(0].set_color('black')
ax.lines[for line in ax.get_lines():
line.set_markersize(marker_size)# Rajout de loess en rouge
= lowess(y, x, frac=2/3)
smooth 0], smooth[:, 1], color='red', lw=2)
ax.plot(smooth[:, return fig
= plot_ccpr_grid_with_loess(mod_lin6v)
fig fig.tight_layout()