import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import make_smoothing_spline
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrix
5 Régression polynomiale et régression spline
Régression polynomiale
= pd.read_csv("../donnees/ozone_simple.txt", header=0, sep=";") ozone
= plt.figure()
fig '.k')
plt.plot(ozone.T12, ozone.O3, 'O3')
plt.ylabel('T12')
plt.xlabel( fig.tight_layout()
def polyreg(donnee, d=3):
= donnee.T12.std()
sigmax = donnee.T12.min() -sigmax
mini = donnee.T12.max()+sigmax
maxi = np.linspace(mini, maxi, 100)
grillex = pd.DataFrame({"T12": grillex})
aprevoir = "O3~1"
formula for deg in range(d):
if deg>0:
= formula + "+ np.power(T12," + str(deg+1) + ")"
formula else:
= formula + "+ T12"
formula = smf.ols(formula, donnee).fit()
repol = repol.predict(aprevoir)
prev return {"grillex": grillex, "grilley": prev}
= ['-', '--', ':', '-.']
ligne = plt.subplots(1, 1)
fig, ax for iter, ii in enumerate([1,2,3,9]):
= polyreg(ozone, d=ii)
tmp "grillex"], tmp["grilley"], ls=ligne[iter])
ax.plot(tmp[
"T12")
ax.set_xlabel("O3")
ax.set_ylabel(0, 45)
ax.set_xlim(0, 150)
ax.set_ylim('d=1', 'd=2', 'd=3', 'd=9'])
ax.legend(["k+")
ax.plot(ozone.T12, ozone.O3, fig.tight_layout()
= ozone.copy()
oz ==7.9,"T12"] = 13
oz.loc[oz.T12= ['-', '--', ':', '-.']
ligne = plt.subplots(2, 1)
fig, (ax1, ax2) for iter, ii in enumerate([1,2,3,9]):
= polyreg(ozone, d=ii)
tmp "grillex"], tmp["grilley"], ls=ligne[iter])
ax1.plot(tmp[
"O3")
ax1.set_ylabel(5,35)
ax1.set_xlim(5, 150)
ax1.set_ylim("k+")
ax1.plot(ozone.T12, ozone.O3, for iter, ii in enumerate([1,2,3,9]):
= polyreg(oz, d=ii)
tmp "grillex"], tmp["grilley"], ls=ligne[iter])
ax2.plot(tmp[
"T12")
ax2.set_xlabel("O3")
ax2.set_ylabel(5,35)
ax2.set_xlim(5, 150)
ax2.set_ylim("k+")
ax2.plot(oz.T12, oz.O3, fig.tight_layout()
= ozone.copy()
oz = oz.loc[oz.T12!=oz.T12.min(),:]
oz = ['-', '--', ':', '-.']
ligne = plt.subplots(2, 1)
fig, (ax1, ax2) for iter, ii in enumerate([1,2,3,9]):
= polyreg(ozone, d=ii)
tmp "grillex"], tmp["grilley"], ls=ligne[iter])
ax1.plot(tmp[
"O3")
ax1.set_ylabel(5,35)
ax1.set_xlim(5, 150)
ax1.set_ylim("k+")
ax1.plot(ozone.T12, ozone.O3, for iter, ii in enumerate([1,2,3,9]):
= polyreg(oz, d=ii)
tmp "grillex"], tmp["grilley"], ls=ligne[iter])
ax2.plot(tmp[
"T12")
ax2.set_xlabel("O3")
ax2.set_ylabel(5,35)
ax2.set_xlim(5, 150)
ax2.set_ylim("k+")
ax2.plot(oz.T12, oz.O3, fig.tight_layout()
Régression spline
= ozone.T12 < 23
ind = smf.ols("O3~T12", data=ozone.loc[ind,:]).fit()
regd = smf.ols("O3~T12", data=ozone.loc[~ind,:]).fit()
regf = np.linspace(3,23,21)
gxd = np.linspace(23,30,8)
gxf = regd.params.iloc[0] +regd.params.iloc[1]*gxd
prd = regf.params.iloc[0] +regf.params.iloc[1]*gxf
prf = plt.subplots(1, 1)
fig, ax "k+", gxd, prd, "k-", gxf, prf, "k-")
ax.plot(ozone.T12, ozone.O3, "T12")
ax.set_xlabel("O3")
ax.set_ylabel(=23, color="k")
ax.axvline(x fig.tight_layout()
= [15, 23]
xi = dmatrix("~ 1 + bs(T12, degree=2, knots=xi, include_intercept=False, lower_bound=5, upper_bound=32)", ozone, return_type="dataframe")
BS = ["bs" + str(i+1) for i in range(5)]
BS.columns = pd.concat([ozone.O3, BS.iloc[:,1:]], axis=1)
df
= smf.ols("O3~1+bs2+bs3+bs4+bs5 ", df).fit()
regs print(regs.params.round(3))
Intercept 51.102
bs2 61.544
bs3 5.562
bs4 70.459
bs5 106.712
dtype: float64
= smf.ols("O3~ 1 + bs(T12, degree=2, knots=xi, include_intercept=False, lower_bound=5, upper_bound=32)", ozone).fit()
regs print(list(regs.params.round(3)))
[51.102, 61.544, 5.562, 70.459, 106.712]
= pd.DataFrame({"T12": np.linspace(5,32,100)})
df = regs.predict(df)
prev = plt.subplots(1, 1)
fig, ax "ko" , df.T12, prev, "-")
ax.plot(ozone.T12, ozone.O3, "T12")
ax.set_xlabel("O3")
ax.set_ylabel(=15, color="k", ls="-")
ax.axvline(x=xi[1], color="k", ls="-")
ax.axvline(x fig.tight_layout()
= ozone.groupby("T12").mean()
ozdedup "w"] = ozone.groupby("T12").count()
ozdedup[= make_smoothing_spline(ozdedup.index, ozdedup.O3, ozdedup.w, lam=10000)
spl = np.linspace(ozdedup.index[0], ozdedup.index[-1], 150)
xi = spl(xi)
yi = plt.subplots(1, 1)
fig, ax "ko" , xi, yi, "-")
ax.plot(ozone.T12, ozone.O3, "T12")
ax.set_xlabel("O3")
ax.set_ylabel( fig.tight_layout()
= make_smoothing_spline(ozdedup.index, ozdedup.O3, ozdedup.w, lam=1e-2)
spl = np.linspace(ozdedup.index[0], ozdedup.index[-1], 150)
xi = spl(xi)
yi = plt.subplots(1, 1)
fig, ax "ko" , xi, yi, "-")
ax.plot(ozone.T12, ozone.O3, "T12")
ax.set_xlabel("O3")
ax.set_ylabel( fig.tight_layout()
= ozone.groupby("T12").mean()
ozdedup "w"] = ozone.groupby("T12").count()
ozdedup[= make_smoothing_spline(ozdedup.index, ozdedup.O3, ozdedup.w)
spl = np.linspace(ozdedup.index[0], ozdedup.index[-1], 150)
xi = spl(xi)
yi = plt.subplots(1, 1)
fig, ax "ko" , xi, yi, "-")
ax.plot(ozone.T12, ozone.O3, "T12")
ax.set_xlabel("O3")
ax.set_ylabel( fig.tight_layout()