5 Régression polynomiale et régression spline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import make_smoothing_spline
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrix 

Régression polynomiale

ozone = pd.read_csv("../donnees/ozone_simple.txt", header=0, sep=";")
fig = plt.figure()
plt.plot(ozone.T12, ozone.O3, '.k')
plt.ylabel('O3')
plt.xlabel('T12')
fig.tight_layout()

def polyreg(donnee, d=3):
    sigmax = donnee.T12.std()
    mini = donnee.T12.min() -sigmax
    maxi = donnee.T12.max()+sigmax
    grillex = np.linspace(mini, maxi, 100)
    aprevoir = pd.DataFrame({"T12": grillex})
    formula = "O3~1"
    for deg in range(d):
        if deg>0:
            formula = formula + "+ np.power(T12," + str(deg+1) + ")"
        else:
            formula = formula + "+ T12"
    repol = smf.ols(formula, donnee).fit()
    prev = repol.predict(aprevoir)
    return {"grillex": grillex, "grilley": prev}
ligne = ['-', '--', ':', '-.']
fig, ax = plt.subplots(1, 1)
for iter, ii in enumerate([1,2,3,9]):
    tmp = polyreg(ozone, d=ii)
    ax.plot(tmp["grillex"], tmp["grilley"], ls=ligne[iter])

ax.set_xlabel("T12")
ax.set_ylabel("O3")
ax.set_xlim(0, 45)
ax.set_ylim(0, 150)
ax.legend(['d=1', 'd=2', 'd=3', 'd=9'])
ax.plot(ozone.T12, ozone.O3, "k+")
fig.tight_layout()

oz = ozone.copy()
oz.loc[oz.T12==7.9,"T12"] = 13
ligne = ['-', '--', ':', '-.']
fig, (ax1, ax2) = plt.subplots(2, 1)
for iter, ii in enumerate([1,2,3,9]):
    tmp = polyreg(ozone, d=ii)
    ax1.plot(tmp["grillex"], tmp["grilley"], ls=ligne[iter])

ax1.set_ylabel("O3")
ax1.set_xlim(5,35)
ax1.set_ylim(5, 150)
ax1.plot(ozone.T12, ozone.O3, "k+")
for iter, ii in enumerate([1,2,3,9]):
    tmp = polyreg(oz, d=ii)
    ax2.plot(tmp["grillex"], tmp["grilley"], ls=ligne[iter])

ax2.set_xlabel("T12")
ax2.set_ylabel("O3")
ax2.set_xlim(5,35)
ax2.set_ylim(5, 150)
ax2.plot(oz.T12, oz.O3, "k+")
fig.tight_layout()

oz = ozone.copy()
oz = oz.loc[oz.T12!=oz.T12.min(),:]
ligne = ['-', '--', ':', '-.']
fig, (ax1, ax2) = plt.subplots(2, 1)
for iter, ii in enumerate([1,2,3,9]):
    tmp = polyreg(ozone, d=ii)
    ax1.plot(tmp["grillex"], tmp["grilley"], ls=ligne[iter])

ax1.set_ylabel("O3")
ax1.set_xlim(5,35)
ax1.set_ylim(5, 150)
ax1.plot(ozone.T12, ozone.O3, "k+")
for iter, ii in enumerate([1,2,3,9]):
    tmp = polyreg(oz, d=ii)
    ax2.plot(tmp["grillex"], tmp["grilley"], ls=ligne[iter])

ax2.set_xlabel("T12")
ax2.set_ylabel("O3")
ax2.set_xlim(5,35)
ax2.set_ylim(5, 150)
ax2.plot(oz.T12, oz.O3, "k+")
fig.tight_layout()

Régression spline

ind = ozone.T12 < 23
regd = smf.ols("O3~T12", data=ozone.loc[ind,:]).fit()
regf = smf.ols("O3~T12", data=ozone.loc[~ind,:]).fit()
gxd = np.linspace(3,23,21)
gxf = np.linspace(23,30,8)
prd = regd.params.iloc[0] +regd.params.iloc[1]*gxd
prf = regf.params.iloc[0] +regf.params.iloc[1]*gxf
fig, ax = plt.subplots(1, 1)
ax.plot(ozone.T12, ozone.O3, "k+", gxd, prd, "k-", gxf, prf, "k-")
ax.set_xlabel("T12")
ax.set_ylabel("O3")
ax.axvline(x=23, color="k")
fig.tight_layout()

xi = [15, 23]
BS = dmatrix("~ 1 + bs(T12, degree=2, knots=xi, include_intercept=False, lower_bound=5, upper_bound=32)", ozone, return_type="dataframe")
BS.columns = ["bs" + str(i+1) for i in range(5)]
df = pd.concat([ozone.O3, BS.iloc[:,1:]], axis=1)

regs = smf.ols("O3~1+bs2+bs3+bs4+bs5 ", df).fit()
print(regs.params.round(3))
Intercept     51.102
bs2           61.544
bs3            5.562
bs4           70.459
bs5          106.712
dtype: float64
regs = smf.ols("O3~ 1 + bs(T12, degree=2, knots=xi, include_intercept=False, lower_bound=5, upper_bound=32)", ozone).fit()
print(list(regs.params.round(3)))
[51.102, 61.544, 5.562, 70.459, 106.712]
df = pd.DataFrame({"T12": np.linspace(5,32,100)})
prev = regs.predict(df)
fig, ax = plt.subplots(1, 1)
ax.plot(ozone.T12, ozone.O3, "ko" , df.T12, prev, "-")
ax.set_xlabel("T12")
ax.set_ylabel("O3")
ax.axvline(x=15, color="k", ls="-")
ax.axvline(x=xi[1], color="k", ls="-")
fig.tight_layout()

ozdedup = ozone.groupby("T12").mean()
ozdedup["w"] = ozone.groupby("T12").count()
spl = make_smoothing_spline(ozdedup.index, ozdedup.O3, ozdedup.w, lam=10000)
xi = np.linspace(ozdedup.index[0], ozdedup.index[-1], 150)
yi = spl(xi)
fig, ax = plt.subplots(1, 1)
ax.plot(ozone.T12, ozone.O3, "ko" , xi, yi, "-")
ax.set_xlabel("T12")
ax.set_ylabel("O3")
fig.tight_layout()

spl = make_smoothing_spline(ozdedup.index, ozdedup.O3, ozdedup.w, lam=1e-2)
xi = np.linspace(ozdedup.index[0], ozdedup.index[-1], 150)
yi = spl(xi)
fig, ax = plt.subplots(1, 1)
ax.plot(ozone.T12, ozone.O3, "ko" , xi, yi, "-")
ax.set_xlabel("T12")
ax.set_ylabel("O3")
fig.tight_layout()

ozdedup = ozone.groupby("T12").mean()
ozdedup["w"] = ozone.groupby("T12").count()
spl = make_smoothing_spline(ozdedup.index, ozdedup.O3, ozdedup.w)
xi = np.linspace(ozdedup.index[0], ozdedup.index[-1], 150)
yi = spl(xi)
fig, ax = plt.subplots(1, 1)
ax.plot(ozone.T12, ozone.O3, "ko" , xi, yi, "-")
ax.set_xlabel("T12")
ax.set_ylabel("O3")
fig.tight_layout()

Retour au sommet