import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import statsmodels.api as sm
import statsmodels.formula.api as smf
2 La régression linéaire multiple
La concentration en ozone
= pd.read_csv("../donnees/ozone.txt", header=0, sep=";") ozone
= plt.figure()
fig = fig.add_subplot(111, projection='3d')
ax
ax.scatter(ozone.T12, ozone.Vx,ozone.O3)'T12') ; ax.set_ylabel('Vx') ; ax.set_zlabel('O3')
ax.set_xlabel( fig.tight_layout()
= smf.ols('O3 ~ T12+Vx', data=ozone).fit()
reg reg.summary()
Dep. Variable: | O3 | R-squared: | 0.525 |
Model: | OLS | Adj. R-squared: | 0.505 |
Method: | Least Squares | F-statistic: | 25.96 |
Date: | Fri, 31 Jan 2025 | Prob (F-statistic): | 2.54e-08 |
Time: | 17:30:04 | Log-Likelihood: | -210.53 |
No. Observations: | 50 | AIC: | 427.1 |
Df Residuals: | 47 | BIC: | 432.8 |
Df Model: | 2 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
Intercept | 35.4530 | 10.745 | 3.300 | 0.002 | 13.838 | 57.068 |
T12 | 2.5380 | 0.515 | 4.927 | 0.000 | 1.502 | 3.574 |
Vx | 0.8736 | 0.177 | 4.931 | 0.000 | 0.517 | 1.230 |
Omnibus: | 0.280 | Durbin-Watson: | 1.678 |
Prob(Omnibus): | 0.869 | Jarque-Bera (JB): | 0.331 |
Skew: | 0.165 | Prob(JB): | 0.848 |
Kurtosis: | 2.777 | Cond. No. | 94.4 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
La hauteur des eucalyptus
= pd.read_csv("../donnees/eucalyptus.txt",header=0,sep=";")
eucalyptus = plt.figure()
fig '+k')
plt.plot(eucalyptus.circ, eucalyptus.ht, 'ht') ; plt.xlabel('circ')
plt.ylabel( fig.tight_layout()
= smf.ols('ht ~ circ+np.sqrt(circ)', data=eucalyptus).fit()
reg reg.summary()
Dep. Variable: | ht | R-squared: | 0.792 |
Model: | OLS | Adj. R-squared: | 0.792 |
Method: | Least Squares | F-statistic: | 2718. |
Date: | Fri, 31 Jan 2025 | Prob (F-statistic): | 0.00 |
Time: | 17:30:04 | Log-Likelihood: | -2208.5 |
No. Observations: | 1429 | AIC: | 4423. |
Df Residuals: | 1426 | BIC: | 4439. |
Df Model: | 2 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
Intercept | -24.3520 | 2.614 | -9.314 | 0.000 | -29.481 | -19.223 |
circ | -0.4829 | 0.058 | -8.336 | 0.000 | -0.597 | -0.369 |
np.sqrt(circ) | 9.9869 | 0.780 | 12.798 | 0.000 | 8.456 | 11.518 |
Omnibus: | 3.015 | Durbin-Watson: | 0.947 |
Prob(Omnibus): | 0.221 | Jarque-Bera (JB): | 2.897 |
Skew: | -0.097 | Prob(JB): | 0.235 |
Kurtosis: | 3.103 | Cond. No. | 4.41e+03 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.41e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
= pd.DataFrame({'circ' : np.linspace(eucalyptus.circ.min(), \
grille max(),100)})
eucalyptus.circ.= reg.get_prediction(grille)
calculprev = calculprev.predicted_mean
prev
= plt.figure()
fig '+k')
plt.plot(eucalyptus.circ, eucalyptus.ht, 'ht') ; plt.xlabel('circ')
plt.ylabel('-', lw=1)
plt.plot(grille.circ, prev, fig.tight_layout()