In [1]:
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [3]:
x = np.linspace(-5, 5, num=100)[:,None]
y = -0.5 + 2.2*x + 0.3*x**3 + 2*np.random.randn(100,1)

plt.plot(x,y)
plt.show()
In [4]:
x_new = np.hstack([x,x**2,x**3,x**4])
x_new.shape
Out[4]:
(100, 4)
In [5]:
x.shape
Out[5]:
(100, 1)
In [6]:
model = LinearRegression()
model.fit(x_new,y)
Out[6]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
In [7]:
model.coef_
Out[7]:
array([[ 2.03769502,  0.15537811,  0.30590637, -0.00703118]])
In [8]:
model.intercept_
Out[8]:
array([-1.23554406])
In [9]:
y_pred = model.predict(x_new)

plt.scatter(x,y)
plt.plot(x_new[:,0],y_pred,'r')
plt.legend(['Predicted line', 'Observed data'])
plt.show()
In [11]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=4,include_bias=False)
x_new2 = poly.fit_transform(x)
x_new2[:2]
Out[11]:
array([[  -5.        ,   25.        , -125.        ,  625.        ],
       [  -4.8989899 ,   24.00010203, -117.57625742,  576.00489747]])
In [12]:
x_new2.shape
Out[12]:
(100, 4)