""" TABLE 3.2. Linear model fit to the prostate cancer data. The Z score is the
coefficient divided by its standard error (3.12). Roughly a Z score larger than two
in absolute value is significantly nonzero at the p = 0.05 level.
"""
class LinearRegression:
def fit(self, X, y):
X = np.c_[np.ones((X.shape[0], 1)), X]
XX_inv = np.linalg.inv(X.T @ X)
self.beta = XX_inv @ X.T @ y
var = np.sum((X @ self.beta - y)**2) / (X.shape[0] - X.shape[1])
self.stderr = np.sqrt(np.diag(XX_inv * var))
self.z_score = self.beta / self.stderr
return self
def predict(self, X):
X = np.c_[np.ones((X.shape[0], 1)), X]
return X @ self.beta
df = df.apply(stats.zscore)
train_x = df[mask_train == 'T']
train_y = df_y[mask_train == 'T']
model = LinearRegression().fit(train_x.values, train_y.values)
pd.DataFrame(data = {'Coefficient': model.beta,
'Std. Error': model.stderr,
'Z Score' : model.z_score},
index = ["Intercept", *df.columns.tolist()])