-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblending.py
More file actions
105 lines (86 loc) · 4.7 KB
/
blending.py
File metadata and controls
105 lines (86 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.externals import joblib
from helpers import calculate_rmse_score
def lin_reg_blending(all_preds_train_trains, y_train_trains, all_preds_train_tests, y_train_tests, all_preds_tests, verbose=True):
"""
Takes predictions on train, test and dataset_testing as 2d numpy array as well as scores on train and test set.
Trains linear regression using predictions on train set, testing on test set.
returns scikit learn model and predictions on dataset_testing
"""
# initialize linear regressor
lin_regr = LinearRegression()
# fit using predictions train dataset
lin_regr.fit(all_preds_train_trains, y_train_trains)
# predict on train, test and dataset_testing datasets
lin_regr_y_preds_trains_trains = lin_regr.predict(all_preds_train_trains)
lin_regr_y_preds_trains_tests = lin_regr.predict(all_preds_train_tests)
lin_regr_y_preds_tests = lin_regr.predict(all_preds_tests)
# print if given True
if verbose:
# calculate tmse on train and test set
rmse_train_train = calculate_rmse_score(lin_regr_y_preds_trains_trains, y_train_trains)
rmse_train_test= calculate_rmse_score(lin_regr_y_preds_trains_tests, y_train_tests)
print(rmse_train_train)
print(rmse_train_test)
# return model and predictions on dataset_testing
return lin_regr, lin_regr_y_preds_tests
def ridge_reg_blending(all_preds_train_trains, y_train_trains, all_preds_train_tests, y_train_tests, all_preds_tests, alpha=2700000, verbose=True):
"""
Takes predictions on train, test and dataset_testing as 2d numpy array as well as scores on train and test set.
Trains ridge regression using predictions on train set, testing on test set.
Alpha controls regularization, check scikit learn docs for more detail
returns scikit learn model and predictions on dataset_testing
"""
# initialize ridge regressor with given regularization
ridge_regr = Ridge(alpha)
# fit using predictions train dataset
ridge_regr.fit(all_preds_train_trains, y_train_trains)
# predict on train, test and dataset_testing datasets
ridge_regr_y_preds_trains_trains = ridge_regr.predict(all_preds_train_trains)
ridge_regr_y_preds_trains_tests = ridge_regr.predict(all_preds_train_tests)
ridge_regr_y_preds_tests = ridge_regr.predict(all_preds_tests)
# print if given True
if verbose:
# calculate tmse on train and test set
rmse_train_train = calculate_rmse_score(ridge_regr_y_preds_trains_trains, y_train_trains)
rmse_train_test = calculate_rmse_score(ridge_regr_y_preds_trains_tests, y_train_tests)
print(rmse_train_train)
print(rmse_train_test)
# return model and predictions on dataset_testing
return ridge_regr, ridge_regr_y_preds_tests
def nn_blending(all_preds_train_trains, y_train_trains, all_preds_train_tests, y_train_tests, all_preds_tests, max_iter=3, alpha=100, verbose=True):
"""
Takes predictions on train, test and dataset_testing as 2d numpy array as well as scores on train and test set.
Trains neural net regressor using predictions on train set, testing on test set.
max_iter controls number of iterations, Alpha controls regularization, check scikit learn docs for more detail
returns scikit learn model and predictions on dataset_testing
"""
# initialize neural net regressor with given max_iter and regularization
mlp = MLPRegressor(max_iter=max_iter, alpha=alpha, verbose=verbose)
# fit using predictions train dataset
mlp.fit(all_preds_train_trains, y_train_trains)
# predict on train, test and dataset_testing datasets
mlp_y_preds_trains_trains = mlp.predict(all_preds_train_trains)
mlp_y_preds_trains_tests = mlp.predict(all_preds_train_tests)
mlp_y_preds_tests = mlp.predict(all_preds_tests)
# print if given True
if verbose:
# calculate tmse on train and test set
rmse_train_train = calculate_rmse_score(mlp_y_preds_trains_trains, y_train_trains)
rmse_train_test = calculate_rmse_score(mlp_y_preds_trains_tests, y_train_tests)
print(rmse_train_train)
print(rmse_train_test)
# return model and predictions on dataset_testing
return mlp, mlp_y_preds_tests
def save_model(clf, filename="ridge_regr"):
"""
Takes sklearn model, saves it to disk
"""
joblib.dump(clf, 'sklearn_models/{}.pkl'.format(filename))
def load_model(filename="ridge_regr"):
"""
Loads sklearn model from disk
"""
clf = joblib.load('sklearn_models/{}.pkl'.format(filename))
return clf