algorithm - classification - emploration¶
Explores: - Logistic Regresion - Random Forest - XGBoost
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')
1. Data¶
Simulation conditinal dataset: if x > 0.5, y = 1, else y = 0
x = np.linspace(0,1,101)
y = x > 0.5
plt.plot(x, y, c='k')
plt.show()
np.random.seed (555)
noise = np.random.uniform(-0.2, 0.2, 101)
y = ((x+noise)>0.5).astype(int)
plt.scatter(x, y)
plt.show()
2. Logistic Regression¶
from sklearn.linear_model import LogisticRegression, LinearRegression
X = x.reshape(101,1)
model = LinearRegression()
model.fit(X,y)
plt.scatter(X, y, c='k')
plt.plot(X, model.predict(X), 'r--')
plt.show()
def fit_and_plot_classification(model, C):
#fit
model.fit(X,y)
#predict
pred = model.predict_proba(X)[:,1]
#plot
plt.scatter(X, y, c='k')
plt.plot(X, pred, 'r--')
plt.title('C = {}'.format(C))
plt.show()
return model, pred
for C in [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]:
_, _ = fit_and_plot_classification(LogisticRegression(C = C), C)
3. Random Forest¶
from sklearn.ensemble import RandomForestClassifier
RandomForestClassifier()
model = RandomForestClassifier(random_state = 123)
model.fit(X,y)
plt.scatter(X,y,c='k')
plt.plot(X, model.predict_proba(X)[:,1], 'r--')
plt.show()
# changing up n_estimators:
for n in [50, 100, 200, 400, 800]:
print('no. of trees:', n)
model = RandomForestClassifier(n_estimators = n)
model.fit(X, y)
plt.scatter(X,y, c='k')
plt.plot(X, model.predict_proba(X)[:,1], 'r--')
plt.show()
for n in [2, 5, 15, 20]:
print ('minimum leaf size:', n)
model = RandomForestClassifier(random_state = 123,
min_samples_leaf = n)
model.fit(X, y)
plt.scatter(X, y, c = 'k')
plt.plot(X, model.predict_proba(X)[:, 1], 'r--')
plt.show()
4. XGBoost¶
from xgboost import XGBClassifier
XGBClassifier()
model = XGBClassifier()
model.fit(X, y)
plt.scatter(X, y, c = 'k')
plt.plot(X, model.predict_proba(X)[:, 1], 'r--')
plt.show()
model = XGBClassifier(n_estimators = 10)
model.fit(X, y)
for n in [10, 20, 50, 200]:
print ('n_estimators:', n)
model = XGBClassifier(n_estimators = n)
model.fit(X, y)
pred = model.predict_proba(X)[:, 1]
plt.scatter(X, y, c='k')
plt.plot(X, pred, 'r--')
plt.show()
for n in [1, 3, 5, 7, 9]:
print('max_depth:',n)
model = XGBClassifier(max_depth=n)
model.fit(X, y)
pred = model.predict_proba(X)[:, 1]
plt.scatter(X, y, c='k')
plt.plot(X, pred, 'r--')
plt.show()