import optuna
import xgboost as xgb
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import numpy as np
from sklearn.model_selection import cross_val_score
from rfl_loss import RFLBinary, XGBRFLMulti
# define model
def robustxgb_binary(X_train, y_train, X_test, y_test, n_trials=10):
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction="maximize",
sampler=sampler,
study_name='xgb_eval')
study.optimize(RobustXGBBinary(X_train, y_train), n_trials=n_trials)
print("Best parameters:", study.best_trial.params)
# Train and evaluate the model with the best hyperparameters
best_params = study.best_trial.params
model = xgb.XGBClassifier(max_depth=best_params['max_depth'],
reg_alpha=best_params['reg_alpha'],
reg_lambda=best_params['reg_lambda'],
learning_rate=best_params['learning_rate'],
n_estimators=best_params['n_estimators'],
objective=RFLBinary(best_params['r'], q=best_params['q']),
# device= "cuda", # for gpu training
# tree_method= "hist",
)
model.fit(X_train, y_train)
y_pred_proba = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)
aucpr = average_precision_score(y_test, y_pred_proba)
print(f'Test AUC: {auc:.4f}')
print(f'Test AUCPR: {aucpr:.4f}')
return auc, aucpr
class RobustXGBBinary(object):
def __init__(self, X, y):
self.X = X
self.y = y
def __call__(self, trial):
params = {
'max_depth': trial.suggest_int('max_depth', 2, 10),
'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
'n_estimators':trial.suggest_int('n_estimators', 10, 200, 10),
"r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
"q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
}
clf = xgb.XGBClassifier(max_depth=params['max_depth'],
reg_alpha=params['reg_alpha'],
reg_lambda=params['reg_lambda'],
learning_rate=params['learning_rate'],
n_estimators=params['n_estimators'],
objective=RFLBinary(r=params['r'], q=params['q']),
# device= "cuda", # for gpu training
# tree_method= "hist",
)
cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
auc_scores = cross_val_score(clf, self.X, self.y, cv=cv, scoring='roc_auc')
return auc_scores.mean()
# load data
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# train model
robustxgb_binary(X_train, y_train, X_test, y_test)
import optuna
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import numpy as np
from rfl_loss import RFLBinary
import lightgbm as lgb
# define some functions
def sigmoid(x):
kEps = 1e-16 # avoid 0 div
x = np.minimum(-x, 88.7) # avoid exp overflow
return 1 / (1 + np.exp(x)+kEps)
def predict_proba(model, X):
# Lightgbm: Cannot compute class probabilities or labels due to the usage of customized objective function.
prediction = model.predict(X)
prediction_probabilities = sigmoid(prediction).reshape(-1, 1)
prediction_probabilities = np.concatenate((1 - prediction_probabilities,
prediction_probabilities), 1)
return prediction_probabilities
def eval_auc(labels, preds): # auc
p = sigmoid(preds)
return 'auc', roc_auc_score(labels, p), True
# define model
def robustlgb_binary(X_train, y_train, X_test, y_test, n_trials=10):
optuna.logging.set_verbosity(optuna.logging.WARNING)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction="maximize",
sampler=sampler,
study_name='lgb_eval')
study.optimize(RobustLGBBinary(X_train, y_train), n_trials=n_trials)
print("Best parameters:", study.best_trial.params)
# Train and evaluate the model with the best hyperparameters
best_params = study.best_trial.params
model = lgb.LGBMClassifier(num_leaves=best_params['num_leaves'],
reg_alpha=best_params['reg_alpha'],
reg_lambda=best_params['reg_lambda'],
learning_rate=best_params['learning_rate'],
n_estimators=best_params['n_estimators'],
objective=RFLBinary(best_params['r'], q=best_params['q']),
verbose=-1
)
model.fit(X_train, y_train)
# y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred_proba = predict_proba(model, X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)
aucpr = average_precision_score(y_test, y_pred_proba)
print(f'Test AUC: {auc:.4f}')
print(f'Test AUCPR: {aucpr:.4f}')
return auc, aucpr
class RobustLGBBinary(object):
def __init__(self, X, y):
self.X = X
self.y = y
def __call__(self, trial):
params = {
'num_leaves': trial.suggest_int('num_leaves', 2, 10),
'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
'n_estimators':trial.suggest_int('n_estimators', 10, 200),
"r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
"q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
}
cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
auc_scores = []
for train_index, val_index in cv.split(self.X, self.y):
X_train, y_train = self.X[train_index], self.y[train_index]
X_val, y_val = self.X[val_index], self.y[val_index]
model = lgb.LGBMClassifier(num_leaves=params['num_leaves'],
reg_alpha=params['reg_alpha'],
reg_lambda=params['reg_lambda'],
learning_rate=params['learning_rate'],
n_estimators=params['n_estimators'],
objective=RFLBinary(r=params['r'], q=params['q']),
verbose=-1
)
model.fit(X_train, y_train)
# eval_set=[(X_val, y_val)],
# eval_metric=eval_auc)
y_val_pred_prob = predict_proba(model, X_val)[:, 1]
auc = roc_auc_score(y_val, y_val_pred_prob)
auc_scores.append(auc)
return np.mean(auc_scores)
# load data
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# train model
robustlgb_binary(X_train, y_train, X_test, y_test)
# define model
def robustxgb_multi(X_train, y_train, X_test, y_test, n_trials=10):
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction="maximize",
sampler=sampler,
study_name='xgb_eval')
study.optimize(RobustXGBMulti(X_train, y_train), n_trials=n_trials)
print("Best parameters:", study.best_trial.params)
# Train and evaluate the model with the best hyperparameters
best_params = study.best_trial.params
model = xgb.XGBClassifier(max_depth=best_params['max_depth'],
reg_alpha=best_params['reg_alpha'],
reg_lambda=best_params['reg_lambda'],
learning_rate=best_params['learning_rate'],
n_estimators=best_params['n_estimators'],
objective=XGBRFLMulti(best_params['r'], q=best_params['q']),
# device= "cuda", # for gpu training
# tree_method= "hist",
)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f'Test ACC: {acc:.4f}')
return acc
class RobustXGBMulti(object):
def __init__(self, X, y):
self.X = X
self.y = y
def __call__(self, trial):
params = {
'max_depth': trial.suggest_int('max_depth', 2, 10),
'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
'n_estimators':trial.suggest_int('n_estimators', 10, 200, 10),
"r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
"q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
}
clf = xgb.XGBClassifier(max_depth=params['max_depth'],
reg_alpha=params['reg_alpha'],
reg_lambda=params['reg_lambda'],
learning_rate=params['learning_rate'],
n_estimators=params['n_estimators'],
objective=XGBRFLMulti(r=params['r'], q=params['q']),
# device= "cuda", # for gpu training
# tree_method= "hist",
)
cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
auc_scores = cross_val_score(clf, self.X, self.y, cv=cv, scoring='accuracy')
return auc_scores.mean()
# load data
from sklearn.datasets import load_iris
data = load_iris()
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# train model
robustxgb_multi(X_train, y_train, X_test, y_test)
import optuna
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import numpy as np
from rfl_loss import LGBRFLMulti
# define some functions
def predict(model, X):
prediction_probabilities = predict_proba(model, X)
predictions = np.argmax(prediction_probabilities, axis=1)
return predictions
def predict_proba(model, X):
# Lightgbm: Cannot compute class probabilities or labels due to the usage of customized objective function.
prediction = model.predict(X)
prediction_probabilities = softmax(prediction)
return prediction_probabilities
def eval_acc(labels, preds):
preds = preds.reshape((labels.shape[0], -1), order='F')
p = softmax(preds)
return 'eacc', accuracy_score(labels, np.argmax(p, axis=1)), True
def softmax(x):
kEps = 1e-16 # avoid 0 div
x = np.minimum(x, 88.7) # avoid exp overflow
e = np.exp(x)
return e / np.expand_dims(np.sum(e, axis=1)+kEps, axis=1)
class RobustLGBMulti(object):
def __init__(self, X, y):
self.X = X
self.y = y
def __call__(self, trial):
params = {
'num_leaves': trial.suggest_int('num_leaves', 2, 10),
'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
'n_estimators':trial.suggest_int('n_estimators', 10, 200),
"r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
"q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
}
cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
acc_scores = []
for train_index, val_index in cv.split(self.X, self.y):
X_train, y_train = self.X[train_index], self.y[train_index]
X_val, y_val = self.X[val_index], self.y[val_index]
model = lgb.LGBMClassifier(num_leaves=params['num_leaves'],
reg_alpha=params['reg_alpha'],
reg_lambda=params['reg_lambda'],
learning_rate=params['learning_rate'],
n_estimators=params['n_estimators'],
device= "gpu", # for gpu training
objective=LGBRFLMulti(r=params['r'], q=params['q']),
)
model.fit(X_train, y_train)
# eval_set=[(X_val, y_val)],
# eval_metric=eval_acc)
y_val_pred = predict(model, X_val)
acc = accuracy_score(y_val, y_val_pred)
acc_scores.append(acc)
return np.mean(acc_scores)
# define model
def robustlgb_multi(X_train, y_train, X_test, y_test, n_trials=10):
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction="maximize",
sampler=sampler,
study_name='lgb_eval')
study.optimize(RobustLGBMulti(X_train, y_train), n_trials=n_trials)
print("Best parameters:", study.best_trial.params)
# Train and evaluate the model with the best hyperparameters
best_params = study.best_trial.params
model = lgb.LGBMClassifier(num_leaves=best_params['num_leaves'],
reg_alpha=best_params['reg_alpha'],
reg_lambda=best_params['reg_lambda'],
learning_rate=best_params['learning_rate'],
n_estimators=best_params['n_estimators'],
device= "gpu", # for gpu training
objective=LGBRFLMulti(best_params['r'], q=best_params['q']),
)
model.fit(X_train, y_train)
y_pred = predict(model, X_test)
acc = accuracy_score(y_test, y_pred)
print(f'Test ACC: {acc:.4f}')
return acc
# load data
from sklearn.datasets import load_iris
data = load_iris()
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# train model
robustlgb_multi(X_train, y_train, X_test, y_test)