GitHub - Luojiaqimath/Robust-GBDT

Demo for binary classification using XGBoost

import optuna
import xgboost as xgb
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score
from sklearn.model_selection import  StratifiedKFold, train_test_split  
import numpy as np
from sklearn.model_selection import cross_val_score
from rfl_loss import RFLBinary, XGBRFLMulti


# define model
def robustxgb_binary(X_train, y_train, X_test, y_test, n_trials=10):
    sampler = optuna.samplers.TPESampler(seed=42)
    study = optuna.create_study(direction="maximize",
                                sampler=sampler,
                                study_name='xgb_eval')
    study.optimize(RobustXGBBinary(X_train, y_train), n_trials=n_trials)

    print("Best parameters:", study.best_trial.params)

    # Train and evaluate the model with the best hyperparameters
    best_params = study.best_trial.params
    model = xgb.XGBClassifier(max_depth=best_params['max_depth'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                learning_rate=best_params['learning_rate'],
                                n_estimators=best_params['n_estimators'],
                                objective=RFLBinary(best_params['r'], q=best_params['q']),
                                # device= "cuda", # for gpu training
                                # tree_method= "hist",
                                )
    model.fit(X_train, y_train)
    
    y_pred_proba = model.predict_proba(X_test)[:, 1]  
    auc = roc_auc_score(y_test, y_pred_proba)
    aucpr = average_precision_score(y_test, y_pred_proba)
    print(f'Test AUC: {auc:.4f}')
    print(f'Test AUCPR: {aucpr:.4f}')
    return auc, aucpr


class RobustXGBBinary(object):
    def __init__(self, X, y):

        self.X = X
        self.y = y

    def __call__(self, trial):
        params = {
        'max_depth': trial.suggest_int('max_depth', 2, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
        'n_estimators':trial.suggest_int('n_estimators', 10, 200, 10),
        "r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
        "q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
    }
    
        clf = xgb.XGBClassifier(max_depth=params['max_depth'],
                                reg_alpha=params['reg_alpha'],
                                reg_lambda=params['reg_lambda'],
                                learning_rate=params['learning_rate'],
                                n_estimators=params['n_estimators'],
                                objective=RFLBinary(r=params['r'], q=params['q']),
                                # device= "cuda", # for gpu training
                                # tree_method= "hist",
                                )
        cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
        auc_scores = cross_val_score(clf, self.X, self.y, cv=cv, scoring='roc_auc')
        return auc_scores.mean()
      

# load data
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()  
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# train model
robustxgb_binary(X_train, y_train, X_test, y_test)

Demo for binary classification using LightGBM

import optuna
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.model_selection import  StratifiedKFold, train_test_split  
import numpy as np
from rfl_loss import RFLBinary
import lightgbm as lgb


# define some functions
def sigmoid(x):
    kEps = 1e-16 #  avoid 0 div
    x = np.minimum(-x, 88.7)  # avoid exp overflow
    return 1 / (1 + np.exp(x)+kEps)


def predict_proba(model, X):
    # Lightgbm: Cannot compute class probabilities or labels due to the usage of customized objective function.
    prediction = model.predict(X)
    
    prediction_probabilities = sigmoid(prediction).reshape(-1, 1)
    prediction_probabilities = np.concatenate((1 - prediction_probabilities,
                                                    prediction_probabilities), 1)
    return prediction_probabilities

def eval_auc(labels, preds):  # auc
    p = sigmoid(preds)
    return 'auc', roc_auc_score(labels, p), True


# define model
def robustlgb_binary(X_train, y_train, X_test, y_test, n_trials=10):
    optuna.logging.set_verbosity(optuna.logging.WARNING)  
    sampler = optuna.samplers.TPESampler(seed=42)
    study = optuna.create_study(direction="maximize",
                                sampler=sampler,
                                study_name='lgb_eval')
    study.optimize(RobustLGBBinary(X_train, y_train), n_trials=n_trials)

    print("Best parameters:", study.best_trial.params)

    # Train and evaluate the model with the best hyperparameters
    best_params = study.best_trial.params
    model = lgb.LGBMClassifier(num_leaves=best_params['num_leaves'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                learning_rate=best_params['learning_rate'],
                                n_estimators=best_params['n_estimators'],
                                objective=RFLBinary(best_params['r'], q=best_params['q']),
                                verbose=-1
                                )
    model.fit(X_train, y_train)
    
    # y_pred_proba = model.predict_proba(X_test)[:, 1]  
    y_pred_proba = predict_proba(model, X_test)[:, 1]
    auc = roc_auc_score(y_test, y_pred_proba)
    aucpr = average_precision_score(y_test, y_pred_proba)
    print(f'Test AUC: {auc:.4f}')
    print(f'Test AUCPR: {aucpr:.4f}')
    return auc, aucpr


class RobustLGBBinary(object):
    def __init__(self, X, y):

        self.X = X
        self.y = y

    def __call__(self, trial):
        params = {
        'num_leaves': trial.suggest_int('num_leaves', 2, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
        'n_estimators':trial.suggest_int('n_estimators', 10, 200),
        "r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
        "q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
    }
    

        cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
        auc_scores = []
        
        for train_index, val_index in cv.split(self.X, self.y):
            X_train, y_train = self.X[train_index], self.y[train_index]
            X_val, y_val = self.X[val_index], self.y[val_index]
            
            model = lgb.LGBMClassifier(num_leaves=params['num_leaves'],
                                reg_alpha=params['reg_alpha'],
                                reg_lambda=params['reg_lambda'],
                                learning_rate=params['learning_rate'],
                                n_estimators=params['n_estimators'],
                                objective=RFLBinary(r=params['r'], q=params['q']),
                                verbose=-1
                                )
            
            model.fit(X_train, y_train)
                    # eval_set=[(X_val, y_val)],
                    # eval_metric=eval_auc)
            
            y_val_pred_prob = predict_proba(model, X_val)[:, 1]
            auc = roc_auc_score(y_val, y_val_pred_prob)
            auc_scores.append(auc)
        return np.mean(auc_scores)
      

# load data
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()  
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# train model
robustlgb_binary(X_train, y_train, X_test, y_test)

Demo for multi-class classification using XGBoost

# define model
def robustxgb_multi(X_train, y_train, X_test, y_test, n_trials=10):
    sampler = optuna.samplers.TPESampler(seed=42)
    study = optuna.create_study(direction="maximize",
                                sampler=sampler,
                                study_name='xgb_eval')
    study.optimize(RobustXGBMulti(X_train, y_train), n_trials=n_trials)

    print("Best parameters:", study.best_trial.params)

    # Train and evaluate the model with the best hyperparameters
    best_params = study.best_trial.params
    model = xgb.XGBClassifier(max_depth=best_params['max_depth'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                learning_rate=best_params['learning_rate'],
                                n_estimators=best_params['n_estimators'],
                                objective=XGBRFLMulti(best_params['r'], q=best_params['q']),
                                # device= "cuda", # for gpu training
                                # tree_method= "hist",
                                )
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)  
    acc = accuracy_score(y_test, y_pred)
    print(f'Test ACC: {acc:.4f}')
    return acc


class RobustXGBMulti(object):
    def __init__(self, X, y):

        self.X = X
        self.y = y

    def __call__(self, trial):
        params = {
        'max_depth': trial.suggest_int('max_depth', 2, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
        'n_estimators':trial.suggest_int('n_estimators', 10, 200, 10),
        "r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
        "q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
    }
    
        clf = xgb.XGBClassifier(max_depth=params['max_depth'],
                                reg_alpha=params['reg_alpha'],
                                reg_lambda=params['reg_lambda'],
                                learning_rate=params['learning_rate'],
                                n_estimators=params['n_estimators'],
                                objective=XGBRFLMulti(r=params['r'], q=params['q']),
                                # device= "cuda", # for gpu training
                                # tree_method= "hist",
                                )
        cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
        auc_scores = cross_val_score(clf, self.X, self.y, cv=cv, scoring='accuracy')
        return auc_scores.mean()


# load data
from sklearn.datasets import load_iris
data = load_iris()  
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# train model
robustxgb_multi(X_train, y_train, X_test, y_test)

Demo for multi-class classification using LightGBM

import optuna
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import  StratifiedKFold, train_test_split  
import numpy as np
from rfl_loss import LGBRFLMulti


# define some functions
def predict(model, X):
    prediction_probabilities = predict_proba(model, X)
    predictions = np.argmax(prediction_probabilities, axis=1)
    return predictions

def predict_proba(model, X):
    # Lightgbm: Cannot compute class probabilities or labels due to the usage of customized objective function.
    prediction = model.predict(X)
    prediction_probabilities = softmax(prediction)
    return prediction_probabilities

def eval_acc(labels, preds):
    preds = preds.reshape((labels.shape[0], -1), order='F')
    p = softmax(preds)
    return 'eacc', accuracy_score(labels, np.argmax(p, axis=1)), True

def softmax(x):
    kEps = 1e-16 #  avoid 0 div
    x = np.minimum(x, 88.7)  # avoid exp overflow
    e = np.exp(x)
    return e / np.expand_dims(np.sum(e, axis=1)+kEps, axis=1)


class RobustLGBMulti(object):
    def __init__(self, X, y):

        self.X = X
        self.y = y

    def __call__(self, trial):
        params = {
        'num_leaves': trial.suggest_int('num_leaves', 2, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 5.0),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 1.0),
        'n_estimators':trial.suggest_int('n_estimators', 10, 200),
        "r": trial.suggest_categorical("r", [0.0, 0.5, 1.0]),
        "q": trial.suggest_categorical("q", [0.0, 0.1, 0.3, 0.5]),
    }
    
        cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
        acc_scores = []
        
        for train_index, val_index in cv.split(self.X, self.y):
            X_train, y_train = self.X[train_index], self.y[train_index]
            X_val, y_val = self.X[val_index], self.y[val_index]
            
            model = lgb.LGBMClassifier(num_leaves=params['num_leaves'],
                                reg_alpha=params['reg_alpha'],
                                reg_lambda=params['reg_lambda'],
                                learning_rate=params['learning_rate'],
                                n_estimators=params['n_estimators'],
                                device= "gpu", # for gpu training
                                objective=LGBRFLMulti(r=params['r'], q=params['q']),
                                )
            
            model.fit(X_train, y_train)
                    # eval_set=[(X_val, y_val)],
                    # eval_metric=eval_acc)
            
            y_val_pred = predict(model, X_val)
            acc = accuracy_score(y_val, y_val_pred)
            acc_scores.append(acc)
        return np.mean(acc_scores)


# define model
def robustlgb_multi(X_train, y_train, X_test, y_test, n_trials=10):
    sampler = optuna.samplers.TPESampler(seed=42)
    study = optuna.create_study(direction="maximize",
                                sampler=sampler,
                                study_name='lgb_eval')
    study.optimize(RobustLGBMulti(X_train, y_train), n_trials=n_trials)

    print("Best parameters:", study.best_trial.params)

    # Train and evaluate the model with the best hyperparameters
    best_params = study.best_trial.params
    model = lgb.LGBMClassifier(num_leaves=best_params['num_leaves'],
                                reg_alpha=best_params['reg_alpha'],
                                reg_lambda=best_params['reg_lambda'],
                                learning_rate=best_params['learning_rate'],
                                n_estimators=best_params['n_estimators'],
                                device= "gpu", # for gpu training
                                objective=LGBRFLMulti(best_params['r'], q=best_params['q']),
                                )
    model.fit(X_train, y_train)
    
    y_pred = predict(model, X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f'Test ACC: {acc:.4f}')
    return acc


# load data
from sklearn.datasets import load_iris
data = load_iris()  
X = data.data.astype(np.float32)
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# train model
robustlgb_multi(X_train, y_train, X_test, y_test)

Name		Name	Last commit message	Last commit date
Latest commit History 24 Commits
LICENSE		LICENSE
README.md		README.md
rfl_loss.py		rfl_loss.py

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

Demo for binary classification using XGBoost

Demo for binary classification using LightGBM

Demo for multi-class classification using XGBoost

Demo for multi-class classification using LightGBM

About

Releases

Packages

Languages

License

Luojiaqimath/Robust-GBDT

Folders and files

Latest commit

History

Repository files navigation

Demo for binary classification using XGBoost

Demo for binary classification using LightGBM

Demo for multi-class classification using XGBoost

Demo for multi-class classification using LightGBM

About

Resources

License

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages