From 97ae7aace46fc77ffe687092dba8487856258e57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=93=E8=BE=95?= Date: Mon, 8 Aug 2022 16:41:18 +0800 Subject: [PATCH] update cfg docs w.r.t. evaluation and fl algos. --- federatedscope/core/configs/README.md | 121 ++++++++++++++++++ federatedscope/core/configs/cfg_data.py | 1 + federatedscope/core/configs/cfg_evaluation.py | 1 - federatedscope/core/configs/cfg_fl_algo.py | 12 ++ 4 files changed, 134 insertions(+), 1 deletion(-) diff --git a/federatedscope/core/configs/README.md b/federatedscope/core/configs/README.md index 4266d6eb5..35b7ef03e 100644 --- a/federatedscope/core/configs/README.md +++ b/federatedscope/core/configs/README.md @@ -9,6 +9,7 @@ The configurations related to the data/dataset are defined in `cfg_data.py`. | `data.root` | (string) 'data' | The folder where the data file located. `data.root` would be used together with `data.type` to load the dataset. | - | | `data.type` | (string) 'toy' | Dataset name | CV: 'femnist', 'celeba' ; NLP: 'shakespeare', 'subreddit', 'twitter'; Graph: 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm', 'epinions', 'ciao', 'fb15k-237', 'wn18', 'fb15k' , 'MUTAG', 'BZR', 'COX2', 'DHFR', 'PTC_MR', 'AIDS', 'NCI1', 'ENZYMES', 'DD', 'PROTEINS', 'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY', 'IMDB-BINARY', 'IMDB-MULTI', 'HIV', 'ESOL', 'FREESOLV', 'LIPO', 'PCBA', 'MUV', 'BACE', 'BBBP', 'TOX21', 'TOXCAST', 'SIDER', 'CLINTOX', 'graph_multi_domain_mol', 'graph_multi_domain_small', 'graph_multi_domain_mix', 'graph_multi_domain_biochem'; MF: 'vflmovielens1m', 'vflmovielens10m', 'hflmovielens1m', 'hflmovielens10m', 'vflnetflix', 'hflnetflix'; Tabular: 'toy', 'synthetic'; External dataset: 'DNAME@torchvision', 'DNAME@torchtext', 'DNAME@huggingface_datasets', 'DNAME@openml'. | | `data.args` | (list) [] | Args for the external dataset | Used for external dataset, eg. `[{'download': False}]` | +| `data.save_data` | (bool) False | Whether to save the generated toy data | - | | `data.splitter` | (string) '' | Splitter name for standalone dataset | Generic splitter: 'lda'; Graph splitter: 'louvain', 'random', 'rel_type', 'graph_type', 'scaffold', 'scaffold_lda', 'rand_chunk' | | `data.splitter_args` | (list) [] | Args for splitter. | Used for splitter, eg. `[{'alpha': 0.5}]` | | `data.transform` | (list) [] | Transform for x of data | Used in `get_item` in torch.dataset, eg. `[['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]]` | @@ -31,6 +32,61 @@ The configurations related to the data/dataset are defined in `cfg_data.py`. | `cfg.data.quadratic.min_curv` | (float) 0.02 | Min_curve of synthetic quadratic dataset | - | | `cfg.data.quadratic.max_curv` | (float) 12.5 | Max_cur of synthetic quadratic dataset | - | + + +### Federated Algorithms +The configurations related to specific federated algorithms, which are +defined in +`cfg_fl_algo.py`. + +#### `fedopt`: for FedOpt algorithm +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `fedopt.use` | (bool) False | Whether to run FL courses with FedOpt algorithm. | If False, all the related configurations (cfg.fedopt.xxx) would not take effect. | +| `fedopt.optimizer.type` | (string) 'SGD' | The type of optimizer used for FedOpt algorithm. | Currently we support all optimizers build in PyTorch (The modules under torch.optim). | +| `fedopt.optimizer.lr` | (float) 0.1 | The learning rate used in for FedOpt optimizer. | - | +#### `fedprox`: for FedProx algorithm +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `fedprox.use` | (bool) False | Whether to run FL courses with FedProx algorithm. | If False, all the related configurations (cfg.fedprox.xxx) would not take effect. | +| `fedprox.mu` | (float) 0.0 | The hyper-parameter $\mu$ used in FedProx algorithm. | - | +#### `personalization`: for personalization algorithms +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `personalization.local_param` | (list of str) [] | The client-distinct local param names, e.g., ['pre', 'bn'] | - | +| `personalization.share_non_trainable_para` | (bool) False | Whether transmit non-trainable parameters between FL participants | - | +| `personalization.local_update_steps` | (int) -1 | The local training steps for personalized models | By default, -1 indicates that the local model steps will be set to be the same as the valid `train.local_update_steps` | +| `personalization.regular_weight` | (float) 0.1 | The regularization factor used for model para regularization methods such as Ditto and pFedMe. | The smaller the regular_weight is, the stronger emphasising on personalized model. | +| `personalization.lr` | (float) 0.0 | The personalized learning rate used in personalized FL algorithms. | The default value 0.0 indicates that the value will be set to be the same as `train.optimizer.lr` in case of users have not specify a valid `personalization.lr` | +| `personalization.K` | (int) 5 | The local approximation steps for pFedMe. | - | +| `personalization.beta` | (float) 5 | The average moving parameter for pFedMe. | - | +#### `fedsageplus`: for fedsageplus algorithm +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `fedsageplus.num_pred` | (int) 5 | Number of nodes generated by the generator | - | +| `fedsageplus.gen_hidden` | (int) 128 | Hidden layer dimension of generator | - | +| `fedsageplus.hide_portion` | (float) 0.5 | Hide graph portion | - | +| `fedsageplus.fedgen_epoch` | (int) 200 | Federated training round for generator | - | +| `fedsageplus.loc_epoch` | (int) 1 | Local pre-train round for generator | - | +| `fedsageplus.a` | (float) 1.0 | Coefficient for criterion number of missing node | - | +| `fedsageplus.b` | (float) 1.0 | Coefficient for criterion feature | - | +| `fedsageplus.c` | (float) 1.0 | Coefficient for criterion classification | - | +#### `gcflplus`: for gcflplus algorithm +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `gcflplus.EPS_1` | (float) 0.05 | Bound for mean_norm | - | +| `gcflplus.EPS_2` | (float) 0.1 | Bound for max_norm | - | +| `gcflplus.seq_length` | (int) 5 | Length of the gradient sequence | - | +| `gcflplus.standardize` | (bool) False | Whether standardized dtw_distances | - | +#### `flitplus`: for flitplus algorithm +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `flitplus.tmpFed` | (float) 0.5 | gamma in focal loss (Eq.4) | - | +| `flitplus.lambdavat` | (float) 0.5 | lambda in phi (Eq.10) | - | +| `flitplus.factor_ema` | (float) 0.8 | beta in omega (Eq.12) | - | +| `flitplus.weightReg` | (float) 1.0 | balance lossLocalLabel and lossLocalVAT | - | + + ### Federated training The configurations related to federated training are defined in `cfg_training.py`. Considering it's infeasible to list all the potential arguments for optimizers and schedulers, we allow the users to add new parameters directly under the corresponding namespace. @@ -50,6 +106,9 @@ The following configurations are related to the local training. | `train.scheduler` | - | - | Similar with `train.optimizer`, you can add new parameters as you need, e.g., `train.scheduler.step_size=10`. All new parameters will be used as arguments for the chosen scheduler. | | `train.scheduler.type` | (string) '' | The type of the scheduler used in local training | Currently we support all schedulers build in PyTorch (The modules under `torch.optim.lr_scheduler`). | + + + #### Fine tuning The following configurations are related to the fine tuning. @@ -126,6 +185,68 @@ The configurations related to FL settings are defined in `cfg_fl_setting.py`. | `vertical.dims` | (list of int) [5,10] | The dimensions of the input features for participants. | - | | `vertical.key_size` | (int) 3072 | The length (bit) of the public keys. | - | + + + + + +### Evaluation +The configurations related to monitoring and evaluation, which are +defined in +`cfg_evaluation.py`. + +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `eval.freq` | (int) 1 | The frequency we conduct evaluation. | - | +| `eval.metrics` | (list of str) [] | The names of adopted evaluation metrics. | By default, we calculate the ['loss', 'avg_loss', 'total'], all the supported metric can be find in `core/monitors/metric_calculator.py` | +| `eval.split` | (list of str) ['test', 'val'] | The data splits' names we conduct evaluation. | - | +| `eval.report` | (list of str) ['weighted_avg', 'avg', 'fairness', 'raw'] | The results reported forms to loggers | By default, we report comprehensive results, - `weighted_avg` and `avg` indicate the weighted average and uniform average over all evaluated clients; - `fairness` indicates report fairness-related results such as individual performance and std across all evaluated clients; - `raw` indicates that we save and compress all clients' individual results without summarization, and users can flexibly post-process the saved results further.| +| `eval.best_res_update_round_wise_key` | (str) 'val_loss' | The metric name we used to as the primary key to check the performance improvement at each evaluation round. | - | +| `eval.monitoring` | (list of str) [] | Extended monitoring methods or metric, e.g., 'dissim' for B-local dissimilarity | - | +| `eval.count_flops` | (bool) True | Whether to count the flops during the FL courses. | - | +#### `wandb`: for wandb tracking and visualization +| Name | (Type) Default Value | Description | Note | +|:----:|:-----:|:---------- |:---- | +| `wandb.use` | (bool) False | Whether to use wandb to track and visualize the FL dynamics and results. | If `False`, all the related configurations (`wandb.xxx`) would not take effect. | +| `wandb.name_user` | (str) '' | the user name used in wandb management | - | +| `wandb.name_project` | (str) '' | the project name used in wandb management | - | +| `wandb.online_track` | (bool) True | whether to track the results in an online manner, i.e., log results at every evaluation round | - | +| `wandb.client_train_info` | (bool) True | whether to track the training info of clients | - | + + + +#### Fine tuning +The following configurations are related to the fine tuning. + +| Name | (Type) Default Value | Description | Note | +|:--------------------------:|:--------------------:|:------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| `finetune.before_eval` | (bool) False | Indicator of fintune before evaluation | If `True`, the clients will fine tune its model before each evaluation. Note the fine tuning is only conducted before evaluation and won't influence the upload weights in each round. | +| `finetune.local_update_steps` | (int) 1 | The number of local fine tune steps | - | +| `finetune.batch_or_epoch` | (string) `batch` | The type of local fine tuning. | Similar with `train.batch_or_epoch`, `finetune.batch_or_epoch` specifies the unit of `finetune.local_update_steps` | +| `finetune.optimizer` | - | - | You can add new parameters under `finetune.optimizer` according to the type of optimizer. All new parameters will be used as arguments for the chosen optimizer. | +| `finetune.optimizer.type` | (string) 'SGD' | The type of the optimizer used in fine tuning. | Currently we support all optimizers build in PyTorch (The modules under `torch.optim`). | +| `finetune.optimizer.lr` | (float) 0.1 | The learning rate used in local fine tuning | - | +| `finetune.scheduler` | - | - | Similar with `train.scheduler`, you can add new parameters as you need, and all new parameters will be used as arguments for the chosen scheduler. | + +#### Grad Clipping +The following configurations are related to the grad clipping. + +| Name | (Type) Default Value | Description | Note | +|:--------------------------:|:--------------------:|:------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| `grad.grad_clip` | (float) -1.0 | The threshold used in gradient clipping. | `grad.grad_clip < 0` means we don't clip the gradient. | + +#### Early Stop + +| Name | (Type) Default Value | Description | Note | +|:----------------------------------------:|:--------------------:|:------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| `early_stop.patience` | (int) 5 | How long to wait after last time the monitored metric improved. | Note that the actual_checking_round = `early_step.patience` * `eval.freq`. To disable the early stop, set the `early_stop.patience` <=0 | +| `early_stop.delta` | (float) 0. | Minimum change in the monitored metric to indicate a improvement. | - | +| `early_stop.improve_indicaator_mode` | (string) 'best' | Early stop when there is no improvement within the last `early_step.patience` rounds, in ['mean', 'best'] | Chosen from 'mean' or 'best' | +| `early_step.the_smaller_the_better` | (bool) True | The optimized direction of the chosen metric | - | + + + + ### Asynchronous Training Strategies The configurations related to applying asynchronous training strategies in FL are defined in `cfg_asyn.py`. diff --git a/federatedscope/core/configs/cfg_data.py b/federatedscope/core/configs/cfg_data.py index 65c7361b5..dea7c2091 100644 --- a/federatedscope/core/configs/cfg_data.py +++ b/federatedscope/core/configs/cfg_data.py @@ -10,6 +10,7 @@ def extend_data_cfg(cfg): cfg.data.root = 'data' cfg.data.type = 'toy' + cfg.data.save_data = False # whether to save the generated toy data cfg.data.args = [] # args for external dataset, eg. [{'download': True}] cfg.data.splitter = '' cfg.data.splitter_args = [] # args for splitter, eg. [{'alpha': 0.5}] diff --git a/federatedscope/core/configs/cfg_evaluation.py b/federatedscope/core/configs/cfg_evaluation.py index 065b991c5..09b9cdd48 100644 --- a/federatedscope/core/configs/cfg_evaluation.py +++ b/federatedscope/core/configs/cfg_evaluation.py @@ -10,7 +10,6 @@ def extend_evaluation_cfg(cfg): cfg.eval = CN( new_allowed=True) # allow user to add their settings under `cfg.eval` - cfg.eval.save_data = False cfg.eval.freq = 1 cfg.eval.metrics = [] cfg.eval.split = ['test', 'val'] diff --git a/federatedscope/core/configs/cfg_fl_algo.py b/federatedscope/core/configs/cfg_fl_algo.py index 6443cb83d..e1f242bf5 100644 --- a/federatedscope/core/configs/cfg_fl_algo.py +++ b/federatedscope/core/configs/cfg_fl_algo.py @@ -54,13 +54,21 @@ def extend_fl_algo_cfg(cfg): # ---------------------------------------------------------------------- # cfg.fedsageplus = CN() + # Number of nodes generated by the generator cfg.fedsageplus.num_pred = 5 + # Hidden layer dimension of generator cfg.fedsageplus.gen_hidden = 128 + # Hide graph portion cfg.fedsageplus.hide_portion = 0.5 + # Federated training round for generator cfg.fedsageplus.fedgen_epoch = 200 + # Local pre-train round for generator cfg.fedsageplus.loc_epoch = 1 + # Coefficient for criterion number of missing node cfg.fedsageplus.a = 1.0 + # Coefficient for criterion feature cfg.fedsageplus.b = 1.0 + # Coefficient for criterion classification cfg.fedsageplus.c = 1.0 # ---------------------------------------------------------------------- # @@ -68,9 +76,13 @@ def extend_fl_algo_cfg(cfg): # ---------------------------------------------------------------------- # cfg.gcflplus = CN() + # Bound for mean_norm cfg.gcflplus.EPS_1 = 0.05 + # Bound for max_norm cfg.gcflplus.EPS_2 = 0.1 + # Length of the gradient sequence cfg.gcflplus.seq_length = 5 + # Whether standardized dtw_distances cfg.gcflplus.standardize = False # ---------------------------------------------------------------------- #