From 8d1753e10f5739241d759d8a4ddf5cc03ebf2785 Mon Sep 17 00:00:00 2001 From: YQ Tsui Date: Sun, 11 Dec 2022 14:29:16 +0800 Subject: [PATCH] fix some typo in doc/comments (#1389) * fix typo in docstrings * fix typo * fix typo * fix black lint * fix black lint --- qlib/contrib/model/pytorch_adarnn.py | 22 ++++++++++++---------- qlib/contrib/strategy/signal_strategy.py | 2 +- qlib/model/riskmodel/structured.py | 2 +- qlib/workflow/__init__.py | 2 +- qlib/workflow/record_temp.py | 16 +++++++++------- scripts/data_collector/pit/README.md | 2 +- 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/qlib/contrib/model/pytorch_adarnn.py b/qlib/contrib/model/pytorch_adarnn.py index 7570d74e0b..4b0db7f4b7 100644 --- a/qlib/contrib/model/pytorch_adarnn.py +++ b/qlib/contrib/model/pytorch_adarnn.py @@ -56,7 +56,7 @@ def __init__( n_splits=2, GPU=0, seed=None, - **kwargs + **_ ): # Set logger. self.logger = get_module_logger("ADARNN") @@ -81,7 +81,7 @@ def __init__( self.optimizer = optimizer.lower() self.loss = loss self.n_splits = n_splits - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") + self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") self.seed = seed self.logger.info( @@ -213,7 +213,8 @@ def train_AdaRNN(self, train_loader_list, epoch, dist_old=None, weight_mat=None) weight_mat = self.transform_type(out_weight_list) return weight_mat, None - def calc_all_metrics(self, pred): + @staticmethod + def calc_all_metrics(pred): """pred is a pandas dataframe that has two attributes: score (pred) and label (real)""" res = {} ic = pred.groupby(level="datetime").apply(lambda x: x.label.corr(x.score)) @@ -259,8 +260,6 @@ def fit( save_path = get_or_create_path(save_path) stop_steps = 0 - best_score = -np.inf - best_epoch = 0 evals_result["train"] = [] evals_result["valid"] = [] @@ -400,7 +399,7 @@ def __init__( self.model_type = model_type self.trans_loss = trans_loss self.len_seq = len_seq - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") + self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") in_size = self.n_input features = nn.ModuleList() @@ -499,7 +498,8 @@ def process_gate_weight(self, out, index): res = self.softmax(weight).squeeze() return res - def get_features(self, output_list): + @staticmethod + def get_features(output_list): fea_list_src, fea_list_tar = [], [] for fea in output_list: fea_list_src.append(fea[0 : fea.size(0) // 2]) @@ -561,7 +561,7 @@ def __init__(self, loss_type="cosine", input_dim=512, GPU=0): """ self.loss_type = loss_type self.input_dim = input_dim - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") + self.device = torch.device("cuda:%d" % GPU if torch.cuda.is_available() and GPU >= 0 else "cpu") def compute(self, X, Y): """Compute adaptation loss @@ -676,7 +676,8 @@ def __init__(self, kernel_type="linear", kernel_mul=2.0, kernel_num=5): self.fix_sigma = None self.kernel_type = kernel_type - def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None): + @staticmethod + def guassian_kernel(source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None): n_samples = int(source.size()[0]) + int(target.size()[0]) total = torch.cat([source, target], dim=0) total0 = total.unsqueeze(0).expand(int(total.size(0)), int(total.size(0)), int(total.size(1))) @@ -691,7 +692,8 @@ def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigm kernel_val = [torch.exp(-L2_distance / bandwidth_temp) for bandwidth_temp in bandwidth_list] return sum(kernel_val) - def linear_mmd(self, X, Y): + @staticmethod + def linear_mmd(X, Y): delta = X.mean(axis=0) - Y.mean(axis=0) loss = delta.dot(delta.T) return loss diff --git a/qlib/contrib/strategy/signal_strategy.py b/qlib/contrib/strategy/signal_strategy.py index b026bf7a97..9399e53a9e 100644 --- a/qlib/contrib/strategy/signal_strategy.py +++ b/qlib/contrib/strategy/signal_strategy.py @@ -428,7 +428,7 @@ def get_risk_data(self, date): specific_risk = load_dataset(root + "/" + self.specific_risk_path, index_col=[0]) if not factor_exp.index.equals(specific_risk.index): - # NOTE: for stocks missing specific_risk, we always assume it have the highest volatility + # NOTE: for stocks missing specific_risk, we always assume it has the highest volatility specific_risk = specific_risk.reindex(factor_exp.index, fill_value=specific_risk.max()) universe = factor_exp.index.tolist() diff --git a/qlib/model/riskmodel/structured.py b/qlib/model/riskmodel/structured.py index eb0fec58f0..71e442536d 100644 --- a/qlib/model/riskmodel/structured.py +++ b/qlib/model/riskmodel/structured.py @@ -18,7 +18,7 @@ class StructuredCovEstimator(RiskModel): `B` is the regression coefficients matrix for all observations (row) on all factors (columns), and `U` is the residual matrix with shape like `X`. - Therefore the structured covariance can be estimated by + Therefore, the structured covariance can be estimated by cov(X.T) = F @ cov(B.T) @ F.T + diag(var(U)) In finance domain, there are mainly three methods to design `F` [1][2]: diff --git a/qlib/workflow/__init__.py b/qlib/workflow/__init__.py index 815d3e1240..9947c98055 100644 --- a/qlib/workflow/__init__.py +++ b/qlib/workflow/__init__.py @@ -155,7 +155,7 @@ def search_records(self, experiment_ids, **kwargs): The arguments of this function are not set to be rigid, and they will be different with different implementation of ``ExpManager`` in ``Qlib``. ``Qlib`` now provides an implementation of ``ExpManager`` with mlflow, and here is the - example code of the this method with the ``MLflowExpManager``: + example code of the method with the ``MLflowExpManager``: .. code-block:: Python diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index ab2a4a2ffd..fdb3f6c92a 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -30,7 +30,8 @@ class RecordTemp: """ artifact_path = None - depend_cls = None # the depend class of the record; the record will depend on the results generated by `depend_cls` + depend_cls = None # the dependant class of the record; the record will depend on the results generated by + # `depend_cls` @classmethod def get_path(cls, path=None): @@ -119,7 +120,7 @@ def check(self, include_self: bool = False, parents: bool = True): Check if the records is properly generated and saved. It is useful in following examples - - checking if the depended files complete before generating new things. + - checking if the dependant files complete before generating new things. - checking if the final files is completed Parameters @@ -186,7 +187,7 @@ def generate_label(dataset): return raw_label def generate(self, **kwargs): - # generate prediciton + # generate prediction pred = self.model.predict(self.dataset) if isinstance(pred, pd.Series): pred = pred.to_frame("score") @@ -285,7 +286,8 @@ def list(self): class SigAnaRecord(ACRecordTemp): """ - This is the Signal Analysis Record class that generates the analysis results such as IC and IR. This class inherits the ``RecordTemp`` class. + This is the Signal Analysis Record class that generates the analysis results such as IC and IR. + This class inherits the ``RecordTemp`` class. """ artifact_path = "sig_analysis" @@ -382,7 +384,7 @@ def __init__( indicator_analysis_freq : str|List[str] indicator analysis freq of report indicator_analysis_method : str, optional, default by None - the candidated values include 'mean', 'amount_weighted', 'value_weighted' + the candidate values include 'mean', 'amount_weighted', 'value_weighted' """ super().__init__(recorder=recorder, skip_existing=skip_existing, **kwargs) @@ -456,9 +458,9 @@ def _generate(self, **kwargs): pred = self.load("pred.pkl") # replace the "" with prediction saved before - placehorder_value = {"": pred} + placeholder_value = {"": pred} for k in "executor_config", "strategy_config": - setattr(self, k, fill_placeholder(getattr(self, k), placehorder_value)) + setattr(self, k, fill_placeholder(getattr(self, k), placeholder_value)) # if the backtesting time range is not set, it will automatically extract time range from the prediction file dt_values = pred.index.get_level_values("datetime") diff --git a/scripts/data_collector/pit/README.md b/scripts/data_collector/pit/README.md index f7b4f9fbe6..a7afe44acd 100644 --- a/scripts/data_collector/pit/README.md +++ b/scripts/data_collector/pit/README.md @@ -19,7 +19,7 @@ cd qlib/scripts/data_collector/pit/ python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly ``` -Downloading all data from the stock is very time consuming. If you just want run a quick test on a few stocks, you can run the command below +Downloading all data from the stock is very time-consuming. If you just want to run a quick test on a few stocks, you can run the command below ```bash python collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex "^(600519|000725).*" ```