From dfc0ed3c01ba08fc6a3c0d88b3649e40bd08f73f Mon Sep 17 00:00:00 2001 From: SunsetWolf <30293408+SunsetWolf@users.noreply.github.com> Date: Fri, 31 Dec 2021 22:14:47 +0800 Subject: [PATCH] fix_typo (#790) Signed-off-by: unknown --- CHANGES.rst | 8 ++++---- docs/component/highfreq.rst | 2 +- docs/component/recorder.rst | 2 +- docs/hidden/tuner.rst | 10 +++++----- docs/introduction/quick.rst | 4 ++-- examples/benchmarks/TFT/data_formatters/base.py | 2 +- examples/benchmarks/TFT/libs/hyperparam_opt.py | 6 +++--- examples/benchmarks/TFT/libs/tft_model.py | 6 +++--- examples/benchmarks/TFT/tft.py | 2 +- examples/benchmarks/TRA/README.md | 2 +- examples/benchmarks/TRA/src/model.py | 6 +++--- examples/highfreq/highfreq_ops.py | 2 +- examples/nested_decision_execution/workflow.py | 2 +- qlib/__init__.py | 2 +- qlib/backtest/account.py | 6 +++--- qlib/backtest/backtest.py | 2 +- qlib/backtest/decision.py | 4 ++-- qlib/backtest/exchange.py | 4 ++-- qlib/backtest/executor.py | 6 +++--- qlib/backtest/high_performance_ds.py | 2 +- qlib/backtest/position.py | 2 +- qlib/backtest/profit_attribution.py | 2 +- qlib/backtest/report.py | 4 ++-- qlib/backtest/utils.py | 2 +- qlib/contrib/data/utils/sepdf.py | 4 ++-- qlib/contrib/evaluate_portfolio.py | 4 ++-- qlib/contrib/model/highfreq_gdbt_model.py | 2 +- qlib/contrib/model/pytorch_tabnet.py | 4 ++-- qlib/contrib/model/pytorch_tra.py | 4 ++-- qlib/contrib/model/pytorch_utils.py | 2 +- qlib/contrib/online/utils.py | 2 +- qlib/contrib/strategy/cost_control.py | 2 +- qlib/contrib/strategy/rule_strategy.py | 6 +++--- qlib/contrib/strategy/signal_strategy.py | 2 +- qlib/contrib/tuner/tuner.py | 2 +- qlib/data/cache.py | 10 +++++----- qlib/data/client.py | 2 +- qlib/data/data.py | 2 +- qlib/data/dataset/__init__.py | 2 +- qlib/data/dataset/handler.py | 2 +- qlib/data/dataset/processor.py | 2 +- qlib/data/dataset/storage.py | 2 +- qlib/rl/env.py | 4 ++-- qlib/strategy/base.py | 6 +++--- qlib/utils/__init__.py | 2 +- qlib/utils/index_data.py | 2 +- qlib/utils/resam.py | 4 ++-- qlib/workflow/exp.py | 2 +- qlib/workflow/expm.py | 2 +- qlib/workflow/online/update.py | 2 +- qlib/workflow/record_temp.py | 2 +- qlib/workflow/task/__init__.py | 2 +- qlib/workflow/task/gen.py | 2 +- qlib/workflow/task/manage.py | 4 ++-- scripts/data_collector/fund/collector.py | 2 +- scripts/data_collector/utils.py | 2 +- 56 files changed, 92 insertions(+), 92 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3daa1e8e67..e34d2a8f2b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -30,7 +30,7 @@ Version 0.2.1 -------------------- - Support registering user-defined ``Provider``. - Support use operators in string format, e.g. ``['Ref($close, 1)']`` is valid field format. -- Support dynamic fields in ``$some_field`` format. And exising fields like ``Close()`` may be deprecated in the future. +- Support dynamic fields in ``$some_field`` format. And existing fields like ``Close()`` may be deprecated in the future. Version 0.2.2 -------------------- @@ -78,7 +78,7 @@ Version 0.3.5 - Support multi-label training, you can provide multiple label in ``handler``. (But LightGBM doesn't support due to the algorithm itself) - Refactor ``handler`` code, dataset.py is no longer used, and you can deploy your own labels and features in ``feature_label_config`` - Handler only offer DataFrame. Also, ``trainer`` and model.py only receive DataFrame -- Change ``split_rolling_data``, we roll the data on market calender now, not on normal date +- Change ``split_rolling_data``, we roll the data on market calendar now, not on normal date - Move some date config from ``handler`` to ``trainer`` Version 0.4.0 @@ -167,11 +167,11 @@ Version 0.8.0 - There are lots of changes for daily trading, it is hard to list all of them. But a few important changes could be noticed - The trading limitation is more accurate; - In `previous version `_, longing and shorting actions share the same action. - - In `current verison `_, the trading limitation is different between loging and shorting action. + - In `current version `_, the trading limitation is different between logging and shorting action. - The constant is different when calculating annualized metrics. - `Current version `_ uses more accurate constant than `previous version `_ - `A new version `_ of data is released. Due to the unstability of Yahoo data source, the data may be different after downloading data again. - - Users could chec kout the backtesting results between `Current version `_ and `previous version `_ + - Users could check out the backtesting results between `Current version `_ and `previous version `_ Other Versions diff --git a/docs/component/highfreq.rst b/docs/component/highfreq.rst index 8b1b425879..19184a10ad 100644 --- a/docs/component/highfreq.rst +++ b/docs/component/highfreq.rst @@ -14,7 +14,7 @@ To get the join trading performance of daily and intraday trading, they must int In order to support the joint backtest strategies in multiple levels, a corresponding framework is required. None of the publicly available high-frequency trading frameworks considers multi-level joint trading, which make the backtesting aforementioned inaccurate. Besides backtesting, the optimization of strategies from different levels is not standalone and can be affected by each other. -For example, the best portfolio management strategy may change with the performance of order executions(e.g. a portfolio with higher turnover may becomes a better choice when we imporve the order execution strategies). +For example, the best portfolio management strategy may change with the performance of order executions(e.g. a portfolio with higher turnover may becomes a better choice when we improve the order execution strategies). To achieve the overall good performance , it is necessary to consider the interaction of strategies in different level. Therefore, building a new framework for trading in multiple levels becomes necessary to solve the various problems mentioned above, for which we designed a nested decision execution framework that consider the interaction of strategies. diff --git a/docs/component/recorder.rst b/docs/component/recorder.rst index 5a7d195d64..072d2f2788 100644 --- a/docs/component/recorder.rst +++ b/docs/component/recorder.rst @@ -37,7 +37,7 @@ Here is a general view of the structure of the system: This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link `_). -If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, pleaes refer to the related documents `here `_. +If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, please refer to the related documents `here `_. Qlib Recorder =================== diff --git a/docs/hidden/tuner.rst b/docs/hidden/tuner.rst index 8abf2ec7c0..eedbba03bd 100644 --- a/docs/hidden/tuner.rst +++ b/docs/hidden/tuner.rst @@ -31,7 +31,7 @@ Let's see an example, First make sure you have the latest version of `qlib` installed. -Then, you need to privide a configuration to setup the experiment. +Then, you need to provide a configuration to setup the experiment. We write a simple configuration example as following, .. code-block:: YAML @@ -217,13 +217,13 @@ The tuner pipeline contains different tuners, and the `tuner` program will proce Each part represents a tuner, and its modules which are to be tuned. Space in each part is the hyper-parameters' space of a certain module, you need to create your searching space and modify it in `/qlib/contrib/tuner/space.py`. We use `hyperopt` package to help us to construct the space, you can see the detail of how to use it in https://github.com/hyperopt/hyperopt/wiki/FMin . - model - You need to provide the `class` and the `space` of the model. If the model is user's own implementation, you need to privide the `module_path`. + You need to provide the `class` and the `space` of the model. If the model is user's own implementation, you need to provide the `module_path`. - trainer - You need to proveide the `class` of the trainer. If the trainer is user's own implementation, you need to privide the `module_path`. + You need to provide the `class` of the trainer. If the trainer is user's own implementation, you need to provide the `module_path`. - strategy - You need to provide the `class` and the `space` of the strategy. If the strategy is user's own implementation, you need to privide the `module_path`. + You need to provide the `class` and the `space` of the strategy. If the strategy is user's own implementation, you need to provide the `module_path`. - data_label The label of the data, you can search which kinds of labels will lead to a better result. This part is optional, and you only need to provide `space`. @@ -273,7 +273,7 @@ You need to use the same dataset to evaluate your different `estimator` experime About the data and backtest ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`data` and `backtest` are all same in the whole `tuner` experiment. Different `estimator` experiments must use the same data and backtest method. So, these two parts of config are same with that in `estimator` configuration. You can see the precise defination of these parts in `estimator` introduction. We only provide an example here. +`data` and `backtest` are all same in the whole `tuner` experiment. Different `estimator` experiments must use the same data and backtest method. So, these two parts of config are same with that in `estimator` configuration. You can see the precise definition of these parts in `estimator` introduction. We only provide an example here. .. code-block:: YAML diff --git a/docs/introduction/quick.rst b/docs/introduction/quick.rst index 1abe3fe762..151519680c 100644 --- a/docs/introduction/quick.rst +++ b/docs/introduction/quick.rst @@ -31,7 +31,7 @@ Users can easily intsall ``Qlib`` according to the following steps: git clone https://github.com/microsoft/qlib.git && cd qlib python setup.py install -To kown more about `installation`, please refer to `Qlib Installation <../start/installation.html>`_. +To known more about `installation`, please refer to `Qlib Installation <../start/installation.html>`_. Prepare Data ============== @@ -44,7 +44,7 @@ Load and prepare data by running the following code: This dataset is created by public data collected by crawler scripts in ``scripts/data_collector/``, which have been released in the same repository. Users could create the same dataset with it. -To kown more about `prepare data`, please refer to `Data Preparation <../component/data.html#data-preparation>`_. +To known more about `prepare data`, please refer to `Data Preparation <../component/data.html#data-preparation>`_. Auto Quant Research Workflow ==================================== diff --git a/examples/benchmarks/TFT/data_formatters/base.py b/examples/benchmarks/TFT/data_formatters/base.py index aa1c0dc82b..9df0448bab 100644 --- a/examples/benchmarks/TFT/data_formatters/base.py +++ b/examples/benchmarks/TFT/data_formatters/base.py @@ -32,7 +32,7 @@ import enum -# Type defintions +# Type definitions class DataTypes(enum.IntEnum): """Defines numerical types of each column.""" diff --git a/examples/benchmarks/TFT/libs/hyperparam_opt.py b/examples/benchmarks/TFT/libs/hyperparam_opt.py index 750fdf2c17..e18f5b7163 100644 --- a/examples/benchmarks/TFT/libs/hyperparam_opt.py +++ b/examples/benchmarks/TFT/libs/hyperparam_opt.py @@ -254,9 +254,9 @@ def __init__( param_ranges: Discrete hyperparameter range for random search. fixed_params: Fixed model parameters per experiment. root_model_folder: Folder to store optimisation artifacts. - worker_number: Worker index definining which set of hyperparameters to + worker_number: Worker index defining which set of hyperparameters to test. - search_iterations: Maximum numer of random search iterations. + search_iterations: Maximum number of random search iterations. num_iterations_per_worker: How many iterations are handled per worker. clear_serialised_params: Whether to regenerate hyperparameter combinations. @@ -330,7 +330,7 @@ def load_serialised_hyperparam_df(self): if os.path.exists(self.serialised_ranges_folder): df = pd.read_csv(self.serialised_ranges_path, index_col=0) else: - print("Unable to load - regenerating serach ranges instead") + print("Unable to load - regenerating search ranges instead") df = self.update_serialised_hyperparam_df() return df diff --git a/examples/benchmarks/TFT/libs/tft_model.py b/examples/benchmarks/TFT/libs/tft_model.py index b39f178255..aa055e2947 100644 --- a/examples/benchmarks/TFT/libs/tft_model.py +++ b/examples/benchmarks/TFT/libs/tft_model.py @@ -342,7 +342,7 @@ def get(cls, key): @classmethod def contains(cls, key): - """Retuns boolean indicating whether key is present in cache.""" + """Returns boolean indicating whether key is present in cache.""" return key in cls._data_cache @@ -1120,10 +1120,10 @@ def predict(self, df, return_targets=False): Args: df: Input dataframe return_targets: Whether to also return outputs aligned with predictions to - faciliate evaluation + facilitate evaluation Returns: - Input dataframe or tuple of (input dataframe, algined output dataframe). + Input dataframe or tuple of (input dataframe, aligned output dataframe). """ data = self._batch_data(df) diff --git a/examples/benchmarks/TFT/tft.py b/examples/benchmarks/TFT/tft.py index cdc7f17e90..8b5c70190a 100644 --- a/examples/benchmarks/TFT/tft.py +++ b/examples/benchmarks/TFT/tft.py @@ -295,7 +295,7 @@ def finetune(self, dataset: DatasetH): def to_pickle(self, path: Union[Path, str]): """ Tensorflow model can't be dumped directly. - So the data should be save seperatedly + So the data should be save separately **TODO**: Please implement the function to load the files diff --git a/examples/benchmarks/TRA/README.md b/examples/benchmarks/TRA/README.md index 5ff5b480e3..ea1d5aace5 100644 --- a/examples/benchmarks/TRA/README.md +++ b/examples/benchmarks/TRA/README.md @@ -57,7 +57,7 @@ And here are two ways to run the model: python example.py --config_file configs/config_alstm.yaml ``` -Here we trained TRA on a pretrained backbone model. Therefore we run `*_init.yaml` before TRA's scipts. +Here we trained TRA on a pretrained backbone model. Therefore we run `*_init.yaml` before TRA's scripts. ### Results diff --git a/examples/benchmarks/TRA/src/model.py b/examples/benchmarks/TRA/src/model.py index a0e6350e4c..f6ee69af3f 100644 --- a/examples/benchmarks/TRA/src/model.py +++ b/examples/benchmarks/TRA/src/model.py @@ -124,7 +124,7 @@ def train_epoch(self, data_set): loss = (pred - label).pow(2).mean() L = (all_preds.detach() - label[:, None]).pow(2) - L -= L.min(dim=-1, keepdim=True).values # normalize & ensure postive input + L -= L.min(dim=-1, keepdim=True).values # normalize & ensure positive input data_set.assign_data(index, L) # save loss to memory @@ -165,7 +165,7 @@ def test_epoch(self, data_set, return_pred=False): L = (all_preds - label[:, None]).pow(2) - L -= L.min(dim=-1, keepdim=True).values # normalize & ensure postive input + L -= L.min(dim=-1, keepdim=True).values # normalize & ensure positive input data_set.assign_data(index, L) # save loss to memory @@ -484,7 +484,7 @@ class TRA(nn.Module): """Temporal Routing Adaptor (TRA) - TRA takes historical prediction erros & latent representation as inputs, + TRA takes historical prediction errors & latent representation as inputs, then routes the input sample to a specific predictor for training & inference. Args: diff --git a/examples/highfreq/highfreq_ops.py b/examples/highfreq/highfreq_ops.py index 175f4f66be..a9947f91d8 100644 --- a/examples/highfreq/highfreq_ops.py +++ b/examples/highfreq/highfreq_ops.py @@ -150,7 +150,7 @@ def __init__(self, feature, l=None, r=None): self.l = l self.r = r if (self.l is not None and self.l <= 0) or (self.r is not None and self.r >= 0): - raise ValueError("Cut operator l shoud > 0 and r should < 0") + raise ValueError("Cut operator l should > 0 and r should < 0") super(Cut, self).__init__(feature) diff --git a/examples/nested_decision_execution/workflow.py b/examples/nested_decision_execution/workflow.py index 991deaa9dd..18d33a32c0 100644 --- a/examples/nested_decision_execution/workflow.py +++ b/examples/nested_decision_execution/workflow.py @@ -298,7 +298,7 @@ def collect_data(self): # - Aligning the profit calculation between multiple levels and single levels. # 2) comparing different backtest # - Basic test idea: - # - the daily backtest will be similar as multi-level(the data quality makes this gap samller) + # - the daily backtest will be similar as multi-level(the data quality makes this gap smaller) def check_diff_freq(self): self._init_qlib() diff --git a/qlib/__init__.py b/qlib/__init__.py index 1e79561754..4d827b25b7 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -241,7 +241,7 @@ def auto_init(**kwargs): default_exp_name: "Experiment" Example 2) - If you wan to create simple a stand alone config, you can use following config(a.k.a `conf_type: origin`) + If you want to create simple a stand alone config, you can use following config(a.k.a `conf_type: origin`) .. code-block:: python diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py index 41fd43cb8f..f2e32c602c 100644 --- a/qlib/backtest/account.py +++ b/qlib/backtest/account.py @@ -31,7 +31,7 @@ class AccumulatedInfo: """ accumulated trading info, including accumulated return/cost/turnover - AccumulatedInfo should be shared accross different levels + AccumulatedInfo should be shared across different levels """ def __init__(self): @@ -199,7 +199,7 @@ def update_order(self, order, trade_val, cost, trade_price): # if stock is sold out, no stock price information in Position, then we should update account first, then update current position # if stock is bought, there is no stock in current position, update current, then update account - # The cost will be substracted from the cash at last. So the trading logic can ignore the cost calculation + # The cost will be subtracted from the cash at last. So the trading logic can ignore the cost calculation if order.direction == Order.SELL: # sell stock self._update_state_from_order(order, trade_val, cost, trade_price) @@ -378,7 +378,7 @@ def update_bar_end( ) def get_portfolio_metrics(self): - """get the history portfolio_metrics and postions instance""" + """get the history portfolio_metrics and positions instance""" if self.is_port_metr_enabled(): _portfolio_metrics = self.portfolio_metrics.generate_portfolio_metrics_dataframe() _positions = self.get_hist_positions() diff --git a/qlib/backtest/backtest.py b/qlib/backtest/backtest.py index fa4063bc92..dff15a7c28 100644 --- a/qlib/backtest/backtest.py +++ b/qlib/backtest/backtest.py @@ -13,7 +13,7 @@ def backtest_loop(start_time, end_time, trade_strategy: BaseStrategy, trade_executor: BaseExecutor): - """backtest funciton for the interaction of the outermost strategy and executor in the nested decision execution + """backtest function for the interaction of the outermost strategy and executor in the nested decision execution please refer to the docs of `collect_data_loop` diff --git a/qlib/backtest/decision.py b/qlib/backtest/decision.py index 049e56c005..3b15b06a46 100644 --- a/qlib/backtest/decision.py +++ b/qlib/backtest/decision.py @@ -505,8 +505,8 @@ def mod_inner_decision(self, inner_trade_decision: BaseTradeDecision): `inner_trade_decision` will be changed **inplaced**. Motivation of the `mod_inner_decision` - - Leave a hook for outer decision to affact the decision generated by the inner strategy - - e.g. the outmost strategy generate a time range for trading. But the upper layer can only affact the + - Leave a hook for outer decision to affect the decision generated by the inner strategy + - e.g. the outmost strategy generate a time range for trading. But the upper layer can only affect the nearest layer in the original design. With `mod_inner_decision`, the decision can passed through multiple layers diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py index 764e468d4d..f09cce2c12 100644 --- a/qlib/backtest/exchange.py +++ b/qlib/backtest/exchange.py @@ -103,7 +103,7 @@ def __init__( Necessary fields: $close is for calculating the total value at end of each day. Optional fields: - $volume is only necessary when we limit the trade amount or caculate PA(vwap) indicator + $volume is only necessary when we limit the trade amount or calculate PA(vwap) indicator $vwap is only necessary when we use the $vwap price as the deal price $factor is for rounding to the trading unit limit_sell will be set to False by default(False indicates we can sell this @@ -505,7 +505,7 @@ def generate_order_for_target_amount_position(self, target_position, current_pos Note: some future information is used in this function Parameter: target_position : dict { stock_id : amount } - current_postion : dict { stock_id : amount} + current_position : dict { stock_id : amount} trade_unit : trade_unit down sample : for amount 321 and trade_unit 100, deal_amount is 300 deal order on trade_date diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py index fa9c39b8f3..94aa84d6dc 100644 --- a/qlib/backtest/executor.py +++ b/qlib/backtest/executor.py @@ -41,7 +41,7 @@ def __init__( Parameters ---------- time_per_step : str - trade time per trading step, used for genreate the trade calendar + trade time per trading step, used for generate the trade calendar show_indicator: bool, optional whether to show indicators, : - 'pa', the price advantage @@ -369,12 +369,12 @@ def _init_sub_trading(self, trade_decision): self.inner_strategy.reset(level_infra=sub_level_infra, outer_trade_decision=trade_decision) def _update_trade_decision(self, trade_decision: BaseTradeDecision) -> BaseTradeDecision: - # outter strategy have chance to update decision each iterator + # outer strategy have chance to update decision each iterator updated_trade_decision = trade_decision.update(self.inner_executor.trade_calendar) if updated_trade_decision is not None: trade_decision = updated_trade_decision # NEW UPDATE - # create a hook for inner strategy to update outter decision + # create a hook for inner strategy to update outer decision self.inner_strategy.alter_outer_trade_decision(trade_decision) return trade_decision diff --git a/qlib/backtest/high_performance_ds.py b/qlib/backtest/high_performance_ds.py index 51847cac35..95a32022e1 100644 --- a/qlib/backtest/high_performance_ds.py +++ b/qlib/backtest/high_performance_ds.py @@ -400,7 +400,7 @@ def sum_all_indicators(order_indicator, indicators: list, metrics: Union[str, Li indicators : List[BaseOrderIndicator] the list of all inner indicators. metrics : Union[str, List[str]] - all metrics needs ot be sumed. + all metrics needs to be sumed. fill_value : float, optional fill np.NaN with value. By default None. """ diff --git a/qlib/backtest/position.py b/qlib/backtest/position.py index 8ad2e957f1..907da9975c 100644 --- a/qlib/backtest/position.py +++ b/qlib/backtest/position.py @@ -152,7 +152,7 @@ def get_stock_weight_dict(self, only_stock: bool = False) -> Dict: """ generate stock weight dict {stock_id : value weight of stock in the position} it is meaningful in the beginning or the end of each trade step - - During execution of each trading step, the weight may be not consistant with the portfolio value + - During execution of each trading step, the weight may be not consistent with the portfolio value Parameters ---------- diff --git a/qlib/backtest/profit_attribution.py b/qlib/backtest/profit_attribution.py index 895f5c78bb..e5b61f8d69 100644 --- a/qlib/backtest/profit_attribution.py +++ b/qlib/backtest/profit_attribution.py @@ -39,7 +39,7 @@ def get_benchmark_weight( if not path: path = Path(C.dpm.get_data_uri(freq)).expanduser() / "raw" / "AIndexMembers" / "weights.csv" # TODO: the storage of weights should be implemented in a more elegent way - # TODO: The benchmark is not consistant with the filename in instruments. + # TODO: The benchmark is not consistent with the filename in instruments. bench_weight_df = pd.read_csv(path, usecols=["code", "date", "index", "weight"]) bench_weight_df = bench_weight_df[bench_weight_df["index"] == bench] bench_weight_df["date"] = pd.to_datetime(bench_weight_df["date"]) diff --git a/qlib/backtest/report.py b/qlib/backtest/report.py index 03fb85344c..69ae720a2d 100644 --- a/qlib/backtest/report.py +++ b/qlib/backtest/report.py @@ -73,7 +73,7 @@ def __init__(self, freq: str = "day", benchmark_config: dict = {}): self.init_bench(freq=freq, benchmark_config=benchmark_config) def init_vars(self): - self.accounts = OrderedDict() # account postion value for each trade time + self.accounts = OrderedDict() # account position value for each trade time self.returns = OrderedDict() # daily return rate for each trade time self.total_turnovers = OrderedDict() # total turnover for each trade time self.turnovers = OrderedDict() # turnover for each trade time @@ -236,7 +236,7 @@ class Indicator: """ `Indicator` is implemented in a aggregate way. All the metrics are calculated aggregately. - All the metrics are calculated for a seperated stock and in a specific step on a specific level. + All the metrics are calculated for a separated stock and in a specific step on a specific level. | indicator | desc. | |--------------+--------------------------------------------------------------| diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py index 89ac893193..25a0d9965d 100644 --- a/qlib/backtest/utils.py +++ b/qlib/backtest/utils.py @@ -93,7 +93,7 @@ def get_step_time(self, trade_step=None, shift=0): About the endpoints: - Qlib uses the closed interval in time-series data selection, which has the same performance as pandas.Series.loc - # - The returned right endpoints should minus 1 seconds becasue of the closed interval representation in Qlib. + # - The returned right endpoints should minus 1 seconds because of the closed interval representation in Qlib. # Note: Qlib supports up to minutely decision execution, so 1 seconds is less than any trading time interval. Parameters diff --git a/qlib/contrib/data/utils/sepdf.py b/qlib/contrib/data/utils/sepdf.py index 58664c46c8..7c3c8665b6 100644 --- a/qlib/contrib/data/utils/sepdf.py +++ b/qlib/contrib/data/utils/sepdf.py @@ -18,8 +18,8 @@ class SepDataFrame: """ (Sep)erate DataFrame We usually concat multiple dataframe to be processed together(Such as feature, label, weight, filter). - However, they are usally be used seperately at last. - This will result in extra cost for concating and spliting data(reshaping and copying data in the memory is very expensive) + However, they are usually be used separately at last. + This will result in extra cost for concatenating and splitting data(reshaping and copying data in the memory is very expensive) SepDataFrame tries to act like a DataFrame whose column with multiindex """ diff --git a/qlib/contrib/evaluate_portfolio.py b/qlib/contrib/evaluate_portfolio.py index b3a770e7c6..920d2182c4 100644 --- a/qlib/contrib/evaluate_portfolio.py +++ b/qlib/contrib/evaluate_portfolio.py @@ -38,11 +38,11 @@ def _get_position_value_from_df(evaluate_date, position, close_data_df): def get_position_value(evaluate_date, position): """sum of close*amount - get value of postion + get value of position use close price - postions: + positions: { Timestamp('2016-01-05 00:00:00'): { diff --git a/qlib/contrib/model/highfreq_gdbt_model.py b/qlib/contrib/model/highfreq_gdbt_model.py index c8a108cab2..ec444f6710 100644 --- a/qlib/contrib/model/highfreq_gdbt_model.py +++ b/qlib/contrib/model/highfreq_gdbt_model.py @@ -56,7 +56,7 @@ def _cal_signal_metrics(self, y_test, l_cut, r_cut): def hf_signal_test(self, dataset: DatasetH, threhold=0.2): """ - Test the sigal in high frequency test set + Test the signal in high frequency test set """ if self.model == None: raise ValueError("Model hasn't been trained yet") diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index e0e2093e8f..d9290977b6 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -446,7 +446,7 @@ def __init__(self, inp_dim=6, out_dim=6, n_d=64, n_a=64, n_shared=2, n_ind=2, n_ Args: n_d: dimension of the features used to calculate the final results n_a: dimension of the features input to the attention transformer of the next step - n_shared: numbr of shared steps in feature transfomer(optional) + n_shared: numbr of shared steps in feature transformer(optional) n_ind: number of independent steps in feature transformer n_steps: number of steps of pass through tabbet relax coefficient: @@ -479,7 +479,7 @@ def forward(self, x, priors): out = torch.zeros(x.size(0), self.n_d).to(x.device) for step in self.steps: x_te, l = step(x, x_a, priors) - out += F.relu(x_te[:, : self.n_d]) # split the feautre from feat_transformer + out += F.relu(x_te[:, : self.n_d]) # split the feature from feat_transformer x_a = x_te[:, self.n_d :] sparse_loss.append(l) return self.fc(out), sum(sparse_loss) diff --git a/qlib/contrib/model/pytorch_tra.py b/qlib/contrib/model/pytorch_tra.py index 8d1e284105..1318c19c1f 100644 --- a/qlib/contrib/model/pytorch_tra.py +++ b/qlib/contrib/model/pytorch_tra.py @@ -232,7 +232,7 @@ def train_epoch(self, epoch, data_set, is_pretrain=False): choice_all.append(pd.DataFrame(choice.detach().cpu().numpy(), index=index)) decay = self.rho ** (self.global_step // 100) # decay every 100 steps lamb = 0 if is_pretrain else self.lamb * decay - reg = prob.log().mul(P).sum(dim=1).mean() # train router to predict OT assignment + reg = prob.log().mul(P).sum(dim=1).mean() # train router to predict TO assignment if self._writer is not None and not is_pretrain: self._writer.add_scalar("training/router_loss", -reg.item(), self.global_step) self._writer.add_scalar("training/reg_loss", loss.item(), self.global_step) @@ -663,7 +663,7 @@ class TRA(nn.Module): """Temporal Routing Adaptor (TRA) - TRA takes historical prediction erros & latent representation as inputs, + TRA takes historical prediction errors & latent representation as inputs, then routes the input sample to a specific predictor for training & inference. Args: diff --git a/qlib/contrib/model/pytorch_utils.py b/qlib/contrib/model/pytorch_utils.py index 1148a596af..e22f8f7544 100644 --- a/qlib/contrib/model/pytorch_utils.py +++ b/qlib/contrib/model/pytorch_utils.py @@ -33,5 +33,5 @@ def count_parameters(models_or_parameters, unit="m"): elif unit == "gb" or unit == "g": counts /= 2 ** 30 elif unit is not None: - raise ValueError("Unknow unit: {:}".format(unit)) + raise ValueError("Unknown unit: {:}".format(unit)) return counts diff --git a/qlib/contrib/online/utils.py b/qlib/contrib/online/utils.py index 2a775ba626..52dcd819e0 100644 --- a/qlib/contrib/online/utils.py +++ b/qlib/contrib/online/utils.py @@ -36,7 +36,7 @@ def save_instance(instance, file_path): save(dump) an instance to a pickle file Parameter instance : - data to te dumped + data to be dumped file_path : string / pathlib.Path() path of file to be dumped """ diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index aaebe35439..2209375e56 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -47,7 +47,7 @@ def get_risk_degree(self, trade_step=None): Return the proportion of your total value you will used in investment. Dynamically risk_degree will result in Market timing """ - # It will use 95% amoutn of your total value by default + # It will use 95% amount of your total value by default return self.risk_degree def generate_target_weight_position(self, score, current, trade_start_time, trade_end_time): diff --git a/qlib/contrib/strategy/rule_strategy.py b/qlib/contrib/strategy/rule_strategy.py index dcf4667ff3..657c62def3 100644 --- a/qlib/contrib/strategy/rule_strategy.py +++ b/qlib/contrib/strategy/rule_strategy.py @@ -24,7 +24,7 @@ class TWAPStrategy(BaseStrategy): NOTE: - This TWAP strategy will celling round when trading. This will make the TWAP trading strategy produce the order - ealier when the total trade unit of amount is less than the trading step + earlier when the total trade unit of amount is less than the trading step """ def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs): @@ -43,8 +43,8 @@ def reset(self, outer_trade_decision: BaseTradeDecision = None, **kwargs): def generate_trade_decision(self, execute_result=None): # NOTE: corner cases!!! # - If using upperbound round, please don't sell the amount which should in next step - # - the coordinate of the amount between steps is hard to be dealed between steps in the same level. It - # is easier to be dealed in upper steps + # - the coordinate of the amount between steps is hard to be dealt between steps in the same level. It + # is easier to be dealt in upper steps # strategy is not available. Give an empty decision if len(self.outer_trade_decision.get_decision()) == 0: diff --git a/qlib/contrib/strategy/signal_strategy.py b/qlib/contrib/strategy/signal_strategy.py index c3afe61182..c1dd87cdda 100644 --- a/qlib/contrib/strategy/signal_strategy.py +++ b/qlib/contrib/strategy/signal_strategy.py @@ -69,7 +69,7 @@ def get_risk_degree(self, trade_step=None): Return the proportion of your total value you will used in investment. Dynamically risk_degree will result in Market timing. """ - # It will use 95% amoutn of your total value by default + # It will use 95% amount of your total value by default return self.risk_degree diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index 7fe1cec07d..114ee0a74d 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -90,7 +90,7 @@ class QLibTuner(Tuner): def objective(self, params): - # 1. Setup an config for a spcific estimator process + # 1. Setup an config for a specific estimator process estimator_path = self.setup_estimator_config(params) self.logger.info("Searching params: {} ".format(params)) diff --git a/qlib/data/cache.py b/qlib/data/cache.py index 6193dcf92b..c33fa655b5 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -359,7 +359,7 @@ def _expression(self, instrument, field, start_time, end_time, freq): def update(self, cache_uri: Union[str, Path], freq: str = "day"): """Update expression cache to latest calendar. - Overide this method to define how to update expression cache corresponding to users' own cache mechanism. + Override this method to define how to update expression cache corresponding to users' own cache mechanism. Parameters ---------- @@ -445,7 +445,7 @@ def _dataset_uri( def update(self, cache_uri: Union[str, Path], freq: str = "day"): """Update dataset cache to latest calendar. - Overide this method to define how to update dataset cache corresponding to users' own cache mechanism. + Override this method to define how to update dataset cache corresponding to users' own cache mechanism. Parameters ---------- @@ -543,7 +543,7 @@ def _expression(self, instrument, field, start_time=None, end_time=None, freq="d # instance series = self.provider.expression(instrument, field, _calendar[0], _calendar[-1], freq) if not series.empty: - # This expresion is empty, we don't generate any cache for it. + # This expression is empty, we don't generate any cache for it. with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri(freq))}:expression-{_cache_uri}"): self.gen_expression_cache( expression_data=series, @@ -858,7 +858,7 @@ def gen_dataset_cache(self, cache_path: Union[str, Path], instruments, fields, f """gen_dataset_cache .. note:: This function does not consider the cache read write lock. Please - Aquire the lock outside this function + Acquire the lock outside this function The format the cache contains 3 parts(followed by typical filename). @@ -1035,7 +1035,7 @@ def update(self, cache_uri, freq: str = "day"): # FIXME: # Because the feature cache are stored as .bin file. # So the series read from features are all float32. - # However, the first dataset cache is calulated based on the + # However, the first dataset cache is calculated based on the # raw data. So the data type may be float64. # Different data type will result in failure of appending data if "/{}".format(DatasetCache.HDF_KEY) in store.keys(): diff --git a/qlib/data/client.py b/qlib/data/client.py index fc96161e85..b6733fd3ad 100644 --- a/qlib/data/client.py +++ b/qlib/data/client.py @@ -58,7 +58,7 @@ def send_request(self, request_type, request_content, msg_queue, msg_proc_func=N msg_proc_func : func the function to process the message when receiving response, should have arg `*args`. msg_queue: Queue - The queue to pass the messsage after callback. + The queue to pass the message after callback. """ head_info = {"version": qlib.__version__} diff --git a/qlib/data/data.py b/qlib/data/data.py index 1188173f97..cdc5d80765 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -16,7 +16,7 @@ from typing import Iterable, Union from typing import List, Union -# For supporting multiprocessing in outter code, joblib is used +# For supporting multiprocessing in outer code, joblib is used from joblib import delayed from .cache import H diff --git a/qlib/data/dataset/__init__.py b/qlib/data/dataset/__init__.py index 46b90402d2..05df602a7b 100644 --- a/qlib/data/dataset/__init__.py +++ b/qlib/data/dataset/__init__.py @@ -392,7 +392,7 @@ def build_index(data: pd.DataFrame) -> Tuple[pd.DataFrame, dict]: 2021-01-14 12441 12442 12443 12444 12445 12446 ... 2) the second element: {: } """ - # object incase of pandas converting int to flaot + # object incase of pandas converting int to float idx_df = pd.Series(range(data.shape[0]), index=data.index, dtype=object) idx_df = lazy_sort_index(idx_df.unstack()) # NOTE: the correctness of `__getitem__` depends on columns sorted here diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 134091c225..f2be3a3c60 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -70,7 +70,7 @@ def __init__( Parameters ---------- instruments : - The stock list to retrive. + The stock list to retrieve. start_time : start_time of the original data. end_time : diff --git a/qlib/data/dataset/processor.py b/qlib/data/dataset/processor.py index e56d71683d..7294a9c4db 100644 --- a/qlib/data/dataset/processor.py +++ b/qlib/data/dataset/processor.py @@ -75,7 +75,7 @@ def is_for_infer(self) -> bool: def readonly(self) -> bool: """ - Does the processor treat the input data readonly (i.e. does not write the input data) when processsing + Does the processor treat the input data readonly (i.e. does not write the input data) when processing Knowning the readonly information is helpful to the Handler to avoid uncessary copy """ diff --git a/qlib/data/dataset/storage.py b/qlib/data/dataset/storage.py index 7f556f497d..1af78e92a0 100644 --- a/qlib/data/dataset/storage.py +++ b/qlib/data/dataset/storage.py @@ -63,7 +63,7 @@ class HasingStockStorage(BaseHandlerStorage): """Hasing data storage for datahanlder - The default data storage pandas.DataFrame is too slow when randomly accessing one stock's data - HasingStockStorage hashes the multiple stocks' data(pandas.DataFrame) by the key `stock_id`. - - HasingStockStorage hases the pandas.DataFrame into a dict, whose key is the stock_id(str) and value this stock data(panda.DataFrame), it has the following format: + - HasingStockStorage hashes the pandas.DataFrame into a dict, whose key is the stock_id(str) and value this stock data(panda.DataFrame), it has the following format: { stock1_id: stock1_data, stock2_id: stock2_data, diff --git a/qlib/rl/env.py b/qlib/rl/env.py index 3a77d22954..77da907180 100644 --- a/qlib/rl/env.py +++ b/qlib/rl/env.py @@ -64,10 +64,10 @@ def __init__( Parameters ---------- state_interpreter : Union[dict, StateInterpreter] - interpretor that interprets the qlib execute result into rl env state. + interpreter that interprets the qlib execute result into rl env state. action_interpreter : Union[dict, ActionInterpreter] - interpretor that interprets the rl agent action into qlib order list + interpreter that interprets the rl agent action into qlib order list """ super(QlibIntRLEnv, self).__init__(executor=executor) self.state_interpreter = init_instance_by_config(state_interpreter, accept_types=StateInterpreter) diff --git a/qlib/strategy/base.py b/qlib/strategy/base.py index 8604775449..bf641343b7 100644 --- a/qlib/strategy/base.py +++ b/qlib/strategy/base.py @@ -34,7 +34,7 @@ def __init__( Parameters ---------- outer_trade_decision : BaseTradeDecision, optional - the trade decision of outer strategy which this startegy relies, and it will be traded in [start_time, end_time], by default None + the trade decision of outer strategy which this strategy relies, and it will be traded in [start_time, end_time], by default None - If the strategy is used to split trade decision, it will be used - If the strategy is used for portfolio management, it can be ignored level_infra : LevelInfrastructure, optional @@ -232,9 +232,9 @@ def __init__( Parameters ---------- state_interpreter : Union[dict, StateInterpreter] - interpretor that interprets the qlib execute result into rl env state + interpreter that interprets the qlib execute result into rl env state action_interpreter : Union[dict, ActionInterpreter] - interpretor that interprets the rl agent action into qlib order list + interpreter that interprets the rl agent action into qlib order list start_time : Union[str, pd.Timestamp], optional start time of trading, by default None end_time : Union[str, pd.Timestamp], optional diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py index 1bb4d60b03..3c503fa409 100644 --- a/qlib/utils/__init__.py +++ b/qlib/utils/__init__.py @@ -579,7 +579,7 @@ def get_date_range(trading_date, left_shift=0, right_shift=0, future=False): def get_date_by_shift(trading_date, shift, future=False, clip_shift=True, freq="day", align: Optional[str] = None): - """get trading date with shift bias wil cur_date + """get trading date with shift bias will cur_date e.g. : shift == 1, return next trading date shift == -1, return previous trading date ---------- diff --git a/qlib/utils/index_data.py b/qlib/utils/index_data.py index 06fb42a5ee..4b5bb8456d 100644 --- a/qlib/utils/index_data.py +++ b/qlib/utils/index_data.py @@ -6,7 +6,7 @@ Some users just want a simple numpy dataframe with indices and don't want such a complicated tools. Such users are the target of `index_data` -`index_data` try to behave like pandas (some API will be different because we try to be simpler and more intuitive) but don't compromize the performance. It provides the basic numpy data and simple indexing feature. If users call APIs which may compromize the performance, index_data will raise Errors. +`index_data` try to behave like pandas (some API will be different because we try to be simpler and more intuitive) but don't compromise the performance. It provides the basic numpy data and simple indexing feature. If users call APIs which may compromise the performance, index_data will raise Errors. """ from typing import Dict, Tuple, Union, Callable, List diff --git a/qlib/utils/resam.py b/qlib/utils/resam.py index 35d9ebe7cf..d4a19b655d 100644 --- a/qlib/utils/resam.py +++ b/qlib/utils/resam.py @@ -203,10 +203,10 @@ def get_valid_value(series, last=True): """get the first/last not nan value of pd.Series with single level index Parameters ---------- - series : pd.Seires + series : pd.Series series should not be empty last : bool, optional - wether to get the last valid value, by default True + whether to get the last valid value, by default True - if last is True, get the last valid value - else, get the first valid value diff --git a/qlib/workflow/exp.py b/qlib/workflow/exp.py index fcf6cd8d16..2136ece8d3 100644 --- a/qlib/workflow/exp.py +++ b/qlib/workflow/exp.py @@ -88,7 +88,7 @@ def create_recorder(self, recorder_name=None): def search_records(self, **kwargs): """ Get a pandas DataFrame of records that fit the search criteria of the experiment. - Inputs are the search critera user want to apply. + Inputs are the search criteria user want to apply. Returns ------- diff --git a/qlib/workflow/expm.py b/qlib/workflow/expm.py index 9efbc5b065..f13c884dfb 100644 --- a/qlib/workflow/expm.py +++ b/qlib/workflow/expm.py @@ -105,7 +105,7 @@ def create_exp(self, experiment_name: Optional[Text] = None): def search_records(self, experiment_ids=None, **kwargs): """ Get a pandas DataFrame of records that fit the search criteria of the experiment. - Inputs are the search critera user want to apply. + Inputs are the search criteria user want to apply. Returns ------- diff --git a/qlib/workflow/online/update.py b/qlib/workflow/online/update.py index 9e72d49703..ae6a214276 100644 --- a/qlib/workflow/online/update.py +++ b/qlib/workflow/online/update.py @@ -75,7 +75,7 @@ def update(self, *args, **kwargs): class DSBasedUpdater(RecordUpdater, metaclass=ABCMeta): """ Dataset-Based Updater - - Provding updating feature for Updating data based on Qlib Dataset + - Providing updating feature for Updating data based on Qlib Dataset Assumption - Based on Qlib dataset diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index ca6a9d6957..7eb01ca832 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -116,7 +116,7 @@ def check(self, include_self: bool = False, parents: bool = True): """ Check if the records is properly generated and saved. It is useful in following examples - - checking if the depended files complete before genrating new things. + - checking if the depended files complete before generating new things. - checking if the final files is completed Parameters diff --git a/qlib/workflow/task/__init__.py b/qlib/workflow/task/__init__.py index cc338cca4d..a7ea80d9b9 100644 --- a/qlib/workflow/task/__init__.py +++ b/qlib/workflow/task/__init__.py @@ -9,5 +9,5 @@ |-----------------------+------------------------------------------------| | TaskGen | Generating tasks. | | TaskManager(optional) | Manage generated tasks | -| run task | retrive tasks from TaskManager and run tasks. | +| run task | retrieve tasks from TaskManager and run tasks. | """ diff --git a/qlib/workflow/task/gen.py b/qlib/workflow/task/gen.py index 45fba12dad..0645d99519 100644 --- a/qlib/workflow/task/gen.py +++ b/qlib/workflow/task/gen.py @@ -272,7 +272,7 @@ def generate(self, task: dict) -> List[dict]: class MultiHorizonGenBase(TaskGen): def __init__(self, horizon: List[int] = [5], label_leak_n=2): """ - This task generator tries to genrate tasks for different horizons based on an existing task + This task generator tries to generate tasks for different horizons based on an existing task Parameters ---------- diff --git a/qlib/workflow/task/manage.py b/qlib/workflow/task/manage.py index a1d42ec8f8..9ac9a7a630 100644 --- a/qlib/workflow/task/manage.py +++ b/qlib/workflow/task/manage.py @@ -48,7 +48,7 @@ class TaskManager: The tasks manager assumes that you will only update the tasks you fetched. The mongo fetch one and update will make it date updating secure. - This class can be used as a tool from commandline. Here are serveral examples. + This class can be used as a tool from commandline. Here are several examples. You can view the help of manage module with the following commands: python -m qlib.workflow.task.manage -h # show manual of manage module CLI python -m qlib.workflow.task.manage wait -h # show manual of the wait command of manage @@ -368,7 +368,7 @@ def commit_task_res(self, task, res, status=STATUS_DONE): def return_task(self, task, status=STATUS_WAITING): """ - Return a task to status. Alway using in error handling. + Return a task to status. Always using in error handling. Args: task ([type]): [description] diff --git a/scripts/data_collector/fund/collector.py b/scripts/data_collector/fund/collector.py index f9d788fd31..ad504b4e09 100644 --- a/scripts/data_collector/fund/collector.py +++ b/scripts/data_collector/fund/collector.py @@ -103,7 +103,7 @@ def get_data_from_remote(symbol, interval, start, end): error_msg = f"{symbol}-{interval}-{start}-{end}" try: - # TODO: numberOfHistoricalDaysToCrawl should be bigger enouhg + # TODO: numberOfHistoricalDaysToCrawl should be bigger enough url = INDEX_BENCH_URL.format( index_code=symbol, numberOfHistoricalDaysToCrawl=10000, startDate=start, endDate=end ) diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py index 883a1c5511..19131ec29f 100644 --- a/scripts/data_collector/utils.py +++ b/scripts/data_collector/utils.py @@ -360,7 +360,7 @@ def _get_eastmoney(): _symbols = [] for sub_data in re.findall(r"[\[](.*?)[\]]", resp.content.decode().split("= [")[-1].replace("];", "")): data = sub_data.replace('"', "").replace("'", "") - # TODO: do we need other informations, like fund_name from ['000001', 'HXCZHH', '华夏成长混合', '混合型', 'HUAXIACHENGZHANGHUNHE'] + # TODO: do we need other information, like fund_name from ['000001', 'HXCZHH', '华夏成长混合', '混合型', 'HUAXIACHENGZHANGHUNHE'] _symbols.append(data.split(",")[0]) except Exception as e: logger.warning(f"request error: {e}")