From 6e3ffa3724cdb2a3c79aab947004a42fadf3656d Mon Sep 17 00:00:00 2001
From: Wendi Li <wendili.academic@qq.com>
Date: Mon, 2 Jan 2023 10:15:50 -0600
Subject: [PATCH] [DDG-DA] Update crowd-sourced data results (#1405)

* [DDG-DA] Update crowd-sourced data experiments

* Remove internal data version

* Modify README
---
 examples/benchmarks_dynamic/DDG-DA/workflow.py |  2 +-
 examples/benchmarks_dynamic/README.md          | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/examples/benchmarks_dynamic/DDG-DA/workflow.py b/examples/benchmarks_dynamic/DDG-DA/workflow.py
index 2d7427cfdb..48ea9bdb3e 100644
--- a/examples/benchmarks_dynamic/DDG-DA/workflow.py
+++ b/examples/benchmarks_dynamic/DDG-DA/workflow.py
@@ -170,7 +170,7 @@ def train_meta_model(self):
         # 3) train and logging meta model
         with R.start(experiment_name=self.meta_exp_name):
             R.log_params(**kwargs)
-            mm = MetaModelDS(step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=200, seed=43)
+            mm = MetaModelDS(step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=100, seed=43)
             mm.fit(md)
             R.save_objects(model=mm)
 
diff --git a/examples/benchmarks_dynamic/README.md b/examples/benchmarks_dynamic/README.md
index e6d09902a4..261fcc0356 100644
--- a/examples/benchmarks_dynamic/README.md
+++ b/examples/benchmarks_dynamic/README.md
@@ -4,15 +4,21 @@ So adapting the forecasting models/strategies to market dynamics is very importa
 
 The table below shows the performances of different solutions on different forecasting models.
 
-## Alpha158 dataset
+## Alpha158 Dataset
+Here is the [crowd sourced version of qlib data](data_collector/crowd_source/README.md): https://github.com/chenditc/investment_data/releases
+```bash
+wget https://github.com/chenditc/investment_data/releases/download/20220720/qlib_bin.tar.gz
+tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=2
+```
 
 | Model Name       | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
 |------------------|---------|----|------|---------|-----------|-------------------|-------------------|--------------|
-| RR[Linear]       |Alpha158 |0.088|0.570|0.102    |0.622      |0.077              |1.175              |-0.086        |
-| DDG-DA[Linear]   |Alpha158 |0.093|0.622|0.106    |0.670      |0.085              |1.213              |-0.093        |
-| RR[LightGBM]     |Alpha158 |0.079|0.566|0.088    |0.592      |0.075              |1.226              |-0.096        |
-| DDG-DA[LightGBM] |Alpha158 |0.084|0.639|0.093    |0.664      |0.099              |1.442              |-0.071        |
+| RR[Linear]       |Alpha158 |0.089|0.577|0.102    |0.627      |0.093              |1.458              |-0.073        |
+| DDG-DA[Linear]   |Alpha158 |0.096|0.636|0.107    |0.677      |0.067              |0.996              |-0.091        |
+| RR[LightGBM]     |Alpha158 |0.082|0.589|0.091    |0.626      |0.077              |1.320              |-0.091        |
+| DDG-DA[LightGBM] |Alpha158 |0.085|0.658|0.094    |0.686      |0.115              |1.792              |-0.068        |
 
 - The label horizon of the `Alpha158` dataset is set to 20.
 - The rolling time intervals are set to 20 trading days.
 - The test rolling periods are from January 2017 to August 2020.
+- The results are based on the crowd-sourced version. The Yahoo version of qlib data does not contain `VWAP`, so all related factors are missing and filled with 0, which leads to a rank-deficient matrix (a matrix does not have full rank) and makes lower-level optimization of DDG-DA can not be solved.