Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add_test_pit #1089

Merged
merged 4 commits into from
May 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ jobs:

- name: Unit tests with Pytest
run: |
pip install -r scripts/data_collector/pit/requirements.txt
cd tests
python -m pytest . --durations=10

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ jobs:
python -m pip install black pytest
- name: Unit tests with Pytest
run: |
pip install -r scripts/data_collector/pit/requirements.txt
cd tests
python -m pytest . --durations=0
- name: Test workflow by config (install from source)
Expand Down
70 changes: 53 additions & 17 deletions scripts/data_collector/pit/test_pit.py → tests/test_pit.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,64 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import pandas as pd


import sys
import qlib
from qlib.data import D
import shutil
import unittest
import pandas as pd
import baostock as bs
from pathlib import Path

from qlib.data import D
from scripts.get_data import GetData
from scripts.dump_pit import DumpPitData

sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts/data_collector/pit")))
from collector import Run


pd.set_option("display.width", 1000)
pd.set_option("display.max_columns", None)

DATA_DIR = Path(__file__).parent.joinpath("test_pit_data")
SOURCE_DIR = DATA_DIR.joinpath("stock_data/source")
SOURCE_DIR.mkdir(exist_ok=True, parents=True)
QLIB_DIR = DATA_DIR.joinpath("qlib_data")
QLIB_DIR.mkdir(exist_ok=True, parents=True)


class TestPIT(unittest.TestCase):
"""
NOTE!!!!!!
The assert of this test assumes that users follows the cmd below and only download 2 stock.
1. `python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn`
2. `python scripts/data_collector/pit/collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex "^(600519|000725).*"`
3. `python scripts/data_collector/pit/collector.py normalize_data --interval quarterly --source_dir ~/.qlib/stock_data/source/pit --normalize_dir ~/.qlib/stock_data/source/pit_normalized`
4. `python scripts/dump_pit.py dump --csv_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly`
"""
@classmethod
def tearDownClass(cls) -> None:
shutil.rmtree(str(DATA_DIR.resolve()))

@classmethod
def setUpClass(cls) -> None:
cn_data_dir = str(QLIB_DIR.joinpath("cn_data").resolve())
pit_dir = str(SOURCE_DIR.joinpath("pit").resolve())
pit_normalized_dir = str(SOURCE_DIR.joinpath("pit_normalized").resolve())
GetData().qlib_data(name="qlib_data_simple", target_dir=cn_data_dir, region="cn")
bs.login()
Run(
source_dir=pit_dir,
interval="quarterly",
).download_data(start="2000-01-01", end="2020-01-01", symbol_regex="^(600519|000725).*")
Run(
source_dir=pit_dir,
normalize_dir=pit_normalized_dir,
interval="quarterly",
).normalize_data()
bs.logout()
DumpPitData(
csv_path=pit_normalized_dir,
qlib_dir=cn_data_dir,
).dump(interval="quarterly")

def setUp(self):
# qlib.init(kernels=1) # NOTE: set kernel to 1 to make it debug easier
qlib.init()
provider_uri = str(QLIB_DIR.joinpath("cn_data").resolve())
qlib.init(provider_uri=provider_uri)

def to_str(self, obj):
return "".join(str(obj).split())
Expand Down Expand Up @@ -66,7 +102,7 @@ def test_no_exist_data(self):
data["$close"] = 1 # in case of different dataset gives different values
expect = """
P($$roewa_q) P($$yoyni_q) $close
instrument datetime
instrument datetime
sh600519 2019-01-02 0.25522 0.243892 1
2019-01-03 0.25522 0.243892 1
2019-01-04 0.25522 0.243892 1
Expand All @@ -78,7 +114,7 @@ def test_no_exist_data(self):
2019-07-17 NaN NaN 1
2019-07-18 NaN NaN 1
2019-07-19 NaN NaN 1

[266 rows x 3 columns]
"""
self.check_same(data, expect)
Expand Down Expand Up @@ -191,7 +227,7 @@ def test_expr2(self):
data = D.features(instruments, fields, start_time="2019-01-01", end_time="2020-01-01", freq="day")
except_data = """
P($$roewa_q) P($$yoyni_q) P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1) P(Sum($$yoyni_q, 4)) $close P($$roewa_q) * $close
instrument datetime
instrument datetime
sh600519 2019-01-02 0.255220 0.243892 1.484224 1.661578 63.595333 16.230801
2019-01-03 0.255220 0.243892 1.484224 1.661578 62.641907 15.987467
2019-01-04 0.255220 0.243892 1.484224 1.661578 63.915985 16.312637
Expand All @@ -203,7 +239,7 @@ def test_expr2(self):
2019-12-27 0.255819 0.219821 0.677052 1.081693 125.307404 32.056015
2019-12-30 0.255819 0.219821 0.677052 1.081693 127.763992 32.684456
2019-12-31 0.255819 0.219821 0.677052 1.081693 127.462303 32.607277

[244 rows x 6 columns]
"""
self.check_same(data, except_data)
Expand All @@ -219,7 +255,7 @@ def test_pref_operator(self):
data = D.features(instruments, fields, start_time="2018-04-28", end_time="2019-07-19", freq="day")
except_data = """
PRef($$roewa_q, 201902) PRef($$yoyni_q, 201801) P($$roewa_q) P($$roewa_q) / PRef($$roewa_q, 201801)
instrument datetime
instrument datetime
sh600519 2018-05-02 NaN 0.395075 0.088887 1.000000
2018-05-03 NaN 0.395075 0.088887 1.000000
2018-05-04 NaN 0.395075 0.088887 1.000000
Expand All @@ -231,7 +267,7 @@ def test_pref_operator(self):
2019-07-17 0.000000 0.395075 0.000000 0.000000
2019-07-18 0.175322 0.395075 0.175322 1.972414
2019-07-19 0.175322 0.395075 0.175322 1.972414

[299 rows x 4 columns]
"""
self.check_same(data, except_data)
Expand Down