Skip to content

Commit

Permalink
Merge the PRef operator into the P operator
Browse files Browse the repository at this point in the history
  • Loading branch information
Chaoyingz committed Mar 23, 2022
1 parent 51aa496 commit fabdebd
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 36 deletions.
7 changes: 6 additions & 1 deletion qlib/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,11 @@ def period_feature(
For example, start_index == -3 end_index == 0 and current period index is cur_idx,
then the data between [start_index + cur_idx, end_index + cur_idx] will be retrieved.
period: int
This is used for query specific period.
The period is represented with int in Qlib. (e.g. 202001 may represent the first quarter in 2020)
NOTE: `period` will override `start_index` and `end_index`
Returns
-------
pd.Series
Expand Down Expand Up @@ -745,7 +750,6 @@ def period_feature(self, instrument, field, start_index, end_index, cur_time, pe
raise ValueError(
f"Expected pd.Timestamp for `cur_time`, got '{cur_time}'. Advices: you can't query PIT data directly(e.g. '$$roewa_q'), you must use `P` operator to convert data to each day (e.g. 'P($$roewa_q)')"
)

assert end_index <= 0 # PIT don't support querying future data

DATA_RECORDS = [
Expand Down Expand Up @@ -796,6 +800,7 @@ def period_feature(self, instrument, field, start_index, end_index, cur_time, pe
first_period = data["period"][:loc].min()
period_list = get_period_list(first_period, last_period, quarterly)
if period is not None:
# NOTE: `period` has higher priority than `start_index` & `end_index`
if period not in period_list:
return pd.Series()
else:
Expand Down
4 changes: 2 additions & 2 deletions qlib/data/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1643,10 +1643,10 @@ def register_all_ops(C):
"""register all operator"""
logger = get_module_logger("ops")

from qlib.data.pit import P, PRef # pylint: disable=C0415
from qlib.data.pit import P # pylint: disable=C0415

Operators.reset()
Operators.register(OpsList + [P, PRef])
Operators.register(OpsList + [P])

if getattr(C, "custom_ops", None) is not None:
Operators.register(C.custom_ops)
Expand Down
25 changes: 8 additions & 17 deletions qlib/data/pit.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,17 @@


class P(ElemOperator):
def __init__(self, feature, period=None):
super().__init__(feature)
self._period = period

def __str__(self):
return f"{super().__str__()}[{self._period}]" # avoid cache key confliction

def _load_internal(self, instrument, start_index, end_index, freq):

_calendar = Cal.calendar(freq=freq)
resample_data = np.empty(end_index - start_index + 1, dtype="float32")

for cur_index in range(start_index, end_index + 1):
cur_time = _calendar[cur_index]
# To load expression accurately, more historical data are required
Expand All @@ -37,7 +43,7 @@ def _load_internal(self, instrument, start_index, end_index, freq):

# The calculated value will always the last element, so the end_offset is zero.
try:
s = self._load_feature(instrument, -start_ws, 0, cur_time)
s = self.feature.load(instrument, -start_ws, 0, cur_time, self._period)
resample_data[cur_index - start_index] = s.iloc[-1] if len(s) > 0 else np.nan
except FileNotFoundError:
get_module_logger("base").warning(f"WARN: period data not found for {str(self)}")
Expand All @@ -48,25 +54,10 @@ def _load_internal(self, instrument, start_index, end_index, freq):
)
return resample_series

def _load_feature(self, instrument, start_index, end_index, cur_time):
return self.feature.load(instrument, start_index, end_index, cur_time)

def get_longest_back_rolling(self):
# The period data will collapse as a normal feature. So no extending and looking back
return 0

def get_extended_window_size(self):
# The period data will collapse as a normal feature. So no extending and looking back
return 0, 0


class PRef(P):
def __init__(self, feature, period):
super().__init__(feature)
self.period = period

def __str__(self):
return f"{super().__str__()}[{self.period}]"

def _load_feature(self, instrument, start_index, end_index, cur_time):
return self.feature.load(instrument, start_index, end_index, cur_time, self.period)
32 changes: 16 additions & 16 deletions scripts/data_collector/pit/test_pit.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,26 +208,26 @@ def test_expr2(self):
"""
self.check_same(data, except_data)

def test_pf_operator(self):
def test_p_period_operator(self):
instruments = ["sh600519"]
fields = ["PRef($$roewa_q, 201902)", "PRef($$yoyni_q, 201801)", "P($$roewa_q)"]
fields = ["P($$roewa_q, 201902)", "P($$yoyni_q, 201801)", "P($$roewa_q)", "P($$roewa_q) / P($$roewa_q, 201801)"]
data = D.features(instruments, fields, start_time="2018-04-28", end_time="2019-07-19", freq="day")
except_data = """
PRef($$roewa_q, 201902) PRef($$yoyni_q, 201801) P($$roewa_q)
instrument datetime
sh600519 2018-05-02 NaN 0.395075 0.088887
2018-05-03 NaN 0.395075 0.088887
2018-05-04 NaN 0.395075 0.088887
2018-05-07 NaN 0.395075 0.088887
2018-05-08 NaN 0.395075 0.088887
... ... ... ...
2019-07-15 0.000000 0.395075 0.000000
2019-07-16 0.000000 0.395075 0.000000
2019-07-17 0.000000 0.395075 0.000000
2019-07-18 0.175322 0.395075 0.175322
2019-07-19 0.175322 0.395075 0.175322
P($$roewa_q, 201902) P($$yoyni_q, 201801) P($$roewa_q) P($$roewa_q) / P($$roewa_q, 201801)
instrument datetime
sh600519 2018-05-02 NaN 0.395075 0.088887 1.000000
2018-05-03 NaN 0.395075 0.088887 1.000000
2018-05-04 NaN 0.395075 0.088887 1.000000
2018-05-07 NaN 0.395075 0.088887 1.000000
2018-05-08 NaN 0.395075 0.088887 1.000000
... ... ... ... ...
2019-07-15 0.000000 0.395075 0.000000 0.000000
2019-07-16 0.000000 0.395075 0.000000 0.000000
2019-07-17 0.000000 0.395075 0.000000 0.000000
2019-07-18 0.175322 0.395075 0.175322 1.972414
2019-07-19 0.175322 0.395075 0.175322 1.972414
[299 rows x 3 columns]
[299 rows x 4 columns]
"""
self.check_same(data, except_data)

Expand Down

0 comments on commit fabdebd

Please sign in to comment.