diff --git a/pyaf/TS/Keras_Models.py b/pyaf/TS/Keras_Models.py index 14b110616..f29b86948 100644 --- a/pyaf/TS/Keras_Models.py +++ b/pyaf/TS/Keras_Models.py @@ -30,15 +30,6 @@ def dumpCoefficients(self, iMax=10): def build_RNN_Architecture(self, iARInputs, iARTarget): assert(0); - def fit_inputs_and_targets_scalers(self, iARInputs, iARTarget): - from sklearn.preprocessing import StandardScaler - - self.mStandardScaler_Input = StandardScaler() - self.mStandardScaler_Target = StandardScaler() - lARInputs = self.mStandardScaler_Input.fit_transform(iARInputs) - lARTarget = self.mStandardScaler_Target.fit_transform(iARTarget.reshape(-1, 1)) - return (lARInputs, lARTarget) - def get_default_keras_options(self): lDict = {} return lDict @@ -61,11 +52,9 @@ def fit_keras_model(self, iARInputs, iARTarget): def predict_keras_model(self, iARInputs): lTimer = tsutil.cTimer(("PREDICTING_KERAS_MODEL", self.mOutName)) - lARInputs = self.mStandardScaler_Input.transform(iARInputs) - lARInputs = self.reshape_inputs(lARInputs) + lARInputs = iARInputs lPredicted = self.mModel.predict(lARInputs); lPredicted = np.reshape(lPredicted, (-1, 1)) - lPredicted = self.mStandardScaler_Target.inverse_transform(lPredicted) return lPredicted def fit(self): @@ -78,8 +67,8 @@ def fit(self): lARInputs = lAREstimFrame[self.mInputNames].values lARTarget = lAREstimFrame[series].values - - (lARInputs, lARTarget) = self.fit_inputs_and_targets_scalers(lARInputs, lARTarget) + if(self.mLagEncoder is not None): + lARTarget = self.mLagEncoder.transform(lARTarget.reshape(-1, 1)).flatten() self.build_RNN_Architecture(lARInputs, lARTarget); @@ -95,6 +84,9 @@ def fit(self): lPredicted = self.predict_keras_model(lFullARInputs); self.mARFrame[self.mOutName] = lPredicted + if(self.mLagEncoder is not None): + self.mARFrame[self.mOutName] = self.mLagEncoder.inverse_transform(self.mARFrame[self.mOutName].values.reshape(-1, 1)).flatten() + self.compute_ar_residue(self.mARFrame) def transformDataset(self, df, horizon_index = 1): @@ -107,6 +99,8 @@ def transformDataset(self, df, horizon_index = 1): lPredicted = self.predict_keras_model(inputs) df[self.mOutName] = lPredicted; + if(self.mLagEncoder is not None): + df[self.mOutName] = self.mLagEncoder.inverse_transform(df[self.mOutName].values.reshape(-1, 1)).flatten() self.compute_ar_residue(df) return df; diff --git a/pyaf/TS/Pytorch_Models.py b/pyaf/TS/Pytorch_Models.py index 366b4cdfa..0267d0d28 100644 --- a/pyaf/TS/Pytorch_Models.py +++ b/pyaf/TS/Pytorch_Models.py @@ -38,16 +38,6 @@ def dumpCoefficients(self, iMax=10): def build_RNN_Architecture(self, iARInputs, iARTarget): assert(0); - def fit_inputs_and_targets_scalers(self, iARInputs, iARTarget): - from sklearn.preprocessing import StandardScaler - - self.mStandardScaler_Input = StandardScaler() - self.mStandardScaler_Target = StandardScaler() - lARInputs = self.mStandardScaler_Input.fit_transform(iARInputs) - lARTarget = self.mStandardScaler_Target.fit_transform(iARTarget.reshape(iARTarget.shape[0], 1)) - lARTarget = lARTarget.reshape((lARTarget.shape[0], 1)) - return (lARInputs, lARTarget) - def get_default_pytorch_options(self): lDict = {} return lDict @@ -70,12 +60,10 @@ def predict_pytorch_model(self, iARInputs): lTimer = None if(self.mOptions.mDebug): lTimer = tsutil.cTimer(("PREDICTING_PYTORCH_MODEL", self.mOutName)) - lARInputs = self.mStandardScaler_Input.transform(iARInputs) - lARInputs = self.reshape_inputs(lARInputs) + lARInputs = iARInputs lARInputs = lARInputs.astype(np.float32) lPredicted = self.mModel.predict(lARInputs); lPredicted = np.reshape(lPredicted, (-1, 1)) - lPredicted = self.mStandardScaler_Target.inverse_transform(lPredicted) return lPredicted @@ -89,8 +77,8 @@ def fit(self): lARInputs = lAREstimFrame[self.mInputNames].values lARTarget = lAREstimFrame[series].values - - (lARInputs, lARTarget) = self.fit_inputs_and_targets_scalers(lARInputs, lARTarget) + if(self.mLagEncoder is not None): + lARTarget = self.mLagEncoder.transform(lARTarget.reshape(-1, 1)) self.build_RNN_Architecture(lARInputs, lARTarget); @@ -105,6 +93,8 @@ def fit(self): lPredicted = self.predict_pytorch_model(lFullARInputs) self.mARFrame[self.mOutName] = lPredicted + if(self.mLagEncoder is not None): + self.mARFrame[self.mOutName] = self.mLagEncoder.inverse_transform(self.mARFrame[self.mOutName].values.reshape(-1, 1)).flatten() self.compute_ar_residue(self.mARFrame) def transformDataset(self, df, horizon_index = 1): @@ -117,6 +107,8 @@ def transformDataset(self, df, horizon_index = 1): lPredicted = self.predict_pytorch_model(inputs) df[self.mOutName] = lPredicted; + if(self.mLagEncoder is not None): + df[self.mOutName] = self.mLagEncoder.inverse_transform(df[self.mOutName].values.reshape(-1, 1)).flatten() self.compute_ar_residue(df) return df; diff --git a/pyaf/TS/Scikit_Models.py b/pyaf/TS/Scikit_Models.py index d990250b4..867efd05c 100644 --- a/pyaf/TS/Scikit_Models.py +++ b/pyaf/TS/Scikit_Models.py @@ -45,6 +45,8 @@ def fit(self): lARInputs = lAREstimFrame[self.mInputNames].values lARTarget = lAREstimFrame[series].values + if(self.mLagEncoder is not None): + lARTarget = self.mLagEncoder.transform(lARTarget.reshape(-1, 1)).flatten() # tsutil.print_pyaf_detailed_info(len(self.mInputNames), lARInputs.shape , lARTarget.shape) assert(lARInputs.shape[1] > 0); assert(lARTarget.shape[0] > 0); @@ -110,6 +112,8 @@ def fit(self): if(self.mDecompositionType in ['TSR']): self.mARFrame[self.mOutName] = 1.0 + if(self.mLagEncoder is not None): + self.mARFrame[self.mOutName] = self.mLagEncoder.inverse_transform(self.mARFrame[self.mOutName].values.reshape(-1, 1)).flatten() self.compute_ar_residue(self.mARFrame) @@ -131,6 +135,8 @@ def transformDataset(self, df, horizon_index = 1): if(self.mDecompositionType in ['TSR']): df[self.mOutName] = 1.0 + if(self.mLagEncoder is not None): + df[self.mOutName] = self.mLagEncoder.inverse_transform(df[self.mOutName].values.reshape(-1, 1)).flatten() self.compute_ar_residue(df) return df; @@ -144,6 +150,9 @@ def __init__(self , cycle_residue_name, P , iExogenousInfo = None): def dumpCoefficients(self, iMax=10): logger = tsutil.get_pyaf_logger(); + if(self.mLagEncoder is not None): + logger.info("AR_MODEL_LAG_ENCODNG_QUANTILES " + str(self.mLagEncoder.quantiles_.flatten().tolist())); + lDict = dict(zip(self.mInputNamesAfterSelection , self.mScikitModel.coef_.round(6))); lDict1 = dict(zip(self.mInputNamesAfterSelection , abs(self.mScikitModel.coef_.round(6)))); i = 1; diff --git a/pyaf/TS/SignalDecomposition_AR.py b/pyaf/TS/SignalDecomposition_AR.py index aa6cca74f..69d6c35a8 100644 --- a/pyaf/TS/SignalDecomposition_AR.py +++ b/pyaf/TS/SignalDecomposition_AR.py @@ -28,6 +28,7 @@ def __init__(self , cycle_residue_name, iExogenousInfo = None): self.mInputNames = []; self.mExogenousInfo = iExogenousInfo; self.mLagsForSeries = {cycle_residue_name : []} + self.mLagEncoder = None def compute_ar_residue(self, df): target = df[self.mCycleResidueName].values @@ -86,9 +87,11 @@ def generateLagsForForecast(self, df, selection = None): lDict = {} # lDict[self.mCycleResidueName] = df[self.mCycleResidueName] series = self.mCycleResidueName - lSeries = df[self.mCycleResidueName] + lSeries = df[self.mCycleResidueName].values + if(self.mLagEncoder is not None): + lSeries = self.mLagEncoder.transform(lSeries.reshape(-1, 1)).flatten() # Investigate Large Horizon Models #213 : The model can produce overflows in its inputs when iterated. - lSeries = lSeries.values.clip(-1e+10, +1e10) + lSeries = lSeries.clip(-1e+10, +1e10) for p in self.mLagsForSeries[self.mCycleResidueName]: name = series +'_Lag' + str(p); if(selection is None or name in selection): @@ -149,6 +152,7 @@ def __init__(self): self.mARFrame = None self.mARList = {} self.mExogenousInfo = None; + self.mLagEncoders = {} def plotAR(self): for trend in self.mTrendList: @@ -169,15 +173,24 @@ def shift_series(self, series, p): new_values = np.append([ series[0] ]*p, series[0:N-p]) return new_values - def generateLagsForTraining(self, df, series, pMinMax): + def generateLagsForTraining(self, df, series, pMinMax, iEncode = False): + lSeries = df[series].values; + lCanApplyQuantileTransform = iEncode and (self.mOptions.mLagEncoding is not None) + if(lCanApplyQuantileTransform): + from sklearn.preprocessing import QuantileTransformer + df_Estim = self.mSplit.getEstimPart(df) + NQ = int(min(20, np.sqrt(df_Estim.shape[0]))) # optimal quantiles number heuristics : sqrt(N) + qt = QuantileTransformer(n_quantiles=NQ, random_state=self.mOptions.mSeed) + qt.fit(df_Estim[series].values.reshape(-1, 1)) + self.mLagEncoders[series] = qt + lSeries = qt.transform(lSeries.reshape(-1, 1)) (pmin, pmax) = pMinMax - lSeries = df[series]; - self.mDefaultValues[series] = lSeries.values[0]; + self.mDefaultValues[series] = lSeries[0]; lDict = {} lags = [] for p in range(pmin, pmax+1): name = series+'_Lag' + str(p) - lShiftedSeries = self.shift_series(lSeries.values, p) + lShiftedSeries = self.shift_series(lSeries, p) lShiftedEstim = self.mSplit.getEstimPart(lShiftedSeries); lAcceptable = self.is_not_constant(lShiftedEstim); if(lAcceptable): @@ -206,12 +219,13 @@ def preselect_exog_vars(self, df, cycle_residue): def addLagsForTraining(self, df, cycle_residue): P = self.get_nb_lags(); - lag_df, lags = self.generateLagsForTraining(df, cycle_residue, (1, P)); + lag_df, lags = self.generateLagsForTraining(df, cycle_residue, (1, P), iEncode = True); lag_dfs = [lag_df] for autoreg in self.mARList[cycle_residue]: for lag in lags: (name , p) = lag autoreg.register_lag(name, p) + autoreg.mLagEncoder = self.mLagEncoders.get(cycle_residue) # Exogenous variables lags lUseExog = False # Exog variables can be configured but not used ("AR" activated and "ARX" disabled).