You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When using the fit() function on a full model (with mixed descriptors), there seems to be an issue with the naive 3-step and 3-step BCH approaches. With the 1-step, 2-step and 3-step ML approaches, there is no such issue.
However, using the bootstrap() function, there are no error messages with any approach.
Here is an example with the Iris dataset:
#packages
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import rand_score
from stepmix.stepmix import StepMix
from stepmix.utils import get_mixed_descriptor
from stepmix.bootstrap import bootstrap
#IRIS in a dataframe
data, target = load_iris(return_X_y=True, as_frame=True)
#Create categorical and binary data based on the Iris data quantiles
for c in data:
c_categorical = c.replace("cm", "cat")
data[c_categorical] = pd.qcut(data[c], q=3).cat.codes
c_binary = c.replace("cm", "binary")
data[c_binary] = pd.qcut(data[c], q=2).cat.codes
#Add missing values in all variables, except the covariate
#Replace 50% of values with missing values
for i, c in enumerate(data.columns):
if c != 'Total length (cm)':
data[c] = data[c].sample(frac=.5, random_state=42*i)
#models to be fit
model1 = StepMix(n_components=3,
measurement=mm_descriptor,
structural=sm_descriptor,
verbose=1,
random_state=123,
n_steps=3) #naive using bootstrap
When using the fit() function on a full model (with mixed descriptors), there seems to be an issue with the naive 3-step and 3-step BCH approaches. With the 1-step, 2-step and 3-step ML approaches, there is no such issue.
However, using the bootstrap() function, there are no error messages with any approach.
Here is an example with the Iris dataset:
#packages
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import rand_score
from stepmix.stepmix import StepMix
from stepmix.utils import get_mixed_descriptor
from stepmix.bootstrap import bootstrap
#IRIS in a dataframe
data, target = load_iris(return_X_y=True, as_frame=True)
#Create categorical and binary data based on the Iris data quantiles
for c in data:
c_categorical = c.replace("cm", "cat")
data[c_categorical] = pd.qcut(data[c], q=3).cat.codes
c_binary = c.replace("cm", "binary")
data[c_binary] = pd.qcut(data[c], q=2).cat.codes
#Create a fake covariate
data['Total length (cm)'] = data["sepal length (cm)"] + data["petal length (cm)"]
#Add missing values in all variables, except the covariate
#Replace 50% of values with missing values
for i, c in enumerate(data.columns):
if c != 'Total length (cm)':
data[c] = data[c].sample(frac=.5, random_state=42*i)
#Measurement model definition
mm_data, mm_descriptor = get_mixed_descriptor(
dataframe=data,
continuous_nan=['sepal length (cm)', 'sepal width (cm)'],
binary_nan=['sepal length (binary)', 'sepal width (binary)'],
categorical_nan=['sepal length (cat)', 'sepal width (cat)'],
)
#Structural model definition
sm_data, sm_descriptor = get_mixed_descriptor(
dataframe=data,
#Covariate
covariate=['Total length (cm)'],
#Outcomes
continuous_nan=['petal length (cm)', 'petal width (cm)'],
binary_nan=['petal length (binary)', 'petal width (binary)'],
categorical_nan=['petal length (cat)', 'petal width (cat)'],
)
#models to be fit
model1 = StepMix(n_components=3,
measurement=mm_descriptor,
structural=sm_descriptor,
verbose=1,
random_state=123,
n_steps=3) #naive using bootstrap
model2 = StepMix(n_components=3,
measurement=mm_descriptor,
structural=sm_descriptor,
verbose=1,
random_state=123,
n_steps=3) #naive using fit()
model3 = StepMix(n_components=3,
measurement=mm_descriptor,
structural=sm_descriptor,
verbose=1,
random_state=123,
n_steps=3, correction='BCH') #BCH using fit()
model4 = StepMix(n_components=3,
measurement=mm_descriptor,
structural=sm_descriptor,
verbose=1,
random_state=123,
n_steps=3, correction='ML') #ML using fit
#results
model1, bootstrapped_params = bootstrap(model1, mm_data, sm_data, n_repetitions=10) #OK
model2.fit(mm_data, sm_data) #error
model3.fit(mm_data, sm_data) #error
model4.fit(mm_data, sm_data) #OK
The text was updated successfully, but these errors were encountered: