Skip to content

Commit

Permalink
Ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
robinholzi committed Sep 10, 2024
1 parent 62c0aa2 commit 5cb048d
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 67 deletions.
14 changes: 9 additions & 5 deletions experiments/arxiv/compare_trigger_policies/pipeline_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@
)
from modyn.config.schema.pipeline.evaluation.metrics import AccuracyMetricConfig, F1ScoreMetricConfig


arxiv_bytes_parser_function = (
"import torch\n"
"import numpy as np\n"
"def bytes_parser_function(data: bytes) -> str:\n"
" return str(data, 'utf8')"
"def bytes_parser_function(data: bytes) -> str:\n"
" return str(data, 'utf8')"
)
arxiv_evaluation_transformer_function = (
"import torch\n"
"def evaluation_transformer_function(model_output: torch.Tensor) -> torch.Tensor:\n"
" return torch.argmax(model_output, dim=-1)\n"
)


def gen_pipeline_config(
config_ref: str,
trigger_config: TriggerConfig,
Expand All @@ -40,7 +40,9 @@ def gen_pipeline_config(
) -> ModynPipelineConfig:
num_classes = 172
return ModynPipelineConfig(
pipeline=Pipeline(name=config_ref, description="Arxiv pipeline for comparing trigger policies", version="0.0.1"),
pipeline=Pipeline(
name=config_ref, description="Arxiv pipeline for comparing trigger policies", version="0.0.1"
),
model=ModelConfig(id="ArticleNet", config={"num_classes": num_classes}),
model_storage=PipelineModelStorageConfig(full_model_strategy=FullModelStrategy(name="PyTorchFullModel")),
training=TrainingConfig(
Expand Down Expand Up @@ -87,7 +89,9 @@ def gen_pipeline_config(
dataloader_workers=1,
tokenizer="DistilBertTokenizerTransform",
metrics=[
AccuracyMetricConfig(evaluation_transformer_function=arxiv_evaluation_transformer_function, topn=1),
AccuracyMetricConfig(
evaluation_transformer_function=arxiv_evaluation_transformer_function, topn=1
),
AccuracyMetricConfig(evaluation_transformer_function="", topn=2),
AccuracyMetricConfig(evaluation_transformer_function="", topn=5),
AccuracyMetricConfig(evaluation_transformer_function="", topn=10),
Expand Down
18 changes: 9 additions & 9 deletions experiments/arxiv/compare_trigger_policies/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import pandas as pd

from experiments.arxiv.compare_trigger_policies.pipeline_config import gen_pipeline_config
from experiments.utils.models import Experiment
from experiments.utils.experiment_runner import run_multiple_pipelines
from experiments.utils.models import Experiment
from modyn.config.schema.pipeline import (
EvalHandlerConfig,
ModynPipelineConfig,
Expand All @@ -27,9 +27,8 @@
_FIRST_TIMESTAMP = int(pd.to_datetime("1995-01-01").timestamp())
_LAST_TIMESTAMP = int(pd.to_datetime("2024-07-01").timestamp())

def construct_slicing_eval_handler(
execution_time: EvalHandlerExecutionTime = "manual"
) -> EvalHandlerConfig:

def construct_slicing_eval_handler(execution_time: EvalHandlerExecutionTime = "manual") -> EvalHandlerConfig:
return EvalHandlerConfig(
name="slidingmatrix",
execution_time=execution_time,
Expand Down Expand Up @@ -95,9 +94,10 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
)
]


PERIODIC_EVAL_INTERVAL = [("current", "13w")] # total: 1/2y

# pretrain/cold start can be chosen post fuction by just dropping evaluation info before a certain date
# pretrain/cold start can be chosen post function by just dropping evaluation info before a certain date
_EXPERIMENT_REFS: dict[int, Experiment] = {
# -------------------------------------------------------------------------------- #
# 1X: Baselines with PERIODIC_EVAL_INTERVAL, executed with cautious #
Expand All @@ -109,8 +109,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
10: Experiment(
name="arxiv-baseline-time",
eval_handlers=(
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual") +
construct_between_trigger_eval_handler("manual")
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual")
+ construct_between_trigger_eval_handler("manual")
),
time_triggers={
schedule: TimeTriggerConfig(every=schedule, start_timestamp=_FIRST_TIMESTAMP)
Expand All @@ -122,8 +122,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
11: Experiment(
name="arxiv-baseline-dataamount",
eval_handlers=(
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual") +
construct_between_trigger_eval_handler("manual")
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual")
+ construct_between_trigger_eval_handler("manual")
),
data_amount_triggers={
f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples)
Expand Down
16 changes: 11 additions & 5 deletions experiments/huffpost/compare_trigger_policies/pipeline_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
hp_bytes_parser_function = (
"import torch\n"
"import numpy as np\n"
"def bytes_parser_function(data: bytes) -> str:\n"
" return str(data, 'utf8')"
"def bytes_parser_function(data: bytes) -> str:\n"
" return str(data, 'utf8')"
)
hp_evaluation_transformer_function = (
"import torch\n"
Expand All @@ -40,7 +40,9 @@ def gen_pipeline_config(
) -> ModynPipelineConfig:
num_classes = 42
return ModynPipelineConfig(
pipeline=Pipeline(name=config_ref, description="Huffpost pipeline for comparing trigger policies", version="0.0.1"),
pipeline=Pipeline(
name=config_ref, description="Huffpost pipeline for comparing trigger policies", version="0.0.1"
),
model=ModelConfig(id="ArticleNet", config={"num_classes": num_classes}),
model_storage=PipelineModelStorageConfig(full_model_strategy=FullModelStrategy(name="PyTorchFullModel")),
training=TrainingConfig(
Expand All @@ -61,7 +63,9 @@ def gen_pipeline_config(
name="default",
algorithm="AdamW",
source="PyTorch",
param_groups=[OptimizerParamGroup(module="model", config={"lr": 0.00002, "weight_decay": 0.01})],
param_groups=[
OptimizerParamGroup(module="model", config={"lr": 0.00002, "weight_decay": 0.01})
],
)
],
)
Expand Down Expand Up @@ -94,7 +98,9 @@ def gen_pipeline_config(
dataloader_workers=1,
tokenizer="DistilBertTokenizerTransform",
metrics=[
AccuracyMetricConfig(evaluation_transformer_function=hp_evaluation_transformer_function, topn=1),
AccuracyMetricConfig(
evaluation_transformer_function=hp_evaluation_transformer_function, topn=1
),
AccuracyMetricConfig(evaluation_transformer_function="", topn=2),
AccuracyMetricConfig(evaluation_transformer_function="", topn=5),
AccuracyMetricConfig(evaluation_transformer_function="", topn=10),
Expand Down
18 changes: 10 additions & 8 deletions experiments/huffpost/compare_trigger_policies/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import pandas as pd

from experiments.utils.models import Experiment
from experiments.utils.experiment_runner import run_multiple_pipelines
from experiments.utils.models import Experiment
from modyn.config.schema.pipeline import (
EvalHandlerConfig,
ModynPipelineConfig,
Expand All @@ -21,15 +21,14 @@
from modyn.config.schema.pipeline.trigger.simple.data_amount import DataAmountTriggerConfig
from modyn.config.schema.pipeline.trigger.simple.time import TimeTriggerConfig
from modynclient.config.schema.client_config import ModynClientConfig, Supervisor

from .pipeline_config import gen_pipeline_config

_FIRST_TIMESTAMP = int(pd.to_datetime("2012-01-28").timestamp())
_LAST_TIMESTAMP = int(pd.to_datetime("2022-09-24").timestamp()) # last: dummy


def construct_slicing_eval_handler(
execution_time: EvalHandlerExecutionTime = "manual"
) -> EvalHandlerConfig:
def construct_slicing_eval_handler(execution_time: EvalHandlerExecutionTime = "manual") -> EvalHandlerConfig:
return EvalHandlerConfig(
name="slidingmatrix",
execution_time=execution_time,
Expand Down Expand Up @@ -66,6 +65,7 @@ def construct_periodic_eval_handlers(
for (interval, fake_interval) in intervals
]


def construct_between_trigger_eval_handler(execution_time: EvalHandlerExecutionTime = "manual") -> EvalHandlerConfig:
return EvalHandlerConfig(
name="full",
Expand All @@ -75,6 +75,7 @@ def construct_between_trigger_eval_handler(execution_time: EvalHandlerExecutionT
datasets=["huffpost_kaggle_all"], # train and test
)


def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
return [
gen_pipeline_config(
Expand All @@ -93,6 +94,7 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
)
]


# total: 14weeks -> ~4mths (with quarterly evaluations the intervals slightly overlap by 1 month)
PERIODIC_EVAL_INTERVAL = [("current", "7w")]

Expand All @@ -107,8 +109,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
10: Experiment(
name="hp-baseline-time",
eval_handlers=(
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual") +
construct_between_trigger_eval_handler("manual")
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual")
+ construct_between_trigger_eval_handler("manual")
),
time_triggers={
schedule: TimeTriggerConfig(every=schedule, start_timestamp=_FIRST_TIMESTAMP)
Expand All @@ -120,8 +122,8 @@ def construct_pipelines(experiment: Experiment) -> list[ModynPipelineConfig]:
11: Experiment(
name="hp-baseline-dataamount",
eval_handlers=(
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual") +
construct_between_trigger_eval_handler("manual")
construct_periodic_eval_handlers(intervals=PERIODIC_EVAL_INTERVAL, execution_time="manual")
+ construct_between_trigger_eval_handler("manual")
),
data_amount_triggers={
f"{num_samples}": DataAmountTriggerConfig(num_samples=num_samples)
Expand Down
10 changes: 6 additions & 4 deletions experiments/yearbook/compare_trigger_policies/pipeline_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
)
from modyn.config.schema.pipeline.evaluation.config import EvalDataConfig
from modyn.config.schema.pipeline.evaluation.handler import EvalHandlerConfig
from modyn.config.schema.pipeline.evaluation.metrics import AccuracyMetricConfig, F1ScoreMetricConfig, RocAucMetricConfig
from modyn.config.schema.pipeline.evaluation.metrics import (
AccuracyMetricConfig,
F1ScoreMetricConfig,
RocAucMetricConfig,
)
from modyn.config.schema.pipeline.model_storage import FullModelStrategy
from modyn.config.schema.pipeline.sampling.config import NewDataStrategyConfig

Expand Down Expand Up @@ -113,9 +117,7 @@ def gen_pipeline_config(
num_classes=2,
average="micro",
),
RocAucMetricConfig(
evaluation_transformer_function=yb_evaluation_transformer_function_rocauc
)
RocAucMetricConfig(evaluation_transformer_function=yb_evaluation_transformer_function_rocauc),
],
)
for yb_dataset_name in ["yearbook_all", "yearbook_train", "yearbook_test"]
Expand Down
Loading

0 comments on commit 5cb048d

Please sign in to comment.