Skip to content

Commit

Permalink
DISCO-3183 Consolidate manifest configs (#772)
Browse files Browse the repository at this point in the history
  • Loading branch information
gruberb authored Jan 29, 2025
1 parent b7484fb commit 1972155
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 45 deletions.
6 changes: 2 additions & 4 deletions merino/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
Validator("metrics.dev_logger", is_type_of=bool),
Validator("metrics.host", is_type_of=str),
Validator("metrics.port", gte=0, is_type_of=int),
Validator("image_manifest.gcs_project", is_type_of=str),
Validator("image_manifest.gcs_bucket", is_type_of=str),
Validator("accuweather.url_location_key_placeholder", is_type_of=str, must_exist=True),
Validator(
"accuweather.url_param_partner_code",
Expand Down Expand Up @@ -98,8 +100,6 @@
),
Validator("providers.top_picks.resync_interval_sec", gt=0),
Validator("providers.top_picks.cron_interval_sec", gt=0),
Validator("providers.top_picks.gcs_project", is_type_of=str),
Validator("providers.top_picks.gcs_bucket", is_type_of=str),
Validator(
"providers.top_picks.domain_data_source",
is_type_of=str,
Expand Down Expand Up @@ -143,8 +143,6 @@
Validator("sentry.traces_sample_rate", gte=0, lte=1),
Validator("manifest.resync_interval_sec", gt=0),
Validator("manifest.cron_interval_sec", gt=0),
Validator("manifest.gcs_project", is_type_of=str),
Validator("manifest.gcs_bucket", is_type_of=str),
]

# `root_path` = The root path for Dynaconf, DO NOT CHANGE.
Expand Down
24 changes: 8 additions & 16 deletions merino/configs/default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,14 @@ env = "dev"
# If set to `debug`, the DSN will be set to a testing value recommended by Sentry,
# and extra output will be included in the logs.

[default.image_manifest]
# MERINO_IMAGE_MANIFEST__GCS_PROJECT
# GCS project name that contains domain data
gcs_project = ""

# MERINO_IMAGE_MANIFEST__GCS_BUCKET
# GCS bucket that contains domain data files
gcs_bucket = ""

[default.providers.accuweather]
# MERINO_PROVIDERS__ACCUWEATHER__TYPE
Expand Down Expand Up @@ -249,14 +257,6 @@ cron_interval_sec = 60
# Time between re-syncs of the manifest file, in seconds. Defaults to 24 hours.
resync_interval_sec = 86400

# MERINO_PROVIDERS__MANIFEST__GCS_PROJECT
# GCS project name that contains domain data
gcs_project = ""

# MERINO_PROVIDERS__MANIFEST__GCS_BUCKET
# GCS bucket that contains domain data files
gcs_bucket = ""

[default.accuweather]
# MERINO_ACCUWEATHER__API_KEY
# The API key to AccuWeather's API endpoint.
Expand Down Expand Up @@ -458,14 +458,6 @@ resync_interval_sec = 43200
# is remotely or locally acquired.
domain_data_source = "local"

# MERINO_PROVIDERS__TOP_PICKS__GCS_PROJECT
# GCS project name that contains domain data
gcs_project = ""

# MERINO_PROVIDERS__TOP_PICKS__GCS_BUCKET
# GCS bucket that contains domain data files
gcs_bucket = ""

[default.providers.wikipedia]
# MERINO_PROVIDERS__WIKIPEDIA__TYPE
# The type of this provider, should be `wikipedia`.
Expand Down
12 changes: 2 additions & 10 deletions merino/configs/production.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dynaconf_merge = true
[production.accuweather]
url_base = "https://api.accuweather.com"

[production.providers.top_picks]
[production.image_manifest]
# MERINO_PROVIDERS__TOP_PICKS__GCS_PROJECT
# GCS project name that contains domain data
gcs_project = "moz-fx-merino-prod-1c2f"
Expand All @@ -22,20 +22,12 @@ gcs_project = "moz-fx-merino-prod-1c2f"
# GCS bucket that contains domain data files
gcs_bucket = "merino-images-prodpy"

[production.providers.top_picks]
# MERINO_PROVIDERS__TOP_PICKS__DOMAIN_DATA_SOURCE
# Enum of either `remote` or `local` that defines whether domain data
# is remotely or locally acquired.
domain_data_source = "remote"

[production.manifest]
# MERINO_PROVIDERS__MANIFEST__GCS_PROJECT
# GCS project name that contains domain data
gcs_project = "moz-fx-merino-prod-1c2f"

# MERINO_PROVIDERS__MANIFEST__GCS_BUCKET
# GCS bucket that contains domain data files
gcs_bucket = "merino-images-prodpy"

[production.curated_recommendations.gcs]
# MERINO__CURATED_RECOMMENDATIONS__GCS__BUCKET_NAME
# GCS bucket that contains aggregate engagement and prior data
Expand Down
11 changes: 3 additions & 8 deletions merino/configs/stage.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,18 @@ dynaconf_merge = true
[stage.accuweather]
url_base = "https://api.accuweather.com"

[stage.providers.top_picks]
[stage.image_manifest]
# MERINO_PROVIDERS__TOP_PICKS__GCS_PROJECT
gcs_project = "moz-fx-merino-nonprod-ee93"

# MERINO_PROVIDERS__TOP_PICKS__GCS_BUCKET
gcs_bucket = "merino-images-stagepy"


[stage.providers.top_picks]
# MERINO_PROVIDERS__TOP_PICKS__DOMAIN_DATA_SOURCE
domain_data_source = "remote"

[stage.manifest]
# MERINO_PROVIDERS__MANIFEST__GCS_PROJECT
gcs_project = "moz-fx-merino-nonprod-ee93"

# MERINO_PROVIDERS__MANIFEST__GCS_BUCKET
gcs_bucket = "merino-images-stagepy"

[stage.curated_recommendations.gcs]
# MERINO__CURATED_RECOMMENDATIONS__GCS__BUCKET_NAME
# GCS bucket that contains aggregate engagement and prior data
Expand Down
2 changes: 1 addition & 1 deletion merino/configs/testing.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ domain_data_source = "local"
# The backend of the provider. Either "elasticsearch" or "test".
backend = "test"

[testing.manifest]
[testing.image_manifest]
gcs_project = "test_gcp_uploader_project"
gcs_bucket = "test_gcp_uploader_bucket"

Expand Down
4 changes: 2 additions & 2 deletions merino/providers/manifest/backends/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ async def fetch(self) -> tuple[GetManifestResultCode, ManifestData | None]:
def fetch_manifest_data(self) -> tuple[GetManifestResultCode, ManifestData | None]:
"""Fetch manifest data from GCS through the remote filemanager."""
remote_filemanager = ManifestRemoteFilemanager(
gcs_project_path=settings.manifest.gcs_project,
gcs_bucket_path=settings.manifest.gcs_bucket,
gcs_project_path=settings.image_manifest.gcs_project,
gcs_bucket_path=settings.image_manifest.gcs_bucket,
blob_name=GCS_BLOB_NAME,
)

Expand Down
4 changes: 2 additions & 2 deletions merino/providers/suggest/top_picks/backends/top_picks.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def maybe_build_indices(self) -> tuple[Enum, TopPicksData | None]:
match DomainDataSource(domain_data_source):
case DomainDataSource.REMOTE:
remote_filemanager = TopPicksRemoteFilemanager(
gcs_project_path=settings.providers.top_picks.gcs_project,
gcs_bucket_path=settings.providers.top_picks.gcs_bucket,
gcs_project_path=settings.image_manifest.gcs_project,
gcs_bucket_path=settings.image_manifest.gcs_bucket,
)
client = remote_filemanager.create_gcs_client()
get_file_result_code, remote_domains = remote_filemanager.get_file(client)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def fixture_top_picks_remote_filemanager_parameters() -> dict[str, Any]:
"""Define TopPicksRemoteFilemanager parameters for test."""
# These settings read from testing.toml, not default.toml.
return {
"gcs_project_path": settings.providers.top_picks.gcs_project,
"gcs_bucket_path": settings.providers.top_picks.gcs_bucket,
"gcs_project_path": settings.image_manifest.gcs_project,
"gcs_bucket_path": settings.image_manifest.gcs_bucket,
}


Expand Down

0 comments on commit 1972155

Please sign in to comment.