diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index ab418b1705956..d7b9c6854270e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -247,9 +247,10 @@ def process_s3_provenance_event(self, event): s3_url = f"s3://{s3_bucket}/{s3_key}" s3_url = s3_url[: s3_url.rindex("/")] - dataset_name = s3_url.replace("s3://", "").replace("/", ".") + s3_path = s3_url[len("s3://") :] + dataset_name = s3_path.replace("/", ".") platform = "s3" - dataset_urn = builder.make_dataset_urn(platform, dataset_name, self.env) + dataset_urn = builder.make_dataset_urn(platform, s3_path, self.env) return ExternalDataset( platform, dataset_name, diff --git a/metadata-ingestion/tests/unit/test_nifi_source.py b/metadata-ingestion/tests/unit/test_nifi_source.py index d9e2b6e35e157..5d7949c04b69f 100644 --- a/metadata-ingestion/tests/unit/test_nifi_source.py +++ b/metadata-ingestion/tests/unit/test_nifi_source.py @@ -89,7 +89,7 @@ def test_nifi_s3_provenance_event(): ioAspect = workunits[4].metadata.aspect assert ioAspect.outputDatasets == [ - "urn:li:dataset:(urn:li:dataPlatform:s3,foo-nifi.tropical_data,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:s3,foo-nifi/tropical_data,PROD)" ] assert ioAspect.inputDatasets == []