From 44a91c911e82c37cc24277e52d1adb29422938fd Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Tue, 25 Jul 2023 11:36:04 +0200 Subject: [PATCH 01/16] fix tests --- .../connector_builder_handler.py | 26 +- .../sources/declarative/declarative_stream.py | 29 +- .../parsers/model_to_component_factory.py | 3 +- .../declarative/requesters/http_requester.py | 42 +-- .../declarative/requesters/requester.py | 15 +- .../declarative/retrievers/retriever.py | 7 +- .../retrievers/simple_retriever.py | 356 +++++++----------- .../test_connector_builder_handler.py | 22 +- .../test_per_partition_cursor_integration.py | 4 +- .../test_model_to_component_factory.py | 21 +- .../requesters/test_http_requester.py | 5 +- .../retrievers/test_simple_retriever.py | 259 +++---------- .../test_manifest_declarative_source.py | 14 +- 13 files changed, 277 insertions(+), 526 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py index 964995fd3eea3..10e45859f81bb 100644 --- a/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -15,7 +15,7 @@ from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory -from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever from airbyte_cdk.utils.traced_exception import AirbyteTracedException DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5 @@ -51,12 +51,14 @@ def create_source(config: Mapping[str, Any], limits: TestReadLimits) -> Manifest emit_connector_builder_messages=True, limit_pages_fetched_per_slice=limits.max_pages_per_slice, limit_slices_fetched=limits.max_slices, - disable_retries=True - ) + disable_retries=True, + ), ) -def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, limits: TestReadLimits) -> AirbyteMessage: +def read_stream( + source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, limits: TestReadLimits +) -> AirbyteMessage: try: handler = MessageGrouper(limits.max_pages_per_slice, limits.max_slices) stream_name = configured_catalog.streams[0].stream.name # The connector builder only supports a single stream @@ -90,7 +92,13 @@ def resolve_manifest(source: ManifestDeclarativeSource) -> AirbyteMessage: def list_streams(source: ManifestDeclarativeSource, config: Dict[str, Any]) -> AirbyteMessage: try: streams = [ - {"name": http_stream.name, "url": urljoin(http_stream.url_base, http_stream.path())} + { + "name": http_stream.name, + "url": urljoin( + http_stream.requester.get_url_base(), + http_stream.requester.get_path(stream_state=None, stream_slice=None, next_page_token=None), + ), + } for http_stream in _get_http_streams(source, config) ] return AirbyteMessage( @@ -105,20 +113,20 @@ def list_streams(source: ManifestDeclarativeSource, config: Dict[str, Any]) -> A return AirbyteTracedException.from_exception(exc, message=f"Error listing streams: {str(exc)}").as_airbyte_message() -def _get_http_streams(source: ManifestDeclarativeSource, config: Dict[str, Any]) -> List[HttpStream]: +def _get_http_streams(source: ManifestDeclarativeSource, config: Dict[str, Any]) -> List[SimpleRetriever]: http_streams = [] for stream in source.streams(config=config): if isinstance(stream, DeclarativeStream): - if isinstance(stream.retriever, HttpStream): + if isinstance(stream.retriever, SimpleRetriever): http_streams.append(stream.retriever) else: raise TypeError( - f"A declarative stream should only have a retriever of type HttpStream, but received: {stream.retriever.__class__}" + f"A declarative stream should only have a retriever of type SimpleRetriever, but received: {stream.retriever.__class__}" ) else: raise TypeError(f"A declarative source should only contain streams of type DeclarativeStream, but received: {stream.__class__}") return http_streams -def _emitted_at(): +def _emitted_at() -> int: return int(datetime.now().timestamp()) * 1000 diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py index 1730e8a330263..05defacbc3a85 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py @@ -37,14 +37,13 @@ class DeclarativeStream(Stream): schema_loader: Optional[SchemaLoader] = None _name: str = field(init=False, repr=False, default="") _primary_key: str = field(init=False, repr=False, default="") - _schema_loader: SchemaLoader = field(init=False, repr=False, default=None) stream_cursor_field: Optional[Union[InterpolatedString, str]] = None - def __post_init__(self, parameters: Mapping[str, Any]): - self.stream_cursor_field = InterpolatedString.create(self.stream_cursor_field, parameters=parameters) + def __post_init__(self, parameters: Mapping[str, Any]) -> None: + self._stream_cursor_field = InterpolatedString.create(self.stream_cursor_field, parameters=parameters) if isinstance(self.stream_cursor_field, str) else self.stream_cursor_field self._schema_loader = self.schema_loader if self.schema_loader else DefaultSchemaLoader(config=self.config, parameters=parameters) - @property + @property # type: ignore def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: return self._primary_key @@ -53,7 +52,7 @@ def primary_key(self, value: str) -> None: if not isinstance(value, property): self._primary_key = value - @property + @property # type: ignore def name(self) -> str: """ :return: Stream name. By default this is the implementing class name, but it can be overridden as needed. @@ -67,14 +66,14 @@ def name(self, value: str) -> None: @property def state(self) -> MutableMapping[str, Any]: - return self.retriever.state + return self.retriever.state # type: ignore @state.setter - def state(self, value: MutableMapping[str, Any]): + def state(self, value: MutableMapping[str, Any]) -> None: """State setter, accept state serialized by state getter.""" self.retriever.state = value - def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]): + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> MutableMapping[str, Any]: return self.state @property @@ -83,22 +82,22 @@ def cursor_field(self) -> Union[str, List[str]]: Override to return the default cursor field used by this stream e.g: an API entity might always use created_at as the cursor field. :return: The name of the field used as a cursor. If the cursor is nested, return an array consisting of the path to the cursor. """ - cursor = self.stream_cursor_field.eval(self.config) + cursor = self._stream_cursor_field.eval(self.config) return cursor if cursor else [] def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, + cursor_field: List[str] = None, # type: ignore + stream_slice: Mapping[str, Any] = None, # type: ignore + stream_state: Mapping[str, Any] = None, # type: ignore ) -> Iterable[Mapping[str, Any]]: """ :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state. """ - yield from self.retriever.read_records(sync_mode, cursor_field, stream_slice) + yield from self.retriever.read_records(stream_slice) - def get_json_schema(self) -> Mapping[str, Any]: + def get_json_schema(self) -> Mapping[str, Any]: # type: ignore """ :return: A dict of the JSON schema representing this stream. @@ -108,7 +107,7 @@ def get_json_schema(self) -> Mapping[str, Any]: return self._schema_loader.get_json_schema() def stream_slices( - self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None # type: ignore ) -> Iterable[Optional[Mapping[str, Any]]]: """ Override to define the slices for this stream. See the stream slicing section of the docs for more information. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 2aec0e32d5541..4bd95501a38a8 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -695,6 +695,7 @@ def create_http_requester(self, model: HttpRequesterModel, config: Config, *, na http_method=model_http_method, request_options_provider=request_options_provider, config=config, + disable_retries=self._disable_retries, parameters=model.parameters or {}, ) @@ -912,7 +913,6 @@ def create_simple_retriever( config=config, maximum_number_of_slices=self._limit_slices_fetched or 5, parameters=model.parameters or {}, - disable_retries=self._disable_retries, message_repository=self._message_repository, ) return SimpleRetriever( @@ -925,7 +925,6 @@ def create_simple_retriever( cursor=cursor, config=config, parameters=model.parameters or {}, - disable_retries=self._disable_retries, message_repository=self._message_repository, ) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index faeeadb52628a..2e5ac6cb6a9d7 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -153,17 +153,6 @@ def get_request_body_json( # type: ignore stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) - def request_kwargs( - self, - *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: - # todo: there are a few integrations that override the request_kwargs() method, but the use case for why kwargs over existing - # constructs is a little unclear. We may revisit this, but for now lets leave it out of the DSL - return {} - disable_retries: bool = False _DEFAULT_MAX_RETRY = 5 _DEFAULT_RETRY_FACTOR = 5 @@ -235,6 +224,7 @@ def _get_mapping( def _get_request_options( self, + stream_state: Optional[StreamState], stream_slice: Optional[StreamSlice], next_page_token: Optional[Mapping[str, Any]], requester_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], @@ -246,7 +236,7 @@ def _get_request_options( Raise a ValueError if there's a key collision Returned merged mapping otherwise """ - requester_mapping, requester_keys = self._get_mapping(requester_method, stream_slice=stream_slice, next_page_token=next_page_token) + requester_mapping, requester_keys = self._get_mapping(requester_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) auth_options_mapping, auth_options_keys = self._get_mapping(auth_options_method) extra_options = extra_options or {} extra_mapping, extra_keys = self._get_mapping(lambda: extra_options) @@ -254,8 +244,12 @@ def _get_request_options( all_mappings = [requester_mapping, auth_options_mapping, extra_mapping] all_keys = [requester_keys, auth_options_keys, extra_keys] + string_options = sum(isinstance(mapping, str) for mapping in all_mappings) # If more than one mapping is a string, raise a ValueError - if sum(isinstance(mapping, str) for mapping in all_mappings) > 1: + if string_options > 1: + raise ValueError("Cannot combine multiple options if one is a string") + + if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: raise ValueError("Cannot combine multiple options if one is a string") # If any mapping is a string, return it @@ -274,6 +268,7 @@ def _get_request_options( def _request_headers( self, + stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, extra_headers: Optional[Mapping[str, Any]] = None, @@ -283,6 +278,7 @@ def _request_headers( Authentication headers will overwrite any overlapping headers returned from this method. """ headers = self._get_request_options( + stream_state, stream_slice, next_page_token, self.get_request_headers, @@ -295,6 +291,7 @@ def _request_headers( def _request_params( self, + stream_state: Optional[StreamState], stream_slice: Optional[StreamSlice], next_page_token: Optional[Mapping[str, Any]], extra_params: Optional[Mapping[str, Any]] = None, @@ -305,7 +302,7 @@ def _request_params( E.g: you might want to define query parameters for paging if next_page_token is not None. """ options = self._get_request_options( - stream_slice, next_page_token, self.get_request_params, self.get_authenticator().get_request_params, extra_params + stream_state, stream_slice, next_page_token, self.get_request_params, self.get_authenticator().get_request_params, extra_params ) if isinstance(options, str): raise ValueError("Request params cannot be a string") @@ -313,6 +310,7 @@ def _request_params( def _request_body_data( self, + stream_state: Optional[StreamState], stream_slice: Optional[StreamSlice], next_page_token: Optional[Mapping[str, Any]], extra_body_data: Optional[Union[Mapping[str, Any], str]] = None, @@ -328,11 +326,12 @@ def _request_body_data( """ # Warning: use self.state instead of the stream_state passed as argument! return self._get_request_options( - stream_slice, next_page_token, self.get_request_body_data, self.get_authenticator().get_request_body_data, extra_body_data + stream_state, stream_slice, next_page_token, self.get_request_body_data, self.get_authenticator().get_request_body_data, extra_body_data ) def _request_body_json( self, + stream_state: Optional[StreamState], stream_slice: Optional[StreamSlice], next_page_token: Optional[Mapping[str, Any]], extra_body_json: Optional[Mapping[str, Any]] = None, @@ -344,7 +343,7 @@ def _request_body_json( """ # Warning: use self.state instead of the stream_state passed as argument! options = self._get_request_options( - stream_slice, next_page_token, self.get_request_body_json, self.get_authenticator().get_request_body_json, extra_body_json + stream_state, stream_slice, next_page_token, self.get_request_body_json, self.get_authenticator().get_request_body_json, extra_body_json ) if isinstance(options, str): raise ValueError("Request body json cannot be a string") @@ -374,6 +373,7 @@ def _create_prepared_request( def send_request( self, + stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, path: Optional[str] = None, @@ -383,11 +383,11 @@ def send_request( request_body_json: Optional[Mapping[str, Any]] = None, ) -> Optional[requests.Response]: request = self._create_prepared_request( - path=path if path is not None else self.get_path(stream_state=None, stream_slice=stream_slice, next_page_token=next_page_token), - headers=self._request_headers(stream_slice, next_page_token, request_headers), - params=self._request_params(stream_slice, next_page_token, request_params), - json=self._request_body_json(stream_slice, next_page_token, request_body_json), - data=self._request_body_data(stream_slice, next_page_token, request_body_data), + path=path if path is not None else self.get_path(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + headers=self._request_headers(stream_state, stream_slice, next_page_token, request_headers), + params=self._request_params(stream_state, stream_slice, next_page_token, request_params), + json=self._request_body_json(stream_state, stream_slice, next_page_token, request_body_json), + data=self._request_body_data(stream_state, stream_slice, next_page_token, request_body_data), ) response = self._send_with_retry(request) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py index 2280b3c1e3493..45240c114f4e8 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py @@ -124,23 +124,10 @@ def get_request_body_json( At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden. """ - @abstractmethod - def request_kwargs( - self, - *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: - """ - Returns a mapping of keyword arguments to be used when creating the HTTP request. - Any option listed in https://docs.python-requests.org/en/latest/api/#requests.adapters.BaseAdapter.send for can be returned from - this method. Note that these options do not conflict with request-level options such as headers, request params, etc.. - """ - @abstractmethod def send_request( self, + stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, path: Optional[str] = None, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py index 45f9cce1940b8..843e1d42409c5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py @@ -20,10 +20,7 @@ class Retriever: @abstractmethod def read_records( self, - sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, stream_slice: Optional[StreamSlice] = None, - stream_state: Optional[StreamState] = None, ) -> Iterable[StreamData]: """ Fetch a stream's records from an HTTP API source @@ -36,7 +33,7 @@ def read_records( """ @abstractmethod - def stream_slices(self, *, sync_mode: SyncMode, stream_state: Optional[StreamState] = None) -> Iterable[Optional[StreamSlice]]: + def stream_slices(self) -> Iterable[Optional[StreamSlice]]: """Returns the stream slices""" @property @@ -56,5 +53,5 @@ def state(self) -> StreamState: @state.setter @abstractmethod - def state(self, value: StreamState): + def state(self, value: StreamState) -> None: """State setter, accept state serialized by state getter.""" diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 820314ddc48f1..4599e5735ba21 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -4,7 +4,7 @@ from dataclasses import InitVar, dataclass, field from itertools import islice -from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union +from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union import requests from airbyte_cdk.models import AirbyteMessage, Level, SyncMode @@ -27,7 +27,7 @@ @dataclass -class SimpleRetriever(Retriever, HttpStream): +class SimpleRetriever(Retriever): """ Retrieves records by synchronously sending requests to fetch records. @@ -50,8 +50,6 @@ class SimpleRetriever(Retriever, HttpStream): parameters (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation """ - _DEFAULT_MAX_RETRY = 5 - requester: Requester record_selector: HttpSelector config: Config @@ -63,14 +61,12 @@ class SimpleRetriever(Retriever, HttpStream): paginator: Optional[Paginator] = None stream_slicer: StreamSlicer = SinglePartitionRouter(parameters={}) cursor: Optional[Cursor] = None - disable_retries: bool = False message_repository: MessageRepository = NoopMessageRepository() def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._paginator = self.paginator or NoPagination(parameters=parameters) self._last_response: Optional[requests.Response] = None self._records_from_last_response: List[Record] = [] - HttpStream.__init__(self, self.requester.get_authenticator()) self._parameters = parameters self._name = InterpolatedString(self._name, parameters=parameters) if isinstance(self._name, str) else self._name @@ -86,151 +82,105 @@ def name(self, value: str) -> None: if not isinstance(value, property): self._name = value - @property - def url_base(self) -> str: - return self.requester.get_url_base() - - @property - def http_method(self) -> str: - return str(self.requester.get_method().value) - - @property - def raise_on_http_errors(self) -> bool: - # never raise on http_errors because this overrides the error handler logic... - return False - - @property - def max_retries(self) -> Union[int, None]: - if self.disable_retries: - return 0 - # this will be removed once simple_retriever is decoupled from http_stream - if hasattr(self.requester.error_handler, "max_retries"): # type: ignore - return self.requester.error_handler.max_retries # type: ignore - return self._DEFAULT_MAX_RETRY - - def should_retry(self, response: requests.Response) -> bool: - """ - Specifies conditions for backoff based on the response from the server. - - By default, back off on the following HTTP response statuses: - - 429 (Too Many Requests) indicating rate limiting - - 500s to handle transient server errors - - Unexpected but transient exceptions (connection timeout, DNS resolution failed, etc..) are retried by default. - """ - return bool(self.requester.interpret_response_status(response).action == ResponseAction.RETRY) - - def backoff_time(self, response: requests.Response) -> Optional[float]: - """ - Specifies backoff time. - - This method is called only if should_backoff() returns True for the input request. - - :param response: - :return how long to backoff in seconds. The return value may be a floating point number for subsecond precision. Returning None defers backoff - to the default backoff behavior (e.g using an exponential algorithm). - """ - should_retry = self.requester.interpret_response_status(response) - if should_retry.action != ResponseAction.RETRY: - raise ValueError(f"backoff_time can only be applied on retriable response action. Got {should_retry.action}") - assert should_retry.action == ResponseAction.RETRY - return should_retry.retry_in - - def error_message(self, response: requests.Response) -> str: + def _get_mapping( + self, method: Callable[..., Optional[Union[Mapping[str, Any], str]]], **kwargs: Any + ) -> Tuple[Union[Mapping[str, Any], str], Set[str]]: """ - Constructs an error message which can incorporate the HTTP response received from the partner API. - - :param response: The incoming HTTP response from the partner API - :return The error message string to be emitted + Get mapping from the provided method, and get the keys of the mapping. + If the method returns a string, it will return the string and an empty set. + If the method returns a dict, it will return the dict and its keys. """ - return self.requester.interpret_response_status(response).error_message + mapping = method(**kwargs) or {} + keys = set(mapping.keys()) if not isinstance(mapping, str) else set() + return mapping, keys def _get_request_options( self, + stream_state: Optional[StreamData], stream_slice: Optional[StreamSlice], next_page_token: Optional[Mapping[str, Any]], - requester_method: Callable[..., Mapping[str, Any]], - paginator_method: Callable[..., Mapping[str, Any]], - stream_slicer_method: Callable[..., Mapping[str, Any]], - auth_options_method: Callable[..., Mapping[str, Any]], - ) -> MutableMapping[str, Any]: + paginator_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], + stream_slicer_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], + ) -> Union[Mapping[str, Any], str]: """ - Get the request_option from the requester and from the paginator + Get the request_option from the requester, the authenticator and extra_options passed in. Raise a ValueError if there's a key collision Returned merged mapping otherwise - :param stream_slice: - :param next_page_token: - :param requester_method: - :param paginator_method: - :return: """ + paginator_mapping, paginator_keys = self._get_mapping(paginator_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + stream_slicer_mapping, stream_slicer_keys = self._get_mapping(stream_slicer_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) - # FIXME we should eventually remove the usage of stream_state as part of the interpolation - requester_mapping = requester_method(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) - requester_mapping_keys = set(requester_mapping.keys()) - paginator_mapping = paginator_method(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) - paginator_mapping_keys = set(paginator_mapping.keys()) - stream_slicer_mapping = stream_slicer_method(stream_slice=stream_slice) - stream_slicer_mapping_keys = set(stream_slicer_mapping.keys()) - auth_options_mapping = auth_options_method() - auth_options_mapping_keys = set(auth_options_mapping.keys()) - - intersection = ( - (requester_mapping_keys & paginator_mapping_keys) - | (requester_mapping_keys & stream_slicer_mapping_keys) - | (paginator_mapping_keys & stream_slicer_mapping_keys) - | (requester_mapping_keys & auth_options_mapping_keys) - | (paginator_mapping_keys & auth_options_mapping_keys) - | (stream_slicer_mapping_keys & auth_options_mapping_keys) - ) - if intersection: + all_mappings = [paginator_mapping, stream_slicer_mapping] + all_keys = [paginator_keys, stream_slicer_keys] + + string_options = sum(isinstance(mapping, str) for mapping in all_mappings) + # If more than one mapping is a string, raise a ValueError + if string_options > 1: + raise ValueError("Cannot combine multiple options if one is a string") + + if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: + raise ValueError("Cannot combine multiple options if one is a string") + + # If any mapping is a string, return it + for mapping in all_mappings: + if isinstance(mapping, str): + return mapping + + # If there are duplicate keys across mappings, raise a ValueError + intersection = set().union(*all_keys) + if len(intersection) < sum(len(keys) for keys in all_keys): raise ValueError(f"Duplicate keys found: {intersection}") - return {**requester_mapping, **paginator_mapping, **stream_slicer_mapping, **auth_options_mapping} - def request_headers( - self, stream_state: StreamState, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None + # Return the combined mappings + # ignore type because mypy doesn't follow all mappings being dicts + return {**paginator_mapping, **stream_slicer_mapping} # type: ignore + + def _request_headers( + self, stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None ) -> Mapping[str, Any]: """ Specifies request headers. Authentication headers will overwrite any overlapping headers returned from this method. """ headers = self._get_request_options( + stream_state, stream_slice, next_page_token, - self.requester.get_request_headers, self._paginator.get_request_headers, self.stream_slicer.get_request_headers, - # auth headers are handled separately by passing the authenticator to the HttpStream constructor - lambda: {}, ) + if isinstance(headers, str): + raise ValueError("Request headers cannot be a string") return {str(k): str(v) for k, v in headers.items()} - def request_params( + def _request_params( self, - stream_state: StreamSlice, + stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, - ) -> MutableMapping[str, Any]: + ) -> Mapping[str, Any]: """ Specifies the query parameters that should be set on an outgoing HTTP request given the inputs. E.g: you might want to define query parameters for paging if next_page_token is not None. """ - return self._get_request_options( + params = self._get_request_options( + stream_state, stream_slice, next_page_token, - self.requester.get_request_params, self._paginator.get_request_params, self.stream_slicer.get_request_params, - self.requester.get_authenticator().get_request_params, ) + if isinstance(params, str): + raise ValueError("Request params cannot be a string") + return params - def request_body_data( + def _request_body_data( self, - stream_state: StreamState, + stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Optional[Union[Mapping[str, Any], str]]: + ) -> Union[Mapping[str, Any], str]: """ Specifies how to populate the body of the request with a non-JSON payload. @@ -240,31 +190,17 @@ def request_body_data( At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden. """ - # Warning: use self.state instead of the stream_state passed as argument! - base_body_data = self.requester.get_request_body_data( - stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token - ) - if isinstance(base_body_data, str): - paginator_body_data = self._paginator.get_request_body_data() - if paginator_body_data: - raise ValueError( - f"Cannot combine requester's body data= {base_body_data} with paginator's body_data: {paginator_body_data}" - ) - else: - return base_body_data return self._get_request_options( + stream_state, stream_slice, next_page_token, - # body data can be a string as well, this will be fixed in the rewrite using http requester instead of http stream - self.requester.get_request_body_data, # type: ignore - self._paginator.get_request_body_data, # type: ignore - self.stream_slicer.get_request_body_data, # type: ignore - self.requester.get_authenticator().get_request_body_data, # type: ignore + self._paginator.get_request_body_data, + self.stream_slicer.get_request_body_data, ) - def request_body_json( + def _request_body_json( self, - stream_state: StreamState, + stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Optional[Mapping[str, Any]]: @@ -273,93 +209,44 @@ def request_body_json( At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden. """ - # Warning: use self.state instead of the stream_state passed as argument! - return self._get_request_options( + body_json = self._get_request_options( + stream_state, stream_slice, next_page_token, - # body json can be None as well, this will be fixed in the rewrite using http requester instead of http stream - self.requester.get_request_body_json, # type: ignore - self._paginator.get_request_body_json, # type: ignore - self.stream_slicer.get_request_body_json, # type: ignore - self.requester.get_authenticator().get_request_body_json, # type: ignore + self._paginator.get_request_body_json, + self.stream_slicer.get_request_body_json, ) + if isinstance(body_json, str): + raise ValueError("Request body json cannot be a string") + return body_json - def request_kwargs( + def _path( self, - stream_state: StreamState, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: - """ - Specifies how to configure a mapping of keyword arguments to be used when creating the HTTP request. - Any option listed in https://docs.python-requests.org/en/latest/api/#requests.adapters.BaseAdapter.send for can be returned from - this method. Note that these options do not conflict with request-level options such as headers, request params, etc.. + ) -> Optional[str]: """ - # Warning: use self.state instead of the stream_state passed as argument! - return self.requester.request_kwargs(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) - - def path( - self, - *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> str: - """ - Return the path the submit the next request to. - If the paginator points to a path, follow it, else return the requester's path + If the paginator points to a path, follow it, else return nothing so the requester is used. :param stream_state: :param stream_slice: :param next_page_token: :return: """ - # Warning: use self.state instead of the stream_state passed as argument! - paginator_path = self._paginator.path() - if paginator_path: - return paginator_path - else: - return self.requester.get_path(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) - - @property - def cache_filename(self) -> str: - """ - TODO remove once simple retriever doesn't rely on HttpStream - """ - return f"{self.name}.yml" + return self._paginator.path() - @property - def use_cache(self) -> bool: - """ - TODO remove once simple retriever doesn't rely on HttpStream - """ - return False - - def parse_response( + def _parse_response( self, - response: requests.Response, - *, + response: Optional[requests.Response], stream_state: StreamState, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Iterable[Record]: - # if fail -> raise exception - # if ignore -> ignore response and return no records - # else -> delegate to record selector - response_status = self.requester.interpret_response_status(response) - if response_status.action == ResponseAction.FAIL: - error_message = ( - response_status.error_message - or f"Request to {response.request.url} failed with status code {response.status_code} and error message {HttpStream.parse_response_error_message(response)}" - ) - raise ReadException(error_message) - elif response_status.action == ResponseAction.IGNORE: - self.logger.info(f"Ignoring response for failed request with error message {HttpStream.parse_response_error_message(response)}") + if not response: + self._last_response = None + self._records_from_last_response = [] return [] - # Warning: use self.state instead of the stream_state passed as argument! self._last_response = response records = self.record_selector.select_records( - response=response, stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token + response=response, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) self._records_from_last_response = records return records @@ -374,7 +261,7 @@ def primary_key(self, value: str) -> None: if not isinstance(value, property): self._primary_key = value - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def _next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: """ Specifies a pagination strategy. @@ -384,14 +271,52 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, """ return self._paginator.next_page_token(response, self._records_from_last_response) + def _fetch_next_page( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any], next_page_token: Optional[Mapping[str, Any]] = None + ) -> Optional[requests.Response]: + response = self.requester.send_request( + path=self._path(), + stream_state=stream_state, + stream_slice=stream_slice, + next_page_token=next_page_token, + request_headers=self._request_headers(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + request_params=self._request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + request_body_data=self._request_body_data(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + request_body_json=self._request_body_json(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + ) + + return response + + def _read_pages( + self, + records_generator_fn: Callable[ + [Optional[requests.Response], Mapping[str, Any], Mapping[str, Any]], Iterable[StreamData] + ], + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any], + ) -> Iterable[StreamData]: + stream_state = stream_state or {} + pagination_complete = False + next_page_token = None + while not pagination_complete: + response = self._fetch_next_page(stream_state, stream_slice, next_page_token) + yield from records_generator_fn(response, stream_state, stream_slice) + + if not response: + pagination_complete = True + else: + next_page_token = self._next_page_token(response) + if not next_page_token: + pagination_complete = True + + # Always return an empty generator just in case no records were ever yielded + yield from [] + + def read_records( self, - sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, stream_slice: Optional[StreamSlice] = None, - stream_state: Optional[StreamState] = None, ) -> Iterable[StreamData]: - # Warning: use self.state instead of the stream_state passed as argument! stream_slice = stream_slice or {} # None-check # Fixing paginator types has a long tail of dependencies self._paginator.reset() # type: ignore @@ -414,7 +339,7 @@ def read_records( slice_state = {} most_recent_record_from_slice = None - for stream_data in self._read_pages(self.parse_records, stream_slice, slice_state): + for stream_data in self._read_pages(self._parse_records, slice_state, stream_slice): most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, stream_data, stream_slice) yield stream_data @@ -458,7 +383,6 @@ def stream_slices(self) -> Iterable[Optional[Mapping[str, Any]]]: # type: ignor :param stream_state: :return: """ - # Warning: use self.state instead of the stream_state passed as argument! return self.stream_slicer.stream_slices() @property @@ -471,14 +395,13 @@ def state(self, value: StreamState) -> None: if self.cursor: self.cursor.set_initial_state(value) - def parse_records( + def _parse_records( self, - request: requests.PreparedRequest, - response: requests.Response, + response: Optional[requests.Response], stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any], ) -> Iterable[StreamData]: - yield from self.parse_response(response, stream_slice=stream_slice, stream_state=stream_state) + yield from self._parse_response(response, stream_slice=stream_slice, stream_state=stream_state) @dataclass @@ -501,20 +424,21 @@ def __post_init__(self, options: Mapping[str, Any]) -> None: def stream_slices(self) -> Iterable[Optional[Mapping[str, Any]]]: # type: ignore return islice(super().stream_slices(), self.maximum_number_of_slices) - def parse_records( + def _parse_records( self, - request: requests.PreparedRequest, - response: requests.Response, + response: Optional[requests.Response], stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any], ) -> Iterable[StreamData]: - self.message_repository.log_message( - Level.DEBUG, - lambda: format_http_message( - response, - f"Stream '{self.name}' request", - f"Request performed in order to extract records for stream '{self.name}'", - self.name, - ), - ) - yield from self.parse_response(response, stream_slice=stream_slice, stream_state=stream_state) + if response is not None: + current_response = response + self.message_repository.log_message( + Level.DEBUG, + lambda: format_http_message( + current_response, + f"Stream '{self.name}' request", + f"Request performed in order to extract records for stream '{self.name}'", + self.name, + ), + ) + yield from self._parse_response(response, stream_slice=stream_slice, stream_state=stream_state) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 4245ccc9d1298..1b428eadd7f47 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -8,7 +8,7 @@ import logging import os from unittest import mock -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest import requests @@ -43,7 +43,7 @@ from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.declarative.retrievers import SimpleRetrieverTestReadDecorator from airbyte_cdk.sources.streams.core import Stream -from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever from unit_tests.connector_builder.utils import create_configured_catalog _stream_name = "stream_with_custom_requester" @@ -607,7 +607,7 @@ def test_given_declarative_stream_retriever_is_not_http_when_list_streams_then_r assert error_message.type == MessageType.TRACE assert error_message.trace.error.message.startswith("Error listing streams") - assert "A declarative stream should only have a retriever of type HttpStream" in error_message.trace.error.internal_message + assert "A declarative stream should only have a retriever of type SimpleRetriever" in error_message.trace.error.internal_message def test_given_unexpected_error_when_list_streams_then_return_exception_message(manifest_declarative_source): @@ -635,10 +635,12 @@ def test_list_streams_integration_test(): def create_mock_http_stream(name, url_base, path): - http_stream = mock.Mock(spec=HttpStream, autospec=True) + http_stream = mock.Mock(spec=SimpleRetriever, autospec=True) http_stream.name = name - http_stream.url_base = url_base - http_stream.path.return_value = path + http_stream.requester = MagicMock() + http_stream.requester.get_url_base.return_value = url_base + http_stream.requester.get_path.return_value = path + http_stream._path.return_value = None return http_stream @@ -676,7 +678,7 @@ def test_create_source(): assert isinstance(source, ManifestDeclarativeSource) assert source._constructor._limit_pages_fetched_per_slice == limits.max_pages_per_slice assert source._constructor._limit_slices_fetched == limits.max_slices - assert source.streams(config={})[0].retriever.max_retries == 0 + assert source.streams(config={})[0].retriever.requester.max_retries == 0 def request_log_message(request: dict) -> AirbyteMessage: @@ -704,10 +706,10 @@ def _create_response(body, request): def _create_page(response_body): request = _create_request() - return request, _create_response(response_body, request) + return _create_response(response_body, request) -@patch.object(HttpStream, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) +@patch.object(SimpleRetriever, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) def test_read_source(mock_http_stream): """ This test sort of acts as an integration test for the connector builder. @@ -748,7 +750,7 @@ def test_read_source(mock_http_stream): assert isinstance(s.retriever, SimpleRetrieverTestReadDecorator) -@patch.object(HttpStream, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) +@patch.object(SimpleRetriever, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) def test_read_source_single_page_single_slice(mock_http_stream): max_records = 100 max_pages_per_slice = 1 diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py b/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py index d2b3503c777e3..eac490bfb89ac 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py @@ -8,7 +8,7 @@ from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import PerPartitionStreamSlice from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.declarative.types import Record -from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever CURSOR_FIELD = "cursor_field" SYNC_MODE = SyncMode.incremental @@ -147,7 +147,7 @@ def test_given_record_for_partition_when_read_then_update_state(): list(stream_instance.stream_slices(sync_mode=SYNC_MODE)) stream_slice = PerPartitionStreamSlice({"partition_field": "1"}, {"start_time": "2022-01-01", "end_time": "2022-01-31"}) - with patch.object(HttpStream, "_read_pages", side_effect=[[Record({"a record key": "a record value", CURSOR_FIELD: "2022-01-15"}, stream_slice)]]): + with patch.object(SimpleRetriever, "_read_pages", side_effect=[[Record({"a record key": "a record value", CURSOR_FIELD: "2022-01-15"}, stream_slice)]]): list( stream_instance.read_records( sync_mode=SYNC_MODE, diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py index 17dccc4bac498..7d9fb6bbd0ac8 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py @@ -203,7 +203,7 @@ def test_full_config_stream(): assert isinstance(stream, DeclarativeStream) assert stream.primary_key == "id" assert stream.name == "lists" - assert stream.stream_cursor_field.string == "created" + assert stream._stream_cursor_field.string == "created" assert isinstance(stream.schema_loader, JsonFileSchemaLoader) assert stream.schema_loader._get_json_filepath() == "./source_sendgrid/schemas/lists.json" @@ -1542,26 +1542,15 @@ def test_simple_retriever_emit_log_messages(): def test_ignore_retry(): requester_model = { - "type": "SimpleRetriever", - "record_selector": { - "type": "RecordSelector", - "extractor": { - "type": "DpathExtractor", - "field_path": [], - }, - }, - "requester": {"type": "HttpRequester", "name": "list", "url_base": "orange.com", "path": "/v1/api"}, + "type": "HttpRequester", "name": "list", "url_base": "orange.com", "path": "/v1/api", } connector_builder_factory = ModelToComponentFactory(disable_retries=True) - retriever = connector_builder_factory.create_component( - model_type=SimpleRetrieverModel, + requester = connector_builder_factory.create_component( + model_type=HttpRequesterModel, component_definition=requester_model, config={}, name="Test", - primary_key="id", - stream_slicer=None, - transformations=[] ) - assert retriever.max_retries == 0 + assert requester.max_retries == 0 diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py index d699513b4ef8a..a29330285d239 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -73,7 +73,6 @@ def test_http_requester(): assert requester.get_request_body_data(stream_state={}, stream_slice=None, next_page_token=None) == request_body_data assert requester.get_request_body_json(stream_state={}, stream_slice=None, next_page_token=None) == request_body_json assert requester.interpret_response_status(requests.Response()) == response_status - assert {} == requester.request_kwargs(stream_state={}, stream_slice=None, next_page_token=None) @pytest.mark.parametrize( @@ -212,6 +211,10 @@ def test_send_request_data_json(provider_data, provider_json, param_data, param_ ("field=value", None, "field=value", ValueError, None), (None, "field=value", "field=value", ValueError, None), ("field=value", "field=value", "field=value", ValueError, None), + # assert body string and mapping from different source fails + ("field=value", {"abc": "def"}, None, ValueError, None), + ({"abc": "def"}, "field=value", None, ValueError, None), + ("field=value", None, {"abc": "def"}, ValueError, None), ] ) def test_send_request_string_data(provider_data, param_data, authenticator_data, expected_exception, expected_body): diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index d8030cd5a6dfa..b59a9a021ef85 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -33,7 +33,7 @@ config = {} -@patch.object(HttpStream, "_read_pages", return_value=iter([])) +@patch.object(SimpleRetriever, "_read_pages", return_value=iter([])) def test_simple_retriever_full(mock_http_stream): requester = MagicMock() request_params = {"param": "value"} @@ -43,6 +43,7 @@ def test_simple_retriever_full(mock_http_stream): next_page_token = {"cursor": "cursor_value"} paginator.path.return_value = None paginator.next_page_token.return_value = next_page_token + paginator.get_requesyyt_headers.return_value = {} record_selector = MagicMock() record_selector.select_records.return_value = records @@ -52,6 +53,7 @@ def test_simple_retriever_full(mock_http_stream): cursor.stream_slices.return_value = stream_slices response = requests.Response() + response.status_code = 200 underlying_state = {"date": "2021-01-01"} cursor.get_stream_state.return_value = underlying_state @@ -89,33 +91,22 @@ def test_simple_retriever_full(mock_http_stream): ) assert retriever.primary_key == primary_key - assert retriever.url_base == url_base - assert retriever.path() == path assert retriever.state == underlying_state - assert retriever.next_page_token(response) == next_page_token - assert retriever.request_params(None, None, None) == request_params + assert retriever._next_page_token(response) == next_page_token + assert retriever._request_params(None, None, None) == {} assert retriever.stream_slices() == stream_slices assert retriever._last_response is None assert retriever._records_from_last_response == [] - assert retriever.parse_response(response, stream_state={}) == records + assert retriever._parse_response(response, stream_state={}) == records assert retriever._last_response == response assert retriever._records_from_last_response == records - assert retriever.http_method == "GET" - assert not retriever.raise_on_http_errors - assert retriever.should_retry(requests.Response()) - assert retriever.backoff_time(requests.Response()) == backoff_time - assert retriever.request_body_json(None, None, None) == request_body_json - assert retriever.request_kwargs(None, None, None) == request_kwargs - assert retriever.cache_filename == "stream_name.yml" - assert not retriever.use_cache - [r for r in retriever.read_records(SyncMode.full_refresh)] paginator.reset.assert_called() -@patch.object(HttpStream, "_read_pages", return_value=iter([*request_response_logs, *records])) +@patch.object(SimpleRetriever, "_read_pages", return_value=iter([*request_response_logs, *records])) def test_simple_retriever_with_request_response_logs(mock_http_stream): requester = MagicMock() paginator = MagicMock() @@ -151,13 +142,14 @@ def test_simple_retriever_with_request_response_logs(mock_http_stream): assert actual_messages[3] == records[1] -@patch.object(HttpStream, "_read_pages", return_value=iter([])) +@patch.object(SimpleRetriever, "_read_pages", return_value=iter([])) def test_simple_retriever_with_request_response_log_last_records(mock_http_stream): requester = MagicMock() paginator = MagicMock() record_selector = MagicMock() record_selector.select_records.return_value = request_response_logs response = requests.Response() + response.status_code = 200 stream_slicer = DatetimeBasedCursor( start_datetime="", end_datetime="", @@ -182,7 +174,7 @@ def test_simple_retriever_with_request_response_log_last_records(mock_http_strea assert retriever._last_response is None assert retriever._records_from_last_response == [] - assert retriever.parse_response(response, stream_state={}) == request_response_logs + assert retriever._parse_response(response, stream_state={}) == request_response_logs assert retriever._last_response == response assert retriever._records_from_last_response == request_response_logs @@ -191,153 +183,15 @@ def test_simple_retriever_with_request_response_log_last_records(mock_http_strea @pytest.mark.parametrize( - "test_name, requester_response, expected_should_retry, expected_backoff_time", - [ - ("test_should_retry_fail", response_status.FAIL, False, None), - ("test_should_retry_none_backoff", ResponseStatus.retry(None), True, None), - ("test_should_retry_custom_backoff", ResponseStatus.retry(60), True, 60), - ], -) -def test_should_retry(test_name, requester_response, expected_should_retry, expected_backoff_time): - requester = MagicMock(use_cache=False) - retriever = SimpleRetriever( - name="stream_name", primary_key=primary_key, requester=requester, record_selector=MagicMock(), parameters={}, config={} - ) - requester.interpret_response_status.return_value = requester_response - assert retriever.should_retry(requests.Response()) == expected_should_retry - if requester_response.action == ResponseAction.RETRY: - assert retriever.backoff_time(requests.Response()) == expected_backoff_time - - -@pytest.mark.parametrize( - "test_name, status_code, response_status, len_expected_records, expected_error", - [ - ( - "test_parse_response_fails_if_should_retry_is_fail", - 404, - response_status.FAIL, - None, - ReadException("Request None failed with response "), - ), - ("test_parse_response_succeeds_if_should_retry_is_ok", 200, response_status.SUCCESS, 1, None), - ("test_parse_response_succeeds_if_should_retry_is_ignore", 404, response_status.IGNORE, 0, None), - ( - "test_parse_response_fails_with_custom_error_message", - 404, - ResponseStatus(response_action=ResponseAction.FAIL, error_message="Custom error message override"), - None, - ReadException("Custom error message override"), - ), - ], -) -def test_parse_response(test_name, status_code, response_status, len_expected_records, expected_error): - requester = MagicMock(use_cache=False) - record_selector = MagicMock() - record_selector.select_records.return_value = [{"id": 100}] - retriever = SimpleRetriever( - name="stream_name", primary_key=primary_key, requester=requester, record_selector=record_selector, parameters={}, config={} - ) - response = requests.Response() - response.request = requests.Request() - response.status_code = status_code - requester.interpret_response_status.return_value = response_status - if len_expected_records is None: - try: - retriever.parse_response(response, stream_state={}) - assert False - except ReadException as actual_exception: - assert type(expected_error) is type(actual_exception) - else: - records = retriever.parse_response(response, stream_state={}) - assert len(records) == len_expected_records - - -def test_max_retries_given_error_handler_has_max_retries(): - requester = MagicMock() - requester.error_handler = MagicMock() - requester.error_handler.max_retries = 10 - retriever = SimpleRetriever( - name="stream_name", - primary_key=primary_key, - requester=requester, - record_selector=MagicMock(), - parameters={}, - config={} - ) - assert retriever.max_retries == 10 - - -def test_max_retries_given_error_handler_without_max_retries(): - requester = MagicMock() - requester.error_handler = MagicMock(spec=[u'without_max_retries_attribute']) - retriever = SimpleRetriever( - name="stream_name", - primary_key=primary_key, - requester=requester, - record_selector=MagicMock(), - parameters={}, - config={} - ) - assert retriever.max_retries == 5 - - -def test_max_retries_given_disable_retries(): - retriever = SimpleRetriever( - name="stream_name", - primary_key=primary_key, - requester=MagicMock(), - record_selector=MagicMock(), - disable_retries=True, - parameters={}, - config={} - ) - assert retriever.max_retries == 0 - - -@pytest.mark.parametrize( - "test_name, response_action, retry_in, expected_backoff_time", + "test_name, paginator_mapping, stream_slicer_mapping, expected_mapping", [ - ("test_backoff_retriable_request", ResponseAction.RETRY, 10, 10), - ("test_backoff_fail_request", ResponseAction.FAIL, 10, None), - ("test_backoff_ignore_request", ResponseAction.IGNORE, 10, None), - ("test_backoff_success_request", ResponseAction.IGNORE, 10, None), + ("test_empty_headers", {}, {}, {}), + ("test_header_from_pagination_and_slicer", {"offset": 1000}, {"key": "value"}, {"key": "value", "offset": 1000}), + ("test_header_from_stream_slicer", {}, {"slice": "slice_value"}, {"slice": "slice_value"}), + ("test_duplicate_header_slicer_paginator", {"k": "v"}, {"k": "slice_value"}, None), ], ) -def test_backoff_time(test_name, response_action, retry_in, expected_backoff_time): - requester = MagicMock(use_cache=False) - record_selector = MagicMock() - record_selector.select_records.return_value = [{"id": 100}] - response = requests.Response() - retriever = SimpleRetriever( - name="stream_name", primary_key=primary_key, requester=requester, record_selector=record_selector, parameters={}, config={} - ) - if expected_backoff_time: - requester.interpret_response_status.return_value = ResponseStatus(response_action, retry_in) - actual_backoff_time = retriever.backoff_time(response) - assert expected_backoff_time == actual_backoff_time - else: - try: - retriever.backoff_time(response) - assert False - except ValueError: - pass - - -@pytest.mark.parametrize( - "test_name, paginator_mapping, stream_slicer_mapping, auth_mapping, expected_mapping", - [ - ("test_only_base_headers", {}, {}, {}, {"key": "value"}), - ("test_header_from_pagination", {"offset": 1000}, {}, {}, {"key": "value", "offset": 1000}), - ("test_header_from_stream_slicer", {}, {"slice": "slice_value"}, {}, {"key": "value", "slice": "slice_value"}), - ("test_duplicate_header_slicer", {}, {"key": "slice_value"}, {}, None), - ("test_duplicate_header_slicer_paginator", {"k": "v"}, {"k": "slice_value"}, {}, None), - ("test_duplicate_header_paginator", {"key": 1000}, {}, {}, None), - ("test_only_base_and_auth_headers", {}, {}, {"AuthKey": "secretkey"}, {"key": "value", "AuthKey": "secretkey"}), - ("test_header_from_pagination_and_auth", {"offset": 1000}, {}, {"AuthKey": "secretkey"}, {"key": "value", "offset": 1000, "AuthKey": "secretkey"}), - ("test_duplicate_auth", {}, {"AuthKey": "secretkey"}, {"AuthKey": "secretkey"}, None), - ], -) -def test_get_request_options_from_pagination(test_name, paginator_mapping, stream_slicer_mapping, auth_mapping, expected_mapping): +def test_get_request_options_from_pagination(test_name, paginator_mapping, stream_slicer_mapping, expected_mapping): # This test does not test request headers because they must be strings paginator = MagicMock() paginator.get_request_params.return_value = paginator_mapping @@ -349,23 +203,11 @@ def test_get_request_options_from_pagination(test_name, paginator_mapping, strea stream_slicer.get_request_body_data.return_value = stream_slicer_mapping stream_slicer.get_request_body_json.return_value = stream_slicer_mapping - authenticator = MagicMock() - authenticator.get_request_params.return_value = auth_mapping - authenticator.get_request_body_data.return_value = auth_mapping - authenticator.get_request_body_json.return_value = auth_mapping - - base_mapping = {"key": "value"} - requester = MagicMock(use_cache=False) - requester.get_request_params.return_value = base_mapping - requester.get_request_body_data.return_value = base_mapping - requester.get_request_body_json.return_value = base_mapping - requester.get_authenticator.return_value = authenticator - record_selector = MagicMock() retriever = SimpleRetriever( name="stream_name", primary_key=primary_key, - requester=requester, + requester=MagicMock(), record_selector=record_selector, paginator=paginator, stream_slicer=stream_slicer, @@ -374,13 +216,13 @@ def test_get_request_options_from_pagination(test_name, paginator_mapping, strea ) request_option_type_to_method = { - RequestOptionType.request_parameter: retriever.request_params, - RequestOptionType.body_data: retriever.request_body_data, - RequestOptionType.body_json: retriever.request_body_json, + RequestOptionType.request_parameter: retriever._request_params, + RequestOptionType.body_data: retriever._request_body_data, + RequestOptionType.body_json: retriever._request_body_json, } for _, method in request_option_type_to_method.items(): - if expected_mapping: + if expected_mapping is not None: actual_mapping = method(None, None, None) assert expected_mapping == actual_mapping else: @@ -405,8 +247,8 @@ def test_get_request_headers(test_name, paginator_mapping, expected_mapping): paginator.get_request_headers.return_value = paginator_mapping requester = MagicMock(use_cache=False) - base_mapping = {"key": "value"} - requester.get_request_headers.return_value = base_mapping + stream_slicer = MagicMock() + stream_slicer.get_request_headers.return_value = {"key": "value"} record_selector = MagicMock() retriever = SimpleRetriever( @@ -414,13 +256,14 @@ def test_get_request_headers(test_name, paginator_mapping, expected_mapping): primary_key=primary_key, requester=requester, record_selector=record_selector, + stream_slicer=stream_slicer, paginator=paginator, parameters={}, config={}, ) request_option_type_to_method = { - RequestOptionType.header: retriever.request_headers, + RequestOptionType.header: retriever._request_headers, } for _, method in request_option_type_to_method.items(): @@ -436,21 +279,22 @@ def test_get_request_headers(test_name, paginator_mapping, expected_mapping): @pytest.mark.parametrize( - "test_name, requester_body_data, paginator_body_data, expected_body_data", + "test_name, slicer_body_data, paginator_body_data, expected_body_data", [ - ("test_only_requester_mapping", {"key": "value"}, {}, {"key": "value"}), - ("test_only_requester_string", "key=value", {}, "key=value"), - ("test_requester_mapping_and_paginator_no_duplicate", {"key": "value"}, {"offset": 1000}, {"key": "value", "offset": 1000}), - ("test_requester_mapping_and_paginator_with_duplicate", {"key": "value"}, {"key": 1000}, None), - ("test_requester_string_and_paginator", "key=value", {"offset": 1000}, None), + ("test_only_slicer_mapping", {"key": "value"}, {}, {"key": "value"}), + ("test_only_slicer_string", "key=value", {}, "key=value"), + ("test_slicer_mapping_and_paginator_no_duplicate", {"key": "value"}, {"offset": 1000}, {"key": "value", "offset": 1000}), + ("test_slicer_mapping_and_paginator_with_duplicate", {"key": "value"}, {"key": 1000}, None), + ("test_slicer_string_and_paginator", "key=value", {"offset": 1000}, None), ], ) -def test_request_body_data(test_name, requester_body_data, paginator_body_data, expected_body_data): +def test_request_body_data(test_name, slicer_body_data, paginator_body_data, expected_body_data): paginator = MagicMock() paginator.get_request_body_data.return_value = paginator_body_data requester = MagicMock(use_cache=False) - requester.get_request_body_data.return_value = requester_body_data + stream_slicer = MagicMock() + stream_slicer.get_request_body_data.return_value = slicer_body_data record_selector = MagicMock() retriever = SimpleRetriever( @@ -459,16 +303,17 @@ def test_request_body_data(test_name, requester_body_data, paginator_body_data, requester=requester, record_selector=record_selector, paginator=paginator, + stream_slicer=stream_slicer, parameters={}, config={}, ) if expected_body_data: - actual_body_data = retriever.request_body_data(None, None, None) + actual_body_data = retriever._request_body_data(None, None, None) assert expected_body_data == actual_body_data else: try: - retriever.request_body_data(None, None, None) + assert False == retriever._request_body_data(None, None, None) assert False except ValueError: pass @@ -477,7 +322,7 @@ def test_request_body_data(test_name, requester_body_data, paginator_body_data, @pytest.mark.parametrize( "test_name, requester_path, paginator_path, expected_path", [ - ("test_path_from_requester", "/v1/path", None, "/v1/path"), + ("test_path_from_requester", "/v1/path", None, None), ("test_path_from_paginator", "/v1/path/", "/v2/paginator", "/v2/paginator"), ], ) @@ -499,7 +344,7 @@ def test_path(test_name, requester_path, paginator_path, expected_path): config={}, ) - actual_path = retriever.path(stream_state=None, stream_slice=None, next_page_token=None) + actual_path = retriever._path() assert expected_path == actual_path @@ -539,12 +384,14 @@ def test_when_read_records_then_cursor_close_slice_with_greater_record(test_name record_selector.select_records.return_value = records cursor = MagicMock(spec=Cursor) cursor.is_greater_than_or_equal.return_value = first_greater_than_second + paginator = MagicMock() + paginator.get_request_headers.return_value = {} retriever = SimpleRetriever( name="stream_name", primary_key=primary_key, requester=MagicMock(), - paginator=Mock(), + paginator=paginator, record_selector=record_selector, stream_slicer=cursor, cursor=cursor, @@ -553,8 +400,8 @@ def test_when_read_records_then_cursor_close_slice_with_greater_record(test_name ) stream_slice = {"repository": "airbyte"} - with patch.object(HttpStream, "_read_pages", return_value=iter([first_record, second_record]), side_effect=lambda _, __, ___: retriever.parse_records(request=MagicMock(), response=MagicMock(), stream_state=None, stream_slice=stream_slice)): - list(retriever.read_records(sync_mode=SyncMode.incremental, stream_slice=stream_slice)) + with patch.object(SimpleRetriever, "_read_pages", return_value=iter([first_record, second_record]), side_effect=lambda _, __, ___: retriever._parse_records(response=MagicMock(), stream_state=None, stream_slice=stream_slice)): + list(retriever.read_records(stream_slice=stream_slice)) cursor.close_slice.assert_called_once_with(stream_slice, first_record if first_greater_than_second else second_record) @@ -577,22 +424,16 @@ def test_given_stream_data_is_not_record_when_read_records_then_update_slice_wit ) stream_slice = {"repository": "airbyte"} - with patch.object(HttpStream, "_read_pages", return_value=iter(stream_data), side_effect=lambda _, __, ___: retriever.parse_records(request=MagicMock(), response=MagicMock(), stream_state=None, stream_slice=stream_slice)): - list(retriever.read_records(sync_mode=SyncMode.incremental, stream_slice=stream_slice)) + with patch.object(SimpleRetriever, "_read_pages", return_value=iter(stream_data), side_effect=lambda _, __, ___: retriever._parse_records(response=MagicMock(), stream_state=None, stream_slice=stream_slice)): + list(retriever.read_records(stream_slice=stream_slice)) cursor.close_slice.assert_called_once_with(stream_slice, None) -def parse_two_pages_and_return_records(retriever, stream_slice, records): - list(retriever.parse_records(request=MagicMock(), response=MagicMock(), stream_state=None, stream_slice=stream_slice)) - list(retriever.parse_records(request=MagicMock(), response=MagicMock(), stream_state=None, stream_slice=stream_slice)) - return records - - def _generate_slices(number_of_slices): return [{"date": f"2022-01-0{day + 1}"} for day in range(number_of_slices)] -@patch.object(HttpStream, "_read_pages", return_value=iter([])) +@patch.object(SimpleRetriever, "_read_pages", return_value=iter([])) def test_given_state_selector_when_read_records_use_slice_state(http_stream_read_pages): requester = MagicMock() paginator = MagicMock() @@ -611,9 +452,9 @@ def test_given_state_selector_when_read_records_use_slice_state(http_stream_read parameters={}, config={}, ) - list(retriever.read_records(SyncMode.incremental, stream_slice=A_STREAM_SLICE)) + list(retriever.read_records(stream_slice=A_STREAM_SLICE)) - http_stream_read_pages.assert_called_once_with(retriever.parse_records, A_STREAM_SLICE, A_SLICE_STATE) + http_stream_read_pages.assert_called_once_with(retriever._parse_records, A_SLICE_STATE, A_STREAM_SLICE) def test_emit_log_request_response_messages(mocker): @@ -642,7 +483,7 @@ def test_emit_log_request_response_messages(mocker): message_repository=message_repository, ) - list(retriever.parse_records(request=request, response=response, stream_slice={}, stream_state={})) + list(retriever._parse_records(response=response, stream_slice={}, stream_state={})) assert len(message_repository.log_message.call_args_list) == 1 assert message_repository.log_message.call_args_list[0].args[0] == Level.DEBUG diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py index 9c28997e917e4..46bac8829745f 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py @@ -25,7 +25,7 @@ ) from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever from jsonschema.exceptions import ValidationError logger = logging.getLogger("airbyte") @@ -767,7 +767,9 @@ def _create_response(body): def _create_page(response_body): - return _create_request(), _create_response(response_body) + response = _create_response(response_body) + response.request = _create_request() + return response @pytest.mark.parametrize("test_name, manifest, pages, expected_records, expected_calls",[ @@ -1135,7 +1137,7 @@ def _create_page(response_body): (_create_page({"rates": [{"ABC": 0, "partition": 0}, {"AED": 1, "partition": 0}], "_metadata": {"next": "next"}}), _create_page({"rates": [{"ABC": 2, "partition": 1}], "_metadata": {"next": "next"}})), [{"ABC": 0, "partition": 0}, {"AED": 1, "partition": 0}, {"ABC": 2, "partition": 1}], - [call({"partition": "0"}, {}, None), call({"partition": "1"}, {}, None)] + [call({}, {"partition": "0"}, None), call({}, {"partition": "1"}, None)] ), ("test_with_pagination_and_partition_router", { @@ -1236,15 +1238,15 @@ def _create_page(response_body): _create_page({"rates": [{"ABC": 2, "partition": 1}], "_metadata": {}}), ), [{"ABC": 0, "partition": 0}, {"AED": 1, "partition": 0}, {"USD": 3, "partition": 0}, {"ABC": 2, "partition": 1}], - [call({"partition": "0"}, {}, None), call({"partition": "0"}, {}, {"next_page_token": "next"}), call({"partition": "1"}, {}, None),] + [call({}, {"partition": "0"}, None), call( {}, {"partition": "0"},{"next_page_token": "next"}), call( {}, {"partition": "1"},None),] ) ]) def test_read_manifest_declarative_source(test_name, manifest, pages, expected_records, expected_calls): _stream_name = "Rates" - with patch.object(HttpStream, "_fetch_next_page", side_effect=pages) as mock_http_stream: + with patch.object(SimpleRetriever, "_fetch_next_page", side_effect=pages) as mock_retriever: output_data = [message.record.data for message in _run_read(manifest, _stream_name) if message.record] assert expected_records == output_data - mock_http_stream.assert_has_calls(expected_calls) + mock_retriever.assert_has_calls(expected_calls) def _run_read(manifest: Mapping[str, Any], stream_name: str) -> List[AirbyteMessage]: From ca7dd9c8e99a00ef00fc5219d4a7225e91cbd3bb Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Tue, 25 Jul 2023 12:04:08 +0200 Subject: [PATCH 02/16] format --- .../sources/declarative/declarative_stream.py | 26 +++++++----- .../declarative/requesters/http_requester.py | 24 ++++++++--- .../declarative/retrievers/retriever.py | 3 +- .../retrievers/simple_retriever.py | 41 +++++++++++-------- .../test_connector_builder_handler.py | 2 +- .../test_per_partition_cursor_integration.py | 2 +- .../retrievers/test_simple_retriever.py | 6 +-- .../test_manifest_declarative_source.py | 2 +- 8 files changed, 63 insertions(+), 43 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py index 05defacbc3a85..328fa654b19ca 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py @@ -40,10 +40,14 @@ class DeclarativeStream(Stream): stream_cursor_field: Optional[Union[InterpolatedString, str]] = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: - self._stream_cursor_field = InterpolatedString.create(self.stream_cursor_field, parameters=parameters) if isinstance(self.stream_cursor_field, str) else self.stream_cursor_field + self._stream_cursor_field = ( + InterpolatedString.create(self.stream_cursor_field, parameters=parameters) + if isinstance(self.stream_cursor_field, str) + else self.stream_cursor_field + ) self._schema_loader = self.schema_loader if self.schema_loader else DefaultSchemaLoader(config=self.config, parameters=parameters) - @property # type: ignore + @property # type: ignore def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: return self._primary_key @@ -52,7 +56,7 @@ def primary_key(self, value: str) -> None: if not isinstance(value, property): self._primary_key = value - @property # type: ignore + @property # type: ignore def name(self) -> str: """ :return: Stream name. By default this is the implementing class name, but it can be overridden as needed. @@ -66,14 +70,16 @@ def name(self, value: str) -> None: @property def state(self) -> MutableMapping[str, Any]: - return self.retriever.state # type: ignore + return self.retriever.state # type: ignore @state.setter def state(self, value: MutableMapping[str, Any]) -> None: """State setter, accept state serialized by state getter.""" self.retriever.state = value - def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> MutableMapping[str, Any]: + def get_updated_state( + self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] + ) -> MutableMapping[str, Any]: return self.state @property @@ -88,16 +94,16 @@ def cursor_field(self) -> Union[str, List[str]]: def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, # type: ignore - stream_slice: Mapping[str, Any] = None, # type: ignore - stream_state: Mapping[str, Any] = None, # type: ignore + cursor_field: List[str] = None, # type: ignore + stream_slice: Mapping[str, Any] = None, # type: ignore + stream_state: Mapping[str, Any] = None, # type: ignore ) -> Iterable[Mapping[str, Any]]: """ :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state. """ yield from self.retriever.read_records(stream_slice) - def get_json_schema(self) -> Mapping[str, Any]: # type: ignore + def get_json_schema(self) -> Mapping[str, Any]: # type: ignore """ :return: A dict of the JSON schema representing this stream. @@ -107,7 +113,7 @@ def get_json_schema(self) -> Mapping[str, Any]: # type: ignore return self._schema_loader.get_json_schema() def stream_slices( - self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None # type: ignore + self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None # type: ignore ) -> Iterable[Optional[Mapping[str, Any]]]: """ Override to define the slices for this stream. See the stream slicing section of the docs for more information. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 2e5ac6cb6a9d7..d221d20ff76fc 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -236,7 +236,9 @@ def _get_request_options( Raise a ValueError if there's a key collision Returned merged mapping otherwise """ - requester_mapping, requester_keys = self._get_mapping(requester_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + requester_mapping, requester_keys = self._get_mapping( + requester_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token + ) auth_options_mapping, auth_options_keys = self._get_mapping(auth_options_method) extra_options = extra_options or {} extra_mapping, extra_keys = self._get_mapping(lambda: extra_options) @@ -248,7 +250,7 @@ def _get_request_options( # If more than one mapping is a string, raise a ValueError if string_options > 1: raise ValueError("Cannot combine multiple options if one is a string") - + if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: raise ValueError("Cannot combine multiple options if one is a string") @@ -326,7 +328,12 @@ def _request_body_data( """ # Warning: use self.state instead of the stream_state passed as argument! return self._get_request_options( - stream_state, stream_slice, next_page_token, self.get_request_body_data, self.get_authenticator().get_request_body_data, extra_body_data + stream_state, + stream_slice, + next_page_token, + self.get_request_body_data, + self.get_authenticator().get_request_body_data, + extra_body_data, ) def _request_body_json( @@ -343,7 +350,12 @@ def _request_body_json( """ # Warning: use self.state instead of the stream_state passed as argument! options = self._get_request_options( - stream_state, stream_slice, next_page_token, self.get_request_body_json, self.get_authenticator().get_request_body_json, extra_body_json + stream_state, + stream_slice, + next_page_token, + self.get_request_body_json, + self.get_authenticator().get_request_body_json, + extra_body_json, ) if isinstance(options, str): raise ValueError("Request body json cannot be a string") @@ -383,7 +395,9 @@ def send_request( request_body_json: Optional[Mapping[str, Any]] = None, ) -> Optional[requests.Response]: request = self._create_prepared_request( - path=path if path is not None else self.get_path(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + path=path + if path is not None + else self.get_path(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), headers=self._request_headers(stream_state, stream_slice, next_page_token, request_headers), params=self._request_params(stream_state, stream_slice, next_page_token, request_params), json=self._request_body_json(stream_state, stream_slice, next_page_token, request_body_json), diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py index 843e1d42409c5..d46dc9463487a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py @@ -4,9 +4,8 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Iterable, List, Optional +from typing import Iterable, Optional -from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState from airbyte_cdk.sources.streams.core import StreamData diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 4599e5735ba21..399a6d3cc873a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -4,16 +4,14 @@ from dataclasses import InitVar, dataclass, field from itertools import islice -from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union +from typing import Any, Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union import requests -from airbyte_cdk.models import AirbyteMessage, Level, SyncMode -from airbyte_cdk.sources.declarative.exceptions import ReadException +from airbyte_cdk.models import AirbyteMessage, Level from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector from airbyte_cdk.sources.declarative.incremental.cursor import Cursor from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter -from airbyte_cdk.sources.declarative.requesters.error_handlers.response_action import ResponseAction from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator from airbyte_cdk.sources.declarative.requesters.requester import Requester @@ -23,7 +21,6 @@ from airbyte_cdk.sources.http_logger import format_http_message from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository from airbyte_cdk.sources.streams.core import StreamData -from airbyte_cdk.sources.streams.http import HttpStream @dataclass @@ -107,8 +104,12 @@ def _get_request_options( Raise a ValueError if there's a key collision Returned merged mapping otherwise """ - paginator_mapping, paginator_keys = self._get_mapping(paginator_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) - stream_slicer_mapping, stream_slicer_keys = self._get_mapping(stream_slicer_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + paginator_mapping, paginator_keys = self._get_mapping( + paginator_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token + ) + stream_slicer_mapping, stream_slicer_keys = self._get_mapping( + stream_slicer_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token + ) all_mappings = [paginator_mapping, stream_slicer_mapping] all_keys = [paginator_keys, stream_slicer_keys] @@ -117,7 +118,7 @@ def _get_request_options( # If more than one mapping is a string, raise a ValueError if string_options > 1: raise ValueError("Cannot combine multiple options if one is a string") - + if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: raise ValueError("Cannot combine multiple options if one is a string") @@ -136,7 +137,10 @@ def _get_request_options( return {**paginator_mapping, **stream_slicer_mapping} # type: ignore def _request_headers( - self, stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None + self, + stream_state: Optional[StreamData] = None, + stream_slice: Optional[StreamSlice] = None, + next_page_token: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: """ Specifies request headers. @@ -155,7 +159,7 @@ def _request_headers( def _request_params( self, - stream_state: Optional[StreamData] = None, + stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: @@ -177,7 +181,7 @@ def _request_params( def _request_body_data( self, - stream_state: Optional[StreamData] = None, + stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Union[Mapping[str, Any], str]: @@ -200,7 +204,7 @@ def _request_body_data( def _request_body_json( self, - stream_state: Optional[StreamData] = None, + stream_state: Optional[StreamData] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Optional[Mapping[str, Any]]: @@ -281,17 +285,19 @@ def _fetch_next_page( next_page_token=next_page_token, request_headers=self._request_headers(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), request_params=self._request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), - request_body_data=self._request_body_data(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), - request_body_json=self._request_body_json(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + request_body_data=self._request_body_data( + stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token + ), + request_body_json=self._request_body_json( + stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token + ), ) return response def _read_pages( self, - records_generator_fn: Callable[ - [Optional[requests.Response], Mapping[str, Any], Mapping[str, Any]], Iterable[StreamData] - ], + records_generator_fn: Callable[[Optional[requests.Response], Mapping[str, Any], Mapping[str, Any]], Iterable[StreamData]], stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any], ) -> Iterable[StreamData]: @@ -312,7 +318,6 @@ def _read_pages( # Always return an empty generator just in case no records were ever yielded yield from [] - def read_records( self, stream_slice: Optional[StreamSlice] = None, diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 1b428eadd7f47..904adbbb44976 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -42,8 +42,8 @@ from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.declarative.retrievers import SimpleRetrieverTestReadDecorator -from airbyte_cdk.sources.streams.core import Stream from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever +from airbyte_cdk.sources.streams.core import Stream from unit_tests.connector_builder.utils import create_configured_catalog _stream_name = "stream_with_custom_requester" diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py b/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py index eac490bfb89ac..0dd19c66fc3c9 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py @@ -7,8 +7,8 @@ from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import PerPartitionStreamSlice from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource -from airbyte_cdk.sources.declarative.types import Record from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever +from airbyte_cdk.sources.declarative.types import Record CURSOR_FIELD = "cursor_field" SYNC_MODE = SyncMode.incremental diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index b59a9a021ef85..f25b951ea303b 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -4,21 +4,17 @@ from unittest.mock import MagicMock, Mock, patch -import airbyte_cdk.sources.declarative.requesters.error_handlers.response_status as response_status import pytest import requests from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode, Type from airbyte_cdk.sources.declarative.auth.declarative_authenticator import NoAuth -from airbyte_cdk.sources.declarative.exceptions import ReadException from airbyte_cdk.sources.declarative.incremental import Cursor, DatetimeBasedCursor from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter -from airbyte_cdk.sources.declarative.requesters.error_handlers.response_action import ResponseAction from airbyte_cdk.sources.declarative.requesters.error_handlers.response_status import ResponseStatus from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever, SimpleRetrieverTestReadDecorator from airbyte_cdk.sources.declarative.types import Record -from airbyte_cdk.sources.streams.http.http import HttpStream A_SLICE_STATE = {"slice_state": "slice state value"} A_STREAM_SLICE = {"stream slice": "slice value"} @@ -313,7 +309,7 @@ def test_request_body_data(test_name, slicer_body_data, paginator_body_data, exp assert expected_body_data == actual_body_data else: try: - assert False == retriever._request_body_data(None, None, None) + retriever._request_body_data(None, None, None) assert False except ValueError: pass diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py index 46bac8829745f..e139e4ac2062e 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_manifest_declarative_source.py @@ -1238,7 +1238,7 @@ def _create_page(response_body): _create_page({"rates": [{"ABC": 2, "partition": 1}], "_metadata": {}}), ), [{"ABC": 0, "partition": 0}, {"AED": 1, "partition": 0}, {"USD": 3, "partition": 0}, {"ABC": 2, "partition": 1}], - [call({}, {"partition": "0"}, None), call( {}, {"partition": "0"},{"next_page_token": "next"}), call( {}, {"partition": "1"},None),] + [call({}, {"partition": "0"}, None), call({}, {"partition": "0"},{"next_page_token": "next"}), call({}, {"partition": "1"},None),] ) ]) def test_read_manifest_declarative_source(test_name, manifest, pages, expected_records, expected_calls): From 6ca6d21c0b4f1c69cdde8cad4449ff3b112b4c22 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 26 Jul 2023 14:38:42 +0200 Subject: [PATCH 03/16] review comments --- .../airbyte_cdk/sources/declarative/declarative_stream.py | 8 ++++---- .../sources/declarative/requesters/http_requester.py | 8 ++++---- .../sources/declarative/retrievers/simple_retriever.py | 7 ++++--- .../connector_builder/test_connector_builder_handler.py | 2 +- .../declarative/retrievers/test_simple_retriever.py | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py index 328fa654b19ca..56d92dfc5639d 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py @@ -94,9 +94,9 @@ def cursor_field(self) -> Union[str, List[str]]: def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, # type: ignore - stream_slice: Mapping[str, Any] = None, # type: ignore - stream_state: Mapping[str, Any] = None, # type: ignore + cursor_field: Optional[List[str]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + stream_state: Optional[Mapping[str, Any]] = None, ) -> Iterable[Mapping[str, Any]]: """ :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state. @@ -113,7 +113,7 @@ def get_json_schema(self) -> Mapping[str, Any]: # type: ignore return self._schema_loader.get_json_schema() def stream_slices( - self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None # type: ignore + self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None ) -> Iterable[Optional[Mapping[str, Any]]]: """ Override to define the slices for this stream. See the stream slicing section of the docs for more information. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index dc01bfc164e33..30d29d84f5a6f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -55,6 +55,10 @@ class HttpRequester(Requester): request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None error_handler: Optional[ErrorHandler] = None + disable_retries: bool = False + _DEFAULT_MAX_RETRY = 5 + _DEFAULT_RETRY_FACTOR = 5 + def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._url_base = InterpolatedString.create(self.url_base, parameters=parameters) self._path = InterpolatedString.create(self.path, parameters=parameters) @@ -154,10 +158,6 @@ def get_request_body_json( # type: ignore stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) - disable_retries: bool = False - _DEFAULT_MAX_RETRY = 5 - _DEFAULT_RETRY_FACTOR = 5 - @property def max_retries(self) -> Union[int, None]: if self.disable_retries: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 3e3591a7c8969..5ffcb5915143f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -100,10 +100,11 @@ def _get_request_options( stream_slicer_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], ) -> Union[Mapping[str, Any], str]: """ - Get the request_option from the requester, the authenticator and extra_options passed in. + Get the request_option from the paginator and the stream slicer. Raise a ValueError if there's a key collision Returned merged mapping otherwise """ + # FIXME we should eventually remove the usage of stream_state as part of the interpolation paginator_mapping, paginator_keys = self._get_mapping( paginator_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) @@ -224,7 +225,7 @@ def _request_body_json( raise ValueError("Request body json cannot be a string") return body_json - def _path( + def _paginator_path( self, ) -> Optional[str]: """ @@ -279,7 +280,7 @@ def _fetch_next_page( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any], next_page_token: Optional[Mapping[str, Any]] = None ) -> Optional[requests.Response]: response = self.requester.send_request( - path=self._path(), + path=self._paginator_path(), stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token, diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index 904adbbb44976..ddcdfc756a58b 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -640,7 +640,7 @@ def create_mock_http_stream(name, url_base, path): http_stream.requester = MagicMock() http_stream.requester.get_url_base.return_value = url_base http_stream.requester.get_path.return_value = path - http_stream._path.return_value = None + http_stream._paginator_path.return_value = None return http_stream diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index f25b951ea303b..6f8a97853c229 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -340,7 +340,7 @@ def test_path(test_name, requester_path, paginator_path, expected_path): config={}, ) - actual_path = retriever._path() + actual_path = retriever._paginator_path() assert expected_path == actual_path From 9bdfa53ddac644401661b30c0aa99f23d1455da2 Mon Sep 17 00:00:00 2001 From: flash1293 Date: Wed, 26 Jul 2023 12:56:46 +0000 Subject: [PATCH 04/16] Automated Commit - Formatting Changes --- .../debezium/internals/mysql/MySqlDebeziumStateUtil.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java index af85ae51159a4..4e285a9d19e7c 100644 --- a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java @@ -38,7 +38,8 @@ public JsonNode constructInitialDebeziumState(final Properties properties, final ConfiguredAirbyteCatalog catalog, final JdbcDatabase database) { // https://debezium.io/documentation/reference/2.2/connectors/mysql.html#mysql-property-snapshot-mode - // We use the schema_only_recovery property cause using this mode will instruct Debezium to construct the db schema history. + // We use the schema_only_recovery property cause using this mode will instruct Debezium to + // construct the db schema history. properties.setProperty("snapshot.mode", "schema_only_recovery"); final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState( constructBinlogOffset(database, database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText()), From 1771157de4abe320ccb8e03f7c6534717cbc7f8b Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 26 Jul 2023 15:08:06 +0200 Subject: [PATCH 05/16] review comments --- .../requesters/paginators/no_pagination.py | 6 +++--- .../requesters/paginators/paginator.py | 2 +- .../retrievers/simple_retriever.py | 21 ++----------------- .../retrievers/test_simple_retriever.py | 5 +++-- .../mysql/MySqlDebeziumStateUtil.java | 3 ++- 5 files changed, 11 insertions(+), 26 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py index 10a2a354d5ef1..683508c761aa5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py @@ -3,7 +3,7 @@ # from dataclasses import InitVar, dataclass -from typing import Any, List, Mapping, Optional, Union +from typing import Any, List, Mapping, MutableMapping, Optional, Union import requests from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator @@ -27,7 +27,7 @@ def get_request_params( stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: + ) -> MutableMapping[str, Any]: return {} def get_request_headers( @@ -60,6 +60,6 @@ def get_request_body_json( def next_page_token(self, response: requests.Response, last_records: List[Record]) -> Mapping[str, Any]: return {} - def reset(self): + def reset(self) -> None: # No state to reset pass diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py index 97fab6e4b6dd1..2138712875dc1 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py @@ -21,7 +21,7 @@ class Paginator(ABC, RequestOptionsProvider): """ @abstractmethod - def reset(self): + def reset(self) -> None: """ Reset the pagination's inner state """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 5ffcb5915143f..8609b1ee807e0 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -325,27 +325,10 @@ def read_records( ) -> Iterable[StreamData]: stream_slice = stream_slice or {} # None-check # Fixing paginator types has a long tail of dependencies - self._paginator.reset() # type: ignore - # Note: Adding the state per partition led to a difficult situation where the state for a partition is not the same as the - # stream_state. This means that if any class downstream wants to access the state, it would need to perform some kind of selection - # based on the partition. To short circuit this, we do the selection here which avoid downstream classes to know about it the - # partition. We have generified the problem to the stream slice instead of the partition because it is the level of abstraction - # streams know (they don't know about partitions). However, we're still unsure as how it will evolve since we can't see any other - # cursor doing selection per slice. We don't want to pollute the interface. Therefore, we will keep the `hasattr` hack for now. - # * What is the information we need to clean the hasattr? Once we will have another case where we need to select a state, we will - # know if the abstraction using `stream_slice` so select to state is the right one and validate if the interface makes sense. - # * Why is this abstraction not on the DeclarativeStream level? DeclarativeStream does not have a notion of stream slicers and we - # would like to avoid exposing the stream state outside of the cursor. This case is needed as of 2023-06-14 because of - # interpolation. - if self.cursor and hasattr(self.cursor, "select_state"): # type: ignore - slice_state = self.cursor.select_state(stream_slice) # type: ignore - elif self.cursor: - slice_state = self.cursor.get_stream_state() - else: - slice_state = {} + self._paginator.reset() most_recent_record_from_slice = None - for stream_data in self._read_pages(self._parse_records, slice_state, stream_slice): + for stream_data in self._read_pages(self._parse_records, self.state, stream_slice): most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, stream_data, stream_slice) yield stream_data diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index 6f8a97853c229..eccf9271d127c 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -430,12 +430,13 @@ def _generate_slices(number_of_slices): @patch.object(SimpleRetriever, "_read_pages", return_value=iter([])) -def test_given_state_selector_when_read_records_use_slice_state(http_stream_read_pages): +def test_given_state_selector_when_read_records_use_stream_state(http_stream_read_pages): requester = MagicMock() paginator = MagicMock() record_selector = MagicMock() cursor = MagicMock(spec=Cursor) cursor.select_state = MagicMock(return_value=A_SLICE_STATE) + cursor.get_stream_state = MagicMock(return_value=A_STREAM_STATE) retriever = SimpleRetriever( name="stream_name", @@ -450,7 +451,7 @@ def test_given_state_selector_when_read_records_use_slice_state(http_stream_read ) list(retriever.read_records(stream_slice=A_STREAM_SLICE)) - http_stream_read_pages.assert_called_once_with(retriever._parse_records, A_SLICE_STATE, A_STREAM_SLICE) + http_stream_read_pages.assert_called_once_with(retriever._parse_records, A_STREAM_STATE, A_STREAM_SLICE) def test_emit_log_request_response_messages(mocker): diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java index af85ae51159a4..4e285a9d19e7c 100644 --- a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/mysql/MySqlDebeziumStateUtil.java @@ -38,7 +38,8 @@ public JsonNode constructInitialDebeziumState(final Properties properties, final ConfiguredAirbyteCatalog catalog, final JdbcDatabase database) { // https://debezium.io/documentation/reference/2.2/connectors/mysql.html#mysql-property-snapshot-mode - // We use the schema_only_recovery property cause using this mode will instruct Debezium to construct the db schema history. + // We use the schema_only_recovery property cause using this mode will instruct Debezium to + // construct the db schema history. properties.setProperty("snapshot.mode", "schema_only_recovery"); final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState( constructBinlogOffset(database, database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText()), From fb2d3f3a12aa91e30b83c6ffe5fce75db7cad258 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Thu, 27 Jul 2023 12:26:46 +0200 Subject: [PATCH 06/16] review comments --- .../declarative/requesters/http_requester.py | 49 +++---------------- .../retrievers/simple_retriever.py | 36 ++------------ .../airbyte_cdk/utils/mapping_helpers.py | 41 ++++++++++++++++ .../unit_tests/utils/test_mapping_helpers.py | 42 ++++++++++++++++ 4 files changed, 94 insertions(+), 74 deletions(-) create mode 100644 airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py create mode 100644 airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 30d29d84f5a6f..9a1ca12cca8bd 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -26,6 +26,7 @@ from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, RequestBodyException, UserDefinedBackoffException from airbyte_cdk.sources.streams.http.http import BODY_REQUEST_METHODS from airbyte_cdk.sources.streams.http.rate_limiting import default_backoff_handler, user_defined_backoff_handler +from airbyte_cdk.utils.mapping_helpers import combine_mappings from requests.auth import AuthBase @@ -211,18 +212,6 @@ def _error_message(self, response: requests.Response) -> str: """ return self.interpret_response_status(response).error_message - def _get_mapping( - self, method: Callable[..., Optional[Union[Mapping[str, Any], str]]], **kwargs: Any - ) -> Tuple[Union[Mapping[str, Any], str], Set[str]]: - """ - Get mapping from the provided method, and get the keys of the mapping. - If the method returns a string, it will return the string and an empty set. - If the method returns a dict, it will return the dict and its keys. - """ - mapping = method(**kwargs) or {} - keys = set(mapping.keys()) if not isinstance(mapping, str) else set() - return mapping, keys - def _get_request_options( self, stream_state: Optional[StreamState], @@ -237,37 +226,11 @@ def _get_request_options( Raise a ValueError if there's a key collision Returned merged mapping otherwise """ - requester_mapping, requester_keys = self._get_mapping( - requester_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token - ) - auth_options_mapping, auth_options_keys = self._get_mapping(auth_options_method) - extra_options = extra_options or {} - extra_mapping, extra_keys = self._get_mapping(lambda: extra_options) - - all_mappings = [requester_mapping, auth_options_mapping, extra_mapping] - all_keys = [requester_keys, auth_options_keys, extra_keys] - - string_options = sum(isinstance(mapping, str) for mapping in all_mappings) - # If more than one mapping is a string, raise a ValueError - if string_options > 1: - raise ValueError("Cannot combine multiple options if one is a string") - - if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: - raise ValueError("Cannot combine multiple options if one is a string") - - # If any mapping is a string, return it - for mapping in all_mappings: - if isinstance(mapping, str): - return mapping - - # If there are duplicate keys across mappings, raise a ValueError - intersection = set().union(*all_keys) - if len(intersection) < sum(len(keys) for keys in all_keys): - raise ValueError(f"Duplicate keys found: {intersection}") - - # Return the combined mappings - # ignore type because mypy doesn't follow all mappings being dicts - return {**requester_mapping, **auth_options_mapping, **extra_mapping} # type: ignore + return combine_mappings([ + requester_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + auth_options_method(), + extra_options + ]) def _request_headers( self, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 8609b1ee807e0..d92faf3e10ca6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -21,6 +21,7 @@ from airbyte_cdk.sources.http_logger import format_http_message from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository from airbyte_cdk.sources.streams.core import StreamData +from airbyte_cdk.utils.mapping_helpers import combine_mappings @dataclass @@ -105,37 +106,10 @@ def _get_request_options( Returned merged mapping otherwise """ # FIXME we should eventually remove the usage of stream_state as part of the interpolation - paginator_mapping, paginator_keys = self._get_mapping( - paginator_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token - ) - stream_slicer_mapping, stream_slicer_keys = self._get_mapping( - stream_slicer_method, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token - ) - - all_mappings = [paginator_mapping, stream_slicer_mapping] - all_keys = [paginator_keys, stream_slicer_keys] - - string_options = sum(isinstance(mapping, str) for mapping in all_mappings) - # If more than one mapping is a string, raise a ValueError - if string_options > 1: - raise ValueError("Cannot combine multiple options if one is a string") - - if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: - raise ValueError("Cannot combine multiple options if one is a string") - - # If any mapping is a string, return it - for mapping in all_mappings: - if isinstance(mapping, str): - return mapping - - # If there are duplicate keys across mappings, raise a ValueError - intersection = set().union(*all_keys) - if len(intersection) < sum(len(keys) for keys in all_keys): - raise ValueError(f"Duplicate keys found: {intersection}") - - # Return the combined mappings - # ignore type because mypy doesn't follow all mappings being dicts - return {**paginator_mapping, **stream_slicer_mapping} # type: ignore + return combine_mappings([ + paginator_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + stream_slicer_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + ]) def _request_headers( self, diff --git a/airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py b/airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py new file mode 100644 index 0000000000000..2d0f5d8df9ff5 --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py @@ -0,0 +1,41 @@ + +from typing import Any, List, Mapping, Optional, Set, Union + + +def combine_mappings( + mappings: List[Optional[Union[Mapping[str, Any], str]]] +) -> Union[Mapping[str, Any], str]: + """ + Combine multiple mappings into a single mapping. If any of the mappings are a string, return + that string. Raise errors in the following cases: + * If there are duplicate keys across mappings + * If there are multiple string mappings + * If there are multiple mappings containing keys and one of them is a string + """ + all_keys: List[Set[str]] = [] + for part in mappings: + if part is None: + continue + keys = set(part.keys()) if not isinstance(part, str) else set() + all_keys.append(keys) + + string_options = sum(isinstance(mapping, str) for mapping in mappings) + # If more than one mapping is a string, raise a ValueError + if string_options > 1: + raise ValueError("Cannot combine multiple string options") + + if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: + raise ValueError("Cannot combine multiple options if one is a string") + + # If any mapping is a string, return it + for mapping in mappings: + if isinstance(mapping, str): + return mapping + + # If there are duplicate keys across mappings, raise a ValueError + intersection = set().union(*all_keys) + if len(intersection) < sum(len(keys) for keys in all_keys): + raise ValueError(f"Duplicate keys found: {intersection}") + + # Return the combined mappings + return {key: value for mapping in mappings if mapping for key, value in mapping.items()} \ No newline at end of file diff --git a/airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py b/airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py new file mode 100644 index 0000000000000..c4715386d9ee8 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py @@ -0,0 +1,42 @@ +import pytest +from airbyte_cdk.utils.mapping_helpers import combine_mappings + +def test_basic_merge(): + mappings = [{"a": 1}, {"b": 2}, {"c": 3}, {}] + result = combine_mappings(mappings) + assert result == {"a": 1, "b": 2, "c": 3} + +def test_combine_with_string(): + mappings = [{"a": 1}, "option"] + with pytest.raises(ValueError, match="Cannot combine multiple options if one is a string"): + combine_mappings(mappings) + +def test_overlapping_keys(): + mappings = [{"a": 1, "b": 2}, {"b": 3}] + with pytest.raises(ValueError, match="Duplicate keys found"): + combine_mappings(mappings) + +def test_multiple_strings(): + mappings = ["option1", "option2"] + with pytest.raises(ValueError, match="Cannot combine multiple string options"): + combine_mappings(mappings) + +def test_handle_none_values(): + mappings = [{"a": 1}, None, {"b": 2}] + result = combine_mappings(mappings) + assert result == {"a": 1, "b": 2} + +def test_empty_mappings(): + mappings = [] + result = combine_mappings(mappings) + assert result == {} + +def test_single_mapping(): + mappings = [{"a": 1}] + result = combine_mappings(mappings) + assert result == {"a": 1} + +def test_combine_with_string_and_empty_mappings(): + mappings = ["option", {}] + result = combine_mappings(mappings) + assert result == "option" From ba13004a882ba9ae89906e0cf005a71fe7c0622b Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Thu, 27 Jul 2023 12:28:57 +0200 Subject: [PATCH 07/16] review comments --- .../declarative/requesters/http_requester.py | 14 ++++++++------ .../declarative/retrievers/simple_retriever.py | 10 ++++++---- .../python/airbyte_cdk/utils/mapping_helpers.py | 10 ++++++---- .../unit_tests/utils/test_mapping_helpers.py | 12 ++++++++++++ 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 9a1ca12cca8bd..ac5a33041fda0 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -7,7 +7,7 @@ import urllib from dataclasses import InitVar, dataclass from functools import lru_cache -from typing import Any, Callable, Mapping, MutableMapping, Optional, Set, Tuple, Union +from typing import Any, Callable, Mapping, MutableMapping, Optional, Union from urllib.parse import urljoin import requests @@ -226,11 +226,13 @@ def _get_request_options( Raise a ValueError if there's a key collision Returned merged mapping otherwise """ - return combine_mappings([ - requester_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), - auth_options_method(), - extra_options - ]) + return combine_mappings( + [ + requester_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + auth_options_method(), + extra_options, + ] + ) def _request_headers( self, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index d92faf3e10ca6..cd74e6b02583c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -106,10 +106,12 @@ def _get_request_options( Returned merged mapping otherwise """ # FIXME we should eventually remove the usage of stream_state as part of the interpolation - return combine_mappings([ - paginator_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), - stream_slicer_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), - ]) + return combine_mappings( + [ + paginator_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + stream_slicer_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + ] + ) def _request_headers( self, diff --git a/airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py b/airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py index 2d0f5d8df9ff5..ae5e898f667de 100644 --- a/airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte-cdk/python/airbyte_cdk/utils/mapping_helpers.py @@ -1,10 +1,12 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + from typing import Any, List, Mapping, Optional, Set, Union -def combine_mappings( - mappings: List[Optional[Union[Mapping[str, Any], str]]] -) -> Union[Mapping[str, Any], str]: +def combine_mappings(mappings: List[Optional[Union[Mapping[str, Any], str]]]) -> Union[Mapping[str, Any], str]: """ Combine multiple mappings into a single mapping. If any of the mappings are a string, return that string. Raise errors in the following cases: @@ -38,4 +40,4 @@ def combine_mappings( raise ValueError(f"Duplicate keys found: {intersection}") # Return the combined mappings - return {key: value for mapping in mappings if mapping for key, value in mapping.items()} \ No newline at end of file + return {key: value for mapping in mappings if mapping for key, value in mapping.items()} # type: ignore # mapping can't be string here diff --git a/airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py b/airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py index c4715386d9ee8..f5dc979e34776 100644 --- a/airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py +++ b/airbyte-cdk/python/unit_tests/utils/test_mapping_helpers.py @@ -1,41 +1,53 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + import pytest from airbyte_cdk.utils.mapping_helpers import combine_mappings + def test_basic_merge(): mappings = [{"a": 1}, {"b": 2}, {"c": 3}, {}] result = combine_mappings(mappings) assert result == {"a": 1, "b": 2, "c": 3} + def test_combine_with_string(): mappings = [{"a": 1}, "option"] with pytest.raises(ValueError, match="Cannot combine multiple options if one is a string"): combine_mappings(mappings) + def test_overlapping_keys(): mappings = [{"a": 1, "b": 2}, {"b": 3}] with pytest.raises(ValueError, match="Duplicate keys found"): combine_mappings(mappings) + def test_multiple_strings(): mappings = ["option1", "option2"] with pytest.raises(ValueError, match="Cannot combine multiple string options"): combine_mappings(mappings) + def test_handle_none_values(): mappings = [{"a": 1}, None, {"b": 2}] result = combine_mappings(mappings) assert result == {"a": 1, "b": 2} + def test_empty_mappings(): mappings = [] result = combine_mappings(mappings) assert result == {} + def test_single_mapping(): mappings = [{"a": 1}] result = combine_mappings(mappings) assert result == {"a": 1} + def test_combine_with_string_and_empty_mappings(): mappings = ["option", {}] result = combine_mappings(mappings) From 87069bcfdbddc1223be571d4ed380f59eeec0317 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Mon, 31 Jul 2023 11:55:57 +0200 Subject: [PATCH 08/16] log all messages --- .../connector_builder_handler.py | 1 + .../declarative/auth/token_provider.py | 24 ++++----- .../parsers/model_to_component_factory.py | 3 +- .../declarative/requesters/http_requester.py | 23 +++++++-- .../declarative/requesters/requester.py | 4 +- .../retrievers/simple_retriever.py | 50 ++++++++++--------- .../test_connector_builder_handler.py | 4 +- .../requesters/test_http_requester.py | 39 +++++++++++++++ .../retrievers/test_simple_retriever.py | 12 ++--- 9 files changed, 104 insertions(+), 56 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py index 10e45859f81bb..0356c6d351b96 100644 --- a/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -71,6 +71,7 @@ def read_stream( error = AirbyteTracedException.from_exception( exc, message=f"Error reading stream with config={config} and catalog={configured_catalog}: {str(exc)}" ) + raise exc return error.as_airbyte_message() diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py index b37d823d107b4..2335e16ce95dc 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py @@ -10,8 +10,6 @@ import dpath.util import pendulum -import requests -from airbyte_cdk.models import Level from airbyte_cdk.sources.declarative.decoders.decoder import Decoder from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder from airbyte_cdk.sources.declarative.exceptions import ReadException @@ -53,19 +51,9 @@ def _refresh_if_necessary(self) -> None: self._refresh() def _refresh(self) -> None: - response = self.login_requester.send_request() - if response is None: - raise ReadException("Failed to get session token, response got ignored by requester") - self._log_response(response) - session_token = dpath.util.get(self._decoder.decode(response), self.session_token_path) - if self.expiration_duration is not None: - self._next_expiration_time = pendulum.now() + self.expiration_duration - self._token = session_token - - def _log_response(self, response: requests.Response) -> None: - self.message_repository.log_message( - Level.DEBUG, - lambda: format_http_message( + response = self.login_requester.send_request( + log_request=True, + log_formatter=lambda response: format_http_message( response, "Login request", "Obtains session token", @@ -73,6 +61,12 @@ def _log_response(self, response: requests.Response) -> None: is_auxiliary=True, ), ) + if response is None: + raise ReadException("Failed to get session token, response got ignored by requester") + session_token = dpath.util.get(self._decoder.decode(response), self.session_token_path) + if self.expiration_duration is not None: + self._next_expiration_time = pendulum.now() + self.expiration_duration + self._token = session_token @dataclass diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 4bd95501a38a8..a5e7d8b78ec72 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -697,6 +697,7 @@ def create_http_requester(self, model: HttpRequesterModel, config: Config, *, na config=config, disable_retries=self._disable_retries, parameters=model.parameters or {}, + message_repository=self._message_repository, ) @staticmethod @@ -913,7 +914,6 @@ def create_simple_retriever( config=config, maximum_number_of_slices=self._limit_slices_fetched or 5, parameters=model.parameters or {}, - message_repository=self._message_repository, ) return SimpleRetriever( name=name, @@ -925,7 +925,6 @@ def create_simple_retriever( cursor=cursor, config=config, parameters=model.parameters or {}, - message_repository=self._message_repository, ) @staticmethod diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index ac5a33041fda0..59ba04d6362e4 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -11,6 +11,7 @@ from urllib.parse import urljoin import requests +from airbyte_cdk.models import Level from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder from airbyte_cdk.sources.declarative.exceptions import ReadException @@ -23,6 +24,7 @@ ) from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester from airbyte_cdk.sources.declarative.types import Config, StreamSlice, StreamState +from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, RequestBodyException, UserDefinedBackoffException from airbyte_cdk.sources.streams.http.http import BODY_REQUEST_METHODS from airbyte_cdk.sources.streams.http.rate_limiting import default_backoff_handler, user_defined_backoff_handler @@ -55,8 +57,9 @@ class HttpRequester(Requester): http_method: Union[str, HttpMethod] = HttpMethod.GET request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None error_handler: Optional[ErrorHandler] = None - disable_retries: bool = False + message_repository: MessageRepository = NoopMessageRepository() + _DEFAULT_MAX_RETRY = 5 _DEFAULT_RETRY_FACTOR = 5 @@ -380,6 +383,8 @@ def send_request( request_params: Optional[Mapping[str, Any]] = None, request_body_data: Optional[Union[Mapping[str, Any], str]] = None, request_body_json: Optional[Mapping[str, Any]] = None, + log_request: bool = False, + log_formatter: Optional[Callable[[requests.Response], Any]] = None, ) -> Optional[requests.Response]: request = self._create_prepared_request( path=path @@ -391,10 +396,10 @@ def send_request( data=self._request_body_data(stream_state, stream_slice, next_page_token, request_body_data), ) - response = self._send_with_retry(request) + response = self._send_with_retry(request, log_request=log_request, log_formatter=log_formatter) return self._validate_response(response) - def _send_with_retry(self, request: requests.PreparedRequest) -> requests.Response: + def _send_with_retry(self, request: requests.PreparedRequest, log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None) -> requests.Response: """ Creates backoff wrappers which are responsible for retry logic """ @@ -425,9 +430,9 @@ def _send_with_retry(self, request: requests.PreparedRequest) -> requests.Respon user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries)(self._send) # type: ignore # we don't pass in kwargs to the backoff handler backoff_handler = default_backoff_handler(max_tries=max_tries, factor=self._DEFAULT_RETRY_FACTOR) # backoff handlers wrap _send, so it will always return a response - return backoff_handler(user_backoff_handler)(request) # type: ignore + return backoff_handler(user_backoff_handler)(request, log_request=log_request, log_formatter=log_formatter) # type: ignore - def _send(self, request: requests.PreparedRequest) -> requests.Response: + def _send(self, request: requests.PreparedRequest, log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None) -> requests.Response: """ Wraps sending the request in rate limit and error handlers. Please note that error handling for HTTP status codes will be ignored if raise_on_http_errors is set to False @@ -451,6 +456,14 @@ def _send(self, request: requests.PreparedRequest) -> requests.Response: ) response: requests.Response = self._session.send(request) self.logger.debug("Receiving response", extra={"headers": response.headers, "status": response.status_code, "body": response.text}) + if log_request: + if log_formatter is None: + raise ValueError("response_formatter must be provided if log_request is True") + formatter = log_formatter + self.message_repository.log_message( + Level.DEBUG, + lambda: formatter(response), + ) if self._should_retry(response): custom_backoff_time = self._backoff_time(response) if custom_backoff_time: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py index 45240c114f4e8..91d20d54a2e73 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py @@ -4,7 +4,7 @@ from abc import abstractmethod from enum import Enum -from typing import Any, Mapping, MutableMapping, Optional, Union +from typing import Any, Callable, Mapping, MutableMapping, Optional, Union import requests from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator @@ -135,6 +135,8 @@ def send_request( request_params: Optional[Mapping[str, Any]] = None, request_body_data: Optional[Union[Mapping[str, Any], str]] = None, request_body_json: Optional[Mapping[str, Any]] = None, + log_request: bool = False, + log_formatter: Optional[Callable[[requests.Response], Any]] = None, ) -> Optional[requests.Response]: """ Sends a request and returns the response. Might return no response if the error handler chooses to ignore the response or throw an exception in case of an error. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index cd74e6b02583c..614d161817cc6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -7,7 +7,7 @@ from typing import Any, Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union import requests -from airbyte_cdk.models import AirbyteMessage, Level +from airbyte_cdk.models import AirbyteMessage from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector from airbyte_cdk.sources.declarative.incremental.cursor import Cursor from airbyte_cdk.sources.declarative.interpolation import InterpolatedString @@ -19,7 +19,6 @@ from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.sources.http_logger import format_http_message -from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.utils.mapping_helpers import combine_mappings @@ -59,7 +58,6 @@ class SimpleRetriever(Retriever): paginator: Optional[Paginator] = None stream_slicer: StreamSlicer = SinglePartitionRouter(parameters={}) cursor: Optional[Cursor] = None - message_repository: MessageRepository = NoopMessageRepository() def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._paginator = self.paginator or NoPagination(parameters=parameters) @@ -255,7 +253,7 @@ def _next_page_token(self, response: requests.Response) -> Optional[Mapping[str, def _fetch_next_page( self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any], next_page_token: Optional[Mapping[str, Any]] = None ) -> Optional[requests.Response]: - response = self.requester.send_request( + return self.requester.send_request( path=self._paginator_path(), stream_state=stream_state, stream_slice=stream_slice, @@ -270,8 +268,6 @@ def _fetch_next_page( ), ) - return response - def _read_pages( self, records_generator_fn: Callable[[Optional[requests.Response], Mapping[str, Any], Mapping[str, Any]], Iterable[StreamData]], @@ -392,21 +388,27 @@ def __post_init__(self, options: Mapping[str, Any]) -> None: def stream_slices(self) -> Iterable[Optional[Mapping[str, Any]]]: # type: ignore return islice(super().stream_slices(), self.maximum_number_of_slices) - def _parse_records( - self, - response: Optional[requests.Response], - stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]], - ) -> Iterable[StreamData]: - if response is not None: - current_response = response - self.message_repository.log_message( - Level.DEBUG, - lambda: format_http_message( - current_response, - f"Stream '{self.name}' request", - f"Request performed in order to extract records for stream '{self.name}'", - self.name, - ), - ) - yield from self._parse_response(response, stream_slice=stream_slice, stream_state=stream_state) + def _fetch_next_page( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any], next_page_token: Optional[Mapping[str, Any]] = None + ) -> Optional[requests.Response]: + return self.requester.send_request( + path=self._paginator_path(), + stream_state=stream_state, + stream_slice=stream_slice, + next_page_token=next_page_token, + request_headers=self._request_headers(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + request_params=self._request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), + request_body_data=self._request_body_data( + stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token + ), + request_body_json=self._request_body_json( + stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token + ), + log_request=True, + log_formatter=lambda response: format_http_message( + response, + f"Stream '{self.name}' request", + f"Request performed in order to extract records for stream '{self.name}'", + self.name, + ), + ) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index ddcdfc756a58b..bdcd5e6a8d6e8 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -709,7 +709,7 @@ def _create_page(response_body): return _create_response(response_body, request) -@patch.object(SimpleRetriever, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) +@patch.object(SimpleRetrieverTestReadDecorator, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) def test_read_source(mock_http_stream): """ This test sort of acts as an integration test for the connector builder. @@ -750,7 +750,7 @@ def test_read_source(mock_http_stream): assert isinstance(s.retriever, SimpleRetrieverTestReadDecorator) -@patch.object(SimpleRetriever, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) +@patch.object(SimpleRetrieverTestReadDecorator, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) def test_read_source_single_page_single_slice(mock_http_stream): max_records = 100 max_pages_per_slice = 1 diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py index 9960fd6165dd0..21ab5234d9ec8 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -561,3 +561,42 @@ def test_duplicate_request_params_are_deduped(path, params, expected_url): else: prepared_request = requester._create_prepared_request(path=path, params=params) assert prepared_request.url == expected_url + + +@pytest.mark.parametrize( + "should_log, status_code, should_throw", [ + (True, 200, False), + (True, 400, False), + (True, 500, True), + (False, 200, False), + (False, 400, False), + (False, 500, True), + ] +) +def test_log_requests(should_log, status_code, should_throw): + repository = MagicMock() + requester = HttpRequester( + name="name", + url_base="https://test_base_url.com", + path="/", + http_method=HttpMethod.GET, + request_options_provider=None, + config={}, + parameters={}, + message_repository=repository, + disable_retries=True + ) + requester._session.send = MagicMock() + response = requests.Response() + response.status_code = status_code + requester._session.send.return_value = response + formatter = MagicMock() + formatter.return_value = "formatted_response" + if should_throw: + with pytest.raises(DefaultBackoffException): + requester.send_request(log_request=should_log, log_formatter=formatter) + else: + requester.send_request(log_request=should_log, log_formatter=formatter) + if should_log: + assert repository.log_message.call_args_list[0].args[1]() == "formatted_response" + formatter.assert_called_once_with(response) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index eccf9271d127c..3582cd6597767 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -467,21 +467,19 @@ def test_emit_log_request_response_messages(mocker): response.status_code = 200 format_http_message_mock = mocker.patch("airbyte_cdk.sources.declarative.retrievers.simple_retriever.format_http_message") - message_repository = Mock() + requester = MagicMock() retriever = SimpleRetrieverTestReadDecorator( name="stream_name", primary_key=primary_key, - requester=MagicMock(), + requester=requester, paginator=MagicMock(), record_selector=record_selector, stream_slicer=SinglePartitionRouter(parameters={}), parameters={}, config={}, - message_repository=message_repository, ) - list(retriever._parse_records(response=response, stream_slice={}, stream_state={})) + retriever._fetch_next_page(stream_state={}, stream_slice={}) - assert len(message_repository.log_message.call_args_list) == 1 - assert message_repository.log_message.call_args_list[0].args[0] == Level.DEBUG - assert message_repository.log_message.call_args_list[0].args[1]() == format_http_message_mock.return_value + assert requester.send_request.call_args_list[0][1]["log_request"] == True + assert requester.send_request.call_args_list[0][1]["log_formatter"](response) == format_http_message_mock.return_value From 2755d0e62f2298d3c01618c44b77a0e827f7a7ec Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Mon, 31 Jul 2023 12:11:03 +0200 Subject: [PATCH 09/16] log all message --- .../declarative/requesters/http_requester.py | 14 ++++++++++++-- .../test_connector_builder_handler.py | 4 ++-- .../retrievers/test_simple_retriever.py | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 59ba04d6362e4..d230bd04730a2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -399,7 +399,12 @@ def send_request( response = self._send_with_retry(request, log_request=log_request, log_formatter=log_formatter) return self._validate_response(response) - def _send_with_retry(self, request: requests.PreparedRequest, log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None) -> requests.Response: + def _send_with_retry( + self, + request: requests.PreparedRequest, + log_request: bool = False, + log_formatter: Optional[Callable[[requests.Response], Any]] = None, + ) -> requests.Response: """ Creates backoff wrappers which are responsible for retry logic """ @@ -432,7 +437,12 @@ def _send_with_retry(self, request: requests.PreparedRequest, log_request: bool # backoff handlers wrap _send, so it will always return a response return backoff_handler(user_backoff_handler)(request, log_request=log_request, log_formatter=log_formatter) # type: ignore - def _send(self, request: requests.PreparedRequest, log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None) -> requests.Response: + def _send( + self, + request: requests.PreparedRequest, + log_request: bool = False, + log_formatter: Optional[Callable[[requests.Response], Any]] = None, + ) -> requests.Response: """ Wraps sending the request in rate limit and error handlers. Please note that error handling for HTTP status codes will be ignored if raise_on_http_errors is set to False diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index bdcd5e6a8d6e8..ae5ede18b0a99 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -709,7 +709,7 @@ def _create_page(response_body): return _create_response(response_body, request) -@patch.object(SimpleRetrieverTestReadDecorator, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) +@patch.object(requests.Session, "send", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) def test_read_source(mock_http_stream): """ This test sort of acts as an integration test for the connector builder. @@ -750,7 +750,7 @@ def test_read_source(mock_http_stream): assert isinstance(s.retriever, SimpleRetrieverTestReadDecorator) -@patch.object(SimpleRetrieverTestReadDecorator, "_fetch_next_page", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) +@patch.object(requests.Session, "send", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) def test_read_source_single_page_single_slice(mock_http_stream): max_records = 100 max_pages_per_slice = 1 diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index 3582cd6597767..adfe89437b7b9 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -481,5 +481,5 @@ def test_emit_log_request_response_messages(mocker): retriever._fetch_next_page(stream_state={}, stream_slice={}) - assert requester.send_request.call_args_list[0][1]["log_request"] == True + assert requester.send_request.call_args_list[0][1]["log_request"] is True assert requester.send_request.call_args_list[0][1]["log_formatter"](response) == format_http_message_mock.return_value From ffce52e3b57614d64790e1ce8fbeedfe8bb6f962 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 2 Aug 2023 11:31:28 +0200 Subject: [PATCH 10/16] review comments --- .../connector_builder_handler.py | 1 - .../declarative/auth/token_provider.py | 1 - .../models/declarative_component_schema.py | 1012 +++++++++-------- .../declarative/requesters/http_requester.py | 11 +- .../declarative/requesters/requester.py | 3 +- .../retrievers/simple_retriever.py | 1 - .../requesters/test_http_requester.py | 4 +- .../retrievers/test_simple_retriever.py | 2 +- 8 files changed, 528 insertions(+), 507 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py index 0356c6d351b96..10e45859f81bb 100644 --- a/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte-cdk/python/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -71,7 +71,6 @@ def read_stream( error = AirbyteTracedException.from_exception( exc, message=f"Error reading stream with config={config} and catalog={configured_catalog}: {str(exc)}" ) - raise exc return error.as_airbyte_message() diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py index 2335e16ce95dc..52383d2d1b59c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token_provider.py @@ -52,7 +52,6 @@ def _refresh_if_necessary(self) -> None: def _refresh(self) -> None: response = self.login_requester.send_request( - log_request=True, log_formatter=lambda response: format_http_message( response, "Login request", diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 64c0164ac0385..e3ee3d5ceef68 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -15,12 +15,12 @@ class AddedFieldDefinition(BaseModel): - type: Literal["AddedFieldDefinition"] + type: Literal['AddedFieldDefinition'] path: List[str] = Field( ..., - description="List of strings defining the path where to add the value on the record.", - examples=[["segment_id"], ["metadata", "segment_id"]], - title="Path", + description='List of strings defining the path where to add the value on the record.', + examples=[['segment_id'], ['metadata', 'segment_id']], + title='Path', ) value: str = Field( ..., @@ -30,601 +30,607 @@ class AddedFieldDefinition(BaseModel): "{{ record['MetaData']['LastUpdatedTime'] }}", "{{ stream_partition['segment_id'] }}", ], - title="Value", + title='Value', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AddFields(BaseModel): - type: Literal["AddFields"] + type: Literal['AddFields'] fields: List[AddedFieldDefinition] = Field( ..., - description="List of transformations (path and corresponding value) that will be added to the record.", - title="Fields", + description='List of transformations (path and corresponding value) that will be added to the record.', + title='Fields', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AuthFlowType(Enum): - oauth2_0 = "oauth2.0" - oauth1_0 = "oauth1.0" + oauth2_0 = 'oauth2.0' + oauth1_0 = 'oauth1.0' class BasicHttpAuthenticator(BaseModel): - type: Literal["BasicHttpAuthenticator"] + type: Literal['BasicHttpAuthenticator'] username: str = Field( ..., - description="The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.", + description='The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.', examples=["{{ config['username'] }}", "{{ config['api_key'] }}"], - title="Username", + title='Username', ) password: Optional[str] = Field( - "", - description="The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", - examples=["{{ config['password'] }}", ""], - title="Password", + '', + description='The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.', + examples=["{{ config['password'] }}", ''], + title='Password', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class BearerAuthenticator(BaseModel): - type: Literal["BearerAuthenticator"] + type: Literal['BearerAuthenticator'] api_token: str = Field( ..., - description="Token to inject as request header for authenticating with the API.", + description='Token to inject as request header for authenticating with the API.', examples=["{{ config['api_key'] }}", "{{ config['token'] }}"], - title="Bearer Token", + title='Bearer Token', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CheckStream(BaseModel): - type: Literal["CheckStream"] + type: Literal['CheckStream'] stream_names: List[str] = Field( ..., - description="Names of the streams to try reading from when running a check operation.", - examples=[["users"], ["users", "contacts"]], - title="Stream Names", + description='Names of the streams to try reading from when running a check operation.', + examples=[['users'], ['users', 'contacts']], + title='Stream Names', ) class ConstantBackoffStrategy(BaseModel): - type: Literal["ConstantBackoffStrategy"] + type: Literal['ConstantBackoffStrategy'] backoff_time_in_seconds: Union[float, str] = Field( ..., - description="Backoff time in seconds.", + description='Backoff time in seconds.', examples=[30, 30.5, "{{ config['backoff_time'] }}"], - title="Backoff Time", + title='Backoff Time', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomAuthenticator"] + type: Literal['CustomAuthenticator'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.", - examples=["source_railz.components.ShortLivedTokenAuthenticator"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.', + examples=['source_railz.components.ShortLivedTokenAuthenticator'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomBackoffStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomBackoffStrategy"] + type: Literal['CustomBackoffStrategy'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomBackoffStrategy"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomBackoffStrategy'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomErrorHandler(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomErrorHandler"] + type: Literal['CustomErrorHandler'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.", - examples=["source_railz.components.MyCustomErrorHandler"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.', + examples=['source_railz.components.MyCustomErrorHandler'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomIncrementalSync(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomIncrementalSync"] + type: Literal['CustomIncrementalSync'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.", - examples=["source_railz.components.MyCustomIncrementalSync"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.', + examples=['source_railz.components.MyCustomIncrementalSync'], + title='Class Name', ) cursor_field: str = Field( ..., - description="The location of the value on a record that will be used as a bookmark during sync.", + description='The location of the value on a record that will be used as a bookmark during sync.', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomPaginationStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomPaginationStrategy"] + type: Literal['CustomPaginationStrategy'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomPaginationStrategy"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomPaginationStrategy'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomRecordExtractor(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomRecordExtractor"] + type: Literal['CustomRecordExtractor'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomRecordExtractor"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomRecordExtractor'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomRequester(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomRequester"] + type: Literal['CustomRequester'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomRecordExtractor"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomRecordExtractor'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomRetriever(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomRetriever"] + type: Literal['CustomRetriever'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomRetriever"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomRetriever'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomPartitionRouter(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomPartitionRouter"] + type: Literal['CustomPartitionRouter'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.", - examples=["source_railz.components.MyCustomPartitionRouter"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.', + examples=['source_railz.components.MyCustomPartitionRouter'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomTransformation(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomTransformation"] + type: Literal['CustomTransformation'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.", - examples=["source_railz.components.MyCustomTransformation"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.', + examples=['source_railz.components.MyCustomTransformation'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class RefreshTokenUpdater(BaseModel): refresh_token_name: Optional[str] = Field( - "refresh_token", - description="The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", - examples=["refresh_token"], - title="Refresh Token Property Name", + 'refresh_token', + description='The name of the property which contains the updated refresh token in the response from the token refresh endpoint.', + examples=['refresh_token'], + title='Refresh Token Property Name', ) access_token_config_path: Optional[List[str]] = Field( - ["credentials", "access_token"], - description="Config path to the access token. Make sure the field actually exists in the config.", - examples=[["credentials", "access_token"], ["access_token"]], - title="Config Path To Access Token", + ['credentials', 'access_token'], + description='Config path to the access token. Make sure the field actually exists in the config.', + examples=[['credentials', 'access_token'], ['access_token']], + title='Config Path To Access Token', ) refresh_token_config_path: Optional[List[str]] = Field( - ["credentials", "refresh_token"], - description="Config path to the access token. Make sure the field actually exists in the config.", - examples=[["credentials", "refresh_token"], ["refresh_token"]], - title="Config Path To Refresh Token", + ['credentials', 'refresh_token'], + description='Config path to the access token. Make sure the field actually exists in the config.', + examples=[['credentials', 'refresh_token'], ['refresh_token']], + title='Config Path To Refresh Token', ) token_expiry_date_config_path: Optional[List[str]] = Field( - ["credentials", "token_expiry_date"], - description="Config path to the expiry date. Make sure actually exists in the config.", - examples=[["credentials", "token_expiry_date"]], - title="Config Path To Expiry Date", + ['credentials', 'token_expiry_date'], + description='Config path to the expiry date. Make sure actually exists in the config.', + examples=[['credentials', 'token_expiry_date']], + title='Config Path To Expiry Date', ) class OAuthAuthenticator(BaseModel): - type: Literal["OAuthAuthenticator"] + type: Literal['OAuthAuthenticator'] client_id: str = Field( ..., - description="The OAuth client ID. Fill it in the user inputs.", + description='The OAuth client ID. Fill it in the user inputs.', examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"], - title="Client ID", + title='Client ID', ) client_secret: str = Field( ..., - description="The OAuth client secret. Fill it in the user inputs.", + description='The OAuth client secret. Fill it in the user inputs.', examples=[ "{{ config['client_secret }}", "{{ config['credentials']['client_secret }}", ], - title="Client Secret", + title='Client Secret', ) refresh_token: Optional[str] = Field( None, - description="Credential artifact used to get a new access token.", + description='Credential artifact used to get a new access token.', examples=[ "{{ config['refresh_token'] }}", "{{ config['credentials]['refresh_token'] }}", ], - title="Refresh Token", + title='Refresh Token', ) token_refresh_endpoint: str = Field( ..., - description="The full URL to call to obtain a new access token.", - examples=["https://connect.squareup.com/oauth2/token"], - title="Token Refresh Endpoint", + description='The full URL to call to obtain a new access token.', + examples=['https://connect.squareup.com/oauth2/token'], + title='Token Refresh Endpoint', ) access_token_name: Optional[str] = Field( - "access_token", - description="The name of the property which contains the access token in the response from the token refresh endpoint.", - examples=["access_token"], - title="Access Token Property Name", + 'access_token', + description='The name of the property which contains the access token in the response from the token refresh endpoint.', + examples=['access_token'], + title='Access Token Property Name', ) expires_in_name: Optional[str] = Field( - "expires_in", - description="The name of the property which contains the expiry date in the response from the token refresh endpoint.", - examples=["expires_in"], - title="Token Expiry Property Name", + 'expires_in', + description='The name of the property which contains the expiry date in the response from the token refresh endpoint.', + examples=['expires_in'], + title='Token Expiry Property Name', ) grant_type: Optional[str] = Field( - "refresh_token", - description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", - examples=["refresh_token", "client_credentials"], - title="Grant Type", + 'refresh_token', + description='Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.', + examples=['refresh_token', 'client_credentials'], + title='Grant Type', ) refresh_request_body: Optional[Dict[str, Any]] = Field( None, - description="Body of the request sent to get a new access token.", + description='Body of the request sent to get a new access token.', examples=[ { - "applicationId": "{{ config['application_id'] }}", - "applicationSecret": "{{ config['application_secret'] }}", - "token": "{{ config['token'] }}", + 'applicationId': "{{ config['application_id'] }}", + 'applicationSecret': "{{ config['application_secret'] }}", + 'token': "{{ config['token'] }}", } ], - title="Refresh Request Body", + title='Refresh Request Body', ) scopes: Optional[List[str]] = Field( None, - description="List of scopes that should be granted to the access token.", - examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], - title="Scopes", + description='List of scopes that should be granted to the access token.', + examples=[ + ['crm.list.read', 'crm.objects.contacts.read', 'crm.schema.contacts.read'] + ], + title='Scopes', ) token_expiry_date: Optional[str] = Field( None, - description="The access token expiry date.", - examples=["2023-04-06T07:12:10.421833+00:00", 1680842386], - title="Token Expiry Date", + description='The access token expiry date.', + examples=['2023-04-06T07:12:10.421833+00:00', 1680842386], + title='Token Expiry Date', ) token_expiry_date_format: Optional[str] = Field( None, - description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", - examples=["%Y-%m-%d %H:%M:%S.%f+00:00"], - title="Token Expiry Date Format", + description='The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.', + examples=['%Y-%m-%d %H:%M:%S.%f+00:00'], + title='Token Expiry Date Format', ) refresh_token_updater: Optional[RefreshTokenUpdater] = Field( None, - description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", - title="Token Updater", + description='When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.', + title='Token Updater', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class ExponentialBackoffStrategy(BaseModel): - type: Literal["ExponentialBackoffStrategy"] + type: Literal['ExponentialBackoffStrategy'] factor: Optional[Union[float, str]] = Field( 5, - description="Multiplicative constant applied on each retry.", - examples=[5, 5.5, "10"], - title="Factor", + description='Multiplicative constant applied on each retry.', + examples=[5, 5.5, '10'], + title='Factor', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SessionTokenRequestBearerAuthenticator(BaseModel): - type: Literal["Bearer"] + type: Literal['Bearer'] class HttpMethodEnum(Enum): - GET = "GET" - POST = "POST" + GET = 'GET' + POST = 'POST' class Action(Enum): - SUCCESS = "SUCCESS" - FAIL = "FAIL" - RETRY = "RETRY" - IGNORE = "IGNORE" + SUCCESS = 'SUCCESS' + FAIL = 'FAIL' + RETRY = 'RETRY' + IGNORE = 'IGNORE' class HttpResponseFilter(BaseModel): - type: Literal["HttpResponseFilter"] + type: Literal['HttpResponseFilter'] action: Action = Field( ..., - description="Action to execute if a response matches the filter.", - examples=["SUCCESS", "FAIL", "RETRY", "IGNORE"], - title="Action", + description='Action to execute if a response matches the filter.', + examples=['SUCCESS', 'FAIL', 'RETRY', 'IGNORE'], + title='Action', ) error_message: Optional[str] = Field( None, - description="Error Message to display if the response matches the filter.", - title="Error Message", + description='Error Message to display if the response matches the filter.', + title='Error Message', ) error_message_contains: Optional[str] = Field( None, - description="Match the response if its error message contains the substring.", - example=["This API operation is not enabled for this site"], - title="Error Message Substring", + description='Match the response if its error message contains the substring.', + example=['This API operation is not enabled for this site'], + title='Error Message Substring', ) http_codes: Optional[List[int]] = Field( None, - description="Match the response if its HTTP code is included in this list.", + description='Match the response if its HTTP code is included in this list.', examples=[[420, 429], [500]], - title="HTTP Codes", + title='HTTP Codes', ) predicate: Optional[str] = Field( None, - description="Match the response if the predicate evaluates to true.", + description='Match the response if the predicate evaluates to true.', examples=[ "{{ 'Too much requests' in response }}", "{{ 'error_code' in response and response['error_code'] == 'ComplexityException' }}", ], - title="Predicate", + title='Predicate', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class InlineSchemaLoader(BaseModel): - type: Literal["InlineSchemaLoader"] + type: Literal['InlineSchemaLoader'] schema_: Optional[Dict[str, Any]] = Field( None, - alias="schema", + alias='schema', description='Describes a streams\' schema. Refer to the Data Types documentation for more details on which types are valid.', - title="Schema", + title='Schema', ) class JsonFileSchemaLoader(BaseModel): - type: Literal["JsonFileSchemaLoader"] + type: Literal['JsonFileSchemaLoader'] file_path: Optional[str] = Field( None, description="Path to the JSON file defining the schema. The path is relative to the connector module's root.", - example=["./schemas/users.json"], - title="File Path", + example=['./schemas/users.json'], + title='File Path', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class JsonDecoder(BaseModel): - type: Literal["JsonDecoder"] + type: Literal['JsonDecoder'] class MinMaxDatetime(BaseModel): - type: Literal["MinMaxDatetime"] + type: Literal['MinMaxDatetime'] datetime: str = Field( ..., - description="Datetime value.", - examples=["2021-01-01", "2021-01-01T00:00:00Z", "{{ config['start_time'] }}"], - title="Datetime", + description='Datetime value.', + examples=['2021-01-01', '2021-01-01T00:00:00Z', "{{ config['start_time'] }}"], + title='Datetime', ) datetime_format: Optional[str] = Field( - "", + '', description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], - title="Datetime Format", + examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], + title='Datetime Format', ) max_datetime: Optional[str] = Field( None, - description="Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", - examples=["2021-01-01T00:00:00Z", "2021-01-01"], - title="Max Datetime", + description='Ceiling applied on the datetime value. Must be formatted with the datetime_format field.', + examples=['2021-01-01T00:00:00Z', '2021-01-01'], + title='Max Datetime', ) min_datetime: Optional[str] = Field( None, - description="Floor applied on the datetime value. Must be formatted with the datetime_format field.", - examples=["2010-01-01T00:00:00Z", "2010-01-01"], - title="Min Datetime", + description='Floor applied on the datetime value. Must be formatted with the datetime_format field.', + examples=['2010-01-01T00:00:00Z', '2010-01-01'], + title='Min Datetime', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class NoAuth(BaseModel): - type: Literal["NoAuth"] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + type: Literal['NoAuth'] + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class NoPagination(BaseModel): - type: Literal["NoPagination"] + type: Literal['NoPagination'] class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( + oauth_user_input_from_connector_config_specification: Optional[ + Dict[str, Any] + ] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", examples=[ - {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, + {'app_id': {'type': 'string', 'path_in_connector_config': ['app_id']}}, { - "app_id": { - "type": "string", - "path_in_connector_config": ["info", "app_id"], + 'app_id': { + 'type': 'string', + 'path_in_connector_config': ['info', 'app_id'], } }, ], - title="OAuth user input", + title='OAuth user input', ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", examples=[ { - "refresh_token": { - "type": "string,", - "path_in_connector_config": ["credentials", "refresh_token"], + 'refresh_token': { + 'type': 'string,', + 'path_in_connector_config': ['credentials', 'refresh_token'], } } ], - title="OAuth output specification", + title='OAuth output specification', ) complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, - description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", - examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], - title="OAuth input specification", + description='OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }', + examples=[ + {'client_id': {'type': 'string'}, 'client_secret': {'type': 'string'}} + ], + title='OAuth input specification', ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", examples=[ { - "client_id": { - "type": "string,", - "path_in_connector_config": ["credentials", "client_id"], + 'client_id': { + 'type': 'string,', + 'path_in_connector_config': ['credentials', 'client_id'], }, - "client_secret": { - "type": "string,", - "path_in_connector_config": ["credentials", "client_secret"], + 'client_secret': { + 'type': 'string,', + 'path_in_connector_config': ['credentials', 'client_secret'], }, } ], - title="OAuth server output specification", + title='OAuth server output specification', ) class OffsetIncrement(BaseModel): - type: Literal["OffsetIncrement"] + type: Literal['OffsetIncrement'] page_size: Optional[Union[int, str]] = Field( None, - description="The number of records to include in each pages.", + description='The number of records to include in each pages.', examples=[100, "{{ config['page_size'] }}"], - title="Limit", + title='Limit', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class PageIncrement(BaseModel): - type: Literal["PageIncrement"] + type: Literal['PageIncrement'] page_size: Optional[int] = Field( None, - description="The number of records to include in each pages.", - examples=[100, "100"], - title="Page Size", + description='The number of records to include in each pages.', + examples=[100, '100'], + title='Page Size', ) start_from_page: Optional[int] = Field( 0, - description="Index of the first page to request.", + description='Index of the first page to request.', examples=[0, 1], - title="Start From Page", + title='Start From Page', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class PrimaryKey(BaseModel): __root__: Union[str, List[str], List[List[str]]] = Field( ..., - description="The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", - examples=["id", ["code", "type"]], - title="Primary Key", + description='The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.', + examples=['id', ['code', 'type']], + title='Primary Key', ) class RecordFilter(BaseModel): - type: Literal["RecordFilter"] + type: Literal['RecordFilter'] condition: Optional[str] = Field( - "", - description="The predicate to filter a record. Records will be removed if evaluated to False.", + '', + description='The predicate to filter a record. Records will be removed if evaluated to False.', examples=[ "{{ record['created_at'] >= stream_interval['start_time'] }}", "{{ record.status in ['active', 'expired'] }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class RemoveFields(BaseModel): - type: Literal["RemoveFields"] + type: Literal['RemoveFields'] field_pointers: List[List[str]] = Field( ..., - description="Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", - examples=[["tags"], [["content", "html"], ["content", "plain_text"]]], - title="Field Paths", + description='Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.', + examples=[['tags'], [['content', 'html'], ['content', 'plain_text']]], + title='Field Paths', ) class RequestPath(BaseModel): - type: Literal["RequestPath"] + type: Literal['RequestPath'] class InjectInto(Enum): - request_parameter = "request_parameter" - header = "header" - body_data = "body_data" - body_json = "body_json" + request_parameter = 'request_parameter' + header = 'header' + body_data = 'body_data' + body_json = 'body_json' class RequestOption(BaseModel): - type: Literal["RequestOption"] + type: Literal['RequestOption'] field_name: str = Field( ..., - description="Configures which key should be used in the location that the descriptor is being injected into", - examples=["segment_id"], - title="Request Option", + description='Configures which key should be used in the location that the descriptor is being injected into', + examples=['segment_id'], + title='Request Option', ) inject_into: InjectInto = Field( ..., - description="Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.", - examples=["request_parameter", "header", "body_data", "body_json"], - title="Inject Into", + description='Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.', + examples=['request_parameter', 'header', 'body_data', 'body_json'], + title='Inject Into', ) @@ -636,246 +642,248 @@ class Config: class LegacySessionTokenAuthenticator(BaseModel): - type: Literal["LegacySessionTokenAuthenticator"] + type: Literal['LegacySessionTokenAuthenticator'] header: str = Field( ..., - description="The name of the session token header that will be injected in the request", - examples=["X-Session"], - title="Session Request Header", + description='The name of the session token header that will be injected in the request', + examples=['X-Session'], + title='Session Request Header', ) login_url: str = Field( ..., - description="Path of the login URL (do not include the base URL)", - examples=["session"], - title="Login Path", + description='Path of the login URL (do not include the base URL)', + examples=['session'], + title='Login Path', ) session_token: Optional[str] = Field( None, - description="Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", + description='Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair', example=["{{ config['session_token'] }}"], - title="Session Token", + title='Session Token', ) session_token_response_key: str = Field( ..., - description="Name of the key of the session token to be extracted from the response", - examples=["id"], - title="Response Token Response Key", + description='Name of the key of the session token to be extracted from the response', + examples=['id'], + title='Response Token Response Key', ) username: Optional[str] = Field( None, - description="Username used to authenticate and obtain a session token", + description='Username used to authenticate and obtain a session token', examples=[" {{ config['username'] }}"], - title="Username", + title='Username', ) password: Optional[str] = Field( - "", - description="Password used to authenticate and obtain a session token", - examples=["{{ config['password'] }}", ""], - title="Password", + '', + description='Password used to authenticate and obtain a session token', + examples=["{{ config['password'] }}", ''], + title='Password', ) validate_session_url: str = Field( ..., - description="Path of the URL to use to validate that the session token is valid (do not include the base URL)", - examples=["user/current"], - title="Validate Session Path", + description='Path of the URL to use to validate that the session token is valid (do not include the base URL)', + examples=['user/current'], + title='Validate Session Path', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class WaitTimeFromHeader(BaseModel): - type: Literal["WaitTimeFromHeader"] + type: Literal['WaitTimeFromHeader'] header: str = Field( ..., - description="The name of the response header defining how long to wait before retrying.", - examples=["Retry-After"], - title="Response Header Name", + description='The name of the response header defining how long to wait before retrying.', + examples=['Retry-After'], + title='Response Header Name', ) regex: Optional[str] = Field( None, - description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", - examples=["([-+]?\\d+)"], - title="Extraction Regex", + description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', + examples=['([-+]?\\d+)'], + title='Extraction Regex', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class WaitUntilTimeFromHeader(BaseModel): - type: Literal["WaitUntilTimeFromHeader"] + type: Literal['WaitUntilTimeFromHeader'] header: str = Field( ..., - description="The name of the response header defining how long to wait before retrying.", - examples=["wait_time"], - title="Response Header", + description='The name of the response header defining how long to wait before retrying.', + examples=['wait_time'], + title='Response Header', ) min_wait: Optional[Union[float, str]] = Field( None, - description="Minimum time to wait before retrying.", - examples=[10, "60"], - title="Minimum Wait Time", + description='Minimum time to wait before retrying.', + examples=[10, '60'], + title='Minimum Wait Time', ) regex: Optional[str] = Field( None, - description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", - examples=["([-+]?\\d+)"], - title="Extraction Regex", + description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', + examples=['([-+]?\\d+)'], + title='Extraction Regex', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class ApiKeyAuthenticator(BaseModel): - type: Literal["ApiKeyAuthenticator"] + type: Literal['ApiKeyAuthenticator'] api_token: Optional[str] = Field( None, - description="The API key to inject in the request. Fill it in the user inputs.", + description='The API key to inject in the request. Fill it in the user inputs.', examples=["{{ config['api_key'] }}", "Token token={{ config['api_key'] }}"], - title="API Key", + title='API Key', ) header: Optional[str] = Field( None, - description="The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", - examples=["Authorization", "Api-Token", "X-Auth-Token"], - title="Header Name", + description='The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.', + examples=['Authorization', 'Api-Token', 'X-Auth-Token'], + title='Header Name', ) inject_into: Optional[RequestOption] = Field( None, - description="Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", + description='Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.', examples=[ - {"inject_into": "header", "field_name": "Authorization"}, - {"inject_into": "request_parameter", "field_name": "authKey"}, + {'inject_into': 'header', 'field_name': 'Authorization'}, + {'inject_into': 'request_parameter', 'field_name': 'authKey'}, ], - title="Inject API Key Into Outgoing HTTP Request", + title='Inject API Key Into Outgoing HTTP Request', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AuthFlow(BaseModel): - auth_flow_type: Optional[AuthFlowType] = Field(None, description="The type of auth to use", title="Auth flow type") + auth_flow_type: Optional[AuthFlowType] = Field( + None, description='The type of auth to use', title='Auth flow type' + ) predicate_key: Optional[List[str]] = Field( None, - description="JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", - examples=[["credentials", "auth_type"]], - title="Predicate key", + description='JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.', + examples=[['credentials', 'auth_type']], + title='Predicate key', ) predicate_value: Optional[str] = Field( None, - description="Value of the predicate_key fields for the advanced auth to be applicable.", - examples=["Oauth"], - title="Predicate value", + description='Value of the predicate_key fields for the advanced auth to be applicable.', + examples=['Oauth'], + title='Predicate value', ) oauth_config_specification: Optional[OAuthConfigSpecification] = None class CursorPagination(BaseModel): - type: Literal["CursorPagination"] + type: Literal['CursorPagination'] cursor_value: str = Field( ..., - description="Value of the cursor defining the next page to fetch.", + description='Value of the cursor defining the next page to fetch.', examples=[ - "{{ headers.link.next.cursor }}", + '{{ headers.link.next.cursor }}', "{{ last_records[-1]['key'] }}", "{{ response['nextPage'] }}", ], - title="Cursor Value", + title='Cursor Value', ) page_size: Optional[int] = Field( None, - description="The number of records to include in each pages.", + description='The number of records to include in each pages.', examples=[100], - title="Page Size", + title='Page Size', ) stop_condition: Optional[str] = Field( None, - description="Template string evaluating when to stop paginating.", + description='Template string evaluating when to stop paginating.', examples=[ - "{{ response.data.has_more is false }}", + '{{ response.data.has_more is false }}', "{{ 'next' not in headers['link'] }}", ], - title="Stop Condition", + title='Stop Condition', ) decoder: Optional[JsonDecoder] = Field( None, - description="Component decoding the response so records can be extracted.", - title="Decoder", + description='Component decoding the response so records can be extracted.', + title='Decoder', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DatetimeBasedCursor(BaseModel): - type: Literal["DatetimeBasedCursor"] + type: Literal['DatetimeBasedCursor'] cursor_field: str = Field( ..., - description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", - examples=["created_at", "{{ config['record_cursor'] }}"], - title="Cursor Field", + description='The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.', + examples=['created_at', "{{ config['record_cursor'] }}"], + title='Cursor Field', ) datetime_format: str = Field( ..., - description="The datetime format of the Cursor Field. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", - examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], - title="Cursor Field Datetime Format", + description='The datetime format of the Cursor Field. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', + examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], + title='Cursor Field Datetime Format', ) start_datetime: Union[str, MinMaxDatetime] = Field( ..., - description="The datetime that determines the earliest record that should be synced.", - examples=["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"], - title="Start Datetime", + description='The datetime that determines the earliest record that should be synced.', + examples=['2020-01-1T00:00:00Z', "{{ config['start_time'] }}"], + title='Start Datetime', ) cursor_granularity: Optional[str] = Field( None, - description="Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.", - examples=["PT1S"], - title="Cursor Granularity", + description='Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.', + examples=['PT1S'], + title='Cursor Granularity', ) end_datetime: Optional[Union[str, MinMaxDatetime]] = Field( None, - description="The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", - examples=["2021-01-1T00:00:00Z", "{{ now_utc() }}", "{{ day_delta(-1) }}"], - title="End Datetime", + description='The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.', + examples=['2021-01-1T00:00:00Z', '{{ now_utc() }}', '{{ day_delta(-1) }}'], + title='End Datetime', ) end_time_option: Optional[RequestOption] = Field( None, - description="Optionally configures how the end datetime will be sent in requests to the source API.", - title="Inject End Time Into Outgoing HTTP Request", + description='Optionally configures how the end datetime will be sent in requests to the source API.', + title='Inject End Time Into Outgoing HTTP Request', ) is_data_feed: Optional[bool] = Field( None, - description="A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", - title="Whether the target API is formatted as a data feed", + description='A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.', + title='Whether the target API is formatted as a data feed', ) lookback_window: Optional[str] = Field( None, - description="Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.", - examples=["P1D", "P{{ config['lookback_days'] }}D"], - title="Lookback Window", + description='Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.', + examples=['P1D', "P{{ config['lookback_days'] }}D"], + title='Lookback Window', ) partition_field_end: Optional[str] = Field( None, - description="Name of the partition start time field.", - examples=["ending_time"], - title="Partition Field End", + description='Name of the partition start time field.', + examples=['ending_time'], + title='Partition Field End', ) partition_field_start: Optional[str] = Field( None, - description="Name of the partition end time field.", - examples=["starting_time"], - title="Partition Field Start", + description='Name of the partition end time field.', + examples=['starting_time'], + title='Partition Field Start', ) start_time_option: Optional[RequestOption] = Field( None, - description="Optionally configures how the start datetime will be sent in requests to the source API.", - title="Inject Start Time Into Outgoing HTTP Request", + description='Optionally configures how the start datetime will be sent in requests to the source API.', + title='Inject Start Time Into Outgoing HTTP Request', ) step: Optional[str] = Field( None, - description="The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.", - examples=["P1W", "{{ config['step_increment'] }}"], - title="Step", + description='The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.', + examples=['P1W', "{{ config['step_increment'] }}"], + title='Step', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DefaultErrorHandler(BaseModel): - type: Literal["DefaultErrorHandler"] + type: Literal['DefaultErrorHandler'] backoff_strategies: Optional[ List[ Union[ @@ -888,142 +896,144 @@ class DefaultErrorHandler(BaseModel): ] ] = Field( None, - description="List of backoff strategies to use to determine how long to wait before retrying a retryable request.", - title="Backoff Strategies", + description='List of backoff strategies to use to determine how long to wait before retrying a retryable request.', + title='Backoff Strategies', ) max_retries: Optional[int] = Field( 5, - description="The maximum number of time to retry a retryable request before giving up and failing.", + description='The maximum number of time to retry a retryable request before giving up and failing.', examples=[5, 0, 10], - title="Max Retry Count", + title='Max Retry Count', ) response_filters: Optional[List[HttpResponseFilter]] = Field( None, description="List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", - title="Response Filters", + title='Response Filters', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DefaultPaginator(BaseModel): - type: Literal["DefaultPaginator"] - pagination_strategy: Union[CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement] = Field( + type: Literal['DefaultPaginator'] + pagination_strategy: Union[ + CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement + ] = Field( ..., - description="Strategy defining how records are paginated.", - title="Pagination Strategy", + description='Strategy defining how records are paginated.', + title='Pagination Strategy', ) decoder: Optional[JsonDecoder] = Field( None, - description="Component decoding the response so records can be extracted.", - title="Decoder", + description='Component decoding the response so records can be extracted.', + title='Decoder', ) page_size_option: Optional[RequestOption] = None page_token_option: Optional[Union[RequestOption, RequestPath]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DpathExtractor(BaseModel): - type: Literal["DpathExtractor"] + type: Literal['DpathExtractor'] field_path: List[str] = Field( ..., description='List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).', examples=[ - ["data"], - ["data", "records"], - ["data", "{{ parameters.name }}"], - ["data", "*", "record"], + ['data'], + ['data', 'records'], + ['data', '{{ parameters.name }}'], + ['data', '*', 'record'], ], - title="Field Path", + title='Field Path', ) decoder: Optional[JsonDecoder] = Field( None, - description="Component decoding the response so records can be extracted.", - title="Decoder", + description='Component decoding the response so records can be extracted.', + title='Decoder', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SessionTokenRequestApiKeyAuthenticator(BaseModel): - type: Literal["ApiKey"] + type: Literal['ApiKey'] inject_into: RequestOption = Field( ..., - description="Configure how the API Key will be sent in requests to the source API.", + description='Configure how the API Key will be sent in requests to the source API.', examples=[ - {"inject_into": "header", "field_name": "Authorization"}, - {"inject_into": "request_parameter", "field_name": "authKey"}, + {'inject_into': 'header', 'field_name': 'Authorization'}, + {'inject_into': 'request_parameter', 'field_name': 'authKey'}, ], - title="Inject API Key Into Outgoing HTTP Request", + title='Inject API Key Into Outgoing HTTP Request', ) class ListPartitionRouter(BaseModel): - type: Literal["ListPartitionRouter"] + type: Literal['ListPartitionRouter'] cursor_field: str = Field( ..., description='While iterating over list values, the name of field used to reference a list value. The partition value can be accessed with string interpolation. e.g. "{{ stream_partition[\'my_key\'] }}" where "my_key" is the value of the cursor_field.', - examples=["section", "{{ config['section_key'] }}"], - title="Current Partition Value Identifier", + examples=['section', "{{ config['section_key'] }}"], + title='Current Partition Value Identifier', ) values: Union[str, List[str]] = Field( ..., - description="The list of attributes being iterated over and used as input for the requests made to the source API.", - examples=[["section_a", "section_b", "section_c"], "{{ config['sections'] }}"], - title="Partition Values", + description='The list of attributes being iterated over and used as input for the requests made to the source API.', + examples=[['section_a', 'section_b', 'section_c'], "{{ config['sections'] }}"], + title='Partition Values', ) request_option: Optional[RequestOption] = Field( None, - description="A request option describing where the list value should be injected into and under what field name if applicable.", - title="Inject Partition Value Into Outgoing HTTP Request", + description='A request option describing where the list value should be injected into and under what field name if applicable.', + title='Inject Partition Value Into Outgoing HTTP Request', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class RecordSelector(BaseModel): - type: Literal["RecordSelector"] + type: Literal['RecordSelector'] extractor: Union[CustomRecordExtractor, DpathExtractor] record_filter: Optional[RecordFilter] = Field( None, - description="Responsible for filtering records to be emitted by the Source.", - title="Record Filter", + description='Responsible for filtering records to be emitted by the Source.', + title='Record Filter', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class Spec(BaseModel): - type: Literal["Spec"] + type: Literal['Spec'] connection_specification: Dict[str, Any] = Field( ..., - description="A connection specification describing how a the connector can be configured.", - title="Connection Specification", + description='A connection specification describing how a the connector can be configured.', + title='Connection Specification', ) documentation_url: Optional[str] = Field( None, description="URL of the connector's documentation page.", - examples=["https://docs.airbyte.com/integrations/sources/dremio"], - title="Documentation URL", + examples=['https://docs.airbyte.com/integrations/sources/dremio'], + title='Documentation URL', ) advanced_auth: Optional[AuthFlow] = Field( None, - description="Advanced specification for configuring the authentication flow.", - title="Advanced Auth", + description='Advanced specification for configuring the authentication flow.', + title='Advanced Auth', ) class CompositeErrorHandler(BaseModel): - type: Literal["CompositeErrorHandler"] + type: Literal['CompositeErrorHandler'] error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] = Field( ..., - description="List of error handlers to iterate on to determine how to handle a failed response.", - title="Error Handlers", + description='List of error handlers to iterate on to determine how to handle a failed response.', + title='Error Handlers', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DeclarativeSource(BaseModel): class Config: extra = Extra.forbid - type: Literal["DeclarativeSource"] + type: Literal['DeclarativeSource'] check: CheckStream streams: List[DeclarativeStream] version: str @@ -1032,7 +1042,7 @@ class Config: spec: Optional[Spec] = None metadata: Optional[Dict[str, Any]] = Field( None, - description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", + description='For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.', ) @@ -1040,91 +1050,101 @@ class DeclarativeStream(BaseModel): class Config: extra = Extra.allow - type: Literal["DeclarativeStream"] + type: Literal['DeclarativeStream'] retriever: Union[CustomRetriever, SimpleRetriever] = Field( ..., - description="Component used to coordinate how records are extracted across stream slices and request pages.", - title="Retriever", + description='Component used to coordinate how records are extracted across stream slices and request pages.', + title='Retriever', ) - incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( + incremental_sync: Optional[ + Union[CustomIncrementalSync, DatetimeBasedCursor] + ] = Field( None, - description="Component used to fetch data incrementally based on a time field in the data.", - title="Incremental Sync", + description='Component used to fetch data incrementally based on a time field in the data.', + title='Incremental Sync', + ) + name: Optional[str] = Field( + '', description='The stream name.', example=['Users'], title='Name' + ) + primary_key: Optional[PrimaryKey] = Field( + '', description='The primary key of the stream.', title='Primary Key' ) - name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") - primary_key: Optional[PrimaryKey] = Field("", description="The primary key of the stream.", title="Primary Key") schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader]] = Field( None, - description="Component used to retrieve the schema for the current stream.", - title="Schema Loader", + description='Component used to retrieve the schema for the current stream.', + title='Schema Loader', ) - transformations: Optional[List[Union[AddFields, CustomTransformation, RemoveFields]]] = Field( + transformations: Optional[ + List[Union[AddFields, CustomTransformation, RemoveFields]] + ] = Field( None, - description="A list of transformations to be applied to each output record.", - title="Transformations", + description='A list of transformations to be applied to each output record.', + title='Transformations', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SessionTokenAuthenticator(BaseModel): - type: Literal["SessionTokenAuthenticator"] + type: Literal['SessionTokenAuthenticator'] login_requester: HttpRequester = Field( ..., - description="Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.", + description='Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.', examples=[ { - "type": "HttpRequester", - "url_base": "https://my_api.com", - "path": "/login", - "authenticator": { - "type": "BasicHttpAuthenticator", - "username": "{{ config.username }}", - "password": "{{ config.password }}", + 'type': 'HttpRequester', + 'url_base': 'https://my_api.com', + 'path': '/login', + 'authenticator': { + 'type': 'BasicHttpAuthenticator', + 'username': '{{ config.username }}', + 'password': '{{ config.password }}', }, } ], - title="Login Requester", + title='Login Requester', ) session_token_path: List[str] = Field( ..., - description="The path in the response body returned from the login requester to the session token.", - examples=[["access_token"], ["result", "token"]], - title="Session Token Path", + description='The path in the response body returned from the login requester to the session token.', + examples=[['access_token'], ['result', 'token']], + title='Session Token Path', ) expiration_duration: Optional[str] = Field( None, - description="The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.", - examples=["PT1H", "P1D"], - title="Expiration Duration", + description='The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.', + examples=['PT1H', 'P1D'], + title='Expiration Duration', ) - request_authentication: Union[SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator] = Field( + request_authentication: Union[ + SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator + ] = Field( ..., - description="Authentication method to use for requests sent to the API, specifying how to inject the session token.", - title="Data Request Authentication", + description='Authentication method to use for requests sent to the API, specifying how to inject the session token.', + title='Data Request Authentication', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class HttpRequester(BaseModel): - type: Literal["HttpRequester"] + type: Literal['HttpRequester'] url_base: str = Field( ..., - description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", + description='Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', examples=[ - "https://connect.squareup.com/v2", + 'https://connect.squareup.com/v2', "{{ config['base_url'] or 'https://app.posthog.com'}}/api/", ], - title="API Base URL", + title='API Base URL', ) path: str = Field( ..., - description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", + description='Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', examples=[ - "/products", + '/products', "/quotes/{{ stream_partition['id'] }}/quote_line_groups", "/trades/{{ config['symbol_id'] }}/history", ], - title="URL Path", + title='URL Path', ) authenticator: Optional[ Union[ @@ -1139,92 +1159,96 @@ class HttpRequester(BaseModel): ] ] = Field( None, - description="Authentication method to use for requests sent to the API.", - title="Authenticator", + description='Authentication method to use for requests sent to the API.', + title='Authenticator', ) - error_handler: Optional[Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler]] = Field( + error_handler: Optional[ + Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler] + ] = Field( None, - description="Error handler component that defines how to handle errors.", - title="Error Handler", + description='Error handler component that defines how to handle errors.', + title='Error Handler', ) http_method: Optional[Union[str, HttpMethodEnum]] = Field( - "GET", - description="The HTTP method used to fetch data from the source (can be GET or POST).", - examples=["GET", "POST"], - title="HTTP Method", + 'GET', + description='The HTTP method used to fetch data from the source (can be GET or POST).', + examples=['GET', 'POST'], + title='HTTP Method', ) request_body_data: Optional[Union[str, Dict[str, str]]] = Field( None, - description="Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", + description='Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.', examples=[ '[{"clause": {"type": "timestamp", "operator": 10, "parameters":\n [{"value": {{ stream_interval[\'start_time\'] | int * 1000 }} }]\n }, "orderBy": 1, "columnName": "Timestamp"}]/\n' ], - title="Request Body Payload (Non-JSON)", + title='Request Body Payload (Non-JSON)', ) request_body_json: Optional[Union[str, Dict[str, Any]]] = Field( None, - description="Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", + description='Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.', examples=[ - {"sort_order": "ASC", "sort_field": "CREATED_AT"}, - {"key": "{{ config['value'] }}"}, - {"sort": {"field": "updated_at", "order": "ascending"}}, + {'sort_order': 'ASC', 'sort_field': 'CREATED_AT'}, + {'key': "{{ config['value'] }}"}, + {'sort': {'field': 'updated_at', 'order': 'ascending'}}, ], - title="Request Body JSON Payload", + title='Request Body JSON Payload', ) request_headers: Optional[Union[str, Dict[str, str]]] = Field( None, - description="Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", - examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}], - title="Request Headers", + description='Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.', + examples=[{'Output-Format': 'JSON'}, {'Version': "{{ config['version'] }}"}], + title='Request Headers', ) request_parameters: Optional[Union[str, Dict[str, str]]] = Field( None, - description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", + description='Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.', examples=[ - {"unit": "day"}, + {'unit': 'day'}, { - "query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' + 'query': 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' }, - {"searchIn": "{{ ','.join(config.get('search_in', [])) }}"}, - {"sort_by[asc]": "updated_at"}, + {'searchIn': "{{ ','.join(config.get('search_in', [])) }}"}, + {'sort_by[asc]': 'updated_at'}, ], - title="Query Parameters", + title='Query Parameters', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class ParentStreamConfig(BaseModel): - type: Literal["ParentStreamConfig"] + type: Literal['ParentStreamConfig'] parent_key: str = Field( ..., - description="The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.", - examples=["id", "{{ config['parent_record_id'] }}"], - title="Parent Key", + description='The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.', + examples=['id', "{{ config['parent_record_id'] }}"], + title='Parent Key', + ) + stream: DeclarativeStream = Field( + ..., description='Reference to the parent stream.', title='Parent Stream' ) - stream: DeclarativeStream = Field(..., description="Reference to the parent stream.", title="Parent Stream") partition_field: str = Field( ..., - description="While iterating over parent records during a sync, the parent_key value can be referenced by using this field.", - examples=["parent_id", "{{ config['parent_partition_field'] }}"], - title="Current Parent Key Value Identifier", + description='While iterating over parent records during a sync, the parent_key value can be referenced by using this field.', + examples=['parent_id', "{{ config['parent_partition_field'] }}"], + title='Current Parent Key Value Identifier', ) request_option: Optional[RequestOption] = Field( None, - description="A request option describing where the parent key value should be injected into and under what field name if applicable.", - title="Request Option", + description='A request option describing where the parent key value should be injected into and under what field name if applicable.', + title='Request Option', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SimpleRetriever(BaseModel): - type: Literal["SimpleRetriever"] + type: Literal['SimpleRetriever'] record_selector: RecordSelector = Field( ..., - description="Component that describes how to extract records from a HTTP response.", + description='Component that describes how to extract records from a HTTP response.', ) requester: Union[CustomRequester, HttpRequester] = Field( ..., - description="Requester component that describes how to prepare HTTP requests to send to the source API.", + description='Requester component that describes how to prepare HTTP requests to send to the source API.', ) paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1235,24 +1259,28 @@ class SimpleRetriever(BaseModel): CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], + List[ + Union[ + CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter + ] + ], ] ] = Field( [], - description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", - title="Partition Router", + description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', + title='Partition Router', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SubstreamPartitionRouter(BaseModel): - type: Literal["SubstreamPartitionRouter"] + type: Literal['SubstreamPartitionRouter'] parent_stream_configs: List[ParentStreamConfig] = Field( ..., - description="Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", - title="Parent Stream Configs", + description='Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.', + title='Parent Stream Configs', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') CompositeErrorHandler.update_forward_refs() diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index d230bd04730a2..3887613ee6528 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -383,7 +383,6 @@ def send_request( request_params: Optional[Mapping[str, Any]] = None, request_body_data: Optional[Union[Mapping[str, Any], str]] = None, request_body_json: Optional[Mapping[str, Any]] = None, - log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None, ) -> Optional[requests.Response]: request = self._create_prepared_request( @@ -396,13 +395,12 @@ def send_request( data=self._request_body_data(stream_state, stream_slice, next_page_token, request_body_data), ) - response = self._send_with_retry(request, log_request=log_request, log_formatter=log_formatter) + response = self._send_with_retry(request, log_formatter=log_formatter) return self._validate_response(response) def _send_with_retry( self, request: requests.PreparedRequest, - log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None, ) -> requests.Response: """ @@ -435,12 +433,11 @@ def _send_with_retry( user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries)(self._send) # type: ignore # we don't pass in kwargs to the backoff handler backoff_handler = default_backoff_handler(max_tries=max_tries, factor=self._DEFAULT_RETRY_FACTOR) # backoff handlers wrap _send, so it will always return a response - return backoff_handler(user_backoff_handler)(request, log_request=log_request, log_formatter=log_formatter) # type: ignore + return backoff_handler(user_backoff_handler)(request, log_formatter=log_formatter) # type: ignore def _send( self, request: requests.PreparedRequest, - log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None, ) -> requests.Response: """ @@ -466,9 +463,7 @@ def _send( ) response: requests.Response = self._session.send(request) self.logger.debug("Receiving response", extra={"headers": response.headers, "status": response.status_code, "body": response.text}) - if log_request: - if log_formatter is None: - raise ValueError("response_formatter must be provided if log_request is True") + if log_formatter: formatter = log_formatter self.message_repository.log_message( Level.DEBUG, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py index 91d20d54a2e73..3b8396756aa01 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py @@ -135,11 +135,12 @@ def send_request( request_params: Optional[Mapping[str, Any]] = None, request_body_data: Optional[Union[Mapping[str, Any], str]] = None, request_body_json: Optional[Mapping[str, Any]] = None, - log_request: bool = False, log_formatter: Optional[Callable[[requests.Response], Any]] = None, ) -> Optional[requests.Response]: """ Sends a request and returns the response. Might return no response if the error handler chooses to ignore the response or throw an exception in case of an error. If path is set, the path configured on the requester itself is ignored. If header, params and body are set, they are merged with the ones configured on the requester itself. + + If a log formatter is provided, it's used to log the performed request and response. If it's not provided, no logging is performed. """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 614d161817cc6..09b1a5f94413a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -404,7 +404,6 @@ def _fetch_next_page( request_body_json=self._request_body_json( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ), - log_request=True, log_formatter=lambda response: format_http_message( response, f"Stream '{self.name}' request", diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py index 21ab5234d9ec8..b3a5bc772261f 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -594,9 +594,9 @@ def test_log_requests(should_log, status_code, should_throw): formatter.return_value = "formatted_response" if should_throw: with pytest.raises(DefaultBackoffException): - requester.send_request(log_request=should_log, log_formatter=formatter) + requester.send_request(log_formatter=formatter if should_log else None) else: - requester.send_request(log_request=should_log, log_formatter=formatter) + requester.send_request(log_formatter=formatter if should_log else None) if should_log: assert repository.log_message.call_args_list[0].args[1]() == "formatted_response" formatter.assert_called_once_with(response) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index adfe89437b7b9..ebdc7a6201fd9 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -481,5 +481,5 @@ def test_emit_log_request_response_messages(mocker): retriever._fetch_next_page(stream_state={}, stream_slice={}) - assert requester.send_request.call_args_list[0][1]["log_request"] is True + assert requester.send_request.call_args_list[0][1]["log_formatter"] is not None assert requester.send_request.call_args_list[0][1]["log_formatter"](response) == format_http_message_mock.return_value From c6e02d6316caa0ee0dd6f110cf9322217d2ce6fc Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 2 Aug 2023 11:32:54 +0200 Subject: [PATCH 11/16] review comments --- .../test_connector_builder_handler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py index ae5ede18b0a99..7f6b3ab9977e3 100644 --- a/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/airbyte-cdk/python/unit_tests/connector_builder/test_connector_builder_handler.py @@ -571,8 +571,8 @@ def manifest_declarative_source(): def test_list_streams(manifest_declarative_source): manifest_declarative_source.streams.return_value = [ - create_mock_declarative_stream(create_mock_http_stream("a name", "https://a-url-base.com", "a-path")), - create_mock_declarative_stream(create_mock_http_stream("another name", "https://another-url-base.com", "another-path")), + create_mock_declarative_stream(create_mock_retriever("a name", "https://a-url-base.com", "a-path")), + create_mock_declarative_stream(create_mock_retriever("another name", "https://another-url-base.com", "another-path")), ] result = list_streams(manifest_declarative_source, {}) @@ -634,7 +634,7 @@ def test_list_streams_integration_test(): } -def create_mock_http_stream(name, url_base, path): +def create_mock_retriever(name, url_base, path): http_stream = mock.Mock(spec=SimpleRetriever, autospec=True) http_stream.name = name http_stream.requester = MagicMock() @@ -704,12 +704,12 @@ def _create_response(body, request): return response -def _create_page(response_body): +def _create_page_response(response_body): request = _create_request() return _create_response(response_body, request) -@patch.object(requests.Session, "send", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) +@patch.object(requests.Session, "send", side_effect=(_create_page_response({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page_response({"result": [{"id": 2}],"_metadata": {"next": "next"}})) * 10) def test_read_source(mock_http_stream): """ This test sort of acts as an integration test for the connector builder. @@ -750,7 +750,7 @@ def test_read_source(mock_http_stream): assert isinstance(s.retriever, SimpleRetrieverTestReadDecorator) -@patch.object(requests.Session, "send", side_effect=(_create_page({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) +@patch.object(requests.Session, "send", side_effect=(_create_page_response({"result": [{"id": 0}, {"id": 1}],"_metadata": {"next": "next"}}), _create_page_response({"result": [{"id": 2}],"_metadata": {"next": "next"}}))) def test_read_source_single_page_single_slice(mock_http_stream): max_records = 100 max_pages_per_slice = 1 From 04301130a609f544f5e4a078a81b5da6e217ab81 Mon Sep 17 00:00:00 2001 From: flash1293 Date: Wed, 2 Aug 2023 09:37:05 +0000 Subject: [PATCH 12/16] Automated Commit - Formatting Changes --- .../models/declarative_component_schema.py | 1012 ++++++++--------- 1 file changed, 492 insertions(+), 520 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index b555cd0675361..11ffb81cfb40c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -15,12 +15,12 @@ class AddedFieldDefinition(BaseModel): - type: Literal['AddedFieldDefinition'] + type: Literal["AddedFieldDefinition"] path: List[str] = Field( ..., - description='List of strings defining the path where to add the value on the record.', - examples=[['segment_id'], ['metadata', 'segment_id']], - title='Path', + description="List of strings defining the path where to add the value on the record.", + examples=[["segment_id"], ["metadata", "segment_id"]], + title="Path", ) value: str = Field( ..., @@ -30,607 +30,601 @@ class AddedFieldDefinition(BaseModel): "{{ record['MetaData']['LastUpdatedTime'] }}", "{{ stream_partition['segment_id'] }}", ], - title='Value', + title="Value", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AddFields(BaseModel): - type: Literal['AddFields'] + type: Literal["AddFields"] fields: List[AddedFieldDefinition] = Field( ..., - description='List of transformations (path and corresponding value) that will be added to the record.', - title='Fields', + description="List of transformations (path and corresponding value) that will be added to the record.", + title="Fields", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AuthFlowType(Enum): - oauth2_0 = 'oauth2.0' - oauth1_0 = 'oauth1.0' + oauth2_0 = "oauth2.0" + oauth1_0 = "oauth1.0" class BasicHttpAuthenticator(BaseModel): - type: Literal['BasicHttpAuthenticator'] + type: Literal["BasicHttpAuthenticator"] username: str = Field( ..., - description='The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.', + description="The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.", examples=["{{ config['username'] }}", "{{ config['api_key'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", + examples=["{{ config['password'] }}", ""], + title="Password", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class BearerAuthenticator(BaseModel): - type: Literal['BearerAuthenticator'] + type: Literal["BearerAuthenticator"] api_token: str = Field( ..., - description='Token to inject as request header for authenticating with the API.', + description="Token to inject as request header for authenticating with the API.", examples=["{{ config['api_key'] }}", "{{ config['token'] }}"], - title='Bearer Token', + title="Bearer Token", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CheckStream(BaseModel): - type: Literal['CheckStream'] + type: Literal["CheckStream"] stream_names: List[str] = Field( ..., - description='Names of the streams to try reading from when running a check operation.', - examples=[['users'], ['users', 'contacts']], - title='Stream Names', + description="Names of the streams to try reading from when running a check operation.", + examples=[["users"], ["users", "contacts"]], + title="Stream Names", ) class ConstantBackoffStrategy(BaseModel): - type: Literal['ConstantBackoffStrategy'] + type: Literal["ConstantBackoffStrategy"] backoff_time_in_seconds: Union[float, str] = Field( ..., - description='Backoff time in seconds.', + description="Backoff time in seconds.", examples=[30, 30.5, "{{ config['backoff_time'] }}"], - title='Backoff Time', + title="Backoff Time", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomAuthenticator'] + type: Literal["CustomAuthenticator"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.', - examples=['source_railz.components.ShortLivedTokenAuthenticator'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.", + examples=["source_railz.components.ShortLivedTokenAuthenticator"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomBackoffStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomBackoffStrategy'] + type: Literal["CustomBackoffStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomBackoffStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomBackoffStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomErrorHandler(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomErrorHandler'] + type: Literal["CustomErrorHandler"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.', - examples=['source_railz.components.MyCustomErrorHandler'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.", + examples=["source_railz.components.MyCustomErrorHandler"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomIncrementalSync(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomIncrementalSync'] + type: Literal["CustomIncrementalSync"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.', - examples=['source_railz.components.MyCustomIncrementalSync'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.", + examples=["source_railz.components.MyCustomIncrementalSync"], + title="Class Name", ) cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync.', + description="The location of the value on a record that will be used as a bookmark during sync.", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPaginationStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPaginationStrategy'] + type: Literal["CustomPaginationStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomPaginationStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomPaginationStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRecordExtractor(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRecordExtractor'] + type: Literal["CustomRecordExtractor"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRequester(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRequester'] + type: Literal["CustomRequester"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRetriever(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRetriever'] + type: Literal["CustomRetriever"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRetriever'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRetriever"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPartitionRouter(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPartitionRouter'] + type: Literal["CustomPartitionRouter"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.', - examples=['source_railz.components.MyCustomPartitionRouter'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.", + examples=["source_railz.components.MyCustomPartitionRouter"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomTransformation(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomTransformation'] + type: Literal["CustomTransformation"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.', - examples=['source_railz.components.MyCustomTransformation'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.", + examples=["source_railz.components.MyCustomTransformation"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RefreshTokenUpdater(BaseModel): refresh_token_name: Optional[str] = Field( - 'refresh_token', - description='The name of the property which contains the updated refresh token in the response from the token refresh endpoint.', - examples=['refresh_token'], - title='Refresh Token Property Name', + "refresh_token", + description="The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", + examples=["refresh_token"], + title="Refresh Token Property Name", ) access_token_config_path: Optional[List[str]] = Field( - ['credentials', 'access_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'access_token'], ['access_token']], - title='Config Path To Access Token', + ["credentials", "access_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "access_token"], ["access_token"]], + title="Config Path To Access Token", ) refresh_token_config_path: Optional[List[str]] = Field( - ['credentials', 'refresh_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'refresh_token'], ['refresh_token']], - title='Config Path To Refresh Token', + ["credentials", "refresh_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "refresh_token"], ["refresh_token"]], + title="Config Path To Refresh Token", ) token_expiry_date_config_path: Optional[List[str]] = Field( - ['credentials', 'token_expiry_date'], - description='Config path to the expiry date. Make sure actually exists in the config.', - examples=[['credentials', 'token_expiry_date']], - title='Config Path To Expiry Date', + ["credentials", "token_expiry_date"], + description="Config path to the expiry date. Make sure actually exists in the config.", + examples=[["credentials", "token_expiry_date"]], + title="Config Path To Expiry Date", ) class OAuthAuthenticator(BaseModel): - type: Literal['OAuthAuthenticator'] + type: Literal["OAuthAuthenticator"] client_id: str = Field( ..., - description='The OAuth client ID. Fill it in the user inputs.', + description="The OAuth client ID. Fill it in the user inputs.", examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"], - title='Client ID', + title="Client ID", ) client_secret: str = Field( ..., - description='The OAuth client secret. Fill it in the user inputs.', + description="The OAuth client secret. Fill it in the user inputs.", examples=[ "{{ config['client_secret }}", "{{ config['credentials']['client_secret }}", ], - title='Client Secret', + title="Client Secret", ) refresh_token: Optional[str] = Field( None, - description='Credential artifact used to get a new access token.', + description="Credential artifact used to get a new access token.", examples=[ "{{ config['refresh_token'] }}", "{{ config['credentials]['refresh_token'] }}", ], - title='Refresh Token', + title="Refresh Token", ) token_refresh_endpoint: str = Field( ..., - description='The full URL to call to obtain a new access token.', - examples=['https://connect.squareup.com/oauth2/token'], - title='Token Refresh Endpoint', + description="The full URL to call to obtain a new access token.", + examples=["https://connect.squareup.com/oauth2/token"], + title="Token Refresh Endpoint", ) access_token_name: Optional[str] = Field( - 'access_token', - description='The name of the property which contains the access token in the response from the token refresh endpoint.', - examples=['access_token'], - title='Access Token Property Name', + "access_token", + description="The name of the property which contains the access token in the response from the token refresh endpoint.", + examples=["access_token"], + title="Access Token Property Name", ) expires_in_name: Optional[str] = Field( - 'expires_in', - description='The name of the property which contains the expiry date in the response from the token refresh endpoint.', - examples=['expires_in'], - title='Token Expiry Property Name', + "expires_in", + description="The name of the property which contains the expiry date in the response from the token refresh endpoint.", + examples=["expires_in"], + title="Token Expiry Property Name", ) grant_type: Optional[str] = Field( - 'refresh_token', - description='Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.', - examples=['refresh_token', 'client_credentials'], - title='Grant Type', + "refresh_token", + description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", + examples=["refresh_token", "client_credentials"], + title="Grant Type", ) refresh_request_body: Optional[Dict[str, Any]] = Field( None, - description='Body of the request sent to get a new access token.', + description="Body of the request sent to get a new access token.", examples=[ { - 'applicationId': "{{ config['application_id'] }}", - 'applicationSecret': "{{ config['application_secret'] }}", - 'token': "{{ config['token'] }}", + "applicationId": "{{ config['application_id'] }}", + "applicationSecret": "{{ config['application_secret'] }}", + "token": "{{ config['token'] }}", } ], - title='Refresh Request Body', + title="Refresh Request Body", ) scopes: Optional[List[str]] = Field( None, - description='List of scopes that should be granted to the access token.', - examples=[ - ['crm.list.read', 'crm.objects.contacts.read', 'crm.schema.contacts.read'] - ], - title='Scopes', + description="List of scopes that should be granted to the access token.", + examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], + title="Scopes", ) token_expiry_date: Optional[str] = Field( None, - description='The access token expiry date.', - examples=['2023-04-06T07:12:10.421833+00:00', 1680842386], - title='Token Expiry Date', + description="The access token expiry date.", + examples=["2023-04-06T07:12:10.421833+00:00", 1680842386], + title="Token Expiry Date", ) token_expiry_date_format: Optional[str] = Field( None, - description='The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.', - examples=['%Y-%m-%d %H:%M:%S.%f+00:00'], - title='Token Expiry Date Format', + description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", + examples=["%Y-%m-%d %H:%M:%S.%f+00:00"], + title="Token Expiry Date Format", ) refresh_token_updater: Optional[RefreshTokenUpdater] = Field( None, - description='When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.', - title='Token Updater', + description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", + title="Token Updater", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ExponentialBackoffStrategy(BaseModel): - type: Literal['ExponentialBackoffStrategy'] + type: Literal["ExponentialBackoffStrategy"] factor: Optional[Union[float, str]] = Field( 5, - description='Multiplicative constant applied on each retry.', - examples=[5, 5.5, '10'], - title='Factor', + description="Multiplicative constant applied on each retry.", + examples=[5, 5.5, "10"], + title="Factor", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestBearerAuthenticator(BaseModel): - type: Literal['Bearer'] + type: Literal["Bearer"] class HttpMethodEnum(Enum): - GET = 'GET' - POST = 'POST' + GET = "GET" + POST = "POST" class Action(Enum): - SUCCESS = 'SUCCESS' - FAIL = 'FAIL' - RETRY = 'RETRY' - IGNORE = 'IGNORE' + SUCCESS = "SUCCESS" + FAIL = "FAIL" + RETRY = "RETRY" + IGNORE = "IGNORE" class HttpResponseFilter(BaseModel): - type: Literal['HttpResponseFilter'] + type: Literal["HttpResponseFilter"] action: Action = Field( ..., - description='Action to execute if a response matches the filter.', - examples=['SUCCESS', 'FAIL', 'RETRY', 'IGNORE'], - title='Action', + description="Action to execute if a response matches the filter.", + examples=["SUCCESS", "FAIL", "RETRY", "IGNORE"], + title="Action", ) error_message: Optional[str] = Field( None, - description='Error Message to display if the response matches the filter.', - title='Error Message', + description="Error Message to display if the response matches the filter.", + title="Error Message", ) error_message_contains: Optional[str] = Field( None, - description='Match the response if its error message contains the substring.', - example=['This API operation is not enabled for this site'], - title='Error Message Substring', + description="Match the response if its error message contains the substring.", + example=["This API operation is not enabled for this site"], + title="Error Message Substring", ) http_codes: Optional[List[int]] = Field( None, - description='Match the response if its HTTP code is included in this list.', + description="Match the response if its HTTP code is included in this list.", examples=[[420, 429], [500]], - title='HTTP Codes', + title="HTTP Codes", ) predicate: Optional[str] = Field( None, - description='Match the response if the predicate evaluates to true.', + description="Match the response if the predicate evaluates to true.", examples=[ "{{ 'Too much requests' in response }}", "{{ 'error_code' in response and response['error_code'] == 'ComplexityException' }}", ], - title='Predicate', + title="Predicate", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class InlineSchemaLoader(BaseModel): - type: Literal['InlineSchemaLoader'] + type: Literal["InlineSchemaLoader"] schema_: Optional[Dict[str, Any]] = Field( None, - alias='schema', + alias="schema", description='Describes a streams\' schema. Refer to the Data Types documentation for more details on which types are valid.', - title='Schema', + title="Schema", ) class JsonFileSchemaLoader(BaseModel): - type: Literal['JsonFileSchemaLoader'] + type: Literal["JsonFileSchemaLoader"] file_path: Optional[str] = Field( None, description="Path to the JSON file defining the schema. The path is relative to the connector module's root.", - example=['./schemas/users.json'], - title='File Path', + example=["./schemas/users.json"], + title="File Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class JsonDecoder(BaseModel): - type: Literal['JsonDecoder'] + type: Literal["JsonDecoder"] class MinMaxDatetime(BaseModel): - type: Literal['MinMaxDatetime'] + type: Literal["MinMaxDatetime"] datetime: str = Field( ..., - description='Datetime value.', - examples=['2021-01-01', '2021-01-01T00:00:00Z', "{{ config['start_time'] }}"], - title='Datetime', + description="Datetime value.", + examples=["2021-01-01", "2021-01-01T00:00:00Z", "{{ config['start_time'] }}"], + title="Datetime", ) datetime_format: Optional[str] = Field( - '', + "", description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], - title='Datetime Format', + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], + title="Datetime Format", ) max_datetime: Optional[str] = Field( None, - description='Ceiling applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2021-01-01T00:00:00Z', '2021-01-01'], - title='Max Datetime', + description="Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2021-01-01T00:00:00Z", "2021-01-01"], + title="Max Datetime", ) min_datetime: Optional[str] = Field( None, - description='Floor applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2010-01-01T00:00:00Z', '2010-01-01'], - title='Min Datetime', + description="Floor applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2010-01-01T00:00:00Z", "2010-01-01"], + title="Min Datetime", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoAuth(BaseModel): - type: Literal['NoAuth'] - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + type: Literal["NoAuth"] + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoPagination(BaseModel): - type: Literal['NoPagination'] + type: Literal["NoPagination"] class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[ - Dict[str, Any] - ] = Field( + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", examples=[ - {'app_id': {'type': 'string', 'path_in_connector_config': ['app_id']}}, + {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, { - 'app_id': { - 'type': 'string', - 'path_in_connector_config': ['info', 'app_id'], + "app_id": { + "type": "string", + "path_in_connector_config": ["info", "app_id"], } }, ], - title='OAuth user input', + title="OAuth user input", ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", examples=[ { - 'refresh_token': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'refresh_token'], + "refresh_token": { + "type": "string,", + "path_in_connector_config": ["credentials", "refresh_token"], } } ], - title='OAuth output specification', + title="OAuth output specification", ) complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, - description='OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }', - examples=[ - {'client_id': {'type': 'string'}, 'client_secret': {'type': 'string'}} - ], - title='OAuth input specification', + description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", + examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], + title="OAuth input specification", ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", examples=[ { - 'client_id': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_id'], + "client_id": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_id"], }, - 'client_secret': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_secret'], + "client_secret": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_secret"], }, } ], - title='OAuth server output specification', + title="OAuth server output specification", ) class OffsetIncrement(BaseModel): - type: Literal['OffsetIncrement'] + type: Literal["OffsetIncrement"] page_size: Optional[Union[int, str]] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100, "{{ config['page_size'] }}"], - title='Limit', + title="Limit", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PageIncrement(BaseModel): - type: Literal['PageIncrement'] + type: Literal["PageIncrement"] page_size: Optional[int] = Field( None, - description='The number of records to include in each pages.', - examples=[100, '100'], - title='Page Size', + description="The number of records to include in each pages.", + examples=[100, "100"], + title="Page Size", ) start_from_page: Optional[int] = Field( 0, - description='Index of the first page to request.', + description="Index of the first page to request.", examples=[0, 1], - title='Start From Page', + title="Start From Page", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PrimaryKey(BaseModel): __root__: Union[str, List[str], List[List[str]]] = Field( ..., - description='The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.', - examples=['id', ['code', 'type']], - title='Primary Key', + description="The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", + examples=["id", ["code", "type"]], + title="Primary Key", ) class RecordFilter(BaseModel): - type: Literal['RecordFilter'] + type: Literal["RecordFilter"] condition: Optional[str] = Field( - '', - description='The predicate to filter a record. Records will be removed if evaluated to False.', + "", + description="The predicate to filter a record. Records will be removed if evaluated to False.", examples=[ "{{ record['created_at'] >= stream_interval['start_time'] }}", "{{ record.status in ['active', 'expired'] }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RemoveFields(BaseModel): - type: Literal['RemoveFields'] + type: Literal["RemoveFields"] field_pointers: List[List[str]] = Field( ..., - description='Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.', - examples=[['tags'], [['content', 'html'], ['content', 'plain_text']]], - title='Field Paths', + description="Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", + examples=[["tags"], [["content", "html"], ["content", "plain_text"]]], + title="Field Paths", ) class RequestPath(BaseModel): - type: Literal['RequestPath'] + type: Literal["RequestPath"] class InjectInto(Enum): - request_parameter = 'request_parameter' - header = 'header' - body_data = 'body_data' - body_json = 'body_json' + request_parameter = "request_parameter" + header = "header" + body_data = "body_data" + body_json = "body_json" class RequestOption(BaseModel): - type: Literal['RequestOption'] + type: Literal["RequestOption"] field_name: str = Field( ..., - description='Configures which key should be used in the location that the descriptor is being injected into', - examples=['segment_id'], - title='Request Option', + description="Configures which key should be used in the location that the descriptor is being injected into", + examples=["segment_id"], + title="Request Option", ) inject_into: InjectInto = Field( ..., - description='Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.', - examples=['request_parameter', 'header', 'body_data', 'body_json'], - title='Inject Into', + description="Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.", + examples=["request_parameter", "header", "body_data", "body_json"], + title="Inject Into", ) @@ -642,191 +636,189 @@ class Config: class LegacySessionTokenAuthenticator(BaseModel): - type: Literal['LegacySessionTokenAuthenticator'] + type: Literal["LegacySessionTokenAuthenticator"] header: str = Field( ..., - description='The name of the session token header that will be injected in the request', - examples=['X-Session'], - title='Session Request Header', + description="The name of the session token header that will be injected in the request", + examples=["X-Session"], + title="Session Request Header", ) login_url: str = Field( ..., - description='Path of the login URL (do not include the base URL)', - examples=['session'], - title='Login Path', + description="Path of the login URL (do not include the base URL)", + examples=["session"], + title="Login Path", ) session_token: Optional[str] = Field( None, - description='Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair', + description="Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", example=["{{ config['session_token'] }}"], - title='Session Token', + title="Session Token", ) session_token_response_key: str = Field( ..., - description='Name of the key of the session token to be extracted from the response', - examples=['id'], - title='Response Token Response Key', + description="Name of the key of the session token to be extracted from the response", + examples=["id"], + title="Response Token Response Key", ) username: Optional[str] = Field( None, - description='Username used to authenticate and obtain a session token', + description="Username used to authenticate and obtain a session token", examples=[" {{ config['username'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='Password used to authenticate and obtain a session token', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="Password used to authenticate and obtain a session token", + examples=["{{ config['password'] }}", ""], + title="Password", ) validate_session_url: str = Field( ..., - description='Path of the URL to use to validate that the session token is valid (do not include the base URL)', - examples=['user/current'], - title='Validate Session Path', + description="Path of the URL to use to validate that the session token is valid (do not include the base URL)", + examples=["user/current"], + title="Validate Session Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class WaitTimeFromHeader(BaseModel): - type: Literal['WaitTimeFromHeader'] + type: Literal["WaitTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['Retry-After'], - title='Response Header Name', + description="The name of the response header defining how long to wait before retrying.", + examples=["Retry-After"], + title="Response Header Name", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class WaitUntilTimeFromHeader(BaseModel): - type: Literal['WaitUntilTimeFromHeader'] + type: Literal["WaitUntilTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['wait_time'], - title='Response Header', + description="The name of the response header defining how long to wait before retrying.", + examples=["wait_time"], + title="Response Header", ) min_wait: Optional[Union[float, str]] = Field( None, - description='Minimum time to wait before retrying.', - examples=[10, '60'], - title='Minimum Wait Time', + description="Minimum time to wait before retrying.", + examples=[10, "60"], + title="Minimum Wait Time", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ApiKeyAuthenticator(BaseModel): - type: Literal['ApiKeyAuthenticator'] + type: Literal["ApiKeyAuthenticator"] api_token: Optional[str] = Field( None, - description='The API key to inject in the request. Fill it in the user inputs.', + description="The API key to inject in the request. Fill it in the user inputs.", examples=["{{ config['api_key'] }}", "Token token={{ config['api_key'] }}"], - title='API Key', + title="API Key", ) header: Optional[str] = Field( None, - description='The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.', - examples=['Authorization', 'Api-Token', 'X-Auth-Token'], - title='Header Name', + description="The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", + examples=["Authorization", "Api-Token", "X-Auth-Token"], + title="Header Name", ) inject_into: Optional[RequestOption] = Field( None, - description='Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.', + description="Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AuthFlow(BaseModel): - auth_flow_type: Optional[AuthFlowType] = Field( - None, description='The type of auth to use', title='Auth flow type' - ) + auth_flow_type: Optional[AuthFlowType] = Field(None, description="The type of auth to use", title="Auth flow type") predicate_key: Optional[List[str]] = Field( None, - description='JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.', - examples=[['credentials', 'auth_type']], - title='Predicate key', + description="JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", + examples=[["credentials", "auth_type"]], + title="Predicate key", ) predicate_value: Optional[str] = Field( None, - description='Value of the predicate_key fields for the advanced auth to be applicable.', - examples=['Oauth'], - title='Predicate value', + description="Value of the predicate_key fields for the advanced auth to be applicable.", + examples=["Oauth"], + title="Predicate value", ) oauth_config_specification: Optional[OAuthConfigSpecification] = None class CursorPagination(BaseModel): - type: Literal['CursorPagination'] + type: Literal["CursorPagination"] cursor_value: str = Field( ..., - description='Value of the cursor defining the next page to fetch.', + description="Value of the cursor defining the next page to fetch.", examples=[ - '{{ headers.link.next.cursor }}', + "{{ headers.link.next.cursor }}", "{{ last_records[-1]['key'] }}", "{{ response['nextPage'] }}", ], - title='Cursor Value', + title="Cursor Value", ) page_size: Optional[int] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100], - title='Page Size', + title="Page Size", ) stop_condition: Optional[str] = Field( None, - description='Template string evaluating when to stop paginating.', + description="Template string evaluating when to stop paginating.", examples=[ - '{{ response.data.has_more is false }}', + "{{ response.data.has_more is false }}", "{{ 'next' not in headers['link'] }}", ], - title='Stop Condition', + title="Stop Condition", ) decoder: Optional[JsonDecoder] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DatetimeBasedCursor(BaseModel): - type: Literal['DatetimeBasedCursor'] + type: Literal["DatetimeBasedCursor"] cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.', - examples=['created_at', "{{ config['record_cursor'] }}"], - title='Cursor Field', + description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", + examples=["created_at", "{{ config['record_cursor'] }}"], + title="Cursor Field", ) datetime_format: str = Field( ..., - description='The datetime format of the Cursor Field. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], - title='Cursor Field Datetime Format', + description="The datetime format of the Cursor Field. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], + title="Cursor Field Datetime Format", ) start_datetime: Union[str, MinMaxDatetime] = Field( ..., - description='The datetime that determines the earliest record that should be synced.', - examples=['2020-01-1T00:00:00Z', "{{ config['start_time'] }}"], - title='Start Datetime', + description="The datetime that determines the earliest record that should be synced.", + examples=["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"], + title="Start Datetime", ) cursor_datetime_formats: Optional[List[str]] = Field( None, @@ -835,60 +827,60 @@ class DatetimeBasedCursor(BaseModel): ) cursor_granularity: Optional[str] = Field( None, - description='Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.', - examples=['PT1S'], - title='Cursor Granularity', + description="Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.", + examples=["PT1S"], + title="Cursor Granularity", ) end_datetime: Optional[Union[str, MinMaxDatetime]] = Field( None, - description='The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.', - examples=['2021-01-1T00:00:00Z', '{{ now_utc() }}', '{{ day_delta(-1) }}'], - title='End Datetime', + description="The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", + examples=["2021-01-1T00:00:00Z", "{{ now_utc() }}", "{{ day_delta(-1) }}"], + title="End Datetime", ) end_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the end datetime will be sent in requests to the source API.', - title='Inject End Time Into Outgoing HTTP Request', + description="Optionally configures how the end datetime will be sent in requests to the source API.", + title="Inject End Time Into Outgoing HTTP Request", ) is_data_feed: Optional[bool] = Field( None, - description='A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.', - title='Whether the target API is formatted as a data feed', + description="A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", + title="Whether the target API is formatted as a data feed", ) lookback_window: Optional[str] = Field( None, - description='Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.', - examples=['P1D', "P{{ config['lookback_days'] }}D"], - title='Lookback Window', + description="Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.", + examples=["P1D", "P{{ config['lookback_days'] }}D"], + title="Lookback Window", ) partition_field_end: Optional[str] = Field( None, - description='Name of the partition start time field.', - examples=['ending_time'], - title='Partition Field End', + description="Name of the partition start time field.", + examples=["ending_time"], + title="Partition Field End", ) partition_field_start: Optional[str] = Field( None, - description='Name of the partition end time field.', - examples=['starting_time'], - title='Partition Field Start', + description="Name of the partition end time field.", + examples=["starting_time"], + title="Partition Field Start", ) start_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the start datetime will be sent in requests to the source API.', - title='Inject Start Time Into Outgoing HTTP Request', + description="Optionally configures how the start datetime will be sent in requests to the source API.", + title="Inject Start Time Into Outgoing HTTP Request", ) step: Optional[str] = Field( None, - description='The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.', - examples=['P1W', "{{ config['step_increment'] }}"], - title='Step', + description="The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.", + examples=["P1W", "{{ config['step_increment'] }}"], + title="Step", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultErrorHandler(BaseModel): - type: Literal['DefaultErrorHandler'] + type: Literal["DefaultErrorHandler"] backoff_strategies: Optional[ List[ Union[ @@ -901,144 +893,142 @@ class DefaultErrorHandler(BaseModel): ] ] = Field( None, - description='List of backoff strategies to use to determine how long to wait before retrying a retryable request.', - title='Backoff Strategies', + description="List of backoff strategies to use to determine how long to wait before retrying a retryable request.", + title="Backoff Strategies", ) max_retries: Optional[int] = Field( 5, - description='The maximum number of time to retry a retryable request before giving up and failing.', + description="The maximum number of time to retry a retryable request before giving up and failing.", examples=[5, 0, 10], - title='Max Retry Count', + title="Max Retry Count", ) response_filters: Optional[List[HttpResponseFilter]] = Field( None, description="List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", - title='Response Filters', + title="Response Filters", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultPaginator(BaseModel): - type: Literal['DefaultPaginator'] - pagination_strategy: Union[ - CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement - ] = Field( + type: Literal["DefaultPaginator"] + pagination_strategy: Union[CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement] = Field( ..., - description='Strategy defining how records are paginated.', - title='Pagination Strategy', + description="Strategy defining how records are paginated.", + title="Pagination Strategy", ) decoder: Optional[JsonDecoder] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) page_size_option: Optional[RequestOption] = None page_token_option: Optional[Union[RequestOption, RequestPath]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DpathExtractor(BaseModel): - type: Literal['DpathExtractor'] + type: Literal["DpathExtractor"] field_path: List[str] = Field( ..., description='List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).', examples=[ - ['data'], - ['data', 'records'], - ['data', '{{ parameters.name }}'], - ['data', '*', 'record'], + ["data"], + ["data", "records"], + ["data", "{{ parameters.name }}"], + ["data", "*", "record"], ], - title='Field Path', + title="Field Path", ) decoder: Optional[JsonDecoder] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestApiKeyAuthenticator(BaseModel): - type: Literal['ApiKey'] + type: Literal["ApiKey"] inject_into: RequestOption = Field( ..., - description='Configure how the API Key will be sent in requests to the source API.', + description="Configure how the API Key will be sent in requests to the source API.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) class ListPartitionRouter(BaseModel): - type: Literal['ListPartitionRouter'] + type: Literal["ListPartitionRouter"] cursor_field: str = Field( ..., description='While iterating over list values, the name of field used to reference a list value. The partition value can be accessed with string interpolation. e.g. "{{ stream_partition[\'my_key\'] }}" where "my_key" is the value of the cursor_field.', - examples=['section', "{{ config['section_key'] }}"], - title='Current Partition Value Identifier', + examples=["section", "{{ config['section_key'] }}"], + title="Current Partition Value Identifier", ) values: Union[str, List[str]] = Field( ..., - description='The list of attributes being iterated over and used as input for the requests made to the source API.', - examples=[['section_a', 'section_b', 'section_c'], "{{ config['sections'] }}"], - title='Partition Values', + description="The list of attributes being iterated over and used as input for the requests made to the source API.", + examples=[["section_a", "section_b", "section_c"], "{{ config['sections'] }}"], + title="Partition Values", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the list value should be injected into and under what field name if applicable.', - title='Inject Partition Value Into Outgoing HTTP Request', + description="A request option describing where the list value should be injected into and under what field name if applicable.", + title="Inject Partition Value Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RecordSelector(BaseModel): - type: Literal['RecordSelector'] + type: Literal["RecordSelector"] extractor: Union[CustomRecordExtractor, DpathExtractor] record_filter: Optional[RecordFilter] = Field( None, - description='Responsible for filtering records to be emitted by the Source.', - title='Record Filter', + description="Responsible for filtering records to be emitted by the Source.", + title="Record Filter", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class Spec(BaseModel): - type: Literal['Spec'] + type: Literal["Spec"] connection_specification: Dict[str, Any] = Field( ..., - description='A connection specification describing how a the connector can be configured.', - title='Connection Specification', + description="A connection specification describing how a the connector can be configured.", + title="Connection Specification", ) documentation_url: Optional[str] = Field( None, description="URL of the connector's documentation page.", - examples=['https://docs.airbyte.com/integrations/sources/dremio'], - title='Documentation URL', + examples=["https://docs.airbyte.com/integrations/sources/dremio"], + title="Documentation URL", ) advanced_auth: Optional[AuthFlow] = Field( None, - description='Advanced specification for configuring the authentication flow.', - title='Advanced Auth', + description="Advanced specification for configuring the authentication flow.", + title="Advanced Auth", ) class CompositeErrorHandler(BaseModel): - type: Literal['CompositeErrorHandler'] + type: Literal["CompositeErrorHandler"] error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] = Field( ..., - description='List of error handlers to iterate on to determine how to handle a failed response.', - title='Error Handlers', + description="List of error handlers to iterate on to determine how to handle a failed response.", + title="Error Handlers", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DeclarativeSource(BaseModel): class Config: extra = Extra.forbid - type: Literal['DeclarativeSource'] + type: Literal["DeclarativeSource"] check: CheckStream streams: List[DeclarativeStream] version: str @@ -1047,7 +1037,7 @@ class Config: spec: Optional[Spec] = None metadata: Optional[Dict[str, Any]] = Field( None, - description='For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.', + description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", ) @@ -1055,101 +1045,91 @@ class DeclarativeStream(BaseModel): class Config: extra = Extra.allow - type: Literal['DeclarativeStream'] + type: Literal["DeclarativeStream"] retriever: Union[CustomRetriever, SimpleRetriever] = Field( ..., - description='Component used to coordinate how records are extracted across stream slices and request pages.', - title='Retriever', + description="Component used to coordinate how records are extracted across stream slices and request pages.", + title="Retriever", ) - incremental_sync: Optional[ - Union[CustomIncrementalSync, DatetimeBasedCursor] - ] = Field( + incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( None, - description='Component used to fetch data incrementally based on a time field in the data.', - title='Incremental Sync', - ) - name: Optional[str] = Field( - '', description='The stream name.', example=['Users'], title='Name' - ) - primary_key: Optional[PrimaryKey] = Field( - '', description='The primary key of the stream.', title='Primary Key' + description="Component used to fetch data incrementally based on a time field in the data.", + title="Incremental Sync", ) + name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") + primary_key: Optional[PrimaryKey] = Field("", description="The primary key of the stream.", title="Primary Key") schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader]] = Field( None, - description='Component used to retrieve the schema for the current stream.', - title='Schema Loader', + description="Component used to retrieve the schema for the current stream.", + title="Schema Loader", ) - transformations: Optional[ - List[Union[AddFields, CustomTransformation, RemoveFields]] - ] = Field( + transformations: Optional[List[Union[AddFields, CustomTransformation, RemoveFields]]] = Field( None, - description='A list of transformations to be applied to each output record.', - title='Transformations', + description="A list of transformations to be applied to each output record.", + title="Transformations", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenAuthenticator(BaseModel): - type: Literal['SessionTokenAuthenticator'] + type: Literal["SessionTokenAuthenticator"] login_requester: HttpRequester = Field( ..., - description='Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.', + description="Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.", examples=[ { - 'type': 'HttpRequester', - 'url_base': 'https://my_api.com', - 'path': '/login', - 'authenticator': { - 'type': 'BasicHttpAuthenticator', - 'username': '{{ config.username }}', - 'password': '{{ config.password }}', + "type": "HttpRequester", + "url_base": "https://my_api.com", + "path": "/login", + "authenticator": { + "type": "BasicHttpAuthenticator", + "username": "{{ config.username }}", + "password": "{{ config.password }}", }, } ], - title='Login Requester', + title="Login Requester", ) session_token_path: List[str] = Field( ..., - description='The path in the response body returned from the login requester to the session token.', - examples=[['access_token'], ['result', 'token']], - title='Session Token Path', + description="The path in the response body returned from the login requester to the session token.", + examples=[["access_token"], ["result", "token"]], + title="Session Token Path", ) expiration_duration: Optional[str] = Field( None, - description='The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.', - examples=['PT1H', 'P1D'], - title='Expiration Duration', + description="The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.", + examples=["PT1H", "P1D"], + title="Expiration Duration", ) - request_authentication: Union[ - SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator - ] = Field( + request_authentication: Union[SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator] = Field( ..., - description='Authentication method to use for requests sent to the API, specifying how to inject the session token.', - title='Data Request Authentication', + description="Authentication method to use for requests sent to the API, specifying how to inject the session token.", + title="Data Request Authentication", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class HttpRequester(BaseModel): - type: Literal['HttpRequester'] + type: Literal["HttpRequester"] url_base: str = Field( ..., - description='Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - 'https://connect.squareup.com/v2', + "https://connect.squareup.com/v2", "{{ config['base_url'] or 'https://app.posthog.com'}}/api/", ], - title='API Base URL', + title="API Base URL", ) path: str = Field( ..., - description='Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - '/products', + "/products", "/quotes/{{ stream_partition['id'] }}/quote_line_groups", "/trades/{{ config['symbol_id'] }}/history", ], - title='URL Path', + title="URL Path", ) authenticator: Optional[ Union[ @@ -1164,96 +1144,92 @@ class HttpRequester(BaseModel): ] ] = Field( None, - description='Authentication method to use for requests sent to the API.', - title='Authenticator', + description="Authentication method to use for requests sent to the API.", + title="Authenticator", ) - error_handler: Optional[ - Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler] - ] = Field( + error_handler: Optional[Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler]] = Field( None, - description='Error handler component that defines how to handle errors.', - title='Error Handler', + description="Error handler component that defines how to handle errors.", + title="Error Handler", ) http_method: Optional[Union[str, HttpMethodEnum]] = Field( - 'GET', - description='The HTTP method used to fetch data from the source (can be GET or POST).', - examples=['GET', 'POST'], - title='HTTP Method', + "GET", + description="The HTTP method used to fetch data from the source (can be GET or POST).", + examples=["GET", "POST"], + title="HTTP Method", ) request_body_data: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.', + description="Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", examples=[ '[{"clause": {"type": "timestamp", "operator": 10, "parameters":\n [{"value": {{ stream_interval[\'start_time\'] | int * 1000 }} }]\n }, "orderBy": 1, "columnName": "Timestamp"}]/\n' ], - title='Request Body Payload (Non-JSON)', + title="Request Body Payload (Non-JSON)", ) request_body_json: Optional[Union[str, Dict[str, Any]]] = Field( None, - description='Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.', + description="Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", examples=[ - {'sort_order': 'ASC', 'sort_field': 'CREATED_AT'}, - {'key': "{{ config['value'] }}"}, - {'sort': {'field': 'updated_at', 'order': 'ascending'}}, + {"sort_order": "ASC", "sort_field": "CREATED_AT"}, + {"key": "{{ config['value'] }}"}, + {"sort": {"field": "updated_at", "order": "ascending"}}, ], - title='Request Body JSON Payload', + title="Request Body JSON Payload", ) request_headers: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.', - examples=[{'Output-Format': 'JSON'}, {'Version': "{{ config['version'] }}"}], - title='Request Headers', + description="Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", + examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}], + title="Request Headers", ) request_parameters: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.', + description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", examples=[ - {'unit': 'day'}, + {"unit": "day"}, { - 'query': 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' + "query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' }, - {'searchIn': "{{ ','.join(config.get('search_in', [])) }}"}, - {'sort_by[asc]': 'updated_at'}, + {"searchIn": "{{ ','.join(config.get('search_in', [])) }}"}, + {"sort_by[asc]": "updated_at"}, ], - title='Query Parameters', + title="Query Parameters", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ParentStreamConfig(BaseModel): - type: Literal['ParentStreamConfig'] + type: Literal["ParentStreamConfig"] parent_key: str = Field( ..., - description='The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.', - examples=['id', "{{ config['parent_record_id'] }}"], - title='Parent Key', - ) - stream: DeclarativeStream = Field( - ..., description='Reference to the parent stream.', title='Parent Stream' + description="The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.", + examples=["id", "{{ config['parent_record_id'] }}"], + title="Parent Key", ) + stream: DeclarativeStream = Field(..., description="Reference to the parent stream.", title="Parent Stream") partition_field: str = Field( ..., - description='While iterating over parent records during a sync, the parent_key value can be referenced by using this field.', - examples=['parent_id', "{{ config['parent_partition_field'] }}"], - title='Current Parent Key Value Identifier', + description="While iterating over parent records during a sync, the parent_key value can be referenced by using this field.", + examples=["parent_id", "{{ config['parent_partition_field'] }}"], + title="Current Parent Key Value Identifier", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the parent key value should be injected into and under what field name if applicable.', - title='Request Option', + description="A request option describing where the parent key value should be injected into and under what field name if applicable.", + title="Request Option", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SimpleRetriever(BaseModel): - type: Literal['SimpleRetriever'] + type: Literal["SimpleRetriever"] record_selector: RecordSelector = Field( ..., - description='Component that describes how to extract records from a HTTP response.', + description="Component that describes how to extract records from a HTTP response.", ) requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API.', + description="Requester component that describes how to prepare HTTP requests to send to the source API.", ) paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1264,28 +1240,24 @@ class SimpleRetriever(BaseModel): CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], - description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', - title='Partition Router', + description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", + title="Partition Router", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SubstreamPartitionRouter(BaseModel): - type: Literal['SubstreamPartitionRouter'] + type: Literal["SubstreamPartitionRouter"] parent_stream_configs: List[ParentStreamConfig] = Field( ..., - description='Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.', - title='Parent Stream Configs', + description="Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", + title="Parent Stream Configs", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") CompositeErrorHandler.update_forward_refs() From dec1322d3c8b8d0747e538b6313ce32f17997a6b Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 2 Aug 2023 11:39:04 +0200 Subject: [PATCH 13/16] add comment --- .../sources/declarative/retrievers/simple_retriever.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 09b1a5f94413a..f269e35ebeaba 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -268,6 +268,7 @@ def _fetch_next_page( ), ) + # This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well. def _read_pages( self, records_generator_fn: Callable[[Optional[requests.Response], Mapping[str, Any], Mapping[str, Any]], Iterable[StreamData]], From 0bbf02a6e1a6293971be51e2637f19ad7700f940 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 2 Aug 2023 12:34:33 +0200 Subject: [PATCH 14/16] update unit test --- .../unit_tests/test_streams.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/airbyte-integrations/connectors/source-greenhouse/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-greenhouse/unit_tests/test_streams.py index 33911d4e1f405..a7a9adaf72028 100644 --- a/airbyte-integrations/connectors/source-greenhouse/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-greenhouse/unit_tests/test_streams.py @@ -28,28 +28,28 @@ def create_response(headers): def test_next_page_token_has_next(applications_stream): headers = {"link": '; rel="next"'} response = create_response(headers) - next_page_token = applications_stream.retriever.next_page_token(response=response) + next_page_token = applications_stream.retriever._next_page_token(response=response) assert next_page_token == {"next_page_token": "https://harvest.greenhouse.io/v1/applications?per_page=100&since_id=123456789"} def test_next_page_token_has_not_next(applications_stream): response = create_response({}) - next_page_token = applications_stream.retriever.next_page_token(response=response) + next_page_token = applications_stream.retriever._next_page_token(response=response) assert next_page_token is None def test_request_params_next_page_token_is_not_none(applications_stream): response = create_response({"link": f'; rel="next"'}) - next_page_token = applications_stream.retriever.next_page_token(response=response) - request_params = applications_stream.retriever.request_params(next_page_token=next_page_token, stream_state={}) - path = applications_stream.retriever.path(next_page_token=next_page_token, stream_state={}) + next_page_token = applications_stream.retriever._next_page_token(response=response) + request_params = applications_stream.retriever._request_params(next_page_token=next_page_token, stream_state={}) + path = applications_stream.retriever._paginator_path() assert "applications?per_page=100&since_id=123456789" == path assert request_params == {"per_page": 100} def test_request_params_next_page_token_is_none(applications_stream): - request_params = applications_stream.retriever.request_params(stream_state={}) + request_params = applications_stream.retriever._request_params(stream_state={}) assert request_params == {"per_page": 100} @@ -138,7 +138,7 @@ def test_parse_response_expected_response(applications_stream): ] """ response._content = response_content - parsed_response = applications_stream.retriever.parse_response(response, stream_state={}) + parsed_response = applications_stream.retriever._parse_response(response, stream_state={}) records = [dict(record) for record in parsed_response] assert records == json.loads(response_content) @@ -148,7 +148,7 @@ def test_parse_response_empty_content(applications_stream): response = requests.Response() response.status_code = 200 response._content = b"[]" - parsed_response = applications_stream.retriever.parse_response(response, stream_state={}) + parsed_response = applications_stream.retriever._parse_response(response, stream_state={}) records = [record for record in parsed_response] assert records == [] @@ -164,7 +164,7 @@ def test_ignore_403(applications_stream): response = requests.Response() response.status_code = 403 response._content = b"" - parsed_response = applications_stream.retriever.parse_response(response, stream_state={}) + parsed_response = applications_stream.retriever._parse_response(response, stream_state={}) records = [record for record in parsed_response] assert records == [] @@ -173,5 +173,5 @@ def test_retry_429(applications_stream): response = requests.Response() response.status_code = 429 response._content = b"{}" - should_retry = applications_stream.retriever.should_retry(response) + should_retry = applications_stream.retriever.requester._should_retry(response) assert should_retry is True From f327cf4247408609457a96047783eed2482e5530 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 2 Aug 2023 12:35:49 +0200 Subject: [PATCH 15/16] fix unit tests --- airbyte-integrations/connectors/source-greenhouse/Dockerfile | 2 +- airbyte-integrations/connectors/source-greenhouse/metadata.yaml | 2 +- docs/integrations/sources/greenhouse.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-greenhouse/Dockerfile b/airbyte-integrations/connectors/source-greenhouse/Dockerfile index 2c6b0c5faeaa2..a6c69afd9724b 100644 --- a/airbyte-integrations/connectors/source-greenhouse/Dockerfile +++ b/airbyte-integrations/connectors/source-greenhouse/Dockerfile @@ -12,5 +12,5 @@ COPY main.py ./ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.4.1 +LABEL io.airbyte.version=0.4.2 LABEL io.airbyte.name=airbyte/source-greenhouse diff --git a/airbyte-integrations/connectors/source-greenhouse/metadata.yaml b/airbyte-integrations/connectors/source-greenhouse/metadata.yaml index b51409411b44f..f1cf82a238d49 100644 --- a/airbyte-integrations/connectors/source-greenhouse/metadata.yaml +++ b/airbyte-integrations/connectors/source-greenhouse/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: 59f1e50a-331f-4f09-b3e8-2e8d4d355f44 - dockerImageTag: 0.4.1 + dockerImageTag: 0.4.2 dockerRepository: airbyte/source-greenhouse githubIssueLabel: source-greenhouse icon: greenhouse.svg diff --git a/docs/integrations/sources/greenhouse.md b/docs/integrations/sources/greenhouse.md index 12cf6c8167bc0..a69a440e0a822 100644 --- a/docs/integrations/sources/greenhouse.md +++ b/docs/integrations/sources/greenhouse.md @@ -64,6 +64,7 @@ The Greenhouse connector should not run into Greenhouse API limitations under no | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.4.2 | 2023-08-02 | [27773](https://github.com/airbytehq/airbyte/pull/27773) | Update CDK version | | 0.4.1 | 2023-06-28 | [27773](https://github.com/airbytehq/airbyte/pull/27773) | Update following state breaking changes | | 0.4.0 | 2023-04-26 | [25332](https://github.com/airbytehq/airbyte/pull/25332) | Add new streams: `ActivityFeed`, `Approvals`, `Disciplines`, `Eeoc`, `EmailTemplates`, `Offices`, `ProspectPools`, `Schools`, `Tags`, `UserPermissions`, `UserRoles` | | 0.3.1 | 2023-03-06 | [23231](https://github.com/airbytehq/airbyte/pull/23231) | Publish using low-code CDK Beta version | From 655c9e85641330cfb63657abf8f64d8affb7988c Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 2 Aug 2023 12:38:57 +0200 Subject: [PATCH 16/16] update PR name --- docs/integrations/sources/greenhouse.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/sources/greenhouse.md b/docs/integrations/sources/greenhouse.md index a69a440e0a822..40f41158557a9 100644 --- a/docs/integrations/sources/greenhouse.md +++ b/docs/integrations/sources/greenhouse.md @@ -64,7 +64,7 @@ The Greenhouse connector should not run into Greenhouse API limitations under no | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 0.4.2 | 2023-08-02 | [27773](https://github.com/airbytehq/airbyte/pull/27773) | Update CDK version | +| 0.4.2 | 2023-08-02 | [28969](https://github.com/airbytehq/airbyte/pull/28969) | Update CDK version | | 0.4.1 | 2023-06-28 | [27773](https://github.com/airbytehq/airbyte/pull/27773) | Update following state breaking changes | | 0.4.0 | 2023-04-26 | [25332](https://github.com/airbytehq/airbyte/pull/25332) | Add new streams: `ActivityFeed`, `Approvals`, `Disciplines`, `Eeoc`, `EmailTemplates`, `Offices`, `ProspectPools`, `Schools`, `Tags`, `UserPermissions`, `UserRoles` | | 0.3.1 | 2023-03-06 | [23231](https://github.com/airbytehq/airbyte/pull/23231) | Publish using low-code CDK Beta version |