From 6518175d91b837e64eed5f68e00f0bf2d2200d25 Mon Sep 17 00:00:00 2001 From: John Lee Date: Tue, 23 Aug 2016 17:25:55 -0700 Subject: [PATCH] convert all strings and code to use unicode in Python client (#371) --- .../src/main/python/pygenie/jobs/core.py | 49 ++++++++++++++++--- .../src/main/python/pygenie/jobs/hadoop.py | 4 ++ .../src/main/python/pygenie/jobs/hive.py | 6 ++- .../src/main/python/pygenie/jobs/pig.py | 7 ++- .../src/main/python/pygenie/jobs/presto.py | 4 ++ .../src/main/python/pygenie/jobs/sqoop.py | 4 ++ .../src/main/python/pygenie/jobs/utils.py | 9 ++-- genie-client/src/main/python/pygenie/utils.py | 31 +++++++++++- genie-client/src/main/python/setup.py | 2 +- 9 files changed, 101 insertions(+), 15 deletions(-) diff --git a/genie-client/src/main/python/pygenie/jobs/core.py b/genie-client/src/main/python/pygenie/jobs/core.py index 5efeaf201cc..dad1e97f029 100644 --- a/genie-client/src/main/python/pygenie/jobs/core.py +++ b/genie-client/src/main/python/pygenie/jobs/core.py @@ -15,8 +15,10 @@ from collections import defaultdict from ..conf import GenieConf -from ..utils import (is_str, +from ..utils import (convert_to_unicode, + is_str, str_to_list, + unicodify, uuid_str) from .utils import (add_to_repr, arg_list, @@ -41,6 +43,12 @@ def __init__(self, class_name=None): self.__repr_list = list() def __repr__(self): + return str(self) + + def __str__(self): + return unicode(self).encode('utf-8') + + def __unicode__(self): return '.'.join(self.repr_list) @staticmethod @@ -73,12 +81,15 @@ def append(self, func_name=None, args=None, kwargs=None): def args_to_str(self, args): """Convert args tuple to string.""" - return ', '.join([ - '{qu}{val}{qu}' \ - .format(val=a, - qu=self.__quote(a) if is_str(a) else '') - for a in args - ]) if args is not None else '' + if args is not None: + results = list() + for arg in [convert_to_unicode(a) for a in args]: + results.append('{qu}{val}{qu}' \ + .format(val=arg, + qu=self.__quote(arg) if is_str(arg) else '')) + return ', '.join(results) + + return '' def kwargs_to_str(self, kwargs): """Convert kwargs dict to string.""" @@ -163,7 +174,13 @@ def __init__(self, conf=None): self._cluster_tag_mapping['default'] = self.default_cluster_tags def __repr__(self): - return str(self.repr_obj) + return str(self) + + def __str__(self): + return unicode(self).encode('utf-8') + + def __unicode__(self): + return unicode(self.repr_obj) def _add_dependency(self, dep): """ @@ -174,6 +191,7 @@ def _add_dependency(self, dep): if dep not in self._dependencies: self._dependencies.append(dep) + @unicodify def _add_cluster_tag(self, tags, priority=0): """ Add a cluster tag to level. The priority is the level of precedence when @@ -185,6 +203,7 @@ def _add_cluster_tag(self, tags, priority=0): # negate priority so can do sorted(self._cluster_tag_mapping.keys()) self._cluster_tag_mapping[-int(priority)].extend(tags) + @unicodify def _set_command_option(self, flag, name, value=None): """ Convenience method for storing an option which can later be used @@ -198,6 +217,7 @@ def _set_command_option(self, flag, name, value=None): self._command_options[flag][name] = value + @unicodify @arg_list @add_to_repr('append') def applications(self, _application_ids): @@ -250,6 +270,7 @@ def archive(self, archive): self._archive = archive return self + @unicodify @add_to_repr('append') def cluster_tags(self, cluster_tags): """ @@ -290,6 +311,7 @@ def cmd_args(self): raise GenieJobError('should not try to access core GenieJob ' \ 'constructed command arguments') + @unicodify @arg_string @add_to_repr('overwrite') def command_arguments(self, _command_arguments): @@ -307,6 +329,7 @@ def command_arguments(self, _command_arguments): :py:class:`GenieJob`: self """ + @unicodify @arg_list @add_to_repr('append') def command_tags(self, _command_tags): @@ -349,6 +372,7 @@ def dependencies(self, _dependencies): :py:class:`GenieJob`: self """ + @unicodify @arg_string @add_to_repr('overwrite') def description(self, _description): @@ -376,6 +400,7 @@ def disable_archive(self): return self.archive(False) + @unicodify @arg_string @add_to_repr('overwrite') def email(self, _email): @@ -500,6 +525,7 @@ def get(self, attr, default=None): return self.to_dict().get(attr, default) + @unicodify @arg_string @add_to_repr('overwrite') def group(self, _group): @@ -518,6 +544,7 @@ def group(self, _group): :py:class:`GenieJob`: self """ + @unicodify @arg_string @add_to_repr('overwrite') def job_id(self, _job_id): @@ -540,6 +567,7 @@ def job_id(self, _job_id): :py:class:`GenieJob`: self """ + @unicodify @arg_string @add_to_repr('overwrite') def job_name(self, _job_name): @@ -557,6 +585,7 @@ def job_name(self, _job_name): :py:class:`GenieJob`: self """ + @unicodify @arg_string @add_to_repr('overwrite') def job_version(self, _job_version): @@ -574,6 +603,7 @@ def job_version(self, _job_version): :py:class:`GenieJob`: self """ + @unicodify @add_to_repr('append') def parameter(self, name, value): """ @@ -625,6 +655,7 @@ def parameters(self, **kwargs): return self + @unicodify @add_to_repr('overwrite') def setup_file(self, setup_file): """ @@ -655,6 +686,7 @@ def setup_file(self, setup_file): return self + @unicodify @arg_list @add_to_repr('append') def tags(self, _tags): @@ -737,6 +769,7 @@ def to_json(self): return json.dumps(self.to_dict(), sort_keys=True, indent=4) + @unicodify @arg_string @add_to_repr('overwrite') def username(self, _username): diff --git a/genie-client/src/main/python/pygenie/jobs/hadoop.py b/genie-client/src/main/python/pygenie/jobs/hadoop.py index 11b0f809954..f69f134a542 100644 --- a/genie-client/src/main/python/pygenie/jobs/hadoop.py +++ b/genie-client/src/main/python/pygenie/jobs/hadoop.py @@ -11,6 +11,7 @@ import logging import os +from ..utils import unicodify from .core import GenieJob from .utils import (add_to_repr, arg_string) @@ -59,6 +60,7 @@ def command(self, script): return self.script(script) + @unicodify @add_to_repr('append') def property(self, name, value): """ @@ -85,6 +87,7 @@ def property(self, name, value): return self + @unicodify @arg_string @add_to_repr('overwrite') def property_file(self, _property_file): @@ -110,6 +113,7 @@ def property_file(self, _property_file): return self + @unicodify @arg_string @add_to_repr('overwrite') def script(self, _script): diff --git a/genie-client/src/main/python/pygenie/jobs/hive.py b/genie-client/src/main/python/pygenie/jobs/hive.py index fd3dc6d42c4..5dca9d77604 100644 --- a/genie-client/src/main/python/pygenie/jobs/hive.py +++ b/genie-client/src/main/python/pygenie/jobs/hive.py @@ -11,6 +11,7 @@ import logging import os +from ..utils import unicodify from .core import GenieJob from .utils import (add_to_repr, arg_string, @@ -64,7 +65,7 @@ def cmd_args(self): params_str = ' '.join([ '-d {qu}{name}={value}{qu}' \ - .format(name=k, value=v, qu='"' if ' ' in str(v) else '') \ + .format(name=k, value=v, qu='"' if ' ' in unicode(v) else '') \ for k, v in self._parameters.items() ]) @@ -100,6 +101,7 @@ def headers(self): self.tags('headers') return self.hiveconf('hive.cli.print.header', 'true') + @unicodify @add_to_repr('append') def hiveconf(self, name, value): """ @@ -131,6 +133,7 @@ def property(self, name, value): """Alias for :py:meth:`HiveJob.hiveconf`""" return self.hiveconf(name, value) + @unicodify @arg_string @add_to_repr('overwrite') def property_file(self, _property_file): @@ -162,6 +165,7 @@ def query(self, script): return self.script(script) + @unicodify @arg_string @add_to_repr('overwrite') def script(self, _script): diff --git a/genie-client/src/main/python/pygenie/jobs/pig.py b/genie-client/src/main/python/pygenie/jobs/pig.py index 6bf1a0ead91..5ec9a4bc276 100644 --- a/genie-client/src/main/python/pygenie/jobs/pig.py +++ b/genie-client/src/main/python/pygenie/jobs/pig.py @@ -11,6 +11,7 @@ import logging import os +from ..utils import unicodify from .core import GenieJob from .utils import (add_to_repr, arg_list, @@ -71,7 +72,7 @@ def cmd_args(self): params_str = ' '.join([ '-p {qu}{name}={value}{qu}' \ - .format(name=k, value=v, qu='"' if ' ' in str(v) else '') \ + .format(name=k, value=v, qu='"' if ' ' in unicode(v) else '') \ for k, v in self._parameters.items() ]) @@ -92,6 +93,7 @@ def cmd_args(self): params=params_str) \ .strip() + @unicodify @arg_list @add_to_repr('append') def parameter_file(self, _parameter_files): @@ -119,6 +121,7 @@ def parameter_file(self, _parameter_files): return self + @unicodify @add_to_repr('append') def property(self, name, value): """ @@ -146,6 +149,7 @@ def property(self, name, value): return self + @unicodify @arg_string @add_to_repr('overwrite') def property_file(self, _property_file): @@ -172,6 +176,7 @@ def property_file(self, _property_file): return self + @unicodify @arg_string @add_to_repr('overwrite') def script(self, _script): diff --git a/genie-client/src/main/python/pygenie/jobs/presto.py b/genie-client/src/main/python/pygenie/jobs/presto.py index 90537b4d62f..8d9a9f86c4c 100644 --- a/genie-client/src/main/python/pygenie/jobs/presto.py +++ b/genie-client/src/main/python/pygenie/jobs/presto.py @@ -23,6 +23,7 @@ import logging import os +from ..utils import unicodify from .core import GenieJob from .utils import (add_to_repr, arg_string, @@ -106,6 +107,7 @@ def headers(self): return self.option('output-format', 'CSV_HEADER') + @unicodify @add_to_repr('append') def option(self, name, value=None): """ @@ -139,6 +141,7 @@ def query(self, script): return self.script(script) + @unicodify @arg_string @add_to_repr('overwrite') def script(self, _script): @@ -159,6 +162,7 @@ def script(self, _script): :py:class:`PrestoJob`: self """ + @unicodify @add_to_repr('append') def session(self, name, value): """ diff --git a/genie-client/src/main/python/pygenie/jobs/sqoop.py b/genie-client/src/main/python/pygenie/jobs/sqoop.py index f3747d4f08b..2c1f042d0fa 100644 --- a/genie-client/src/main/python/pygenie/jobs/sqoop.py +++ b/genie-client/src/main/python/pygenie/jobs/sqoop.py @@ -12,6 +12,7 @@ from collections import defaultdict +from ..utils import unicodify from .core import GenieJob from .utils import add_to_repr @@ -76,6 +77,7 @@ def cmd_args(self): options=' '.join(opts_list)) \ .strip() + @unicodify @add_to_repr('overwrite') def cmd(self, cmd): """ @@ -98,6 +100,7 @@ def cmd(self, cmd): return self + @unicodify @add_to_repr('append') def option(self, name, value=None): """ @@ -125,6 +128,7 @@ def option(self, name, value=None): return self + @unicodify @add_to_repr('append') def property(self, name, value, flag='-D'): """ diff --git a/genie-client/src/main/python/pygenie/jobs/utils.py b/genie-client/src/main/python/pygenie/jobs/utils.py index 6fe0eb14f62..44a784805ad 100644 --- a/genie-client/src/main/python/pygenie/jobs/utils.py +++ b/genie-client/src/main/python/pygenie/jobs/utils.py @@ -17,7 +17,8 @@ from .running import RunningJob -from ..utils import (is_str, +from ..utils import (convert_to_unicode, + is_str, str_to_list) from ..exceptions import GenieJobNotFoundError @@ -163,7 +164,7 @@ def wrapper(*args, **kwargs): assert is_str(value), \ '{}() argument value should be a string'.format(func.__name__) - setattr(self, attr_name, value) + setattr(self, attr_name, convert_to_unicode(value)) return func(*args, **kwargs) or self @@ -199,7 +200,7 @@ def generate_job_id(job_id, return_success=True, conf=None): return running_job.job_id id_parts = running_job.job_id.split('-') if id_parts[-1].isdigit(): - id_parts[-1] = str(int(id_parts[-1]) + 1) + id_parts[-1] = unicode(int(id_parts[-1]) + 1) else: id_parts.append('1') job_id = '-'.join(id_parts) @@ -221,6 +222,8 @@ def is_attachment(dependency): def is_file(path): """Checks if path is to a file.""" + path = convert_to_unicode(path) + return path is not None and \ (os.path.isfile(path) \ or path.startswith('s3://') \ diff --git a/genie-client/src/main/python/pygenie/utils.py b/genie-client/src/main/python/pygenie/utils.py index 8ba2ff2d3d6..1d691371814 100644 --- a/genie-client/src/main/python/pygenie/utils.py +++ b/genie-client/src/main/python/pygenie/utils.py @@ -16,6 +16,8 @@ import time import uuid +from functools import wraps + import requests from .auth import AuthHandler @@ -97,6 +99,33 @@ def call(url, method='get', headers=None, raise_not_status=None, return resp +def convert_to_unicode(value): + """Convert value to unicode.""" + + if is_str(value) and not isinstance(value, unicode): + return value.decode('utf-8') + + return value + + +def unicodify(func): + """ + Decorator to convert all string args and kwargs to unicode. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + """Wraps func.""" + + args = tuple(convert_to_unicode(i) for i in args) + kwargs = {convert_to_unicode(key): convert_to_unicode(value) \ + for key, value in kwargs.items()} + + return func(*args, **kwargs) + + return wrapper + + def dttm_to_epoch(date_str, frmt='%Y-%m-%dT%H:%M:%SZ'): """Convert a date string to epoch seconds.""" @@ -146,4 +175,4 @@ def uuid_str(): str: A unique id. """ - return str(uuid.uuid1()) + return unicode(uuid.uuid1()) diff --git a/genie-client/src/main/python/setup.py b/genie-client/src/main/python/setup.py index 82568d7485c..869281d5f44 100644 --- a/genie-client/src/main/python/setup.py +++ b/genie-client/src/main/python/setup.py @@ -26,7 +26,7 @@ setup( name='nflx-genie-client', - version='3.0.32', + version='3.0.33', author='Netflix Inc.', author_email='genieoss@googlegroups.com', keywords='genie hadoop cloud netflix client bigdata presto',