From b1a4ffc2b97ba940e82d44a3b0f780c5a8d8aaf6 Mon Sep 17 00:00:00 2001 From: Tarun Goyal Date: Fri, 15 Feb 2019 11:51:02 +0530 Subject: [PATCH 01/69] SDK-335: GCP creds schema changes (#263) --- qds_sdk/cloud/gcp_cloud.py | 154 +++++++++++++++---------------------- tests/test_clusterv2.py | 29 +++---- 2 files changed, 74 insertions(+), 109 deletions(-) diff --git a/qds_sdk/cloud/gcp_cloud.py b/qds_sdk/cloud/gcp_cloud.py index 92413bd2..20897c8c 100644 --- a/qds_sdk/cloud/gcp_cloud.py +++ b/qds_sdk/cloud/gcp_cloud.py @@ -12,19 +12,16 @@ def __init__(self): self.storage_config = {} def set_cloud_config(self, - compute_client_id=None, - compute_project_id=None, - compute_client_email=None, - compute_private_key_id=None, - compute_private_key=None, + qsa_client_id=None, + customer_project_id=None, + qsa_client_email=None, + qsa_private_key_id=None, + qsa_private_key=None, + comp_client_email=None, + inst_client_email=None, use_account_compute_creds=None, gcp_region=None, gcp_zone=None, - storage_client_id=None, - storage_project_id=None, - storage_client_email=None, - storage_private_key_id=None, - storage_private_key=None, storage_disk_size_in_gb=None, storage_disk_count=None, storage_disk_type=None, @@ -34,31 +31,25 @@ def set_cloud_config(self, ''' Args: - compute_client_id: Compute client id for gcp cluster + qsa_client_id: Compute client id for gcp cluster - compute_project_id: Compute project id for gcp cluster + customer_project_id: Compute project id for gcp cluster - compute_client_email: Compute client email for gcp cluster + qsa_client_email: Compute client email for gcp cluster - compute_private_key_id: Compute private key id for gcp cluster + qsa_private_key_id: Compute private key id for gcp cluster - compute_private_key: Compute private key for gcp cluster + qsa_private_key: Compute private key for gcp cluster + + comp_client_email: Client compute service account email + + inst_client_email: Client storage/instance service account email use_account_compute_creds: Set it to true to use the account's compute credentials for all clusters of the account.The default value is false gcp_region: Region for gcp cluster - storage_client_id: Storage client id for gcp cluster - - storage_project_id: Storage project id for gcp cluster - - storage_client_email: Storage client email for gcp cluster - - storage_private_key_id: Storage private key id for gcp cluster - - storage_private_key: Storage private key for gcp cluster - bastion_node_public_dns: public dns name of the bastion node. Required only if cluster is in a private subnet. @@ -67,26 +58,27 @@ def set_cloud_config(self, subnet_id: Subnet id for gcp cluster ''' - self.set_compute_config(use_account_compute_creds, compute_client_id, compute_project_id, compute_client_email, - compute_private_key_id, compute_private_key) + self.set_compute_config(use_account_compute_creds, qsa_client_id, customer_project_id, qsa_client_email, + qsa_private_key_id, qsa_private_key, comp_client_email) self.set_location(gcp_region, gcp_zone) self.set_network_config(bastion_node_public_dns, vpc_id, subnet_id) - self.set_storage_config(storage_client_id, storage_project_id, storage_client_email, storage_private_key_id, - storage_private_key, storage_disk_size_in_gb, storage_disk_count, storage_disk_type) + self.set_storage_config(inst_client_email, storage_disk_size_in_gb, storage_disk_count, storage_disk_type) def set_compute_config(self, use_account_compute_creds=None, - compute_client_id=None, - compute_project_id=None, - compute_client_email=None, - compute_private_key_id=None, - compute_private_key=None): + qsa_client_id=None, + customer_project_id=None, + qsa_client_email=None, + qsa_private_key_id=None, + qsa_private_key=None, + comp_client_email=None): self.compute_config['use_account_compute_creds'] = use_account_compute_creds - self.compute_config['compute_client_id'] = compute_client_id - self.compute_config['compute_project_id'] = compute_project_id - self.compute_config['compute_client_email'] = compute_client_email - self.compute_config['compute_private_key_id'] = compute_private_key_id - self.compute_config['compute_private_key'] = compute_private_key + self.compute_config['qsa_client_id'] = qsa_client_id + self.compute_config['customer_project_id'] = customer_project_id + self.compute_config['qsa_client_email'] = qsa_client_email + self.compute_config['qsa_private_key_id'] = qsa_private_key_id + self.compute_config['qsa_private_key'] = qsa_private_key + self.compute_config['comp_client_email'] = comp_client_email def set_location(self, gcp_region=None, @@ -104,38 +96,27 @@ def set_network_config(self, self.network_config['subnet'] = subnet_id def set_storage_config(self, - storage_client_id=None, - storage_project_id=None, - storage_client_email=None, - storage_private_key_id=None, - storage_private_key=None, + inst_client_email=None, storage_disk_size_in_gb=None, storage_disk_count=None, storage_disk_type=None ): - self.storage_config['storage_client_id'] = storage_client_id - self.storage_config['storage_project_id'] = storage_project_id - self.storage_config['storage_client_email'] = storage_client_email - self.storage_config['storage_private_key_id'] = storage_private_key_id - self.storage_config['storage_private_key'] = storage_private_key + self.storage_config['inst_client_email'] = inst_client_email self.storage_config['disk_size_in_gb'] = storage_disk_size_in_gb self.storage_config['disk_count'] = storage_disk_count self.storage_config['disk_type'] = storage_disk_type def set_cloud_config_from_arguments(self, arguments): - self.set_cloud_config(compute_client_id=arguments.compute_client_id, - compute_project_id=arguments.compute_project_id, - compute_client_email=arguments.compute_client_email, - compute_private_key_id=arguments.compute_private_key_id, - compute_private_key=arguments.compute_private_key, + self.set_cloud_config(qsa_client_id=arguments.qsa_client_id, + customer_project_id=arguments.customer_project_id, + qsa_client_email=arguments.qsa_client_email, + qsa_private_key_id=arguments.qsa_private_key_id, + qsa_private_key=arguments.qsa_private_key, + inst_client_email=arguments.inst_client_email, + comp_client_email=arguments.comp_client_email, use_account_compute_creds=arguments.use_account_compute_creds, gcp_region=arguments.gcp_region, gcp_zone=arguments.gcp_zone, - storage_client_id=arguments.storage_client_id, - storage_project_id=arguments.storage_project_id, - storage_client_email=arguments.storage_client_email, - storage_private_key_id=arguments.storage_private_key_id, - storage_private_key=arguments.storage_private_key, storage_disk_size_in_gb=arguments.storage_disk_size_in_gb, storage_disk_count=arguments.storage_disk_count, storage_disk_type=arguments.storage_disk_type, @@ -157,26 +138,30 @@ def create_parser(self, argparser): action="store_false", default=None, help="to disable account compute credentials") - compute_config.add_argument("--compute-client-id", - dest="compute_client_id", + compute_config.add_argument("--qsa-client-id", + dest="qsa_client_id", default=None, - help="compute client id for gcp cluster") - compute_config.add_argument("--compute-project-id", - dest="compute_project_id", + help="qsa client id for gcp cluster") + compute_config.add_argument("--customer-project-id", + dest="customer_project_id", default=None, - help="compute project id for gcp cluster") - compute_config.add_argument("--compute-client-email", - dest="compute_client_email", + help="customer project id for gcp cluster") + compute_config.add_argument("--qsa-client-email", + dest="qsa_client_email", default=None, - help="compute client email for gcp cluster") - compute_config.add_argument("--compute-private-key-id", - dest="compute_private_key_id", + help="qsa client email for gcp cluster") + compute_config.add_argument("--qsa-private-key-id", + dest="qsa_private_key_id", default=None, - help="compute private key id for gcp cluster") - compute_config.add_argument("--compute-private-key", - dest="compute_private_key", + help="qsa private key id for gcp cluster") + compute_config.add_argument("--qsa-private-key", + dest="qsa_private_key", default=None, - help="compute private key for gcp cluster") + help="qsa private key for gcp cluster") + compute_config.add_argument("--compute-client-email", + dest="comp_client_email", + default=None, + help="client compute service account email") # location settings parser location_group = argparser.add_argument_group("location config settings") @@ -201,26 +186,11 @@ def create_parser(self, argparser): # storage config settings parser storage_config = argparser.add_argument_group("storage config settings") - storage_config.add_argument("--storage-client-id", - dest="storage_client_id", - default=None, - help="storage client id for gcp cluster") - storage_config.add_argument("--storage-project-id", - dest="storage_project_id", - default=None, - help="storage project id for gcp cluster") + storage_config.add_argument("--storage-client-email", - dest="storage_client_email", - default=None, - help="storage client email for gcp cluster") - storage_config.add_argument("--storage-private-key-id", - dest="storage_private_key_id", - default=None, - help="storage private key id for gcp cluster") - storage_config.add_argument("--storage-private-key", - dest="storage_private_key", + dest="inst_client_email", default=None, - help="storage private key for gcp cluster") + help="client storage service account email") storage_config.add_argument("--storage-disk-size-in-gb", dest="storage_disk_size_in_gb", default=None, diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index e8b4de78..f07a350b 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -313,24 +313,23 @@ def test_oracle_opc_network_config(self): def test_gcp_compute_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', - '--compute-client-id', 'xxx11', '--compute-project-id', 'www11', '--compute-client-email', - 'yyyy11', '--compute-private-key-id', 'zzz22', '--compute-private-key', 'aaa'] + '--qsa-client-id', 'xxx11', '--customer-project-id', 'www11', '--qsa-client-email', + 'yyyy11', '--qsa-private-key-id', 'zzz22', '--qsa-private-key', 'aaa'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'compute_config': - {'compute_private_key_id': 'zzz22', - 'compute_private_key': 'aaa', - 'compute_client_email': 'yyyy11', - 'compute_project_id': 'www11', - 'compute_client_id': 'xxx11'}}, + {'qsa_private_key_id': 'zzz22', + 'qsa_private_key': 'aaa', + 'qsa_client_email': 'yyyy11', + 'customer_project_id': 'www11', + 'qsa_client_id': 'xxx11'}}, 'cluster_info': {'label': ['test_label']}}) def test_gcp_storage_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', - '--storage-client-id', 'xxx11', '--storage-project-id', 'yyyy11', '--storage-client-email', 'www11', - '--storage-private-key-id', 'zzz22', '--storage-private-key', 'aaa', '--storage-disk-size-in-gb', 'aaa', + '--storage-client-email', 'aaa', '--storage-disk-size-in-gb', 'aaa', '--storage-disk-count', 'bbb', '--storage-disk-type', 'ccc' ] Qubole.cloud = None print_command() @@ -338,11 +337,7 @@ def test_gcp_storage_config(self): qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'storage_config': - {'storage_private_key_id': 'zzz22', - 'storage_private_key': 'aaa', - 'storage_client_email': 'www11', - 'storage_project_id': 'yyyy11', - 'storage_client_id': 'xxx11', + {'inst_client_email': 'aaa', 'disk_size_in_gb': 'aaa', 'disk_count': 'bbb', 'disk_type': 'ccc'}}, @@ -653,7 +648,7 @@ def test_oracle_opc_cloud_config(self): def test_gcp_cloud_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'update', '123', '--gcp-region', 'xxx', '--subnet-id', 'abc-subnet', - '--storage-client-id', 'xxx11', '--compute-client-id', 'yyyy11'] + '--storage-client-email', 'xxx11', '--qsa-client-id', 'yyyy11'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -661,8 +656,8 @@ def test_gcp_cloud_config(self): Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': {'network_config': {'subnet': 'abc-subnet'}, - 'compute_config': {'compute_client_id': 'yyyy11'}, - 'storage_config': {'storage_client_id': 'xxx11'}, + 'compute_config': {'qsa_client_id': 'yyyy11'}, + 'storage_config': {'inst_client_email': 'xxx11'}, 'location': {'region': 'xxx'} } }) From 3631d0e4d6b01677c25653092019faddbb794fc5 Mon Sep 17 00:00:00 2001 From: abhijithshankar93 <31067437+abhijithshankar93@users.noreply.github.com> Date: Thu, 21 Feb 2019 15:57:20 +0530 Subject: [PATCH 02/69] [SDK-338] Added option to enable Rubix (#265) --- bin/qds.py | 3 ++- qds_sdk/cluster.py | 21 ++++++++++++++++++--- qds_sdk/engine.py | 24 ++++++++++++++++++++---- tests/test_cluster.py | 37 +++++++++++++++++++++++++++++++++++++ tests/test_clusterv2.py | 14 +++++++++----- 5 files changed, 86 insertions(+), 13 deletions(-) diff --git a/bin/qds.py b/bin/qds.py index 24a1b13b..fac8b439 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -314,7 +314,8 @@ def _create_cluster_info(arguments, api_version): is_ha=arguments.is_ha, env_name=arguments.env_name, python_version=arguments.python_version, - r_version=arguments.r_version) + r_version=arguments.r_version, + enable_rubix=arguments.enable_rubix) else: cluster_info = ClusterInfo(arguments.label, arguments.aws_access_key_id, diff --git a/qds_sdk/cluster.py b/qds_sdk/cluster.py index 504d9976..e6666227 100755 --- a/qds_sdk/cluster.py +++ b/qds_sdk/cluster.py @@ -304,6 +304,17 @@ def _parse_create_update(cls, args, action, api_version): dest="ebs_volume_size", type=int, help="Size of each EBS volume, in GB",) + enable_rubix_group = hadoop_group.add_mutually_exclusive_group() + enable_rubix_group.add_argument("--enable-rubix", + dest="enable_rubix", + action="store_true", + default=None, + help="Enable rubix for cluster", ) + enable_rubix_group.add_argument("--no-enable-rubix", + dest="enable_rubix", + action="store_false", + default=None, + help="Do not enable rubix for cluster", ) hadoop2 = hadoop_group.add_mutually_exclusive_group() hadoop2.add_argument("--use-hadoop2", @@ -1034,7 +1045,8 @@ def set_cluster_info(self, aws_access_key_id=None, is_ha=None, env_name=None, python_version=None, - r_version=None): + r_version=None, + enable_rubix=None): """ Kwargs: @@ -1159,6 +1171,7 @@ def set_cluster_info(self, aws_access_key_id=None, `r_version`: Version of R for environment. (For Spark clusters) + `enable_rubix`: Enable rubix on the cluster (For Presto clusters) """ self.disallow_cluster_termination = disallow_cluster_termination @@ -1169,7 +1182,7 @@ def set_cluster_info(self, aws_access_key_id=None, node_base_cooldown_period, node_spot_cooldown_period, root_volume_size) self.set_ec2_settings(aws_access_key_id, aws_secret_access_key, aws_region, aws_availability_zone, vpc_id, subnet_id, master_elastic_ip, bastion_node_public_dns, role_instance_profile) - self.set_hadoop_settings(custom_config, use_hbase, use_hadoop2, use_spark, use_qubole_placement_policy, is_ha) + self.set_hadoop_settings(custom_config, use_hbase, use_hadoop2, use_spark, use_qubole_placement_policy, is_ha, enable_rubix) self.set_spot_instance_settings(maximum_bid_price_percentage, timeout_for_request, maximum_spot_instance_percentage) self.set_stable_spot_instance_settings(stable_maximum_bid_price_percentage, stable_timeout_for_request, stable_allow_fallback) self.set_spot_block_settings(spot_block_duration) @@ -1230,13 +1243,15 @@ def set_hadoop_settings(self, custom_config=None, use_hadoop2=None, use_spark=None, use_qubole_placement_policy=None, - is_ha=None): + is_ha=None, + enable_rubix=None): self.hadoop_settings['custom_config'] = custom_config self.hadoop_settings['use_hbase'] = use_hbase self.hadoop_settings['use_hadoop2'] = use_hadoop2 self.hadoop_settings['use_spark'] = use_spark self.hadoop_settings['use_qubole_placement_policy'] = use_qubole_placement_policy self.hadoop_settings['is_ha'] = is_ha + self.hadoop_settings['enable_rubix'] = enable_rubix def set_spot_instance_settings(self, maximum_bid_price_percentage=None, timeout_for_request=None, diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 811eaee5..ec041b44 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -28,7 +28,8 @@ def set_engine_config(self, dbtap_id=None, fernet_key=None, overrides=None, - is_ha=None): + is_ha=None, + enable_rubix=None): ''' Args: @@ -60,10 +61,11 @@ def set_engine_config(self, is_ha: Enabling HA config for cluster is_deeplearning : this is a deeplearning cluster config + enable_rubix: Enable rubix on the cluster ''' - self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool) + self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) self.set_airflow_settings(dbtap_id, fernet_key, overrides) @@ -81,11 +83,13 @@ def set_hadoop_settings(self, use_qubole_placement_policy=None, is_ha=None, fairscheduler_config_xml=None, - default_pool=None): + default_pool=None, + enable_rubix=None): self.hadoop_settings['custom_hadoop_config'] = custom_hadoop_config self.hadoop_settings['use_qubole_placement_policy'] = use_qubole_placement_policy self.hadoop_settings['is_ha'] = is_ha self.set_fairscheduler_settings(fairscheduler_config_xml, default_pool) + self.hadoop_settings['enable_rubix'] = enable_rubix def set_presto_settings(self, presto_version=None, @@ -123,7 +127,8 @@ def set_engine_config_settings(self, arguments): custom_spark_config=arguments.custom_spark_config, dbtap_id=arguments.dbtap_id, fernet_key=arguments.fernet_key, - overrides=arguments.overrides) + overrides=arguments.overrides, + enable_rubix=arguments.enable_rubix) @staticmethod def engine_parser(argparser): @@ -153,6 +158,17 @@ def engine_parser(argparser): default=None, help="Do not use Qubole Block Placement policy" + " for clusters with spot nodes", ) + enable_rubix_group = hadoop_settings_group.add_mutually_exclusive_group() + enable_rubix_group.add_argument("--enable-rubix", + dest="enable_rubix", + action="store_true", + default=None, + help="Enable rubix for cluster", ) + enable_rubix_group.add_argument("--no-enable-rubix", + dest="enable_rubix", + action="store_false", + default=None, + help="Do not enable rubix for cluster", ) fairscheduler_group = argparser.add_argument_group( "fairscheduler configuration options") diff --git a/tests/test_cluster.py b/tests/test_cluster.py index d16764ea..b06e1c08 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -1672,6 +1672,43 @@ def test_root_volume_size_invalid_v13(self): qds.main() + def test_use_enable_rubix_v13(self): + sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'create', '--label', 'test_label', + '--access-key-id', 'aki', '--secret-access-key', 'sak', + '--enable-rubix'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'label': ['test_label'], + 'ec2_settings': {'compute_secret_key': 'sak', + 'compute_access_key': 'aki'}, + 'hadoop_settings': {'enable_rubix': True}, + }) + + def test_no_use_enable_rubix_v13(self): + sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'create', '--label', 'test_label', + '--access-key-id', 'aki', '--secret-access-key', 'sak', + '--no-enable-rubix'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'label': ['test_label'], + 'ec2_settings': {'compute_secret_key': 'sak', + 'compute_access_key': 'aki'}, + 'hadoop_settings': {'enable_rubix': False}, + }) + + @unittest.skipIf(sys.version_info < (2, 7, 0), "Known failure on Python 2.6") + def test_conflict_enable_rubix_v13(self): + sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'create', '--label', 'test_label', + '--access-key-id', 'aki', '--secret-access-key', 'sak', + '--enable-rubix', '--no-enable-rubix'] + print_command() + with self.assertRaises(SystemExit): + qds.main() + class TestClusterUpdate(QdsCliTestCase): def test_minimal(self): sys.argv = ['qds.py', 'cluster', 'update', '123'] diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index f07a350b..ef56ec04 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -372,7 +372,7 @@ def test_presto_engine_config(self): temp.write("config.properties:\na=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--flavour', 'presto', '--presto-custom-config', temp.name] + '--flavour', 'presto', '--enable-rubix' , '--presto-custom-config', temp.name] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -381,7 +381,10 @@ def test_presto_engine_config(self): {'engine_config': {'flavour': 'presto', 'presto_settings': { - 'custom_presto_config': 'config.properties:\na=1\nb=2'}}, + 'custom_presto_config': 'config.properties:\na=1\nb=2'}, + 'hadoop_settings':{ + 'enable_rubix': True + }}, 'cluster_info': {'label': ['test_label']}}) def test_spark_engine_config(self): @@ -667,8 +670,8 @@ def test_engine_config(self): temp.write("a=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', - '--use-qubole-placement-policy', '--custom-hadoop-config', - temp.name] + '--use-qubole-placement-policy', '--enable-rubix', + '--custom-hadoop-config',temp.name] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -676,7 +679,8 @@ def test_engine_config(self): Connection._api_call.assert_called_with('PUT', 'clusters/123', {'engine_config': {'hadoop_settings': {'use_qubole_placement_policy': True, - 'custom_hadoop_config': 'a=1\nb=2'}} + 'custom_hadoop_config': 'a=1\nb=2', + 'enable_rubix': True}} }) def test_cluster_info(self): From 62468d2eea002889606db72a62904dad790500cd Mon Sep 17 00:00:00 2001 From: akaranjkar-qu <41927856+akaranjkar-qu@users.noreply.github.com> Date: Thu, 28 Feb 2019 21:23:04 -0800 Subject: [PATCH 03/69] SDK-323: GCP changes for preemptible nodes (#266) --- qds_sdk/cloud/gcp_cloud.py | 71 ++++++++++++++++++++++++++++++++++++-- tests/test_clusterv2.py | 40 +++++++++++++++++++++ 2 files changed, 108 insertions(+), 3 deletions(-) diff --git a/qds_sdk/cloud/gcp_cloud.py b/qds_sdk/cloud/gcp_cloud.py index 20897c8c..a2033246 100644 --- a/qds_sdk/cloud/gcp_cloud.py +++ b/qds_sdk/cloud/gcp_cloud.py @@ -10,6 +10,7 @@ def __init__(self): self.location = {} self.network_config = {} self.storage_config = {} + self.cluster_composition = {} def set_cloud_config(self, qsa_client_id=None, @@ -27,7 +28,12 @@ def set_cloud_config(self, storage_disk_type=None, bastion_node_public_dns=None, vpc_id=None, - subnet_id=None): + subnet_id=None, + master_preemptible=None, + min_nodes_preemptible=None, + min_nodes_preemptible_percentage=None, + autoscaling_nodes_preemptible=None, + autoscaling_nodes_preemptible_percentage=None): ''' Args: @@ -56,6 +62,16 @@ def set_cloud_config(self, vpc_id: Vpc id for gcp cluster subnet_id: Subnet id for gcp cluster + + master_preemptible: if the master node is preemptible + + min_nodes_preemptible: if the min nodes are preemptible + + min_nodes_preemptible_percentage: percentage of min nodes that are preemptible + + autoscaling_nodes_preemptible: if the autoscaling nodes are preemptible + + autoscaling_nodes_preemptible_percentage: percentage of autoscaling nodes that are preemptible ''' self.set_compute_config(use_account_compute_creds, qsa_client_id, customer_project_id, qsa_client_email, @@ -63,6 +79,8 @@ def set_cloud_config(self, self.set_location(gcp_region, gcp_zone) self.set_network_config(bastion_node_public_dns, vpc_id, subnet_id) self.set_storage_config(inst_client_email, storage_disk_size_in_gb, storage_disk_count, storage_disk_type) + self.set_cluster_composition(master_preemptible, min_nodes_preemptible, min_nodes_preemptible_percentage, + autoscaling_nodes_preemptible, autoscaling_nodes_preemptible_percentage) def set_compute_config(self, use_account_compute_creds=None, @@ -106,6 +124,21 @@ def set_storage_config(self, self.storage_config['disk_count'] = storage_disk_count self.storage_config['disk_type'] = storage_disk_type + def set_cluster_composition(self, + master_preemptible=None, + min_nodes_preemptible=None, + min_nodes_preemptible_percentage=None, + autoscaling_nodes_preemptible=None, + autoscaling_nodes_preemptible_percentage=None): + self.cluster_composition['master'] = {} + self.cluster_composition['master']['preemptible'] = master_preemptible + self.cluster_composition['min_nodes'] = {} + self.cluster_composition['min_nodes']['preemptible'] = min_nodes_preemptible + self.cluster_composition['min_nodes']['percentage'] = min_nodes_preemptible_percentage + self.cluster_composition['autoscaling_nodes'] = {} + self.cluster_composition['autoscaling_nodes']['preemptible'] = autoscaling_nodes_preemptible + self.cluster_composition['autoscaling_nodes']['percentage'] = autoscaling_nodes_preemptible_percentage + def set_cloud_config_from_arguments(self, arguments): self.set_cloud_config(qsa_client_id=arguments.qsa_client_id, customer_project_id=arguments.customer_project_id, @@ -122,7 +155,12 @@ def set_cloud_config_from_arguments(self, arguments): storage_disk_type=arguments.storage_disk_type, bastion_node_public_dns=arguments.bastion_node_public_dns, vpc_id=arguments.vpc_id, - subnet_id=arguments.subnet_id) + subnet_id=arguments.subnet_id, + master_preemptible=arguments.master_preemptible, + min_nodes_preemptible=arguments.min_nodes_preemptible, + min_nodes_preemptible_percentage=arguments.min_nodes_preemptible_percentage, + autoscaling_nodes_preemptible=arguments.autoscaling_nodes_preemptible, + autoscaling_nodes_preemptible_percentage=arguments.autoscaling_nodes_preemptible_percentage) def create_parser(self, argparser): # compute settings parser @@ -202,4 +240,31 @@ def create_parser(self, argparser): storage_config.add_argument("--storage-disk-type", dest="storage_disk_type", default=None, - help="disk type for gcp cluster") \ No newline at end of file + help="disk type for gcp cluster") + # cluster composition settings parser + cluster_composition = argparser.add_argument_group("cluster composition settings") + cluster_composition.add_argument("--master-preemptible", + dest="master_preemptible", + action="store_true", + default=None, + help="if the master node is preemptible") + cluster_composition.add_argument("--min-nodes-preemptible", + dest="min_nodes_preemptible", + action="store_true", + default=None, + help="if the min nodes are preemptible") + cluster_composition.add_argument("--min-nodes-preemptible-percentage", + dest="min_nodes_preemptible_percentage", + type=int, + default=None, + help="percentage of min nodes that are preemptible") + cluster_composition.add_argument("--autoscaling-nodes-preemptible", + dest="autoscaling_nodes_preemptible", + action="store_true", + default=None, + help="if the autoscaling nodes are preemptible") + cluster_composition.add_argument("--autoscaling-nodes-preemptible-percentage", + dest="autoscaling_nodes_preemptible_percentage", + type=int, + default=None, + help="percentage of autoscaling nodes that are preemptible") diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index ef56ec04..1b33f4e2 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -367,6 +367,46 @@ def test_gcp_location_config(self): 'zone': 'yyy'}}, 'cluster_info': {'label': ['test_label']}}) + def test_gcp_cluster_composition(self): + sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', + '--master-preemptible', + '--min-nodes-preemptible', '--min-nodes-preemptible-percentage', '50', + '--autoscaling-nodes-preemptible', '--autoscaling-nodes-preemptible-percentage', '75'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + { + 'cloud_config': { + 'cluster_composition': { + 'master': { + 'preemptible': True + }, + 'min_nodes': { + 'preemptible': True, + 'percentage': 50 + }, + 'autoscaling_nodes': { + 'preemptible': True, + 'percentage': 75 + } + } + }, + 'cluster_info': { + 'label': ['test_label'] + } + }) + + def test_gcp_cluster_composition_invalid(self): + sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', + '--master-preemptible', + '--min-nodes-preemptible', '--min-nodes-preemptible-percentage', 'invalid_value'] + Qubole.cloud = None + print_command() + with self.assertRaises(SystemExit): + qds.main() + def test_presto_engine_config(self): with tempfile.NamedTemporaryFile() as temp: temp.write("config.properties:\na=1\nb=2".encode("utf8")) From 756d5174cd041d6edc13883719e1486786cf756c Mon Sep 17 00:00:00 2001 From: mcarlsen Date: Tue, 19 Mar 2019 14:39:00 +0100 Subject: [PATCH 04/69] fixes #175. utf-8 errors in Python 3 (#208) Caused by block reads chopping multibyte utf-8 sequences in half. --- qds_sdk/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index d3c96def..80e48609 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -1375,9 +1375,9 @@ def _read_iteratively(key_instance, fp, delim): else: import io if isinstance(fp, io.TextIOBase): - fp.buffer.write(data.decode('utf-8').replace(chr(1), delim).encode('utf8')) + fp.buffer.write(data.replace(bytes([1]), delim.encode('utf8'))) elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase): - fp.write(data.decode('utf8').replace(chr(1), delim).encode('utf8')) + fp.write(data.replace(bytes([1]), delim.encode('utf8'))) else: # Can this happen? Don't know what's the right thing to do in this case. pass From 8afa7fe487e4e79fef07a11ad5f0cb8f846a73ed Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Tue, 19 Mar 2019 19:12:53 +0530 Subject: [PATCH 05/69] SDK-344: Enable use of port argument for DbTaps #267 --- qds_sdk/dbtaps.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/qds_sdk/dbtaps.py b/qds_sdk/dbtaps.py index c5ded3c6..84941c21 100644 --- a/qds_sdk/dbtaps.py +++ b/qds_sdk/dbtaps.py @@ -72,8 +72,7 @@ def parsers(): help="Username") edit.add_argument("--password", dest="password", help="Password") - edit.add_argument("--port", dest="port", - help="Database Port") + edit.add_argument("--port", dest="port", help="Database Port") edit.add_argument("--type", dest="type", choices=["mysql","vertica","mongo","postgresql","redshift","sqlserver"], help="Type of database") edit.add_argument("--location", dest="location", choices=["us-east-1", "us-west-2", "ap-southeast-1", "eu-west-1", "on-premise"], @@ -107,7 +106,8 @@ def create(args): db_user=args.user, db_passwd=args.password, db_type=args.type, - db_location=args.location) + db_location=args.location, + port=args.port) return json.dumps(dbtap.attributes, sort_keys=True, indent=4) @@ -148,6 +148,8 @@ def edit(args): options["db_type"] = args.type if args.location is not None: options["db_location"] = args.location + if args.port is not None: + options["port"] = args.port tap = tap.edit(**options) return json.dumps(tap.attributes, sort_keys=True, indent=4) From c7b9cb6560e964b0e6d78b423b11dc8864cf75a3 Mon Sep 17 00:00:00 2001 From: Timofei Korostelev Date: Tue, 19 Mar 2019 06:52:01 -0700 Subject: [PATCH 06/69] Added exception instead of "pass" to avoid silently skipping of file write (#205) --- qds_sdk/commands.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 80e48609..299160f7 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -1379,8 +1379,7 @@ def _read_iteratively(key_instance, fp, delim): elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase): fp.write(data.replace(bytes([1]), delim.encode('utf8'))) else: - # Can this happen? Don't know what's the right thing to do in this case. - pass + raise ValueError('Only subclasses of io.TextIOBase or io.BufferedIOBase supported') except StopIteration: # Stream closes itself when the exception is raised return From ac95aafb3b576d11322d09686e55695bd1e1a1b1 Mon Sep 17 00:00:00 2001 From: Aaditya Sharma Date: Tue, 26 Mar 2019 17:15:17 +0530 Subject: [PATCH 07/69] SDK-339 : Added comments on how to use List command. (#269) --- qds_sdk/commands.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 299160f7..2fa9cc27 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -76,6 +76,20 @@ def is_success(status): def list(cls, **kwargs): """ List a command by issuing a GET request to the /command endpoint + + Args: + `**kwargs`: Various parameters can be used to filter the commands such as: + * command_type - HiveQuery, PrestoQuery, etc. The types should be in title case. + * status - failed, success, etc + * name + * command_id + * qbol_user_id + * command_source + * page + * cluster_label + * session_id, etc + + For example - Command.list(command_type = "HiveQuery", status = "success") """ conn = Qubole.agent() params = {} From 70b075e9d71fb36ac7c8c7c3ae5fd09c8bebfa8a Mon Sep 17 00:00:00 2001 From: akaranjkar-qu <41927856+akaranjkar-qu@users.noreply.github.com> Date: Mon, 1 Apr 2019 04:11:21 -0700 Subject: [PATCH 08/69] SDK-345: Add custom resource group and start-stop related params for Azure clusters (#268) --- qds_sdk/cloud/azure_cloud.py | 11 ++++-- qds_sdk/clusterv2.py | 68 ++++++++++++++++++++++++++++++++++-- tests/test_clusterv2.py | 53 ++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 4 deletions(-) diff --git a/qds_sdk/cloud/azure_cloud.py b/qds_sdk/cloud/azure_cloud.py index 682c58d4..435e4993 100755 --- a/qds_sdk/cloud/azure_cloud.py +++ b/qds_sdk/cloud/azure_cloud.py @@ -29,7 +29,8 @@ def set_cloud_config(self, vnet_resource_group_name=None, master_elastic_ip=None, master_static_nic_name=None, - master_static_public_ip_name=None): + master_static_public_ip_name=None, + resource_group_name=None): ''' Args: @@ -74,6 +75,7 @@ def set_cloud_config(self, master_static_public_ip_name: Name of Static Public Ip address that has to be attached to cluster's master node + resource_group_name: Resource group for cluster ''' self.set_compute_config(use_account_compute_creds, compute_tenant_id, @@ -86,6 +88,7 @@ def set_cloud_config(self, self.set_storage_config(storage_access_key, storage_account_name, disk_storage_account_name, disk_storage_account_resource_group_name) + self.resource_group_name = resource_group_name def set_compute_config(self, use_account_compute_creds=None, @@ -147,7 +150,8 @@ def set_cloud_config_from_arguments(self, arguments): subnet_name=arguments.subnet_name, vnet_resource_group_name=arguments.vnet_resource_group_name, master_static_nic_name=arguments.master_static_nic_name, - master_static_public_ip_name=arguments.master_static_public_ip_name) + master_static_public_ip_name=arguments.master_static_public_ip_name, + resource_group_name=arguments.resource_group_name) def create_parser(self, argparser): # compute settings parser @@ -204,6 +208,9 @@ def create_parser(self, argparser): network_config_group.add_argument("--master-static-public-ip-name", dest="master_static_public_ip_name", help="name of public IP to be attached to master node") + network_config_group.add_argument("--resource-group-name", + dest="resource_group_name", + help="resource group for cluster") # storage config settings parser storage_config = argparser.add_argument_group("storage config settings") storage_config.add_argument("--storage-access-key", diff --git a/qds_sdk/clusterv2.py b/qds_sdk/clusterv2.py index 95e7905a..eb3f07f6 100755 --- a/qds_sdk/clusterv2.py +++ b/qds_sdk/clusterv2.py @@ -105,7 +105,11 @@ def get_cluster_create_clone_update(arguments, action): image_uri_overrides=arguments.image_uri_overrides, env_name=arguments.env_name, python_version=arguments.python_version, - r_version=arguments.r_version) + r_version=arguments.r_version, + disable_cluster_pause=arguments.disable_cluster_pause, + paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, + disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, + paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins) # This will set cloud config settings cloud_config = Qubole.get_cloud() @@ -193,7 +197,11 @@ def set_cluster_info(self, image_uri_overrides=None, env_name=None, python_version=None, - r_version=None): + r_version=None, + disable_cluster_pause=None, + paused_cluster_timeout_mins=None, + disable_autoscale_node_pause=None, + paused_autoscale_node_timeout_mins=None): """ Args: @@ -290,6 +298,14 @@ def set_cluster_info(self, `r_version`: Version of R for environment. (For Spark clusters) + `disable_cluster_pause`: Disable cluster pause + + `paused_cluster_timeout_mins`: Paused cluster timeout in mins + + `disable_autoscale_node_pause`: Disable autoscale node pause + + `paused_autoscale_node_timeout_mins`: Paused autoscale node timeout in mins + Doc: For getting details about arguments http://docs.qubole.com/en/latest/rest-api/cluster_api/create-new-cluster.html#parameters @@ -327,6 +343,8 @@ def set_cluster_info(self, self.set_monitoring(enable_ganglia_monitoring, datadog_api_token, datadog_app_token) self.set_internal(image_uri_overrides) self.set_env_settings(env_name, python_version, r_version) + self.set_start_stop_settings(disable_cluster_pause, paused_cluster_timeout_mins, + disable_autoscale_node_pause, paused_autoscale_node_timeout_mins) def set_datadog_setting(self, datadog_api_token=None, @@ -392,6 +410,20 @@ def set_env_settings(self, env_name=None, python_version=None, r_version=None): self.cluster_info['env_settings']['python_version'] = python_version self.cluster_info['env_settings']['r_version'] = r_version + def set_start_stop_settings(self, + disable_cluster_pause=None, + paused_cluster_timeout_mins=None, + disable_autoscale_node_pause=None, + paused_autoscale_node_timeout_mins=None): + if disable_cluster_pause is not None: + disable_cluster_pause = int(disable_cluster_pause) + self.cluster_info['disable_cluster_pause'] = disable_cluster_pause + self.cluster_info['paused_cluster_timeout_mins'] = paused_cluster_timeout_mins + if disable_autoscale_node_pause is not None: + disable_autoscale_node_pause = int(disable_autoscale_node_pause) + self.cluster_info['disable_autoscale_node_pause'] = disable_autoscale_node_pause + self.cluster_info['paused_autoscale_node_timeout_mins'] = paused_autoscale_node_timeout_mins + @staticmethod def list_info_parser(argparser, action): argparser.add_argument("--id", dest="cluster_id", @@ -641,6 +673,38 @@ def cluster_info_parser(argparser, action): default=None, help="version of R in environment") + start_stop_group = argparser.add_argument_group("start stop settings") + start_stop_group.add_argument("--disable-cluster-pause", + dest="disable_cluster_pause", + action='store_true', + default=None, + help="disable cluster pause") + start_stop_group.add_argument("--no-disable-cluster-pause", + dest="disable_cluster_pause", + action='store_false', + default=None, + help="disable cluster pause") + start_stop_group.add_argument("--paused-cluster-timeout", + dest="paused_cluster_timeout_mins", + default=None, + type=int, + help="paused cluster timeout in min") + start_stop_group.add_argument("--disable-autoscale-node-pause", + dest="disable_autoscale_node_pause", + action='store_true', + default=None, + help="disable autoscale node pause") + start_stop_group.add_argument("--no-disable-autoscale-node-pause", + dest="disable_autoscale_node_pause", + action='store_false', + default=None, + help="disable autoscale node pause") + start_stop_group.add_argument("--paused-autoscale-node-timeout", + dest="paused_autoscale_node_timeout_mins", + default=None, + type=int, + help="paused autoscale node timeout in min") + class ClusterV2(Resource): rest_entity_path = "clusters" diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 1b33f4e2..0f6f94b8 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -259,6 +259,19 @@ def test_azure_master_static_pip(self): 'master_static_public_ip_name':'pip1'}}, 'cluster_info': {'label': ['test_label']}}) + def test_azure_resource_group_name(self): + sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'AZURE', 'cluster', 'create', '--label', 'test_label', + '--resource-group-name', 'testrg'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'cloud_config': { + 'resource_group_name': 'testrg' + }, + 'cluster_info': {'label': ['test_label']}}) + def test_oracle_opc_compute_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_OPC', 'cluster', 'create', '--label', 'test_label', '--username', 'testusername', '--password', 'testpassword', @@ -585,6 +598,46 @@ def test_root_disk_size_invalid_v2(self): qds.main() + def test_disable_start_stop(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--disable-cluster-pause', '--disable-autoscale-node-pause'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + { + 'cluster_info': { + 'label': ['test_label'], + 'disable_cluster_pause': 1, + 'disable_autoscale_node_pause': 1 + } + }) + def test_start_stop_timeouts(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--no-disable-cluster-pause', '--paused-cluster-timeout', '30', + '--no-disable-autoscale-node-pause', '--paused-autoscale-node-timeout', '60'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + { + 'cluster_info': { + 'label': ['test_label'], + 'disable_cluster_pause': 0, + 'paused_cluster_timeout_mins': 30, + 'disable_autoscale_node_pause': 0, + 'paused_autoscale_node_timeout_mins': 60 + } + }) + + def test_start_stop_timeouts_invalid(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--paused-cluster-timeout', 'invalid_value', '--paused-autoscale-node-timeout', 'invalid_value'] + print_command() + with self.assertRaises(SystemExit): + qds.main() + + class TestClusterUpdate(QdsCliTestCase): def test_minimal(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123'] From 7fb3292532b61d2c6e5e8fcaab62ae7873cc9c07 Mon Sep 17 00:00:00 2001 From: Aaditya Sharma Date: Fri, 19 Apr 2019 16:21:56 +0530 Subject: [PATCH 09/69] SDK-340 : Support for Airflow Version and Python Version (#270) --- qds_sdk/engine.py | 24 ++++++++++++++++++++++-- tests/test_clusterv2.py | 21 +++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index ec041b44..3070f699 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -28,6 +28,8 @@ def set_engine_config(self, dbtap_id=None, fernet_key=None, overrides=None, + airflow_version=None, + airflow_python_version=None, is_ha=None, enable_rubix=None): ''' @@ -59,6 +61,10 @@ def set_engine_config(self, overrides: Airflow configuration to override the default settings.Use the following syntax for overrides:
.=\n
.=... + airflow_version: The airflow version. + + airflow_python_version: The python version for the environment on the cluster. + is_ha: Enabling HA config for cluster is_deeplearning : this is a deeplearning cluster config enable_rubix: Enable rubix on the cluster @@ -68,7 +74,7 @@ def set_engine_config(self, self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) - self.set_airflow_settings(dbtap_id, fernet_key, overrides) + self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) def set_fairscheduler_settings(self, fairscheduler_config_xml=None, @@ -106,10 +112,14 @@ def set_spark_settings(self, def set_airflow_settings(self, dbtap_id=None, fernet_key=None, - overrides=None): + overrides=None, + airflow_version="1.10.0", + airflow_python_version="2.7"): self.airflow_settings['dbtap_id'] = dbtap_id self.airflow_settings['fernet_key'] = fernet_key self.airflow_settings['overrides'] = overrides + self.airflow_settings['version'] = airflow_version + self.airflow_settings['airflow_python_version'] = airflow_python_version def set_engine_config_settings(self, arguments): custom_hadoop_config = util._read_file(arguments.custom_hadoop_config_file) @@ -128,6 +138,8 @@ def set_engine_config_settings(self, arguments): dbtap_id=arguments.dbtap_id, fernet_key=arguments.fernet_key, overrides=arguments.overrides, + airflow_version=arguments.airflow_version, + airflow_python_version=arguments.airflow_python_version, enable_rubix=arguments.enable_rubix) @staticmethod @@ -215,4 +227,12 @@ def engine_parser(argparser): dest="overrides", default=None, help="overrides for airflow cluster", ) + airflow_settings_group.add_argument("--airflow-version", + dest="airflow_version", + default=None, + help="airflow version for airflow cluster", ) + airflow_settings_group.add_argument("--airflow-python-version", + dest="airflow_python_version", + default=None, + help="python environment version for airflow cluster", ) diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 0f6f94b8..dadd5817 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -457,6 +457,27 @@ def test_spark_engine_config(self): 'custom_spark_config': 'spark-overrides'}}, 'cluster_info': {'label': ['test_label'],}}) + def test_airflow_engine_config(self): + with tempfile.NamedTemporaryFile() as temp: + temp.write("config.properties:\na=1\nb=2".encode("utf8")) + temp.flush() + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--flavour', 'airflow', '--dbtap-id', '1', '--fernet-key', '-1', '--overrides', 'airflow_overrides', '--airflow-version', '1.10.0', '--airflow-python-version', '2.7'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'airflow', + 'airflow_settings': { + 'dbtap_id': '1', + 'fernet_key': '-1', + 'overrides': 'airflow_overrides', + 'version': '1.10.0', + 'airflow_python_version': '2.7' + }}, + 'cluster_info': {'label': ['test_label'],}}) def test_persistent_security_groups_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', From 0db727c18365640d8e7da8dc368fb307cf0faf82 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Wed, 8 May 2019 11:04:40 +0530 Subject: [PATCH 10/69] SDK-350 MultiCloud Fix for Script Location Parameter (#272) --- .travis.yml | 2 +- qds_sdk/commands.py | 27 +++++++++------------------ qds_sdk/util.py | 8 ++++++++ tests/test_command.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2607b4b0..883a43ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ install: - pip install urllib3==1.22 requests - "python setup.py install" - pip install mock - - pip install pytest + - pip install pytest==3.2.0 - if [[ $TRAVIS_PYTHON_VERSION == 2.6 ]]; then pip install unittest2; fi # command to run tests script: py.test diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 2fa9cc27..fe0ac9a3 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -9,9 +9,7 @@ from qds_sdk.resource import Resource from qds_sdk.exception import ParseError from qds_sdk.account import Account -from qds_sdk.util import GentleOptionParser -from qds_sdk.util import OptionParsingError -from qds_sdk.util import OptionParsingExit +from qds_sdk.util import GentleOptionParser, OptionParsingError, OptionParsingExit, _is_cloud_url from optparse import SUPPRESS_HELP import boto @@ -407,8 +405,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -493,8 +490,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -627,9 +623,8 @@ def validate_script_location(cls, options): else: raise ParseError("Invalid program type %s. Please choose one from python, scala, R or sql." % str(fileExtension), cls.optparser.format_help()) - - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + + if not _is_cloud_url(options.script_location): # script location is local file so set the program as the text from the file @@ -749,8 +744,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file try: @@ -912,8 +906,7 @@ def parse(cls, args): "Both script and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -1011,8 +1004,7 @@ def parse(cls, args): "Both script and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -1333,8 +1325,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file diff --git a/qds_sdk/util.py b/qds_sdk/util.py index 1efe6865..b79eb70a 100755 --- a/qds_sdk/util.py +++ b/qds_sdk/util.py @@ -143,6 +143,7 @@ def underscore(word): return re.sub(r'\B((?<=[a-z])[A-Z]|[A-Z](?=[a-z]))', r'_\1', word).lower() + def _make_minimal(dictionary): """ This function removes all the keys whose value is either None or an empty @@ -159,6 +160,7 @@ def _make_minimal(dictionary): new_dict[key] = value return new_dict + def _read_file(file_path): file_content = None if file_path is not None: @@ -170,3 +172,9 @@ def _read_file(file_path): raise IOError("Unable to read %s: %s\n" % (file_path, str(e))) return file_content + +def _is_cloud_url(file_path): + cloud_prefixes = ('oci://', 'oraclebmc://', 'wasb://', 'gs://', 's3://', + 's3n://', 's3a://', 'swift://', 'adl://', 'abfs://', 'abfss://') + return file_path.startswith(cloud_prefixes) + diff --git a/tests/test_command.py b/tests/test_command.py index 80cefc8e..55198415 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -1,6 +1,7 @@ from __future__ import print_function import sys import os +import pytest if sys.version_info > (2, 7, 0): import unittest else: @@ -2054,5 +2055,33 @@ def test_result_failed_more_than_two_arguments(self): qds.main() +@pytest.mark.parametrize("script_location", [ + 'oci://some_path/file', 'oraclebmc://some_path/file', 'wasb://some_path/file', + 'gs://some_path/file', 's3://some_path/file', 's3n://some_path/file', + 's3a://some_path/file', 'swift://some_path/file', 'adl://some_path/file', + 'abfs://some_path/file', 'abfss://some_path/file']) +def test_submit_script_location_multi_cloud(script_location): + os.environ['QDS_API_TOKEN'] = 'dummy_token' + os.environ['QDS_API_URL'] = 'https://qds.api.url/api' + sys.argv = ['qds.py', 'hivecmd', 'submit', '--script_location', script_location, + '--tags', 'abc,def'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'macros': None, + 'hive_version': None, + 'label': None, + 'tags': ["abc", "def"], + 'sample_size': None, + 'name': None, + 'query': None, + 'command_type': 'HiveCommand', + 'can_notify': False, + 'script_location': script_location, + 'retry': 0, + 'pool': None}) + + if __name__ == '__main__': unittest.main() From d1cccf4bd8fd60c23edfd880934d987c8e486b5a Mon Sep 17 00:00:00 2001 From: Sumit Maheshwari Date: Tue, 14 May 2019 15:20:29 +0530 Subject: [PATCH 11/69] Release Version 1.11.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 176cab70..c5486fa4 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.11.0", + version="1.11.1", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 838819e186ce5a4860a8f0b0218cc97c52a814cd Mon Sep 17 00:00:00 2001 From: Sumit Maheshwari Date: Tue, 14 May 2019 15:54:51 +0530 Subject: [PATCH 12/69] SDK-XXX: Update readme --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 46a6ba14..3551794d 100644 --- a/README.rst +++ b/README.rst @@ -21,9 +21,9 @@ The SDK is available on `PyPI `_. From source ~~~~~~~~~~~ -* Download the source code: +* Get the source code: - - Either clone the project: ``git clone git@github.com:qubole/qds-sdk-py.git`` + - Either clone the project: ``git clone git@github.com:qubole/qds-sdk-py.git`` and checkout latest release tag from `Releases `_. - Or download one of the releases from https://github.com/qubole/qds-sdk-py/releases From 6951fe2b7691a337b7c1f19c9967f7e4f9710b91 Mon Sep 17 00:00:00 2001 From: Pulkit Chawla Date: Tue, 21 May 2019 11:01:03 +0530 Subject: [PATCH 13/69] SDK-352 : Add storage config params - block_volume_count and block_volume_size (#274) * SDK-352 : Add storage config params - block_volume_count and block_volume_size --- qds_sdk/cloud/oracle_bmc_cloud.py | 34 ++++++++++++++++++++++++++----- tests/test_clusterv2.py | 7 +++++-- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/qds_sdk/cloud/oracle_bmc_cloud.py b/qds_sdk/cloud/oracle_bmc_cloud.py index f71c9445..edefe0a4 100755 --- a/qds_sdk/cloud/oracle_bmc_cloud.py +++ b/qds_sdk/cloud/oracle_bmc_cloud.py @@ -29,7 +29,9 @@ def set_cloud_config(self, storage_tenant_id=None, storage_user_id=None, storage_key_finger_print=None, - storage_api_private_rsa_key=None): + storage_api_private_rsa_key=None, + block_volume_count=None, + block_volume_size=None): ''' Args: @@ -66,6 +68,10 @@ def set_cloud_config(self, storage_api_private_rsa_key: storage api private rsa key for oracle cluster + block_volume_count: count of block volumes to be mounted to an instance as reserved disks + + block_volume_size: It is the size (in GB) of each block volume to be mounted to an instance as reserved disk + ''' self.set_compute_config(use_account_compute_creds, compute_tenant_id, @@ -75,7 +81,8 @@ def set_cloud_config(self, self.set_network_config(vcn_id, subnet_id, compartment_id, image_id, availability_domain_info_map) self.set_storage_config(storage_tenant_id, storage_user_id, - storage_key_finger_print, storage_api_private_rsa_key) + storage_key_finger_print, storage_api_private_rsa_key, block_volume_count, + block_volume_size) def set_compute_config(self, use_account_compute_creds=None, @@ -115,11 +122,15 @@ def set_storage_config(self, storage_tenant_id=None, storage_user_id=None, storage_key_finger_print=None, - storage_api_private_rsa_key=None): + storage_api_private_rsa_key=None, + block_volume_count=None, + block_volume_size=None): self.storage_config['storage_tenant_id'] = storage_tenant_id self.storage_config['storage_user_id'] = storage_user_id self.storage_config['storage_key_finger_print'] = storage_key_finger_print self.storage_config['storage_api_private_rsa_key'] = storage_api_private_rsa_key + self.storage_config['block_volume_count'] = block_volume_count + self.storage_config['block_volume_size'] = block_volume_size def set_cloud_config_from_arguments(self, arguments): self.set_cloud_config(compute_tenant_id=arguments.compute_tenant_id, @@ -137,7 +148,10 @@ def set_cloud_config_from_arguments(self, arguments): storage_tenant_id=arguments.storage_tenant_id, storage_user_id=arguments.storage_user_id, storage_key_finger_print=arguments.storage_key_finger_print, - storage_api_private_rsa_key=arguments.storage_api_private_rsa_key) + storage_api_private_rsa_key=arguments.storage_api_private_rsa_key, + block_volume_count=arguments.block_volume_count, + block_volume_size=arguments.block_volume_size + ) def create_parser(self, argparser): # compute settings parser @@ -215,4 +229,14 @@ def create_parser(self, argparser): storage_config.add_argument("--storage-api-private-rsa-key", dest="storage_api_private_rsa_key", default=None, - help="storage api private rsa key for oracle cluster") \ No newline at end of file + help="storage api private rsa key for oracle cluster") + storage_config.add_argument("--block-volume-count", + dest="block_volume_count", + default=None, + help="count of block volumes to be mounted to an instance as reserved disks", + type=int) + storage_config.add_argument("--block-volume-size", + dest="block_volume_size", + default=None, + help="size (in GB) of each block volume to be mounted to an instance", + type=int) \ No newline at end of file diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index dadd5817..c1e0b413 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -114,7 +114,8 @@ def test_oracle_bmc_compute_config(self): def test_oracle_bmc_storage_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_BMC', 'cluster', 'create', '--label', 'test_label', '--storage-tenant-id', 'xxx11', '--storage-user-id', 'yyyy11', '--storage-key-finger-print', - 'zzz22', '--storage-api-private-rsa-key', 'aaa'] + 'zzz22', '--storage-api-private-rsa-key', 'aaa', '--block-volume-count', '1', + '--block-volume-size', '100'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -124,7 +125,9 @@ def test_oracle_bmc_storage_config(self): {'storage_key_finger_print': 'zzz22', 'storage_api_private_rsa_key': 'aaa', 'storage_user_id': 'yyyy11', - 'storage_tenant_id': 'xxx11'}}, + 'storage_tenant_id': 'xxx11', + 'block_volume_count': 1, + 'block_volume_size': 100}}, 'cluster_info': {'label': ['test_label']}}) def test_oracle_bmc_network_config(self): From f8e3548862ca31850dc439209b4a74050c1eddce Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Tue, 21 May 2019 12:57:44 +0530 Subject: [PATCH 14/69] SDK-354 Default to S3 Signature Version V4 (#273) Signature Version V2 will be deprecated by AWS which was the default for boto, configured it to use v4 signature for s3 authentication. --- qds_sdk/commands.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index fe0ac9a3..fd715fd2 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -299,19 +299,17 @@ def get_results(self, fp=sys.stdout, inline=True, delim=None, fetch=True, qlog=N pass else: if fetch: + if not boto.config.has_section('s3'): + boto.config.add_section('s3') + boto.config.set('s3', 'use-sigv4', 'True') storage_credentials = conn.get(Account.credentials_rest_entity_path) - if storage_credentials['region_endpoint'] is not None: - boto_conn = boto.connect_s3(aws_access_key_id=storage_credentials['storage_access_key'], - aws_secret_access_key=storage_credentials['storage_secret_key'], - security_token = storage_credentials['session_token'], - host = storage_credentials['region_endpoint']) - else: - boto_conn = boto.connect_s3(aws_access_key_id=storage_credentials['storage_access_key'], - aws_secret_access_key=storage_credentials['storage_secret_key'], - security_token=storage_credentials['session_token']) - + host = storage_credentials['region_endpoint'] if storage_credentials['region_endpoint'] else "s3.amazonaws.com" + boto_conn = boto.connect_s3(aws_access_key_id=storage_credentials['storage_access_key'], + aws_secret_access_key=storage_credentials['storage_secret_key'], + security_token=storage_credentials['session_token'], + host=host) log.info("Starting download from result locations: [%s]" % ",".join(r['result_location'])) - #fetch latest value of num_result_dir + # fetch latest value of num_result_dir num_result_dir = Command.find(self.id).num_result_dir # If column/header names are not able to fetch then use include header as true From 35fe3343d7bfc760a96e92bc79c7a84f6d3c7fc5 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Tue, 21 May 2019 13:37:41 +0530 Subject: [PATCH 15/69] Release Version 1.12.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c5486fa4..12559c75 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.11.1", + version="1.12.0", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 87989ceeca94c90b511c22b2c22898755cdeb814 Mon Sep 17 00:00:00 2001 From: Aaditya Sharma Date: Wed, 19 Jun 2019 15:04:00 +0530 Subject: [PATCH 16/69] SDK-109 Proper Error Message when results unavailable (#276) --- qds_sdk/connection.py | 5 ++++- qds_sdk/exception.py | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index ede6c412..0398acb5 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -152,9 +152,12 @@ def _handle_error(response): elif code == 422: sys.stderr.write(response.text + "\n") raise ResourceInvalid(response) - elif code in (449, 502, 503, 504): + elif code in (502, 503, 504): sys.stderr.write(response.text + "\n") raise RetryWithDelay(response) + elif code == 449: + sys.stderr.write(response.text + "\n") + raise RetryWithDelay(response, "Data requested is unavailable. Retrying ...") elif 401 <= code < 500: sys.stderr.write(response.text + "\n") raise ClientError(response) diff --git a/qds_sdk/exception.py b/qds_sdk/exception.py index 5e361902..ef06a335 100644 --- a/qds_sdk/exception.py +++ b/qds_sdk/exception.py @@ -11,8 +11,9 @@ def __init__(self, message, usage): class Error(Exception): """A general error derived from Exception.""" - def __init__(self, request): - Exception.__init__(self, request.text) + def __init__(self, request, message = ""): + response = message if message else request.text + Exception.__init__(self, response) self.request = request From 915bdea725bc1c5de06caac5270dc24dfc8ffb50 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Mon, 29 Jul 2019 19:35:27 +0530 Subject: [PATCH 17/69] fix: dev: SDK-364: Oracle Multi AD changes required in qds-sdk --- .travis.yml | 1 + qds_sdk/cloud/oracle_bmc_cloud.py | 16 ++++++++++------ tests/test_clusterv2.py | 6 +++--- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 883a43ed..0ef565bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,3 +15,4 @@ install: # command to run tests script: py.test env: BOTO_CONFIG=/tmp/nowhere +dist: trusty diff --git a/qds_sdk/cloud/oracle_bmc_cloud.py b/qds_sdk/cloud/oracle_bmc_cloud.py index edefe0a4..2f0ba903 100755 --- a/qds_sdk/cloud/oracle_bmc_cloud.py +++ b/qds_sdk/cloud/oracle_bmc_cloud.py @@ -1,5 +1,6 @@ from qds_sdk.cloud.cloud import Cloud import json +import ast class OracleBmcCloud(Cloud): ''' @@ -112,11 +113,8 @@ def set_network_config(self, self.network_config['subnet_id'] = subnet_id self.network_config['compartment_id'] = compartment_id self.network_config['image_id'] = image_id - if availability_domain_info_map and availability_domain_info_map.strip(): - try: - self.network_config['availability_domain_info_map'] = json.loads(availability_domain_info_map.strip()) - except Exception as e: - raise Exception("Invalid JSON string for availability domain info map: %s" % e.message) + if availability_domain_info_map: + self.network_config['availability_domain_info_map'] = availability_domain_info_map def set_storage_config(self, storage_tenant_id=None, @@ -133,6 +131,12 @@ def set_storage_config(self, self.storage_config['block_volume_size'] = block_volume_size def set_cloud_config_from_arguments(self, arguments): + if arguments.availability_domain_info_map: + try: + arguments.availability_domain_info_map = ast.literal_eval(arguments.availability_domain_info_map) + assert isinstance(arguments.availability_domain_info_map, list) + except Exception as e: + raise Exception("Invalid List format for availability_domain_info_map: %s" % e.message) self.set_cloud_config(compute_tenant_id=arguments.compute_tenant_id, compute_user_id=arguments.compute_user_id, compute_key_finger_print=arguments.compute_key_finger_print, @@ -239,4 +243,4 @@ def create_parser(self, argparser): dest="block_volume_size", default=None, help="size (in GB) of each block volume to be mounted to an instance", - type=int) \ No newline at end of file + type=int) diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index c1e0b413..eec62ade 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -148,7 +148,7 @@ def test_oracle_bmc_network_config(self): def test_oracle_bmc_network_config_az_info_map(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_BMC', 'cluster', 'create', '--label', 'test_label', '--compartment-id', 'abc-compartment', '--image-id', 'abc-image', '--vcn-id', 'vcn-1', - '--availability-domain-info-map', '{"availability_domain": "AD-1", "subnet_id": "subnet-1"}'] + '--availability-domain-info-map', str([{"availability_domain": "AD-1", "subnet_id": "subnet-1"}])] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -159,8 +159,8 @@ def test_oracle_bmc_network_config_az_info_map(self): 'compartment_id': 'abc-compartment', 'image_id': 'abc-image', 'availability_domain_info_map': - {'availability_domain': 'AD-1', - 'subnet_id': 'subnet-1'}}}, + [{'availability_domain': 'AD-1', + 'subnet_id': 'subnet-1'}]}}, 'cluster_info': {'label': ['test_label']}}) def test_oracle_bmc_location_config(self): From c5c035b6f0cea0feeaead4dddb01b2147d58d016 Mon Sep 17 00:00:00 2001 From: Ben Roubicek Date: Tue, 30 Jul 2019 23:38:53 -0700 Subject: [PATCH 18/69] Update requests and urllib library versions (#280) This fixes security vulnerabilities in current versions of these packages. --- .travis.yml | 3 +-- requirements.txt | 8 ++++++++ setup.py | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 requirements.txt diff --git a/.travis.yml b/.travis.yml index 0ef565bc..7478fed1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,12 @@ language: python python: - - "2.6" - "2.7" - "3.3" - "3.4" - "3.5" # command to install dependencies install: - - pip install urllib3==1.22 requests + - pip install -r requirements.txt - "python setup.py install" - pip install mock - pip install pytest==3.2.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..54393b3e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +boto==2.45.0 +certifi==2019.6.16 +chardet==3.0.4 +idna==2.8 +inflection==0.3.1 +requests==2.21.0 +six==1.12.0 +urllib3==1.24.3 diff --git a/setup.py b/setup.py index 12559c75..6d878420 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import sys from setuptools import setup -INSTALL_REQUIRES = ['requests >=1.0.3', 'boto >=2.45.0', 'six >=1.2.0', 'urllib3 >= 1.0.2', 'inflection >= 0.3.1'] +INSTALL_REQUIRES = ['requests >=2.21.0', 'boto >=2.45.0', 'six >=1.12.0', 'urllib3 >= 1.24.3', 'inflection >= 0.3.1'] if sys.version_info < (2, 7, 0): INSTALL_REQUIRES.append('argparse>=1.1') From 76613276f9e273fa0e037994cf3f9e2583aa2169 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Wed, 31 Jul 2019 12:30:52 +0530 Subject: [PATCH 19/69] SDK-366 Drop Support for Python 2.6 (#281) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6d878420..211c8ee5 100644 --- a/setup.py +++ b/setup.py @@ -23,13 +23,13 @@ def read(fname): scripts=['bin/qds.py'], install_requires=INSTALL_REQUIRES, long_description=read('README.rst'), + python_requires='>=2.7', classifiers=[ "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", From 2d72f4be156746914c3a3aaae4bf1d68e6603079 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Wed, 31 Jul 2019 13:13:47 +0530 Subject: [PATCH 20/69] Release Version 1.13.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 211c8ee5..60d183c1 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.12.0", + version="1.13.0", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 22880b24b10fe24b3a9bd638c374d1d2a7505f58 Mon Sep 17 00:00:00 2001 From: Sanket Saurav Date: Wed, 7 Aug 2019 18:57:41 +0530 Subject: [PATCH 21/69] DeepSource.io Integeration (#275) --- .deepsource.toml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000..f464e01a --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,14 @@ +version = 1 + +test_patterns = [ + 'tests/**' +] + +exclude_patterns = [ + 'example/**' +] + +[[analyzers]] +name = 'python' +enabled = true +runtime_version = '2.x.x' From 488e3e8432af4b068b75bb09556d8c7ea9423727 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Sat, 10 Aug 2019 23:13:04 +0530 Subject: [PATCH 22/69] DeepSource Integeration for Commit checks and Test Coverage (#282) --- .deepsource.toml | 5 +++++ .travis.yml | 10 +++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.deepsource.toml b/.deepsource.toml index f464e01a..41837d03 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -12,3 +12,8 @@ exclude_patterns = [ name = 'python' enabled = true runtime_version = '2.x.x' + +# Test coverage analyzer +[[analyzers]] +name = "test-coverage" +enabled = true \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 7478fed1..f2104f70 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,9 +9,13 @@ install: - pip install -r requirements.txt - "python setup.py install" - pip install mock - - pip install pytest==3.2.0 + - pip install pytest==3.2.0 pytest-cov==2.6.0 - if [[ $TRAVIS_PYTHON_VERSION == 2.6 ]]; then pip install unittest2; fi + - curl https://deepsource.io/cli | sh # command to run tests -script: py.test -env: BOTO_CONFIG=/tmp/nowhere +script: + - py.test --cov=./ --cov-report xml + - ./bin/deepsource report --analyzer test-coverage --key python --value-file ./coverage.xml +env: + - BOTO_CONFIG=/tmp/nowhere DEEPSOURCE_DSN=https://c9d4fb28ce6f41798861936c25b0361e@deepsource.io dist: trusty From baca76980de71305aa4ed8930983e824e6b866af Mon Sep 17 00:00:00 2001 From: shekharsaurabh <37484772+shekharsaurabh@users.noreply.github.com> Date: Fri, 11 Oct 2019 15:38:10 +0530 Subject: [PATCH 23/69] SDK-375: Added support for Qubole-trace-Id (#285) Added support for Qubole-trace-Id --- qds_sdk/connection.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index 0398acb5..df552da8 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -5,6 +5,7 @@ import json import pkg_resources from requests.adapters import HTTPAdapter +from datetime import datetime try: from requests.packages.urllib3.poolmanager import PoolManager except ImportError: @@ -130,7 +131,13 @@ def _handle_error(response): if 200 <= code < 400: return - + + if 'X-Qubole-Trace-Id' in response.headers: + now = datetime.now() + time = now.strftime('%Y-%m-%d %H:%M:%S') + format_list = [time,response.headers['X-Qubole-Trace-Id']] + sys.stderr.write("[{}] Request ID is: {}. Please share it with Qubole Support team for any assistance".format(*format_list) + "\n") + if code == 400: sys.stderr.write(response.text + "\n") raise BadRequest(response) From 3dc5bc426a0370fe08fcea1fde697faa7ec1628e Mon Sep 17 00:00:00 2001 From: saiyam1712 <56955417+saiyam1712@users.noreply.github.com> Date: Mon, 4 Nov 2019 11:37:00 +0530 Subject: [PATCH 24/69] SDK-381 support for spark streaming cluster (#289) support for spark streaming cluster --- qds_sdk/engine.py | 2 +- tests/test_clusterv2.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 3070f699..ee55e50d 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -147,7 +147,7 @@ def engine_parser(argparser): engine_group = argparser.add_argument_group("engine settings") engine_group.add_argument("--flavour", dest="flavour", - choices=["hadoop", "hadoop2", "presto", "spark", "hbase", "airflow", "deeplearning"], + choices=["hadoop", "hadoop2", "presto", "spark", "sparkstreaming", "hbase", "airflow", "deeplearning"], default=None, help="Set engine flavour") diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index eec62ade..4e5dd0ff 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -460,6 +460,23 @@ def test_spark_engine_config(self): 'custom_spark_config': 'spark-overrides'}}, 'cluster_info': {'label': ['test_label'],}}) + def test_sparkstreaming_engine_config(self): + with tempfile.NamedTemporaryFile() as temp: + temp.write("config.properties:\na=1\nb=2".encode("utf8")) + temp.flush() + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--flavour', 'sparkstreaming', '--custom-spark-config', 'spark-overrides'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'sparkstreaming', + 'spark_settings': { + 'custom_spark_config': 'spark-overrides'}}, + 'cluster_info': {'label': ['test_label'],}}) + def test_airflow_engine_config(self): with tempfile.NamedTemporaryFile() as temp: temp.write("config.properties:\na=1\nb=2".encode("utf8")) @@ -787,7 +804,7 @@ def test_engine_config(self): temp.write("a=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', - '--use-qubole-placement-policy', '--enable-rubix', + '--use-qubole-placement-policy', '--enable-rubix', '--custom-hadoop-config',temp.name] Qubole.cloud = None print_command() From 880d26ed07c51d98c86cc80820caab6d24b2f8fe Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Mon, 4 Nov 2019 17:52:32 +0530 Subject: [PATCH 25/69] Release Version 1.13.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 60d183c1..fba96107 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.13.0", + version="1.13.1", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 2266e965a8c0865858559bba896e3d86da6b0a8d Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Mon, 4 Nov 2019 18:01:05 +0530 Subject: [PATCH 26/69] Revert "Update requests and urllib library versions (#280)" --- .travis.yml | 2 +- requirements.txt | 8 -------- setup.py | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) delete mode 100644 requirements.txt diff --git a/.travis.yml b/.travis.yml index f2104f70..9b46d2cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ python: - "3.5" # command to install dependencies install: - - pip install -r requirements.txt + - pip install urllib3==1.22 requests - "python setup.py install" - pip install mock - pip install pytest==3.2.0 pytest-cov==2.6.0 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 54393b3e..00000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -boto==2.45.0 -certifi==2019.6.16 -chardet==3.0.4 -idna==2.8 -inflection==0.3.1 -requests==2.21.0 -six==1.12.0 -urllib3==1.24.3 diff --git a/setup.py b/setup.py index fba96107..259aa5ed 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import sys from setuptools import setup -INSTALL_REQUIRES = ['requests >=2.21.0', 'boto >=2.45.0', 'six >=1.12.0', 'urllib3 >= 1.24.3', 'inflection >= 0.3.1'] +INSTALL_REQUIRES = ['requests >=1.0.3', 'boto >=2.45.0', 'six >=1.2.0', 'urllib3 >= 1.0.2', 'inflection >= 0.3.1'] if sys.version_info < (2, 7, 0): INSTALL_REQUIRES.append('argparse>=1.1') From 4292d8cc556a27da0d23e349b39ac005b826ef36 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Mon, 9 Dec 2019 17:58:41 +0530 Subject: [PATCH 27/69] SDK-383: Add retries to get calls in case of 500 Response (#292) (cherry picked from commit 99544a9166cb9de8d86f218412bdee137111d881) --- qds_sdk/connection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index df552da8..dbec341b 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -50,11 +50,11 @@ def __init__(self, auth, rest_url, skip_ssl_cert_check, reuse=True): self.session_with_retries = requests.Session() self.session_with_retries.mount('https://', MyAdapter(max_retries=3)) - @retry((RetryWithDelay, requests.Timeout), tries=6, delay=30, backoff=2) + @retry((RetryWithDelay, requests.Timeout, ServerError), tries=6, delay=30, backoff=2) def get_raw(self, path, params=None): return self._api_call_raw("GET", path, params=params) - @retry((RetryWithDelay, requests.Timeout), tries=6, delay=30, backoff=2) + @retry((RetryWithDelay, requests.Timeout, ServerError), tries=6, delay=30, backoff=2) def get(self, path, params=None): return self._api_call("GET", path, params=params) From b7dac341001d6eb271f4da7c6dbcda714a286456 Mon Sep 17 00:00:00 2001 From: Manzoor Husain Date: Wed, 11 Dec 2019 11:14:58 +0530 Subject: [PATCH 28/69] Release Version 1.13.2 (cherry picked from commit d10719bf542ca4b4eaf87a81bea1791a06abda71) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 259aa5ed..3b9ed249 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.13.1", + version="1.13.2", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 05f89943ad731e948e2f2ba9334857e7e4dfca70 Mon Sep 17 00:00:00 2001 From: Tarun Goyal Date: Thu, 17 Oct 2019 13:23:00 +0530 Subject: [PATCH 29/69] SDK-376: Adding support for multiple API versions in clusters (#284) This adds Support for multiple API versions in clusters --- bin/qds.py | 2 +- qds_sdk/cluster_cmd_line.py | 110 +++++ qds_sdk/cluster_info_factory.py | 17 + qds_sdk/cluster_info_v22.py | 752 ++++++++++++++++++++++++++++++++ qds_sdk/clusterv2.py | 200 +++------ qds_sdk/qubole.py | 1 + tests/test_clusterv2.py | 4 +- tests/test_clusterv22.py | 197 +++++++++ 8 files changed, 1136 insertions(+), 147 deletions(-) create mode 100644 qds_sdk/cluster_cmd_line.py create mode 100644 qds_sdk/cluster_info_factory.py create mode 100644 qds_sdk/cluster_info_v22.py create mode 100644 tests/test_clusterv22.py diff --git a/bin/qds.py b/bin/qds.py index fac8b439..4952763a 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -15,7 +15,7 @@ from qds_sdk.nezha import NezhaCmdLine from qds_sdk.user import UserCmdLine from qds_sdk.template import TemplateCmdLine -from qds_sdk.clusterv2 import ClusterCmdLine +from qds_sdk.cluster_cmd_line import ClusterCmdLine from qds_sdk.sensors import * import os import sys diff --git a/qds_sdk/cluster_cmd_line.py b/qds_sdk/cluster_cmd_line.py new file mode 100644 index 00000000..8978c162 --- /dev/null +++ b/qds_sdk/cluster_cmd_line.py @@ -0,0 +1,110 @@ +from qds_sdk.cluster_info_factory import ClusterInfoFactory +from qds_sdk.clusterv2 import ClusterV2 +from qds_sdk.qubole import Qubole +from qds_sdk.resource import Resource +from qds_sdk.cloud.cloud import Cloud +from qds_sdk.engine import Engine +from qds_sdk import util +import argparse +import json + +class ClusterCmdLine: + + @staticmethod + def parsers(action): + argparser = argparse.ArgumentParser( + prog="qds.py cluster", + description="Cluster Operations for Qubole Data Service.") + subparsers = argparser.add_subparsers(title="Cluster operations") + if Qubole.version is not None: + ClusterV2.api_version = Qubole.version + if action == "create": + create = subparsers.add_parser("create", help="Create a new cluster") + ClusterCmdLine.create_update_clone_parser(create, action="create") + create.set_defaults(func=ClusterV2.create) + + if action == "update": + update = subparsers.add_parser("update", help="Update the settings of an existing cluster") + ClusterCmdLine.create_update_clone_parser(update, action="update") + update.set_defaults(func=ClusterV2.update) + + if action == "clone": + clone = subparsers.add_parser("clone", help="Clone a cluster from an existing one") + ClusterCmdLine.create_update_clone_parser(clone, action="clone") + clone.set_defaults(func=ClusterV2.clone) + + if action == "list": + li = subparsers.add_parser("list", help="list clusters from existing clusters depending upon state") + ClusterCmdLine.list_parser(li, action="list") + li.set_defaults(func=ClusterV2.list) + return argparser + + @staticmethod + def list_parser(subparser, action=None, ): + + # cluster info parser + cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() + cluster_info_cls.list_info_parser(subparser, action) + + @staticmethod + def create_update_clone_parser(subparser, action=None): + # cloud config parser + cloud = Qubole.get_cloud() + cloud.create_parser(subparser) + + # cluster info parser + cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() + cluster_info_cls.cluster_info_parser(subparser, action) + + # engine config parser + Engine.engine_parser(subparser) + + @staticmethod + def run(args): + parser = ClusterCmdLine.parsers(args[0]) + arguments = parser.parse_args(args) + if args[0] in ["create", "clone", "update"]: + ClusterCmdLine.get_cluster_create_clone_update(arguments, args[0]) + else: + return arguments.func(arguments.label, arguments.cluster_id, arguments.state, + arguments.page, arguments.per_page) + + @staticmethod + def get_cluster_create_clone_update(arguments, action): + + # This will set cluster info and monitoring settings + cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() + cluster_info = cluster_info_cls(arguments.label) + cluster_info.set_cluster_info_from_arguments(arguments) + + # This will set cloud config settings + cloud_config = Qubole.get_cloud() + cloud_config.set_cloud_config_from_arguments(arguments) + + # This will set engine settings + engine_config = Engine(flavour=arguments.flavour) + engine_config.set_engine_config_settings(arguments) + cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config) + + action = action + if action == "create": + return arguments.func(cluster_request) + else: + return arguments.func(arguments.cluster_id_label, cluster_request) + + @staticmethod + def get_cluster_request_parameters(cluster_info, cloud_config, engine_config): + ''' + Use this to return final minimal request from cluster_info, cloud_config or engine_config objects + Alternatively call util._make_minimal if only one object needs to be implemented + ''' + + cluster_request = {} + cloud_config = util._make_minimal(cloud_config.__dict__) + if bool(cloud_config): cluster_request['cloud_config'] = cloud_config + + engine_config = util._make_minimal(engine_config.__dict__) + if bool(engine_config): cluster_request['engine_config'] = engine_config + + cluster_request.update(util._make_minimal(cluster_info.__dict__)) + return cluster_request \ No newline at end of file diff --git a/qds_sdk/cluster_info_factory.py b/qds_sdk/cluster_info_factory.py new file mode 100644 index 00000000..923e499c --- /dev/null +++ b/qds_sdk/cluster_info_factory.py @@ -0,0 +1,17 @@ +from qds_sdk.qubole import Qubole +from qds_sdk.clusterv2 import ClusterInfoV2 +from qds_sdk.cluster_info_v22 import ClusterInfoV22 + + +class ClusterInfoFactory: + + @staticmethod + def get_cluster_info_cls(api_version=None): + if api_version is None: + api_version = Qubole.version + if api_version == "v2": + return ClusterInfoV2 + elif api_version == "v2.2": + return ClusterInfoV22 + else: + return ClusterInfoV2 diff --git a/qds_sdk/cluster_info_v22.py b/qds_sdk/cluster_info_v22.py new file mode 100644 index 00000000..1af9cdf2 --- /dev/null +++ b/qds_sdk/cluster_info_v22.py @@ -0,0 +1,752 @@ +import json + +from qds_sdk import util + + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + + +class ClusterInfoV22(object): + """ + qds_sdk.ClusterInfoV2 is the class which stores information about a cluster_info. + You can use objects of this class to create/update/clone a cluster. + """ + + def __init__(self, label): + """ + Args: + `label`: A list of labels that identify the cluster. At least one label + must be provided when creating a cluster. + """ + self.cluster_info = {'label': label} + self.monitoring = {} + self.internal = {} # right now not supported + + def set_cluster_info_from_arguments(self, arguments): + customer_ssh_key = util._read_file(arguments.customer_ssh_key_file) + self.set_cluster_info(disallow_cluster_termination=arguments.disallow_cluster_termination, + enable_ganglia_monitoring=arguments.enable_ganglia_monitoring, + datadog_api_token=arguments.datadog_api_token, + datadog_app_token=arguments.datadog_app_token, + node_bootstrap=arguments.node_bootstrap_file, + master_instance_type=arguments.master_instance_type, + slave_instance_type=arguments.slave_instance_type, + min_nodes=arguments.initial_nodes, + max_nodes=arguments.max_nodes, + node_base_cooldown_period=arguments.node_base_cooldown_period, + node_spot_cooldown_period=arguments.node_spot_cooldown_period, + custom_tags=arguments.custom_tags, + heterogeneous_config=arguments.heterogeneous_config, + idle_cluster_timeout=arguments.idle_cluster_timeout, + disk_count=arguments.count, + disk_type=arguments.disk_type, + disk_size=arguments.size, + root_disk_size=arguments.root_disk_size, + upscaling_config=arguments.upscaling_config, + enable_encryption=arguments.encrypted_ephemerals, + customer_ssh_key=customer_ssh_key, + image_uri_overrides=arguments.image_uri_overrides, + env_name=arguments.env_name, + python_version=arguments.python_version, + r_version=arguments.r_version, + disable_cluster_pause=arguments.disable_cluster_pause, + paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, + disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, + paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins) + + self.set_composition(master_type=arguments.master_type, + master_spot_block_duration=arguments.master_spot_block_duration, + master_maximum_bid_price_percentage=arguments.master_maximum_bid_price_percentage, + master_timeout_for_request=arguments.master_timeout_for_request, + master_spot_fallback=arguments.master_spot_fallback, + min_ondemand_percentage=arguments.min_ondemand_percentage, + min_spot_block_percentage=arguments.min_spot_block_percentage, + min_spot_block_duration=arguments.min_spot_block_duration, + min_spot_percentage=arguments.min_spot_percentage, + min_maximum_bid_price_percentage=arguments.min_maximum_bid_price_percentage, + min_timeout_for_request=arguments.min_timeout_for_request, + min_spot_fallback=arguments.min_spot_fallback, + autoscaling_ondemand_percentage=arguments.autoscaling_ondemand_percentage, + autoscaling_spot_block_percentage=arguments.autoscaling_spot_block_percentage, + autoscaling_spot_percentage=arguments.autoscaling_spot_percentage, + autoscaling_spot_block_duration=arguments.autoscaling_spot_block_duration, + autoscaling_maximum_bid_price_percentage=arguments.autoscaling_maximum_bid_price_percentage, + autoscaling_timeout_for_request=arguments.autoscaling_timeout_for_request, + autoscaling_spot_fallback=arguments.autoscaling_spot_fallback) + + def set_cluster_info(self, + disallow_cluster_termination=None, + enable_ganglia_monitoring=None, + datadog_api_token=None, + datadog_app_token=None, + node_bootstrap=None, + master_instance_type=None, + slave_instance_type=None, + min_nodes=None, + max_nodes=None, + node_base_cooldown_period=None, + node_spot_cooldown_period=None, + custom_tags=None, + heterogeneous_config=None, + idle_cluster_timeout=None, + disk_count=None, + disk_type=None, + disk_size=None, + root_disk_size=None, + upscaling_config=None, + enable_encryption=None, + customer_ssh_key=None, + cluster_name=None, + force_tunnel=None, + image_uri_overrides=None, + env_name=None, + python_version=None, + r_version=None, + disable_cluster_pause=None, + paused_cluster_timeout_mins=None, + disable_autoscale_node_pause=None, + paused_autoscale_node_timeout_mins=None): + """ + Args: + + `disallow_cluster_termination`: Set this to True if you don't want + qubole to auto-terminate idle clusters. Use this option with + extreme caution. + + `enable_ganglia_monitoring`: Set this to True if you want to enable + ganglia monitoring for the cluster. + + `node_bootstrap`: name of the node bootstrap file for this + cluster. It should be in stored in S3 at + /scripts/hadoop/ + + `master_instance_type`: The instance type to use for the Hadoop master + node. + + `slave_instance_type`: The instance type to use for the Hadoop slave + nodes. + + `min_nodes`: Number of nodes to start the cluster with. + + `max_nodes`: Maximum number of nodes the cluster may be auto-scaled up + to. + + `node_base_cooldown_period`: Time for which an on-demand node waits before termination (Unit: minutes) + + `node_spot_cooldown_period`: Time for which a spot node waits before termination (Unit: minutes) + + `disk_count`: Number of EBS volumes to attach + to each instance of the cluster. + + `disk_type`: Type of the EBS volume. Valid + values are 'standard' (magnetic) and 'ssd'. + + `disk_size`: Size of each EBS volume, in GB. + + `root_disk_size`: Size of root volume, in GB. + + `enable_encryption`: Encrypt the ephemeral drives on the instance. + + `customer_ssh_key`: SSH key to use to login to the instances. + + `idle_cluster_timeout`: The buffer time (range in 0-6 hrs) after a cluster goes idle + and gets terminated, given cluster auto termination is on and no cluster specific + timeout has been set (default is 2 hrs) + + `heterogeneous_config` : Configuring heterogeneous nodes in Hadoop 2 and Spark clusters. + It implies that slave nodes can be of different instance types + + `custom_tags` : Custom tags to be set on all instances + of the cluster. Specified as JSON object (key-value pairs) + + `datadog_api_token` : Specify the Datadog API token to use the Datadog monitoring service + + `datadog_app_token` : Specify the Datadog APP token to use the Datadog monitoring service + + `image_uri_overrides` : Override the image name provided + + `env_name`: Name of python and R environment. (For Spark clusters) + + `python_version`: Version of Python for environment. (For Spark clusters) + + `r_version`: Version of R for environment. (For Spark clusters) + + `disable_cluster_pause`: Disable cluster pause + + `paused_cluster_timeout_mins`: Paused cluster timeout in mins + + `disable_autoscale_node_pause`: Disable autoscale node pause + + `paused_autoscale_node_timeout_mins`: Paused autoscale node timeout in mins + + Doc: For getting details about arguments + http://docs.qubole.com/en/latest/rest-api/cluster_api/create-new-cluster.html#parameters + + """ + self.cluster_info['master_instance_type'] = master_instance_type + self.cluster_info['slave_instance_type'] = slave_instance_type + self.cluster_info['min_nodes'] = min_nodes + self.cluster_info['max_nodes'] = max_nodes + self.cluster_info['cluster_name'] = cluster_name + self.cluster_info['node_bootstrap'] = node_bootstrap + self.cluster_info['disallow_cluster_termination'] = disallow_cluster_termination + self.cluster_info['force_tunnel'] = force_tunnel + self.cluster_info['node_base_cooldown_period'] = node_base_cooldown_period + self.cluster_info['node_volatile_cooldown_period'] = node_spot_cooldown_period + self.cluster_info['customer_ssh_key'] = customer_ssh_key + if custom_tags and custom_tags.strip(): + try: + self.cluster_info['custom_tags'] = json.loads(custom_tags.strip()) + except Exception as e: + raise Exception( + "Invalid JSON string for custom ec2 tags: %s" % e.message) + + self.cluster_info['heterogeneous_config'] = heterogeneous_config + self.cluster_info['idle_cluster_timeout'] = idle_cluster_timeout + self.cluster_info['rootdisk'] = {} + self.cluster_info['rootdisk']['size'] = root_disk_size + self.set_data_disk(disk_size, disk_count, disk_type, + upscaling_config, enable_encryption) + self.set_monitoring(enable_ganglia_monitoring, + datadog_api_token, datadog_app_token) + self.set_internal(image_uri_overrides) + self.set_env_settings(env_name, python_version, r_version) + self.set_start_stop_settings(disable_cluster_pause, paused_cluster_timeout_mins, + disable_autoscale_node_pause, paused_autoscale_node_timeout_mins) + + def set_composition(self, + master_type="ondemand", + master_spot_block_duration=None, + master_maximum_bid_price_percentage=None, + master_timeout_for_request=None, + master_spot_fallback=None, + min_ondemand_percentage=None, + min_spot_block_percentage=None, + min_spot_block_duration=None, + min_spot_percentage=None, + min_maximum_bid_price_percentage=None, + min_timeout_for_request=None, + min_spot_fallback=None, + autoscaling_ondemand_percentage=None, + autoscaling_spot_block_percentage=None, + autoscaling_spot_percentage=None, + autoscaling_spot_block_duration=None, + autoscaling_maximum_bid_price_percentage=None, + autoscaling_timeout_for_request=None, + autoscaling_spot_fallback=None): + + self.cluster_info["composition"] = {} + + self.set_master_config(master_type, + master_spot_block_duration, + master_maximum_bid_price_percentage, + master_timeout_for_request, + master_spot_fallback) + + self.set_min_config(min_ondemand_percentage, + min_spot_block_percentage, + min_spot_block_duration, + min_spot_percentage, + min_maximum_bid_price_percentage, + min_timeout_for_request, + min_spot_fallback) + + self.set_autoscaling_config(autoscaling_ondemand_percentage, + autoscaling_spot_block_percentage, + autoscaling_spot_block_duration, + autoscaling_spot_percentage, + autoscaling_maximum_bid_price_percentage, + autoscaling_timeout_for_request, + autoscaling_spot_fallback) + + def set_master_config(self, + master_type, + master_spot_block_duration, + master_maximum_bid_price_percentage, + master_timeout_for_request, + master_spot_fallback): + self.cluster_info["composition"]["master"] = {"nodes": []} + if master_type == "ondemand": + self.set_master_ondemand(100) + elif master_type == "spot": + self.set_master_spot(100, master_maximum_bid_price_percentage, + master_timeout_for_request, master_spot_fallback) + elif master_type == "spotblock": + self.set_master_spot_block( + 100, master_spot_block_duration) + + def set_min_config(self, + min_ondemand_percentage, + min_spot_block_percentage, + min_spot_block_duration, + min_spot_percentage, + min_maximum_bid_price_percentage, + min_timeout_for_request, + min_spot_fallback): + self.cluster_info["composition"]["min_nodes"] = {"nodes": []} + if not min_ondemand_percentage and not min_spot_block_percentage and not min_spot_percentage: + self.set_min_ondemand(100) + else: + if min_ondemand_percentage: + self.set_min_ondemand(min_ondemand_percentage) + if min_spot_block_percentage: + self.set_min_spot_block( + min_spot_block_percentage, min_spot_block_duration) + if min_spot_percentage: + self.set_min_spot(min_spot_percentage, min_maximum_bid_price_percentage, + min_timeout_for_request, min_spot_fallback) + + def set_autoscaling_config(self, + autoscaling_ondemand_percentage, + autoscaling_spot_block_percentage, + autoscaling_spot_block_duration, + autoscaling_spot_percentage, + autoscaling_maximum_bid_price_percentage, + autoscaling_timeout_for_request, + autoscaling_spot_fallback): + self.cluster_info["composition"]["autoscaling_nodes"] = {"nodes": []} + if not autoscaling_ondemand_percentage and not autoscaling_spot_block_percentage and not autoscaling_spot_percentage: + self.set_autoscaling_ondemand(50) + self.set_autoscaling_spot(50, 100, 1, 'ondemand') + else: + if autoscaling_ondemand_percentage: + self.set_autoscaling_ondemand(autoscaling_ondemand_percentage) + if autoscaling_spot_block_percentage: + self.set_autoscaling_spot_block(autoscaling_spot_block_percentage, + autoscaling_spot_block_duration) + if autoscaling_spot_percentage: + self.set_autoscaling_spot(autoscaling_spot_percentage, autoscaling_maximum_bid_price_percentage, + autoscaling_timeout_for_request, autoscaling_spot_fallback) + + def set_master_ondemand(self, master_ondemand_percentage=None): + ondemand = {"percentage": master_ondemand_percentage, "type": "ondemand"} + self.cluster_info["composition"]["master"]["nodes"].append(ondemand) + + def set_master_spot_block(self, master_spot_block_percentage=None, master_spot_block_duration=120): + spot_block = {"percentage": master_spot_block_percentage, + "type": "spotblock", + "timeout": master_spot_block_duration} + self.cluster_info["composition"]["master"]["nodes"].append(spot_block) + + def set_master_spot(self, master_spot_percentage=None, master_maximum_bid_price_percentage=100, + master_timeout_for_request=1, master_spot_fallback=None): + spot = {"percentage": master_spot_percentage, + "type": "spot", + "maximum_bid_price_percentage": master_maximum_bid_price_percentage, + "timeout_for_request": master_timeout_for_request, + "fallback": master_spot_fallback + } + self.cluster_info["composition"]["master"]["nodes"].append(spot) + + def set_min_ondemand(self, min_ondemand_percentage=None): + ondemand = {"percentage": min_ondemand_percentage, "type": "ondemand"} + self.cluster_info["composition"]["min_nodes"]["nodes"].append(ondemand) + + def set_min_spot_block(self, min_spot_block_percentage=None, min_spot_block_duration=120): + spot_block = {"percentage": min_spot_block_percentage, + "type": "spotblock", + "timeout": min_spot_block_duration} + self.cluster_info["composition"]["min_nodes"]["nodes"].append(spot_block) + + def set_min_spot(self, min_spot_percentage=None, min_maximum_bid_price_percentage=100, + min_timeout_for_request=1, min_spot_fallback=None): + spot = {"percentage": min_spot_percentage, + "type": "spot", + "maximum_bid_price_percentage": min_maximum_bid_price_percentage, + "timeout_for_request": min_timeout_for_request, + "fallback": min_spot_fallback + } + self.cluster_info["composition"]["min_nodes"]["nodes"].append(spot) + + def set_autoscaling_ondemand(self, autoscaling_ondemand_percentage=None): + ondemand = { + "percentage": autoscaling_ondemand_percentage, "type": "ondemand"} + self.cluster_info["composition"]["autoscaling_nodes"]["nodes"].append(ondemand) + + def set_autoscaling_spot_block(self, autoscaling_spot_block_percentage=None, autoscaling_spot_block_duration=120): + spot_block = {"percentage": autoscaling_spot_block_percentage, + "type": "spotblock", + "timeout": autoscaling_spot_block_duration} + self.cluster_info["composition"]["autoscaling_nodes"]["nodes"].append(spot_block) + + def set_autoscaling_spot(self, autoscaling_spot_percentage=None, autoscaling_maximum_bid_price_percentage=100, + autoscaling_timeout_for_request=1, autoscaling_spot_fallback=None): + spot = {"percentage": autoscaling_spot_percentage, + "type": "spot", + "maximum_bid_price_percentage": autoscaling_maximum_bid_price_percentage, + "timeout_for_request": autoscaling_timeout_for_request, + "fallback": autoscaling_spot_fallback + } + self.cluster_info["composition"]["autoscaling_nodes"]["nodes"].append(spot) + + def set_datadog_setting(self, + datadog_api_token=None, + datadog_app_token=None): + self.monitoring['datadog'] = {} + self.monitoring['datadog']['datadog_api_token'] = datadog_api_token + self.monitoring['datadog']['datadog_app_token'] = datadog_app_token + + def set_monitoring(self, + enable_ganglia_monitoring=None, + datadog_api_token=None, + datadog_app_token=None): + self.monitoring['ganglia'] = enable_ganglia_monitoring + self.set_datadog_setting(datadog_api_token, datadog_app_token) + + def set_data_disk(self, + disk_size=None, + disk_count=None, + disk_type=None, + upscaling_config=None, + enable_encryption=None): + self.cluster_info['datadisk'] = {} + self.cluster_info['datadisk']['size'] = disk_size + self.cluster_info['datadisk']['count'] = disk_count + self.cluster_info['datadisk']['type'] = disk_type + self.cluster_info['datadisk']['upscaling_config'] = upscaling_config + self.cluster_info['datadisk']['encryption'] = enable_encryption + + def set_internal(self, image_uri_overrides=None): + self.internal['image_uri_overrides'] = image_uri_overrides + + def set_env_settings(self, env_name=None, python_version=None, r_version=None): + self.cluster_info['env_settings'] = {} + self.cluster_info['env_settings']['name'] = env_name + self.cluster_info['env_settings']['python_version'] = python_version + self.cluster_info['env_settings']['r_version'] = r_version + + def set_start_stop_settings(self, + disable_cluster_pause=None, + paused_cluster_timeout_mins=None, + disable_autoscale_node_pause=None, + paused_autoscale_node_timeout_mins=None): + if disable_cluster_pause is not None: + disable_cluster_pause = int(disable_cluster_pause) + self.cluster_info['disable_cluster_pause'] = disable_cluster_pause + self.cluster_info['paused_cluster_timeout_mins'] = paused_cluster_timeout_mins + if disable_autoscale_node_pause is not None: + disable_autoscale_node_pause = int(disable_autoscale_node_pause) + self.cluster_info['disable_autoscale_node_pause'] = disable_autoscale_node_pause + self.cluster_info['paused_autoscale_node_timeout_mins'] = paused_autoscale_node_timeout_mins + + @staticmethod + def list_info_parser(argparser, action): + argparser.add_argument("--id", dest="cluster_id", + help="show cluster with this id") + + argparser.add_argument("--label", dest="label", + help="show cluster with this label") + argparser.add_argument("--state", dest="state", + choices=['invalid', 'up', 'down', + 'pending', 'terminating'], + help="State of the cluster") + argparser.add_argument("--page", dest="page", + type=int, + help="Page number") + argparser.add_argument("--per-page", dest="per_page", + type=int, + help="Number of clusters to be retrieved per page") + + @staticmethod + def cluster_info_parser(argparser, action): + create_required = False + label_required = False + if action == "create": + create_required = True + elif action == "update": + argparser.add_argument("cluster_id_label", + help="id/label of the cluster to update") + elif action == "clone": + argparser.add_argument("cluster_id_label", + help="id/label of the cluster to update") + label_required = True + + argparser.add_argument("--label", dest="label", + nargs="+", required=(create_required or label_required), + help="list of labels for the cluster" + + " (atleast one label is required)") + cluster_info = argparser.add_argument_group("cluster_info") + cluster_info.add_argument("--master-instance-type", + dest="master_instance_type", + help="instance type to use for the hadoop" + + " master node") + cluster_info.add_argument("--slave-instance-type", + dest="slave_instance_type", + help="instance type to use for the hadoop" + + " slave nodes") + cluster_info.add_argument("--min-nodes", + dest="initial_nodes", + type=int, + help="number of nodes to start the" + + " cluster with", ) + cluster_info.add_argument("--max-nodes", + dest="max_nodes", + type=int, + help="maximum number of nodes the cluster" + + " may be auto-scaled up to") + cluster_info.add_argument("--idle-cluster-timeout", + dest="idle_cluster_timeout", + help="cluster termination timeout for idle cluster") + cluster_info.add_argument("--node-bootstrap-file", + dest="node_bootstrap_file", + help="""name of the node bootstrap file for this cluster. It + should be in stored in S3 at + /scripts/hadoop/NODE_BOOTSTRAP_FILE + """, ) + cluster_info.add_argument("--root-disk-size", + dest="root_disk_size", + type=int, + help="size of the root volume in GB") + termination = cluster_info.add_mutually_exclusive_group() + termination.add_argument("--disallow-cluster-termination", + dest="disallow_cluster_termination", + action="store_true", + default=None, + help="don't auto-terminate idle clusters," + + " use this with extreme caution", ) + termination.add_argument("--allow-cluster-termination", + dest="disallow_cluster_termination", + action="store_false", + default=None, + help="auto-terminate idle clusters,") + + node_cooldown_period_group = argparser.add_argument_group( + "node cooldown period settings") + node_cooldown_period_group.add_argument("--node-base-cooldown-period", + dest="node_base_cooldown_period", + type=int, + help="Cooldown period for on-demand nodes" + + " unit: minutes") + node_cooldown_period_group.add_argument("--node-spot-cooldown-period", + dest="node_spot_cooldown_period", + type=int, + help="Cooldown period for spot nodes" + + " unit: minutes") + cluster_info.add_argument("--customer-ssh-key", + dest="customer_ssh_key_file", + help="location for ssh key to use to" + + " login to the instance") + cluster_info.add_argument("--custom-tags", + dest="custom_tags", + help="""Custom tags to be set on all instances + of the cluster. Specified as JSON object (key-value pairs) + e.g. --custom-ec2-tags '{"key1":"value1", "key2":"value2"}' + """, ) + + # datadisk settings + datadisk_group = argparser.add_argument_group("data disk settings") + datadisk_group.add_argument("--count", + dest="count", + type=int, + help="Number of EBS volumes to attach to" + + " each instance of the cluster", ) + datadisk_group.add_argument("--disk-type", + dest="disk_type", + choices=["standard", "gp2"], + help="Type of the volume attached to the instances. Valid values are " + + "'standard' (magnetic) and 'gp2' (ssd).") + datadisk_group.add_argument("--size", + dest="size", + type=int, + help="Size of each EBS volume, in GB", ) + datadisk_group.add_argument("--upscaling-config", + dest="upscaling_config", + help="Upscaling config to be attached with the instances.", ) + ephemerals = datadisk_group.add_mutually_exclusive_group() + ephemerals.add_argument("--encrypted-ephemerals", + dest="encrypted_ephemerals", + action="store_true", + default=None, + help="encrypt the ephemeral drives on" + + " the instance", ) + ephemerals.add_argument("--no-encrypted-ephemerals", + dest="encrypted_ephemerals", + action="store_false", + default=None, + help="don't encrypt the ephemeral drives on" + + " the instance", ) + + cluster_info.add_argument("--heterogeneous-config", + dest="heterogeneous_config", + help="heterogeneous config for the cluster") + + composition_group = argparser.add_argument_group("cluster composition settings") + composition_group.add_argument("--master-type", + dest="master_type", + choices=["ondemand", "spot", "spotblock"], + default="ondemand", + help="type of master nodes. Valid values are: ('ondemand', 'spot', 'spotblock')" + + "default: ondemand") + composition_group.add_argument("--master-spot-block-duration", + dest="master_spot_block_duration", + type=int, + default=120, + help="spot block duration unit: minutes") + composition_group.add_argument("--master-maximum-bid-price-percentage", + dest="master_maximum_bid_price_percentage", + type=int, + default=100, + help="maximum value to bid for master spot instances" + + " expressed as a percentage of the base" + + " price for the master instance types") + composition_group.add_argument("--master-timeout-for-request", + dest="master_timeout_for_request", + type=int, + default=1, + help="timeout for a master spot instance request, unit: minutes") + composition_group.add_argument("--master-spot-fallback", + dest="master_spot_fallback", + choices=["ondemand", None], + default=None, + help="whether to fallback to on-demand instances for master nodes" + + " if spot instances aren't available") + composition_group.add_argument("--min-ondemand-percentage", + dest="min_ondemand_percentage", + type=int, + help="percentage of ondemand nodes in min config") + composition_group.add_argument("--min-spot-block-percentage", + dest="min_spot_block_percentage", + type=int, + help="percentage of spot block nodes in min config") + composition_group.add_argument("--min-spot-percentage", + dest="min_spot_percentage", + type=int, + help="percentage of spot nodes in min config") + composition_group.add_argument("--min-spot-block-duration", + dest="min_spot_block_duration", + type=int, + default=120, + help="spot block duration unit: minutes") + composition_group.add_argument("--min-maximum-bid-price-percentage", + dest="min_maximum_bid_price_percentage", + type=int, + default=100, + help="maximum value to bid for min spot instances" + + " expressed as a percentage of the base" + + " price for the master instance types") + composition_group.add_argument("--min-timeout-for-request", + dest="min_timeout_for_request", + type=int, + default=1, + help="timeout for a min spot instance request, unit: minutes") + composition_group.add_argument("--min-spot-fallback", + dest="min_spot_fallback", + choices=["ondemand", None], + default=None, + help="whether to fallback to on-demand instances for min nodes" + + " if spot instances aren't available") + + composition_group.add_argument("--autoscaling-ondemand-percentage", + dest="autoscaling_ondemand_percentage", + type=int, + help="percentage of ondemand nodes in autoscaling config") + composition_group.add_argument("--autoscaling-spot-block-percentage", + dest="autoscaling_spot_block_percentage", + type=int, + help="percentage of spot block nodes in autoscaling config") + composition_group.add_argument("--autoscaling-spot-percentage", + dest="autoscaling_spot_percentage", + type=int, + help="percentage of spot nodes in autoscaling config") + composition_group.add_argument("--autoscaling-spot-block-duration", + dest="autoscaling_spot_block_duration", + type=int, + default=120, + help="spot block duration unit: minutes") + composition_group.add_argument("--autoscaling-maximum-bid-price-percentage", + dest="autoscaling_maximum_bid_price_percentage", + type=int, + default=100, + help="maximum value to bid for autoscaling spot instances" + + " expressed as a percentage of the base" + + " price for the master instance types") + composition_group.add_argument("--autoscaling-timeout-for-request", + dest="autoscaling_timeout_for_request", + type=int, + default=1, + help="timeout for a autoscaling spot instance request, unit: minutes") + composition_group.add_argument("--autoscaling-spot-fallback", + dest="autoscaling_spot_fallback", + choices=["ondemand", None], + default=None, + help="whether to fallback to on-demand instances for autoscaling nodes" + + " if spot instances aren't available") + + # monitoring settings + monitoring_group = argparser.add_argument_group("monitoring settings") + ganglia = monitoring_group.add_mutually_exclusive_group() + ganglia.add_argument("--enable-ganglia-monitoring", + dest="enable_ganglia_monitoring", + action="store_true", + default=None, + help="enable ganglia monitoring for the" + + " cluster", ) + ganglia.add_argument("--disable-ganglia-monitoring", + dest="enable_ganglia_monitoring", + action="store_false", + default=None, + help="disable ganglia monitoring for the" + + " cluster", ) + + datadog_group = argparser.add_argument_group("datadog settings") + datadog_group.add_argument("--datadog-api-token", + dest="datadog_api_token", + default=None, + help="fernet key for airflow cluster", ) + datadog_group.add_argument("--datadog-app-token", + dest="datadog_app_token", + default=None, + help="overrides for airflow cluster", ) + + internal_group = argparser.add_argument_group("internal settings") + internal_group.add_argument("--image-overrides", + dest="image_uri_overrides", + default=None, + help="overrides for image", ) + + env_group = argparser.add_argument_group("environment settings") + env_group.add_argument("--env-name", + dest="env_name", + default=None, + help="name of Python and R environment") + env_group.add_argument("--python-version", + dest="python_version", + default=None, + help="version of Python in environment") + env_group.add_argument("--r-version", + dest="r_version", + default=None, + help="version of R in environment") + + start_stop_group = argparser.add_argument_group("start stop settings") + start_stop_group.add_argument("--disable-cluster-pause", + dest="disable_cluster_pause", + action='store_true', + default=None, + help="disable cluster pause") + start_stop_group.add_argument("--no-disable-cluster-pause", + dest="disable_cluster_pause", + action='store_false', + default=None, + help="disable cluster pause") + start_stop_group.add_argument("--paused-cluster-timeout", + dest="paused_cluster_timeout_mins", + default=None, + type=int, + help="paused cluster timeout in min") + start_stop_group.add_argument("--disable-autoscale-node-pause", + dest="disable_autoscale_node_pause", + action='store_true', + default=None, + help="disable autoscale node pause") + start_stop_group.add_argument("--no-disable-autoscale-node-pause", + dest="disable_autoscale_node_pause", + action='store_false', + default=None, + help="disable autoscale node pause") + start_stop_group.add_argument("--paused-autoscale-node-timeout", + dest="paused_autoscale_node_timeout_mins", + default=None, + type=int, + help="paused autoscale node timeout in min") diff --git a/qds_sdk/clusterv2.py b/qds_sdk/clusterv2.py index eb3f07f6..957a6f94 100755 --- a/qds_sdk/clusterv2.py +++ b/qds_sdk/clusterv2.py @@ -6,143 +6,10 @@ import argparse import json + def str2bool(v): return v.lower() in ("yes", "true", "t", "1") -class ClusterCmdLine: - - @staticmethod - def parsers(action): - argparser = argparse.ArgumentParser( - prog="qds.py cluster", - description="Cluster Operations for Qubole Data Service.") - subparsers = argparser.add_subparsers(title="Cluster operations") - - if action == "create": - create = subparsers.add_parser("create", help="Create a new cluster") - ClusterCmdLine.create_update_clone_parser(create, action="create") - create.set_defaults(func=ClusterV2.create) - - if action == "update": - update = subparsers.add_parser("update", help="Update the settings of an existing cluster") - ClusterCmdLine.create_update_clone_parser(update, action="update") - update.set_defaults(func=ClusterV2.update) - - if action == "clone": - clone = subparsers.add_parser("clone", help="Clone a cluster from an existing one") - ClusterCmdLine.create_update_clone_parser(clone, action="clone") - clone.set_defaults(func=ClusterV2.clone) - - if action == "list": - li = subparsers.add_parser("list", help="list clusters from existing clusters depending upon state") - ClusterCmdLine.list_parser(li, action="list") - li.set_defaults(func=ClusterV2.list) - return argparser - - @staticmethod - def list_parser(subparser, action=None): - - # cluster info parser - ClusterInfoV2.list_info_parser(subparser, action) - - @staticmethod - def create_update_clone_parser(subparser, action=None): - # cloud config parser - cloud = Qubole.get_cloud() - cloud.create_parser(subparser) - - # cluster info parser - ClusterInfoV2.cluster_info_parser(subparser, action) - - # engine config parser - Engine.engine_parser(subparser) - - @staticmethod - def run(args): - parser = ClusterCmdLine.parsers(args[0]) - arguments = parser.parse_args(args) - if args[0] in ["create", "clone", "update"]: - ClusterCmdLine.get_cluster_create_clone_update(arguments, args[0]) - else: - return arguments.func(arguments.label, arguments.cluster_id, arguments.state, - arguments.page, arguments.per_page) - - @staticmethod - def get_cluster_create_clone_update(arguments, action): - customer_ssh_key = util._read_file(arguments.customer_ssh_key_file) - # This will set cluster info and monitoring settings - cluster_info = ClusterInfoV2(arguments.label) - cluster_info.set_cluster_info(disallow_cluster_termination=arguments.disallow_cluster_termination, - enable_ganglia_monitoring=arguments.enable_ganglia_monitoring, - datadog_api_token=arguments.datadog_api_token, - datadog_app_token=arguments.datadog_app_token, - node_bootstrap=arguments.node_bootstrap_file, - master_instance_type=arguments.master_instance_type, - slave_instance_type=arguments.slave_instance_type, - min_nodes=arguments.initial_nodes, - max_nodes=arguments.max_nodes, - slave_request_type=arguments.slave_request_type, - fallback_to_ondemand=arguments.fallback_to_ondemand, - node_base_cooldown_period=arguments.node_base_cooldown_period, - node_spot_cooldown_period=arguments.node_spot_cooldown_period, - custom_tags=arguments.custom_tags, - heterogeneous_config=arguments.heterogeneous_config, - maximum_bid_price_percentage=arguments.maximum_bid_price_percentage, - timeout_for_request=arguments.timeout_for_request, - maximum_spot_instance_percentage=arguments.maximum_spot_instance_percentage, - stable_maximum_bid_price_percentage=arguments.stable_maximum_bid_price_percentage, - stable_timeout_for_request=arguments.stable_timeout_for_request, - stable_spot_fallback=arguments.stable_spot_fallback, - spot_block_duration=arguments.spot_block_duration, - idle_cluster_timeout=arguments.idle_cluster_timeout, - disk_count=arguments.count, - disk_type=arguments.disk_type, - disk_size=arguments.size, - root_disk_size=arguments.root_disk_size, - upscaling_config=arguments.upscaling_config, - enable_encryption=arguments.encrypted_ephemerals, - customer_ssh_key=customer_ssh_key, - image_uri_overrides=arguments.image_uri_overrides, - env_name=arguments.env_name, - python_version=arguments.python_version, - r_version=arguments.r_version, - disable_cluster_pause=arguments.disable_cluster_pause, - paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, - disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, - paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins) - - # This will set cloud config settings - cloud_config = Qubole.get_cloud() - cloud_config.set_cloud_config_from_arguments(arguments) - - # This will set engine settings - engine_config = Engine(flavour=arguments.flavour) - engine_config.set_engine_config_settings(arguments) - - cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config) - - action = action - if action == "create": - return arguments.func(cluster_request) - else: - return arguments.func(arguments.cluster_id_label, cluster_request) - - @staticmethod - def get_cluster_request_parameters(cluster_info, cloud_config, engine_config): - ''' - Use this to return final minimal request from cluster_info, cloud_config or engine_config objects - Alternatively call util._make_minimal if only one object needs to be implemented - ''' - - cluster_request = {} - cloud_config = util._make_minimal(cloud_config.__dict__) - if bool(cloud_config): cluster_request['cloud_config'] = cloud_config - - engine_config = util._make_minimal(engine_config.__dict__) - if bool(engine_config): cluster_request['engine_config'] = engine_config - - cluster_request.update(util._make_minimal(cluster_info.__dict__)) - return cluster_request class ClusterInfoV2(object): """ @@ -159,7 +26,48 @@ def __init__(self, label): self.cluster_info = {} self.cluster_info['label'] = label self.monitoring = {} - self.internal = {} # right now not supported + self.internal = {} # right now not supported + + def set_cluster_info_from_arguments(self, arguments): + customer_ssh_key = util._read_file(arguments.customer_ssh_key_file) + self.set_cluster_info(disallow_cluster_termination=arguments.disallow_cluster_termination, + enable_ganglia_monitoring=arguments.enable_ganglia_monitoring, + datadog_api_token=arguments.datadog_api_token, + datadog_app_token=arguments.datadog_app_token, + node_bootstrap=arguments.node_bootstrap_file, + master_instance_type=arguments.master_instance_type, + slave_instance_type=arguments.slave_instance_type, + min_nodes=arguments.initial_nodes, + max_nodes=arguments.max_nodes, + slave_request_type=arguments.slave_request_type, + fallback_to_ondemand=arguments.fallback_to_ondemand, + node_base_cooldown_period=arguments.node_base_cooldown_period, + node_spot_cooldown_period=arguments.node_spot_cooldown_period, + custom_tags=arguments.custom_tags, + heterogeneous_config=arguments.heterogeneous_config, + maximum_bid_price_percentage=arguments.maximum_bid_price_percentage, + timeout_for_request=arguments.timeout_for_request, + maximum_spot_instance_percentage=arguments.maximum_spot_instance_percentage, + stable_maximum_bid_price_percentage=arguments.stable_maximum_bid_price_percentage, + stable_timeout_for_request=arguments.stable_timeout_for_request, + stable_spot_fallback=arguments.stable_spot_fallback, + spot_block_duration=arguments.spot_block_duration, + idle_cluster_timeout=arguments.idle_cluster_timeout, + disk_count=arguments.count, + disk_type=arguments.disk_type, + disk_size=arguments.size, + root_disk_size=arguments.root_disk_size, + upscaling_config=arguments.upscaling_config, + enable_encryption=arguments.encrypted_ephemerals, + customer_ssh_key=customer_ssh_key, + image_uri_overrides=arguments.image_uri_overrides, + env_name=arguments.env_name, + python_version=arguments.python_version, + r_version=arguments.r_version, + disable_cluster_pause=arguments.disable_cluster_pause, + paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, + disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, + paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins) def set_cluster_info(self, disallow_cluster_termination=None, @@ -320,7 +228,7 @@ def set_cluster_info(self, self.cluster_info['force_tunnel'] = force_tunnel self.cluster_info['fallback_to_ondemand'] = fallback_to_ondemand self.cluster_info['node_base_cooldown_period'] = node_base_cooldown_period - self.cluster_info['node_spot_cooldown_period'] = node_spot_cooldown_period + self.cluster_info['node_volatile_cooldown_period'] = node_spot_cooldown_period self.cluster_info['customer_ssh_key'] = customer_ssh_key if custom_tags and custom_tags.strip(): try: @@ -336,8 +244,10 @@ def set_cluster_info(self, self.cluster_info['rootdisk'] = {} self.cluster_info['rootdisk']['size'] = root_disk_size - self.set_spot_instance_settings(maximum_bid_price_percentage, timeout_for_request, maximum_spot_instance_percentage) - self.set_stable_spot_bid_settings(stable_maximum_bid_price_percentage, stable_timeout_for_request, stable_spot_fallback) + self.set_spot_instance_settings(maximum_bid_price_percentage, timeout_for_request, + maximum_spot_instance_percentage) + self.set_stable_spot_bid_settings(stable_maximum_bid_price_percentage, stable_timeout_for_request, + stable_spot_fallback) self.set_spot_block_settings(spot_block_duration) self.set_data_disk(disk_size, disk_count, disk_type, upscaling_config, enable_encryption) self.set_monitoring(enable_ganglia_monitoring, datadog_api_token, datadog_app_token) @@ -705,16 +615,18 @@ def cluster_info_parser(argparser, action): type=int, help="paused autoscale node timeout in min") -class ClusterV2(Resource): +class ClusterV2(Resource): rest_entity_path = "clusters" + api_version = "v2" @classmethod def create(cls, cluster_info): """ Create a new cluster using information provided in `cluster_info`. """ - conn = Qubole.agent(version="v2") + + conn = Qubole.agent(version=cls.api_version) return conn.post(cls.rest_entity_path, data=cluster_info) @classmethod @@ -723,7 +635,7 @@ def update(cls, cluster_id_label, cluster_info): Update the cluster with id/label `cluster_id_label` using information provided in `cluster_info`. """ - conn = Qubole.agent(version="v2") + conn = Qubole.agent(version=cls.api_version) return conn.put(cls.element_path(cluster_id_label), data=cluster_info) @classmethod @@ -732,7 +644,7 @@ def clone(cls, cluster_id_label, cluster_info): Update the cluster with id/label `cluster_id_label` using information provided in `cluster_info`. """ - conn = Qubole.agent(version="v2") + conn = Qubole.agent(version=cls.api_version) return conn.post(cls.element_path(cluster_id_label) + '/clone', data=cluster_info) @classmethod @@ -758,7 +670,7 @@ def list(cls, label=None, cluster_id=None, state=None, page=None, per_page=None) if per_page: params['per_page'] = per_page params = None if not params else params - conn = Qubole.agent(version="v2") + conn = Qubole.agent(version=cls.api_version) cluster_list = conn.get(cls.rest_entity_path) if state is None: # return the complete list since state is None @@ -776,5 +688,5 @@ def show(cls, cluster_id_label): """ Show information about the cluster with id/label `cluster_id_label`. """ - conn = Qubole.agent(version="v2") + conn = Qubole.agent(version=cls.api_version) return conn.get(cls.element_path(cluster_id_label)) diff --git a/qds_sdk/qubole.py b/qds_sdk/qubole.py index 06be4da5..d8b842a5 100644 --- a/qds_sdk/qubole.py +++ b/qds_sdk/qubole.py @@ -121,3 +121,4 @@ def get_cloud_object(cls, cloud_name): elif cloud_name.lower() == "gcp": import qds_sdk.cloud.gcp_cloud return qds_sdk.cloud.gcp_cloud.GcpCloud() + diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 4e5dd0ff..34dd8433 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -600,7 +600,7 @@ def test_node_spot_cooldown_period_v2(self): qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': {'label': ['test_label'], - 'node_spot_cooldown_period': 15}}) + 'node_volatile_cooldown_period': 15}}) def test_node_spot_cooldown_period_invalid_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', @@ -872,7 +872,7 @@ def test_node_spot_cooldown_period_v2(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('PUT', 'clusters/123', - {'cluster_info': {'node_spot_cooldown_period': 15}}) + {'cluster_info': {'node_volatile_cooldown_period': 15}}) def test_node_spot_cooldown_period_invalid_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', diff --git a/tests/test_clusterv22.py b/tests/test_clusterv22.py new file mode 100644 index 00000000..e05128dd --- /dev/null +++ b/tests/test_clusterv22.py @@ -0,0 +1,197 @@ +from __future__ import print_function +import sys +import os + +if sys.version_info > (2, 7, 0): + import unittest +else: + import unittest2 as unittest +from mock import Mock, ANY +import tempfile + +sys.path.append(os.path.join(os.path.dirname(__file__), '../bin')) +import qds +from qds_sdk.connection import Connection +from test_base import print_command +from test_base import QdsCliTestCase +from qds_sdk.cloud.cloud import Cloud +from qds_sdk.qubole import Qubole + + +class TestClusterCreate(QdsCliTestCase): + # default cluster composition + def test_cluster_info(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--compute-access-key', 'aki', '--compute-secret-key', 'sak', '--min-nodes', '3', + '--max-nodes', '5', '--disallow-cluster-termination', '--enable-ganglia-monitoring', + '--node-bootstrap-file', 'test_file_name', '--master-instance-type', + 'm1.xlarge', '--slave-instance-type', 'm1.large', '--encrypted-ephemerals'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', { + 'cloud_config': {'compute_config': {'compute_secret_key': 'sak', 'compute_access_key': 'aki'}}, + 'monitoring': {'ganglia': True}, + 'cluster_info': {'master_instance_type': 'm1.xlarge', 'node_bootstrap': 'test_file_name', + 'slave_instance_type': 'm1.large', 'label': ['test_label'], + 'disallow_cluster_termination': True, 'max_nodes': 5, 'min_nodes': 3, + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'autoscaling_nodes': {'nodes': [{'percentage': 50, 'type': 'ondemand'}, + {'timeout_for_request': 1, + 'percentage': 50, 'type': 'spot', + 'fallback': 'ondemand', + 'maximum_bid_price_percentage': 100}]}}, + 'datadisk': {'encryption': True}}}) + + def test_od_od_od(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'ondemand', '--min-ondemand-percentage', '100', + '--autoscaling-ondemand-percentage', '100'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'autoscaling_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}}, + 'label': ['test_label']}}) + + def test_od_od_odspot(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'ondemand', '--min-ondemand-percentage', '100', + '--autoscaling-ondemand-percentage', + '50', '--autoscaling-spot-percentage', '50', '--autoscaling-maximum-bid-price-percentage', '50', + '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', 'ondemand'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { + 'nodes': [{'percentage': 50, 'type': 'ondemand'}, + {'timeout_for_request': 3, 'percentage': 50, 'type': 'spot', 'fallback': 'ondemand', + 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + + def test_od_od_odspot_nofallback(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'ondemand', '--min-ondemand-percentage', '100', + '--autoscaling-ondemand-percentage', + '50', '--autoscaling-spot-percentage', '50', '--autoscaling-maximum-bid-price-percentage', '50', + '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', None] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { + 'nodes': [{'percentage': 50, 'type': 'ondemand'}, + {'timeout_for_request': 3, 'percentage': 50, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + + def test_od_od_spotblock(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'ondemand', '--min-ondemand-percentage', '100', + '--autoscaling-spot-block-percentage', + '100', '--autoscaling-spot-block-duration', '60'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'autoscaling_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}}, + 'label': ['test_label']}}) + + def test_od_od_spotblockspot(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'ondemand', '--min-ondemand-percentage', '100', + '--autoscaling-spot-block-percentage', + '50', '--autoscaling-spot-block-duration', '60', '--autoscaling-spot-percentage', '50', + '--autoscaling-maximum-bid-price-percentage', '50', + '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', None] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { + 'nodes': [{'percentage': 50, 'type': 'spotblock', 'timeout': 60}, + {'timeout_for_request': 3, 'percentage': 50, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + + def test_od_od_spot(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'ondemand', '--min-ondemand-percentage', '100', '--autoscaling-spot-percentage', + '100', + '--autoscaling-maximum-bid-price-percentage', '50', '--autoscaling-timeout-for-request', '3', + '--autoscaling-spot-fallback', None] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { + 'nodes': [{'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + + def test_od_spot_spot(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'ondemand', '--min-spot-percentage', '100', + '--min-maximum-bid-price-percentage', '50', '--min-timeout-for-request', '3', + '--min-spot-fallback', None, '--autoscaling-spot-percentage', '100', + '--autoscaling-maximum-bid-price-percentage', '50', '--autoscaling-timeout-for-request', '3', + '--autoscaling-spot-fallback', None] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': {'composition': {'min_nodes': { + 'nodes': [{'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}, 'master': { + 'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': {'nodes': [ + {'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + + def test_spotblock_spotblock_spotblock(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'spotblock', '--master-spot-block-duration', '60', '--min-spot-block-percentage', + '100', '--min-spot-block-duration', '60', '--autoscaling-spot-block-percentage', + '100', '--autoscaling-spot-block-duration', '60'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}, + 'master': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}, + 'autoscaling_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}}, + 'label': ['test_label']}}) + + def test_spot_spot_spot(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--master-type', 'spot', '--master-maximum-bid-price-percentage', '50', + '--master-timeout-for-request', '3', + '--master-spot-fallback', None, '--min-spot-percentage', '100', + '--min-maximum-bid-price-percentage', '50', '--min-timeout-for-request', '3', + '--min-spot-fallback', None, '--autoscaling-spot-percentage', '100', + '--autoscaling-maximum-bid-price-percentage', '50', '--autoscaling-timeout-for-request', '3', + '--autoscaling-spot-fallback', None] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': {'composition': {'min_nodes': { + 'nodes': [{'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}, 'master': {'nodes': [ + {'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}, 'autoscaling_nodes': {'nodes': [ + {'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, + 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) From 3937ba6b41f755655d4222cfe6b60e63f2170220 Mon Sep 17 00:00:00 2001 From: Tarun Goyal Date: Tue, 12 Nov 2019 15:51:35 +0530 Subject: [PATCH 30/69] SDK-376: Revert the refactoring of cluster_cmd_line (#286) Revert the refactoring of cluster_cmd_line --- bin/qds.py | 2 +- qds_sdk/cluster_cmd_line.py | 110 ------------------------------ qds_sdk/cluster_info_factory.py | 17 ----- qds_sdk/clusterv2.py | 114 ++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 128 deletions(-) delete mode 100644 qds_sdk/cluster_cmd_line.py delete mode 100644 qds_sdk/cluster_info_factory.py diff --git a/bin/qds.py b/bin/qds.py index 4952763a..fac8b439 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -15,7 +15,7 @@ from qds_sdk.nezha import NezhaCmdLine from qds_sdk.user import UserCmdLine from qds_sdk.template import TemplateCmdLine -from qds_sdk.cluster_cmd_line import ClusterCmdLine +from qds_sdk.clusterv2 import ClusterCmdLine from qds_sdk.sensors import * import os import sys diff --git a/qds_sdk/cluster_cmd_line.py b/qds_sdk/cluster_cmd_line.py deleted file mode 100644 index 8978c162..00000000 --- a/qds_sdk/cluster_cmd_line.py +++ /dev/null @@ -1,110 +0,0 @@ -from qds_sdk.cluster_info_factory import ClusterInfoFactory -from qds_sdk.clusterv2 import ClusterV2 -from qds_sdk.qubole import Qubole -from qds_sdk.resource import Resource -from qds_sdk.cloud.cloud import Cloud -from qds_sdk.engine import Engine -from qds_sdk import util -import argparse -import json - -class ClusterCmdLine: - - @staticmethod - def parsers(action): - argparser = argparse.ArgumentParser( - prog="qds.py cluster", - description="Cluster Operations for Qubole Data Service.") - subparsers = argparser.add_subparsers(title="Cluster operations") - if Qubole.version is not None: - ClusterV2.api_version = Qubole.version - if action == "create": - create = subparsers.add_parser("create", help="Create a new cluster") - ClusterCmdLine.create_update_clone_parser(create, action="create") - create.set_defaults(func=ClusterV2.create) - - if action == "update": - update = subparsers.add_parser("update", help="Update the settings of an existing cluster") - ClusterCmdLine.create_update_clone_parser(update, action="update") - update.set_defaults(func=ClusterV2.update) - - if action == "clone": - clone = subparsers.add_parser("clone", help="Clone a cluster from an existing one") - ClusterCmdLine.create_update_clone_parser(clone, action="clone") - clone.set_defaults(func=ClusterV2.clone) - - if action == "list": - li = subparsers.add_parser("list", help="list clusters from existing clusters depending upon state") - ClusterCmdLine.list_parser(li, action="list") - li.set_defaults(func=ClusterV2.list) - return argparser - - @staticmethod - def list_parser(subparser, action=None, ): - - # cluster info parser - cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() - cluster_info_cls.list_info_parser(subparser, action) - - @staticmethod - def create_update_clone_parser(subparser, action=None): - # cloud config parser - cloud = Qubole.get_cloud() - cloud.create_parser(subparser) - - # cluster info parser - cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() - cluster_info_cls.cluster_info_parser(subparser, action) - - # engine config parser - Engine.engine_parser(subparser) - - @staticmethod - def run(args): - parser = ClusterCmdLine.parsers(args[0]) - arguments = parser.parse_args(args) - if args[0] in ["create", "clone", "update"]: - ClusterCmdLine.get_cluster_create_clone_update(arguments, args[0]) - else: - return arguments.func(arguments.label, arguments.cluster_id, arguments.state, - arguments.page, arguments.per_page) - - @staticmethod - def get_cluster_create_clone_update(arguments, action): - - # This will set cluster info and monitoring settings - cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() - cluster_info = cluster_info_cls(arguments.label) - cluster_info.set_cluster_info_from_arguments(arguments) - - # This will set cloud config settings - cloud_config = Qubole.get_cloud() - cloud_config.set_cloud_config_from_arguments(arguments) - - # This will set engine settings - engine_config = Engine(flavour=arguments.flavour) - engine_config.set_engine_config_settings(arguments) - cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config) - - action = action - if action == "create": - return arguments.func(cluster_request) - else: - return arguments.func(arguments.cluster_id_label, cluster_request) - - @staticmethod - def get_cluster_request_parameters(cluster_info, cloud_config, engine_config): - ''' - Use this to return final minimal request from cluster_info, cloud_config or engine_config objects - Alternatively call util._make_minimal if only one object needs to be implemented - ''' - - cluster_request = {} - cloud_config = util._make_minimal(cloud_config.__dict__) - if bool(cloud_config): cluster_request['cloud_config'] = cloud_config - - engine_config = util._make_minimal(engine_config.__dict__) - if bool(engine_config): cluster_request['engine_config'] = engine_config - - cluster_request.update(util._make_minimal(cluster_info.__dict__)) - return cluster_request \ No newline at end of file diff --git a/qds_sdk/cluster_info_factory.py b/qds_sdk/cluster_info_factory.py deleted file mode 100644 index 923e499c..00000000 --- a/qds_sdk/cluster_info_factory.py +++ /dev/null @@ -1,17 +0,0 @@ -from qds_sdk.qubole import Qubole -from qds_sdk.clusterv2 import ClusterInfoV2 -from qds_sdk.cluster_info_v22 import ClusterInfoV22 - - -class ClusterInfoFactory: - - @staticmethod - def get_cluster_info_cls(api_version=None): - if api_version is None: - api_version = Qubole.version - if api_version == "v2": - return ClusterInfoV2 - elif api_version == "v2.2": - return ClusterInfoV22 - else: - return ClusterInfoV2 diff --git a/qds_sdk/clusterv2.py b/qds_sdk/clusterv2.py index 957a6f94..e65d6570 100755 --- a/qds_sdk/clusterv2.py +++ b/qds_sdk/clusterv2.py @@ -2,6 +2,7 @@ from qds_sdk.resource import Resource from qds_sdk.cloud.cloud import Cloud from qds_sdk.engine import Engine +from qds_sdk.cluster_info_v22 import ClusterInfoV22 from qds_sdk import util import argparse import json @@ -10,6 +11,119 @@ def str2bool(v): return v.lower() in ("yes", "true", "t", "1") +class ClusterCmdLine: + + @staticmethod + def parsers(action): + argparser = argparse.ArgumentParser( + prog="qds.py cluster", + description="Cluster Operations for Qubole Data Service.") + subparsers = argparser.add_subparsers(title="Cluster operations") + if Qubole.version is not None: + ClusterV2.api_version = Qubole.version + if action == "create": + create = subparsers.add_parser("create", help="Create a new cluster") + ClusterCmdLine.create_update_clone_parser(create, action="create") + create.set_defaults(func=ClusterV2.create) + + if action == "update": + update = subparsers.add_parser("update", help="Update the settings of an existing cluster") + ClusterCmdLine.create_update_clone_parser(update, action="update") + update.set_defaults(func=ClusterV2.update) + + if action == "clone": + clone = subparsers.add_parser("clone", help="Clone a cluster from an existing one") + ClusterCmdLine.create_update_clone_parser(clone, action="clone") + clone.set_defaults(func=ClusterV2.clone) + + if action == "list": + li = subparsers.add_parser("list", help="list clusters from existing clusters depending upon state") + ClusterCmdLine.list_parser(li, action="list") + li.set_defaults(func=ClusterV2.list) + return argparser + + @staticmethod + def list_parser(subparser, action=None, ): + + # cluster info parser + cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() + cluster_info_cls.list_info_parser(subparser, action) + + @staticmethod + def create_update_clone_parser(subparser, action=None): + # cloud config parser + cloud = Qubole.get_cloud() + cloud.create_parser(subparser) + + # cluster info parser + cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() + cluster_info_cls.cluster_info_parser(subparser, action) + + # engine config parser + Engine.engine_parser(subparser) + + @staticmethod + def run(args): + parser = ClusterCmdLine.parsers(args[0]) + arguments = parser.parse_args(args) + if args[0] in ["create", "clone", "update"]: + ClusterCmdLine.get_cluster_create_clone_update(arguments, args[0]) + else: + return arguments.func(arguments.label, arguments.cluster_id, arguments.state, + arguments.page, arguments.per_page) + + @staticmethod + def get_cluster_create_clone_update(arguments, action): + + # This will set cluster info and monitoring settings + cluster_info_cls = ClusterInfoFactory.get_cluster_info_cls() + cluster_info = cluster_info_cls(arguments.label) + cluster_info.set_cluster_info_from_arguments(arguments) + + # This will set cloud config settings + cloud_config = Qubole.get_cloud() + cloud_config.set_cloud_config_from_arguments(arguments) + + # This will set engine settings + engine_config = Engine(flavour=arguments.flavour) + engine_config.set_engine_config_settings(arguments) + cluster_request = ClusterCmdLine.get_cluster_request_parameters(cluster_info, cloud_config, engine_config) + + action = action + if action == "create": + return arguments.func(cluster_request) + else: + return arguments.func(arguments.cluster_id_label, cluster_request) + + @staticmethod + def get_cluster_request_parameters(cluster_info, cloud_config, engine_config): + ''' + Use this to return final minimal request from cluster_info, cloud_config or engine_config objects + Alternatively call util._make_minimal if only one object needs to be implemented + ''' + + cluster_request = {} + cloud_config = util._make_minimal(cloud_config.__dict__) + if bool(cloud_config): cluster_request['cloud_config'] = cloud_config + + engine_config = util._make_minimal(engine_config.__dict__) + if bool(engine_config): cluster_request['engine_config'] = engine_config + + cluster_request.update(util._make_minimal(cluster_info.__dict__)) + return cluster_request + +class ClusterInfoFactory: + + @staticmethod + def get_cluster_info_cls(api_version=None): + if api_version is None: + api_version = Qubole.version + if api_version == "v2": + return ClusterInfoV2 + elif api_version == "v2.2": + return ClusterInfoV22 + else: + return ClusterInfoV2 class ClusterInfoV2(object): """ From 1f51003ed4e18c7a84707c5c1570b664f48b4abf Mon Sep 17 00:00:00 2001 From: satyabolnedi <57746175+satyabolnedi@users.noreply.github.com> Date: Fri, 29 Nov 2019 13:29:31 +0530 Subject: [PATCH 31/69] ACM-4728: Add HS2 cluster creation from qds-sdk (#290) Added support for creating HS2 cluster --- qds_sdk/clusterv2.py | 11 +++++++++-- qds_sdk/engine.py | 2 +- tests/test_clusterv2.py | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/qds_sdk/clusterv2.py b/qds_sdk/clusterv2.py index e65d6570..e0e4cb0c 100755 --- a/qds_sdk/clusterv2.py +++ b/qds_sdk/clusterv2.py @@ -181,7 +181,8 @@ def set_cluster_info_from_arguments(self, arguments): disable_cluster_pause=arguments.disable_cluster_pause, paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, - paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins) + paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins, + parent_cluster_id=arguments.parent_cluster_id) def set_cluster_info(self, disallow_cluster_termination=None, @@ -223,7 +224,8 @@ def set_cluster_info(self, disable_cluster_pause=None, paused_cluster_timeout_mins=None, disable_autoscale_node_pause=None, - paused_autoscale_node_timeout_mins=None): + paused_autoscale_node_timeout_mins=None, + parent_cluster_id=None): """ Args: @@ -357,6 +359,7 @@ def set_cluster_info(self, self.cluster_info['rootdisk'] = {} self.cluster_info['rootdisk']['size'] = root_disk_size + self.cluster_info['parent_cluster_id'] = parent_cluster_id self.set_spot_instance_settings(maximum_bid_price_percentage, timeout_for_request, maximum_spot_instance_percentage) @@ -515,6 +518,10 @@ def cluster_info_parser(argparser, action): dest="root_disk_size", type=int, help="size of the root volume in GB") + cluster_info.add_argument("--parent-cluster-id", + dest="parent_cluster_id", + type=int, + help="Id of the parent cluster this hs2 cluster is attached to") termination = cluster_info.add_mutually_exclusive_group() termination.add_argument("--disallow-cluster-termination", dest="disallow_cluster_termination", diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index ee55e50d..22893df8 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -147,7 +147,7 @@ def engine_parser(argparser): engine_group = argparser.add_argument_group("engine settings") engine_group.add_argument("--flavour", dest="flavour", - choices=["hadoop", "hadoop2", "presto", "spark", "sparkstreaming", "hbase", "airflow", "deeplearning"], + choices=["hadoop", "hadoop2", "hs2", "presto", "spark", "sparkstreaming", "hbase", "airflow", "deeplearning"], default=None, help="Set engine flavour") diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 34dd8433..9acc5dc5 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -443,6 +443,25 @@ def test_presto_engine_config(self): }}, 'cluster_info': {'label': ['test_label']}}) + def test_hs2_engine_config(self): + with tempfile.NamedTemporaryFile() as temp: + temp.write("config.properties:\na=1\nb=2".encode("utf8")) + temp.flush() + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', + 'test_label', '--flavour', 'hs2', '--node-bootstrap-file', 'test_file_name', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--parent-cluster-id', '1'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'hs2'}, + 'cluster_info': {'label': ['test_label'], + 'parent_cluster_id': 1, + 'min_nodes': 3, + 'node_bootstrap': 'test_file_name', + 'slave_instance_type': 'c1.xlarge' }}) + def test_spark_engine_config(self): with tempfile.NamedTemporaryFile() as temp: temp.write("config.properties:\na=1\nb=2".encode("utf8")) From 1efa8cd69adadf66007b9239f1770c12a580b9de Mon Sep 17 00:00:00 2001 From: Tarun Goyal Date: Tue, 10 Dec 2019 15:23:43 +0530 Subject: [PATCH 32/69] SDK-361: Support for cluster image version clusters API (#291) * SDK changes for adding cluster image version support --- qds_sdk/cluster_info_v22.py | 21 +++++++++++++++++++-- qds_sdk/clusterv2.py | 14 ++++++++++++-- qds_sdk/engine.py | 2 +- tests/test_clusterv2.py | 18 ++++++++++++++++++ tests/test_clusterv22.py | 16 ++++++++++++++++ 5 files changed, 66 insertions(+), 5 deletions(-) diff --git a/qds_sdk/cluster_info_v22.py b/qds_sdk/cluster_info_v22.py index 1af9cdf2..32aa52d8 100644 --- a/qds_sdk/cluster_info_v22.py +++ b/qds_sdk/cluster_info_v22.py @@ -53,7 +53,9 @@ def set_cluster_info_from_arguments(self, arguments): disable_cluster_pause=arguments.disable_cluster_pause, paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, - paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins) + paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins, + parent_cluster_id=arguments.parent_cluster_id, + image_version=arguments.image_version) self.set_composition(master_type=arguments.master_type, master_spot_block_duration=arguments.master_spot_block_duration, @@ -106,7 +108,9 @@ def set_cluster_info(self, disable_cluster_pause=None, paused_cluster_timeout_mins=None, disable_autoscale_node_pause=None, - paused_autoscale_node_timeout_mins=None): + paused_autoscale_node_timeout_mins=None, + parent_cluster_id=None, + image_version=None): """ Args: @@ -180,6 +184,10 @@ def set_cluster_info(self, `paused_autoscale_node_timeout_mins`: Paused autoscale node timeout in mins + `parent_cluster_id`: parent cluster id for HS2 cluster + + `image_version`: cluster image version + Doc: For getting details about arguments http://docs.qubole.com/en/latest/rest-api/cluster_api/create-new-cluster.html#parameters @@ -206,6 +214,8 @@ def set_cluster_info(self, self.cluster_info['idle_cluster_timeout'] = idle_cluster_timeout self.cluster_info['rootdisk'] = {} self.cluster_info['rootdisk']['size'] = root_disk_size + self.cluster_info['parent_cluster_id'] = parent_cluster_id + self.cluster_info['cluster_image_version'] = image_version self.set_data_disk(disk_size, disk_count, disk_type, upscaling_config, enable_encryption) self.set_monitoring(enable_ganglia_monitoring, @@ -498,6 +508,13 @@ def cluster_info_parser(argparser, action): dest="root_disk_size", type=int, help="size of the root volume in GB") + cluster_info.add_argument("--parent-cluster-id", + dest="parent_cluster_id", + type=int, + help="Id of the parent cluster this hs2 cluster is attached to") + cluster_info.add_argument("--image-version", + dest="image_version", + help="cluster image version") termination = cluster_info.add_mutually_exclusive_group() termination.add_argument("--disallow-cluster-termination", dest="disallow_cluster_termination", diff --git a/qds_sdk/clusterv2.py b/qds_sdk/clusterv2.py index e0e4cb0c..6e20ef20 100755 --- a/qds_sdk/clusterv2.py +++ b/qds_sdk/clusterv2.py @@ -182,7 +182,8 @@ def set_cluster_info_from_arguments(self, arguments): paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins, - parent_cluster_id=arguments.parent_cluster_id) + parent_cluster_id=arguments.parent_cluster_id, + image_version=arguments.image_version) def set_cluster_info(self, disallow_cluster_termination=None, @@ -225,7 +226,8 @@ def set_cluster_info(self, paused_cluster_timeout_mins=None, disable_autoscale_node_pause=None, paused_autoscale_node_timeout_mins=None, - parent_cluster_id=None): + parent_cluster_id=None, + image_version=None): """ Args: @@ -330,6 +332,10 @@ def set_cluster_info(self, `paused_autoscale_node_timeout_mins`: Paused autoscale node timeout in mins + `parent_cluster_id`: parent cluster id for HS2 cluster + + `image_version`: cluster image version + Doc: For getting details about arguments http://docs.qubole.com/en/latest/rest-api/cluster_api/create-new-cluster.html#parameters @@ -360,6 +366,7 @@ def set_cluster_info(self, self.cluster_info['rootdisk'] = {} self.cluster_info['rootdisk']['size'] = root_disk_size self.cluster_info['parent_cluster_id'] = parent_cluster_id + self.cluster_info['cluster_image_version'] = image_version self.set_spot_instance_settings(maximum_bid_price_percentage, timeout_for_request, maximum_spot_instance_percentage) @@ -522,6 +529,9 @@ def cluster_info_parser(argparser, action): dest="parent_cluster_id", type=int, help="Id of the parent cluster this hs2 cluster is attached to") + cluster_info.add_argument("--image-version", + dest="image_version", + help="cluster image version") termination = cluster_info.add_mutually_exclusive_group() termination.add_argument("--disallow-cluster-termination", dest="disallow_cluster_termination", diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 22893df8..24a8289b 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -147,7 +147,7 @@ def engine_parser(argparser): engine_group = argparser.add_argument_group("engine settings") engine_group.add_argument("--flavour", dest="flavour", - choices=["hadoop", "hadoop2", "hs2", "presto", "spark", "sparkstreaming", "hbase", "airflow", "deeplearning"], + choices=["hadoop", "hadoop2", "hs2", "hive", "presto", "spark", "sparkstreaming", "hbase", "airflow", "deeplearning"], default=None, help="Set engine flavour") diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 9acc5dc5..0c149e50 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -572,6 +572,24 @@ def test_image_override(self): 'internal':{'image_uri_overrides': 'test/image1'} }) + def test_image_version_v2(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', + 'test_label', '--flavour', 'hadoop2', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--image-version', '1.latest'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'hadoop2'}, + 'cluster_info': {'label': ['test_label'], + 'min_nodes': 3, + 'slave_instance_type': 'c1.xlarge', + 'cluster_image_version': '1.latest'}}) + + + + def test_spot_block_duration_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', '--spot-block-duration', '120'] diff --git a/tests/test_clusterv22.py b/tests/test_clusterv22.py index e05128dd..5b1e4ee7 100644 --- a/tests/test_clusterv22.py +++ b/tests/test_clusterv22.py @@ -195,3 +195,19 @@ def test_spot_spot_spot(self): 'maximum_bid_price_percentage': 50}]}, 'autoscaling_nodes': {'nodes': [ {'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + + def test_image_version_v22(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', + 'test_label', '--flavour', 'hive', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--image-version', '1.latest'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'hive'}, + 'cluster_info': {'label': ['test_label'], + 'min_nodes': 3, + 'slave_instance_type': 'c1.xlarge', + 'cluster_image_version': '1.latest', + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': {'nodes': [{'percentage': 50, 'type': 'ondemand'}, {'timeout_for_request': 1, 'percentage': 50, 'type': 'spot', 'fallback': 'ondemand', 'maximum_bid_price_percentage': 100}]}}, 'label': ['test_label']}}) From d22bcd24a4ee2cb286eba55ed84939283d164705 Mon Sep 17 00:00:00 2001 From: shekharsaurabh <37484772+shekharsaurabh@users.noreply.github.com> Date: Thu, 30 Jan 2020 16:41:34 +0530 Subject: [PATCH 33/69] SDK-386: Add Retries and Delay Options for API Rate Limiting (#295) --- bin/qds.py | 23 +++++++++++++++++- qds_sdk/connection.py | 50 ++++++++++++++++++++++++++++++++------- qds_sdk/qubole.py | 51 ++++++++++++++++++++++++++++------------ qds_sdk/retry.py | 24 ------------------- tests/test_cluster.py | 8 +++---- tests/test_clusterv2.py | 2 +- tests/test_connection.py | 48 +++++++++++++++++++++++++++++++++++++ 7 files changed, 152 insertions(+), 54 deletions(-) delete mode 100644 qds_sdk/retry.py create mode 100644 tests/test_connection.py diff --git a/bin/qds.py b/bin/qds.py index fac8b439..26692dbf 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -582,6 +582,18 @@ def main(): default=os.getenv('CLOUD_PROVIDER'), help="cloud", choices=["AWS", "AZURE", "ORACLE_BMC", "ORACLE_OPC", "GCP"]) + optparser.add_option("--base_retry_delay", dest="base_retry_delay", + type=int, + default=os.getenv('QDS_BASE_RETRY_DELAY'), + help="base sleep interval for exponential backoff in case of " + "retryable exceptions.Defaults to 10s.") + + optparser.add_option("--max_retries", dest="max_retries", + type=int, + default=os.getenv('QDS_MAX_RETRIES'), + help="Number of re-attempts for an api-call in case of " + " retryable exceptions. Defaults to 5.") + optparser.add_option("-v", dest="verbose", action="store_true", default=False, help="verbose mode - info level logging") @@ -613,6 +625,12 @@ def main(): if options.poll_interval is None: options.poll_interval = 5 + if options.max_retries is None: + options.max_retries = 5 + + if options.base_retry_delay is None: + options.base_retry_delay = 10 + if options.cloud_name is None: options.cloud_name = "AWS" @@ -626,7 +644,10 @@ def main(): version=options.api_version, poll_interval=options.poll_interval, skip_ssl_cert_check=options.skip_ssl_cert_check, - cloud_name=options.cloud_name) + cloud_name=options.cloud_name, + base_retry_delay=options.base_retry_delay, + max_retries=options.max_retries + ) if len(args) < 1: sys.stderr.write("Missing first argument containing subcommand\n") diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index dbec341b..eea6fe32 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -3,6 +3,7 @@ import logging import ssl import json +import time import pkg_resources from requests.adapters import HTTPAdapter from datetime import datetime @@ -10,8 +11,8 @@ from requests.packages.urllib3.poolmanager import PoolManager except ImportError: from urllib3.poolmanager import PoolManager -from qds_sdk.retry import retry from qds_sdk.exception import * +from functools import wraps log = logging.getLogger("qds_connection") @@ -34,7 +35,9 @@ def init_poolmanager(self, connections, maxsize,block=False): class Connection: - def __init__(self, auth, rest_url, skip_ssl_cert_check, reuse=True): + def __init__(self, auth, rest_url, skip_ssl_cert_check, + reuse=True, max_retries=5, + base_retry_delay=10): self.auth = auth self.rest_url = rest_url self.skip_ssl_cert_check = skip_ssl_cert_check @@ -42,6 +45,8 @@ def __init__(self, auth, rest_url, skip_ssl_cert_check, reuse=True): 'Content-Type': 'application/json'} self.reuse = reuse + self.max_retries = max_retries + self.base_retry_delay = base_retry_delay if reuse: self.session = requests.Session() self.session.mount('https://', MyAdapter()) @@ -50,20 +55,46 @@ def __init__(self, auth, rest_url, skip_ssl_cert_check, reuse=True): self.session_with_retries = requests.Session() self.session_with_retries.mount('https://', MyAdapter(max_retries=3)) - @retry((RetryWithDelay, requests.Timeout, ServerError), tries=6, delay=30, backoff=2) + def retry(ExceptionToCheck, tries=5, delay=10, backoff=2): + def deco_retry(f): + @wraps(f) + def f_retry(self, *args, **kwargs): + if hasattr(self, 'max_retries'): + mtries, mdelay = self.max_retries, self.base_retry_delay + else: + mtries, mdelay = tries, delay + while mtries >= 1: + try: + return f(self, *args, **kwargs) + except ExceptionToCheck as e: + logger = logging.getLogger("retry") + msg = "%s, Retrying in %d seconds..." % (e.__class__.__name__, + mdelay) + logger.info(msg) + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(self, *args, **kwargs) + return f_retry # true decorator + return deco_retry + + @retry((RetryWithDelay, requests.Timeout)) def get_raw(self, path, params=None): return self._api_call_raw("GET", path, params=params) - @retry((RetryWithDelay, requests.Timeout, ServerError), tries=6, delay=30, backoff=2) + @retry((RetryWithDelay, requests.Timeout, ServerError)) def get(self, path, params=None): return self._api_call("GET", path, params=params) + @retry((RetryWithDelay, requests.Timeout)) def put(self, path, data=None): return self._api_call("PUT", path, data) + @retry((RetryWithDelay, requests.Timeout)) def post(self, path, data=None): return self._api_call("POST", path, data) + @retry((RetryWithDelay, requests.Timeout)) def delete(self, path, data=None): return self._api_call("DELETE", path, data) @@ -111,10 +142,8 @@ def _api_call(self, req_type, path, data=None, params=None): @staticmethod def _handle_error(response): """Raise exceptions in response to any http errors - Args: response: A Response object - Raises: BadRequest: if HTTP error code 400 returned. UnauthorizedAccess: if HTTP error code 401 returned. @@ -134,8 +163,8 @@ def _handle_error(response): if 'X-Qubole-Trace-Id' in response.headers: now = datetime.now() - time = now.strftime('%Y-%m-%d %H:%M:%S') - format_list = [time,response.headers['X-Qubole-Trace-Id']] + time_now = now.strftime('%Y-%m-%d %H:%M:%S') + format_list = [time_now, response.headers['X-Qubole-Trace-Id']] sys.stderr.write("[{}] Request ID is: {}. Please share it with Qubole Support team for any assistance".format(*format_list) + "\n") if code == 400: @@ -164,7 +193,10 @@ def _handle_error(response): raise RetryWithDelay(response) elif code == 449: sys.stderr.write(response.text + "\n") - raise RetryWithDelay(response, "Data requested is unavailable. Retrying ...") + raise RetryWithDelay(response, "Data requested is unavailable. Retrying...") + elif code == 429: + sys.stderr.write(response.text + "\n") + raise RetryWithDelay(response, "Too many requests. Retrying...") elif 401 <= code < 500: sys.stderr.write(response.text + "\n") raise ClientError(response) diff --git a/qds_sdk/qubole.py b/qds_sdk/qubole.py index d8b842a5..df830a2c 100644 --- a/qds_sdk/qubole.py +++ b/qds_sdk/qubole.py @@ -21,6 +21,8 @@ class Qubole: """ MIN_POLL_INTERVAL = 1 + RETRIES_CAP = 5 + MAX_RETRY_DELAY = 10 _auth = None api_token = None @@ -31,22 +33,24 @@ class Qubole: cloud_name = None cached_agent = None cloud = None + base_retry_delay = None + max_retries = None @classmethod def configure(cls, api_token, api_url="https://api.qubole.com/api/", version="v1.2", - poll_interval=5, skip_ssl_cert_check=False, cloud_name="AWS"): + poll_interval=5, skip_ssl_cert_check=False, cloud_name="AWS", + base_retry_delay=10, max_retries=5): """ Set parameters governing interaction with QDS - Args: `api_token`: authorization token for QDS. required - `api_url`: the base URL for QDS API. configurable for testing only - `version`: QDS REST api version. Will be used throughout unless overridden in Qubole.agent(..) - `poll_interval`: interval in secs when polling QDS for events + `delay` : interval in secs to sleep in between successive retries + `retries` : maximum number of time to retry an api call in case + of retryable exception. """ cls._auth = QuboleAuth(api_token) @@ -61,14 +65,28 @@ def configure(cls, api_token, cls.skip_ssl_cert_check = skip_ssl_cert_check cls.cloud_name = cloud_name.lower() cls.cached_agent = None - + if base_retry_delay > Qubole.MAX_RETRY_DELAY: + log.warn("Sleep between successive retries cannot be greater than" + " %s seconds." + " Setting it to" + " %s seconds.\n" + % (Qubole.MAX_RETRY_DELAY, Qubole.MAX_RETRY_DELAY)) + cls.base_retry_delay = Qubole.MAX_RETRY_DELAY + else: + cls.base_retry_delay = base_retry_delay + if max_retries > Qubole.RETRIES_CAP: + log.warn("Maximum retries cannot be greater than %s." + " Setting it to" + " default - %s.\n" % (Qubole.RETRIES_CAP, Qubole.RETRIES_CAP)) + cls.max_retries = Qubole.RETRIES_CAP + else: + cls.max_retries = max_retries @classmethod def agent(cls, version=None): """ Returns: a connection object to make REST calls to QDS - optionally override the `version` of the REST endpoint for advanced features available only in the newer version of the API available for certain resource end points eg: /v1.3/cluster. When version is @@ -76,19 +94,23 @@ def agent(cls, version=None): """ reuse_cached_agent = True if version: - log.debug("api version changed to %s" % version) - cls.rest_url = '/'.join([cls.baseurl.rstrip('/'), version]) - reuse_cached_agent = False + log.debug("api version changed to %s" % version) + cls.rest_url = '/'.join([cls.baseurl.rstrip('/'), version]) + reuse_cached_agent = False else: - cls.rest_url = '/'.join([cls.baseurl.rstrip('/'), cls.version]) + cls.rest_url = '/'.join([cls.baseurl.rstrip('/'), cls.version]) if cls.api_token is None: raise ConfigError("No API Token specified - please supply one via Qubole.configure()") if not reuse_cached_agent: - uncached_agent = Connection(cls._auth, cls.rest_url, cls.skip_ssl_cert_check) - return uncached_agent + uncached_agent = Connection(cls._auth, cls.rest_url, + cls.skip_ssl_cert_check, + True, cls.max_retries, cls.base_retry_delay) + return uncached_agent if cls.cached_agent is None: - cls.cached_agent = Connection(cls._auth, cls.rest_url, cls.skip_ssl_cert_check) + cls.cached_agent = Connection(cls._auth, cls.rest_url, + cls.skip_ssl_cert_check, + True, cls.max_retries, cls.base_retry_delay) return cls.cached_agent @@ -121,4 +143,3 @@ def get_cloud_object(cls, cloud_name): elif cloud_name.lower() == "gcp": import qds_sdk.cloud.gcp_cloud return qds_sdk.cloud.gcp_cloud.GcpCloud() - diff --git a/qds_sdk/retry.py b/qds_sdk/retry.py deleted file mode 100644 index 1676317a..00000000 --- a/qds_sdk/retry.py +++ /dev/null @@ -1,24 +0,0 @@ -import time -import logging -from functools import wraps - -log = logging.getLogger("retry") - - -def retry(ExceptionToCheck, tries=4, delay=3, backoff=2): - def deco_retry(f): - @wraps(f) - def f_retry(*args, **kwargs): - mtries, mdelay = tries, delay - while mtries > 1: - try: - return f(*args, **kwargs) - except ExceptionToCheck as e: - msg = "%s, Retrying in %d seconds..." % (e.__class__.__name__, mdelay) - log.info(msg) - time.sleep(mdelay) - mtries -= 1 - mdelay *= backoff - return f(*args, **kwargs) - return f_retry # true decorator - return deco_retry diff --git a/tests/test_cluster.py b/tests/test_cluster.py index b06e1c08..f08dc4a2 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -109,7 +109,7 @@ def test_connection(self): Connection.__init__ = Mock(return_value=None) Connection._api_call = Mock(return_value={}) qds.main() - Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.2', ANY) + Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.2', ANY, ANY, ANY, ANY) def test_connection_v13(self): sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'list'] @@ -117,7 +117,7 @@ def test_connection_v13(self): Connection.__init__ = Mock(return_value=None) Connection._api_call = Mock(return_value={}) qds.main() - Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.3', ANY) + Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.3', ANY, ANY, ANY, ANY) class TestClusterShow(QdsCliTestCase): @@ -128,7 +128,7 @@ def test_connection(self): Connection.__init__ = Mock(return_value=None) Connection._api_call = Mock(return_value={}) qds.main() - Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.2', ANY) + Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.2', ANY, ANY, ANY, ANY) def test_connection_v13(self): sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'list', '--label', 'test_label'] @@ -136,7 +136,7 @@ def test_connection_v13(self): Connection.__init__ = Mock(return_value=None) Connection._api_call = Mock(return_value={}) qds.main() - Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.3', ANY) + Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v1.3', ANY, ANY, ANY, ANY) class TestClusterDelete(QdsCliTestCase): def test_success(self): diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 0c149e50..aef0f50d 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -1050,7 +1050,7 @@ def test_connection(self): Connection.__init__ = Mock(return_value=None) Connection._api_call = Mock(return_value={}) qds.main() - Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v2', ANY) + Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v2', ANY, ANY, ANY, ANY) class TestClusterStatus(QdsCliTestCase): diff --git a/tests/test_connection.py b/tests/test_connection.py new file mode 100644 index 00000000..ae7486a1 --- /dev/null +++ b/tests/test_connection.py @@ -0,0 +1,48 @@ +import sys +import os + +if sys.version_info > (2, 7, 0): + import unittest +else: + import unittest2 as unittest +from mock import * + +sys.path.append(os.path.join(os.path.dirname(__file__), '../bin')) +import qds +from mock import Mock, ANY +from qds_sdk.connection import Connection +from test_base import print_command +from test_base import QdsCliTestCase + + +class TestConnection(QdsCliTestCase): + + #Test with correct values set + def test_connection_object(self): + sys.argv = ['qds.py', '--max_retries', '3', '--base_retry_delay', '2', 'cluster', 'list'] + print_command() + Connection.__init__ = Mock(return_value=None) + Connection._api_call = Mock(return_value={}) + qds.main() + Connection.__init__.assert_called_with(ANY, ANY, ANY, ANY, 3, 2) + + #Test with incorrect values + def test_connection_override(self): + sys.argv = ['qds.py', '--max_retries', '15', '--base_retry_delay', '15', 'cluster', 'list'] + print_command() + Connection.__init__ = Mock(return_value=None) + Connection._api_call = Mock(return_value={}) + qds.main() + Connection.__init__.assert_called_with(ANY, ANY, ANY, ANY, 5, 10) + + #Test with no values given should set default + def test_connection_default(self): + sys.argv = ['qds.py', 'cluster', 'list'] + print_command() + Connection.__init__ = Mock(return_value=None) + Connection._api_call = Mock(return_value={}) + qds.main() + Connection.__init__.assert_called_with(ANY, ANY, ANY, ANY, 5, 10) + +if __name__ == '__main__': + unittest.main() From ff6dccb9b2bf55ff4b7f7a5a8f301668b285c581 Mon Sep 17 00:00:00 2001 From: vasantchaitanyamahipala Date: Wed, 29 Jan 2020 16:29:00 +0530 Subject: [PATCH 34/69] new: dev: ACM-6382: Added for parent cluster label in hs2 clusters (#301) Co-authored-by: vasantm7 <60360689+vasantm7@users.noreply.github.com> --- qds_sdk/clusterv2.py | 6 ++++++ tests/test_clusterv2.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/qds_sdk/clusterv2.py b/qds_sdk/clusterv2.py index 6e20ef20..08a7d821 100755 --- a/qds_sdk/clusterv2.py +++ b/qds_sdk/clusterv2.py @@ -183,6 +183,7 @@ def set_cluster_info_from_arguments(self, arguments): disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins, parent_cluster_id=arguments.parent_cluster_id, + parent_cluster_label=arguments.parent_cluster_label, image_version=arguments.image_version) def set_cluster_info(self, @@ -227,6 +228,7 @@ def set_cluster_info(self, disable_autoscale_node_pause=None, paused_autoscale_node_timeout_mins=None, parent_cluster_id=None, + parent_cluster_label=None, image_version=None): """ Args: @@ -366,6 +368,7 @@ def set_cluster_info(self, self.cluster_info['rootdisk'] = {} self.cluster_info['rootdisk']['size'] = root_disk_size self.cluster_info['parent_cluster_id'] = parent_cluster_id + self.cluster_info['parent_cluster_label'] = parent_cluster_label self.cluster_info['cluster_image_version'] = image_version self.set_spot_instance_settings(maximum_bid_price_percentage, timeout_for_request, @@ -529,6 +532,9 @@ def cluster_info_parser(argparser, action): dest="parent_cluster_id", type=int, help="Id of the parent cluster this hs2 cluster is attached to") + cluster_info.add_argument("--parent-cluster-label", + dest="parent_cluster_label", + help="Label of the parent cluster this hs2 cluster is attached to") cluster_info.add_argument("--image-version", dest="image_version", help="cluster image version") diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index aef0f50d..38074410 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -461,6 +461,24 @@ def test_hs2_engine_config(self): 'min_nodes': 3, 'node_bootstrap': 'test_file_name', 'slave_instance_type': 'c1.xlarge' }}) + def test_hs2_parent_cluster_label(self): + with tempfile.NamedTemporaryFile() as temp: + temp.write("config.properties:\na=1\nb=2".encode("utf8")) + temp.flush() + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', + 'test_label', '--flavour', 'hs2', '--node-bootstrap-file', 'test_file_name', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--parent-cluster-label', 'parent_cluster_label'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'hs2'}, + 'cluster_info': {'label': ['test_label'], + 'parent_cluster_label': 'parent_cluster_label', + 'min_nodes': 3, + 'node_bootstrap': 'test_file_name', + 'slave_instance_type': 'c1.xlarge' }}) def test_spark_engine_config(self): with tempfile.NamedTemporaryFile() as temp: From 439b88229bd33bcd026c8509b388120decdfa7d6 Mon Sep 17 00:00:00 2001 From: shekharsaurabh <37484772+shekharsaurabh@users.noreply.github.com> Date: Fri, 7 Feb 2020 11:37:09 +0530 Subject: [PATCH 35/69] SDK-386: Add New Exception Class for 429 errors (#302) --- qds_sdk/connection.py | 12 ++++++------ qds_sdk/exception.py | 6 ++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index eea6fe32..3ec4c0a8 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -78,23 +78,23 @@ def f_retry(self, *args, **kwargs): return f_retry # true decorator return deco_retry - @retry((RetryWithDelay, requests.Timeout)) + @retry((RetryWithDelay, requests.Timeout, ServerError, ApiThrottledRetry)) def get_raw(self, path, params=None): return self._api_call_raw("GET", path, params=params) - @retry((RetryWithDelay, requests.Timeout, ServerError)) + @retry((RetryWithDelay, requests.Timeout, ServerError, ApiThrottledRetry)) def get(self, path, params=None): return self._api_call("GET", path, params=params) - @retry((RetryWithDelay, requests.Timeout)) + @retry(ApiThrottledRetry) def put(self, path, data=None): return self._api_call("PUT", path, data) - @retry((RetryWithDelay, requests.Timeout)) + @retry(ApiThrottledRetry) def post(self, path, data=None): return self._api_call("POST", path, data) - @retry((RetryWithDelay, requests.Timeout)) + @retry(ApiThrottledRetry) def delete(self, path, data=None): return self._api_call("DELETE", path, data) @@ -196,7 +196,7 @@ def _handle_error(response): raise RetryWithDelay(response, "Data requested is unavailable. Retrying...") elif code == 429: sys.stderr.write(response.text + "\n") - raise RetryWithDelay(response, "Too many requests. Retrying...") + raise ApiThrottledRetry(response, "Too many requests. Retrying...") elif 401 <= code < 500: sys.stderr.write(response.text + "\n") raise ClientError(response) diff --git a/qds_sdk/exception.py b/qds_sdk/exception.py index ef06a335..5f545a30 100644 --- a/qds_sdk/exception.py +++ b/qds_sdk/exception.py @@ -85,3 +85,9 @@ class MethodNotAllowed(ClientError): """An error raised when a method is not allowed.""" # 405 Method Not Allowed pass + + +class ApiThrottledRetry(ClientError): + """An error raised when upstream requests are throttled.""" + # 429 Too Many Requests + pass From 033a2bcf54bd1a16559abc708ffebfc91508a3c9 Mon Sep 17 00:00:00 2001 From: Harshit Sharma Date: Thu, 13 Feb 2020 17:27:41 +0530 Subject: [PATCH 36/69] SDK-385: SDK support for Quest (#297) --- bin/qds.py | 13 +- qds_sdk/quest.py | 789 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_quest.py | 100 ++++++ 3 files changed, 900 insertions(+), 2 deletions(-) create mode 100644 qds_sdk/quest.py create mode 100644 tests/test_quest.py diff --git a/bin/qds.py b/bin/qds.py index 26692dbf..48b74218 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -17,6 +17,7 @@ from qds_sdk.template import TemplateCmdLine from qds_sdk.clusterv2 import ClusterCmdLine from qds_sdk.sensors import * +from qds_sdk.quest import QuestCmdLine import os import sys import traceback @@ -88,6 +89,8 @@ " action --help\n" "\nScheduler subcommand:\n" " scheduler --help\n" + "\nQuest subcommand:\n" + " quest --help\n" "\nTemplate subcommand:\n" " template --help\n" "\nAccount subcommand:\n" @@ -554,6 +557,10 @@ def templatemain(args): result = TemplateCmdLine.run(args) print(result) +def questmain(args): + result = QuestCmdLine.run(args) + print(result) + def main(): optparser = OptionParser(usage=usage_str) @@ -604,7 +611,7 @@ def main(): optparser.disable_interspersed_args() (options, args) = optparser.parse_args() - + if options.chatty: logging.basicConfig(level=logging.DEBUG) elif options.verbose: @@ -698,11 +705,13 @@ def main(): return usermain(args) if a0 == "template": return templatemain(args) + if a0 == "quest": + return questmain(args) cmdset = set(CommandClasses.keys()) sys.stderr.write("First command must be one of <%s>\n" % "|".join(cmdset.union(["cluster", "action", "scheduler", "report", - "dbtap", "role", "group", "app", "account", "nezha", "user", "template"]))) + "dbtap", "role", "group", "app", "account", "nezha", "user", "template", "quest"]))) usage(optparser) diff --git a/qds_sdk/quest.py b/qds_sdk/quest.py new file mode 100644 index 00000000..d522d69c --- /dev/null +++ b/qds_sdk/quest.py @@ -0,0 +1,789 @@ +""" +The quest module contains the base definition for +a generic quest commands. +""" +from qds_sdk.actions import * +import json +from qds_sdk.qubole import Qubole +from qds_sdk.resource import Resource +from argparse import ArgumentParser + +log = logging.getLogger("qds_quest") + +# Pattern matcher for s3 path +_URI_RE = re.compile(r's3://([^/]+)/?(.*)') + + +class QuestCmdLine: + """qds_sdk.QuestCmdLine is the interface used by qds.py.""" + + @staticmethod + def parsers(): + argparser = ArgumentParser(prog="qds.py quest", + description="Quest client for Qubole Data Service.") + subparsers = argparser.add_subparsers() + + # Create + create = subparsers.add_parser("create", help="Create a new pipeline") + create.add_argument("--create-type", dest="create_type", required=True, + help="create_type=1 for assisted, " + "create_type=2 for jar, create_type=3 for code") + create.add_argument("--pipeline-name", dest="name", required=True, + help="Name of pipeline") + create.add_argument("--description", dest="description", default=None, + help="Pipeline description"), + create.add_argument("--cluster-label", dest="cluster_label", + default="default", help="Cluster label") + create.add_argument("-c", "--code", dest="code", help="query string") + create.add_argument("-f", "--script-location", dest="script_location", + help="Path where code to run is stored. " + "local file path") + create.add_argument("-l", "--language", dest="language", + help="Language for bring your own code, " + "valid values are python and scala") + create.add_argument("--jar-path", dest="jar_path", + help="Location of Jar") + create.add_argument("--user-arguments", dest="user_arguments", + help="Additional user arguments") + create.add_argument("--main-class-name", dest="main_class_name", + help="class name of your jar file. " + "Required for create_type=2(BYOJ)") + create.add_argument("--command-line-options", + dest="command_line_options", + help="command line options on property page.") + create.set_defaults(func=QuestCmdLine.create) + + # Update/Edit + update_properties = subparsers.add_parser("update-property", + help="Update properties of " + "a existing pipeline") + update_properties.add_argument("--pipeline-id", + dest="pipeline_id", + required=True, + help='Id of pipeline which need to be updated') + update_properties.add_argument("--cluster-label", dest="cluster_label", + help="Update cluster label.") + update_properties.add_argument("--command-line-options", dest="command_line_options", + help="command line options on property page.") + update_properties.add_argument("--can-retry", dest="can_retry", + help="can retry true or false") + update_properties.set_defaults(func=QuestCmdLine.update_properties) + update_code = subparsers.add_parser("update-code", + help="Update code of a existing pipeline") + update_code.add_argument( + "-c", "--code", dest="code", help="query string") + update_code.add_argument("-f", "--script-location", dest="script_location", + help="Path where code to run is stored. local file path") + update_code.set_defaults(func=QuestCmdLine.update_code) + update_code.add_argument( + "--jar-path", + dest="jar_path", + help="Location of Jar") + update_code.add_argument("--user-arguments", dest="user_arguments", + help="Additional user arguments") + update_code.add_argument("--main-class-name", dest="main_class_name", + help="class name of your jar file. " + "Required for create_type=2(BYOJ)") + update_code.add_argument("--language", dest="language", + help="language of code scala or python") + update_code.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be updated') + + # Pipeline Util (Utility for start, pause, clone, edit, delete, + # archive) + delete = subparsers.add_parser("delete", help="Delete Pipeline") + delete.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be started') + delete.set_defaults(func=QuestCmdLine.delete) + status = subparsers.add_parser("status", help="Status of Pipeline") + status.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be started') + status.set_defaults(func=QuestCmdLine.status) + start = subparsers.add_parser("start", help="Start Pipeline") + start.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be started') + start.set_defaults(func=QuestCmdLine.start) + pause = subparsers.add_parser("pause", help="pause Pipeline") + pause.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be started') + pause.set_defaults(func=QuestCmdLine.pause) + clone = subparsers.add_parser("clone", help="clone Pipeline") + clone.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be started') + clone.set_defaults(func=QuestCmdLine.clone) + archive = subparsers.add_parser("archive", help="archive Pipeline") + archive.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be started') + archive.set_defaults(func=QuestCmdLine.archive) + health = subparsers.add_parser("health", help="health of Pipeline") + health.add_argument("--pipeline-id", dest="pipeline_id", required=True, + help='Id of pipeline which need to be started') + health.set_defaults(func=QuestCmdLine.health) + # list + index = subparsers.add_parser("list", help="list of Pipeline.") + index.add_argument("--pipeline-status", dest="status", required=True, + help='Id of pipeline which need to be started. ' + 'Valid values = [active, archive, all, draft] ') + index.set_defaults(func=QuestCmdLine.index) + return argparser + + @staticmethod + def run(args): + """ + Commandline method to run pipeline. + :param args: + :return: + """ + parser = QuestCmdLine.parsers() + parsed = parser.parse_args(args) + return parsed.func(parsed) + + @staticmethod + def delete(args): + """ + Commandline method to delete pipeline. + :param args: + :return: + """ + response = Quest.delete(args.pipeline_id) + return json.dumps( + response, default=lambda o: o.attributes, sort_keys=True, indent=4) + + @staticmethod + def pause(args): + """ + Commandline method to pause pipeline. + :param args: + :return: + """ + response = Quest.pause(args.pipeline_id) + return json.dumps( + response, default=lambda o: o.attributes, sort_keys=True, indent=4) + + @staticmethod + def archive(args): + """ + commandline method to archive active pipeline. + :param args: + :return: + """ + response = Quest.archive(args.pipeline_id) + return json.dumps( + response, default=lambda o: o.attributes, sort_keys=True, indent=4) + + @staticmethod + def clone(args): + """ + Commandline method to clone pipeline + :param args: + :return: + """ + response = Quest.clone(args.pipeline_id) + return json.dumps(response, default=lambda o: o.attributes, sort_keys=True, indent=4) + + @staticmethod + def status(args): + """ + CommandLine method to get pipeline status + :param args: + :return: + """ + response = Quest.get_status(args.pipeline_id) + return json.dumps( + response, default=lambda o: o.attributes, sort_keys=True, indent=4) + + @staticmethod + def health(args): + """ + Commandline method to get health of pipeline. + :param args: + :return: + """ + response = Quest.get_health(args.pipeline_id) + return json.dumps( + response, default=lambda o: o.attributes, sort_keys=True, indent=4) + + @staticmethod + def start(args): + """ + Commandline method to start pipeline. + :param args: + :return: + """ + response = Quest.start(args.pipeline_id) + return json.dumps(response, sort_keys=True, indent=4) + + @staticmethod + def index(args): + """ + Commandline method to list pipeline. + :param args: + :return: + """ + pipelinelist = Quest.list(args.status) + return json.dumps( + pipelinelist, default=lambda o: o.attributes, sort_keys=True, indent=4) + + @staticmethod + def create(args): + """ + Commandline method to create pipeline. + :param args: + :return: + """ + pipeline = None + if int(args.create_type) == 2: + pipeline = QuestJar.create_pipeline(pipeline_name=args.name, + jar_path=args.jar_path, + main_class_name=args.main_class_name, + cluster_label=args.cluster_label, + user_arguments=args.user_arguments, + command_line_options=args.command_line_options) + elif int(args.create_type) == 3: + if args.code: + pipeline = QuestCode.create_pipeline(pipeline_name=args.name, + cluster_label=args.cluster_label, + code=args.code, + file_path=args.script_location, + language=args.language, + user_arguments=args.user_arguments, + command_line_options=args.command_line_options) + elif args.script_location: + pipeline = QuestCode.create_pipeline(pipeline_name=args.name, + cluster_label=args.cluster_label, + code=args.code, + file_path=args.script_location, + language=args.language, + user_arguments=args.user_arguments, + command_line_options=args.command_line_options) + + return json.dumps(pipeline) + + @staticmethod + def update_properties(args): + """ + Commandline method to update pipeline properties. + :param args: + :return: + """ + params = args.__dict__ + log.debug(params) + Quest.add_property(pipeline_id=args.pipeline_id, + cluster_label=args.cluster_label, + can_retry=args.can_retry, + command_line_options=args.command_line_options) + + @staticmethod + def update_code(args): + """ + Commandline method to update code/Jar_Path + :param args: + :return: + """ + if args.jar_path or args.main_class_name: + response = QuestJar.save_code(pipeline_id=args.pipeline_id, + code=args.code, + file_path=args.script_location, + language=args.language, + jar_path=args.jar_path, + user_arguments=args.user_arguments, + main_class_name=args.main_class_name) + elif args.code or args.script_location: + response = QuestCode.save_code(pipeline_id=args.pipeline_id, + code=args.code, + file_path=args.script_location, + language=args.language, + jar_path=args.jar_path, + user_arguments=args.user_arguments, + main_class_name=args.main_class_name) + return json.dumps(response, sort_keys=True, indent=4) + + +class Quest(Resource): + """qds_sdk.Quest is the base Qubole Quest class.""" + + """ all commands use the /pipelines endpoint""" + + rest_entity_path = "pipelines" + pipeline_id = None + pipeline_name = None + pipeline_code = None + jar_path = None + + @staticmethod + def get_pipline_id(response): + return str(response.get('data').get('id')) + + @staticmethod + def list(status=None): + """ + Method to list pipeline on the basis of status. + :param status: Valid values - all, draft, archive, active. + :return: List of pipeline in json format. + """ + if status is None or status.lower() == 'all': + params = {"filter": "draft,archive,active"} + else: + params = {"filter": status.lower()} + conn = Qubole.agent() + url_path = Quest.rest_entity_path + pipeline_list = conn.get(url_path, params) + return pipeline_list + + @classmethod + def create(cls, pipeline_name, create_type, **kwargs): + """ + Create a pipeline object by issuing a POST + request to the /pipeline?mode=wizard endpoint + Note - this creates pipeline in draft mode + + Args: + pipeline_name: Name to be given. + create_type: 1->Assisted, 2->Code, 3->Jar + **kwargs: keyword arguments specific to create type + + Returns: + response + """ + conn = Qubole.agent() + data = {"data": { + "attributes": + {"name": pipeline_name, "status": "DRAFT", + "create_type": create_type}, + "type": "pipelines"} + } + url = Quest.rest_entity_path + "?mode=wizard" + response = conn.post(url, data) + cls.pipeline_id = Quest.get_pipline_id(response) + cls.pipeline_name = pipeline_name + + @staticmethod + def start(pipeline_id): + """ + Method to start Pipeline + :param pipeline_id: id of pipeline to be deleted + :return: response + """ + conn = Qubole.agent() + url = Quest.rest_entity_path + "/" + pipeline_id + "/start" + response = conn.put(url) + pipeline_status = Quest.get_status(pipeline_id) + while pipeline_status == 'waiting': + log.info("Pipeline is in waiting state....") + time.sleep(10) + pipeline_status = response.get( + 'data').get('pipeline_instance_status') + log.debug("State of pipeline is %s", pipeline_status) + return response + + @staticmethod + def add_property(pipeline_id, + cluster_label, + checkpoint_location=None, + output_mode=None, + trigger_interval=None, + can_retry=True, + command_line_options=None): + """ + Method to add properties in pipeline + :param can_retry: + :param pipeline_id: + :param cluster_label: + :param checkpoint_location: + :param trigger_interval: + :param output_mode: + :param command_line_options: + :return: + """ + conn = Qubole.agent() + if command_line_options is None: + command_line_options = """--conf spark.driver.extraLibraryPath=/usr/lib/hadoop2/lib/native\n--conf spark.eventLog.compress=true\n--conf spark.eventLog.enabled=true\n--conf spark.sql.streaming.qubole.enableStreamingEvents=true\n--conf spark.qubole.event.enabled=true""" + data = {"data": {"attributes": { + "cluster_label": cluster_label, + "can_retry": can_retry, + "checkpoint_location": checkpoint_location, + "trigger_interval": trigger_interval, + "output_mode": output_mode, + "command_line_options": command_line_options + }, + "type": "pipeline/properties" + } + } + log.info("Data {}".format(data)) + url = Quest.rest_entity_path + "/" + pipeline_id + "/properties" + response = conn.put(url, data) + log.debug(response) + return response + + @classmethod + def save_code(cls, pipeline_id, + code=None, + file_path=None, + language=None, + jar_path=None, + main_class_name=None, + user_arguments=None): + """ + :param file_path: + :param code: + :param language: + :param user_arguments: + :param pipeline_id: + :param jar_path: + :param main_class_name: + :return: + """ + data = None + if cls.create_type == 2: + if jar_path is None or main_class_name is None: + raise ParseError( + "Provide Jar path for BYOJ mode.") + else: + cls.jar_path = jar_path + data = {"data": { + "attributes": {"create_type": cls.create_type, + "user_arguments": str(user_arguments), + "jar_path": str(jar_path), + "main_class_name": str(main_class_name)}}} + + elif cls.create_type == 3: + if code or file_path: + try: + if file_path: + with open(file_path, 'r') as f: + code = f.read() + else: + code = code + except IOError as e: + raise ParseError("Unable to open script location or script " + "location and code both are empty. ", + e.message) + cls.pipeline_code = code + data = {"data": { + "attributes": {"create_type": cls.create_type, "user_arguments": str(user_arguments), + "code": str(code), "language": str(language)}}} + + else: + raise ParseError( + "Provide code or file location for BYOC mode.") + + conn = Qubole.agent() + url = cls.rest_entity_path + "/" + str(pipeline_id) + "/save_code" + response = conn.put(url, data) + log.debug(response) + return response + + @staticmethod + def get_health(pipeline_id): + """ + Get Pipeline Health + :param pipeline_id: + :return: + """ + conn = Qubole.agent() + url = Quest.rest_entity_path + "/" + pipeline_id + response = conn.get(url) + log.info(response) + return response.get("data").get("attributes").get("health") + + @staticmethod + def clone(pipeline_id): + """ + Method to clone pipeline + :param pipeline_id: + :return: + """ + url = Quest.rest_entity_path + "/" + pipeline_id + "/duplicate" + log.info("Cloning pipeline with id {}".format(pipeline_id)) + conn = Qubole.agent() + return conn.post(url) + + @staticmethod + def pause(pipeline_id): + """ + Method to pause pipeline + :param pipeline_id: + :return: + """ + url = Quest.rest_entity_path + "/" + pipeline_id + "/pause" + log.info("Pausing pipeline with id {}".format(pipeline_id)) + conn = Qubole.agent() + return conn.put(url) + + @staticmethod + def archive(pipeline_id): + """ + Method to Archive pipeline + :param pipeline_id: + :return: + """ + url = Quest.rest_entity_path + "/" + pipeline_id + "/archive" + log.info("Archiving pipeline with id {}".format(pipeline_id)) + conn = Qubole.agent() + return conn.put(url) + + @staticmethod + def get_status(pipeline_id): + """ + Get pipeline status + :param pipeline_id: + :return: + """ + conn = Qubole.agent() + url = Quest.rest_entity_path + "/" + pipeline_id + response = conn.get(url) + log.debug(response) + return response.get("data").get( + "attributes").get("pipeline_instance_status") + + @staticmethod + def delete(pipeline_id): + """ + Method to delete pipeline + :param pipeline_id: + :return: + """ + conn = Qubole.agent() + url = Quest.rest_entity_path + "/" + pipeline_id + "/delete" + log.info("Deleting Pipeline with id: {}".format(pipeline_id)) + response = conn.put(url) + log.info(response) + return response + + @staticmethod + def edit_pipeline_name(pipeline_id, pipeline_name): + """ + Method to edit pipeline name (Required in case of cloning) + :param pipeline_id: + :param pipeline_name: + :return: + """ + conn = Qubole.agent() + url = Quest.rest_entity_path + "/" + pipeline_id + data = { + "data": { + "attributes": { + "name": pipeline_name}, + "type": "pipelines"}} + return conn.put(url, data) + + @staticmethod + def set_alert(pipeline_id, channel_id): + """ + + :param pipeline_id: + :param channel_id: List of channel's id + :return: + """ + data = { + "data": {"attributes": { + "event_type": "error", + "notification_channels": [channel_id], + "can_notify": True}, + "type": "pipeline/alerts" + } + } + conn = Qubole.agent() + url = Quest.rest_entity_path + "/" + pipeline_id + "/alerts" + return conn.put(url, data) + + @staticmethod + def get_code(pipeline_id): + """ + Get pipeline code + :param pipeline_id: + :return: + """ + url = Quest.rest_entity_path + "/" + pipeline_id + conn = Qubole.agent() + reponse = conn.get(url) + code = reponse.get("meta")["command_details"]["code"] + return code + + +class QuestCode(Quest): + create_type = 3 + + @staticmethod + def create_pipeline(pipeline_name, + cluster_label, + code=None, + file_path=None, + language=None, + can_retry=True, + channel_id=None, + command_line_options=None, + user_arguments=None): + """ + Method to create pipeline in BYOC mode in one go. + :param file_path: + :param code: + :param command_line_options: + :param user_arguments: + :param pipeline_name: + :param cluster_label: + :param language: + :param can_retry: + :param channel_id: + :return: + """ + QuestCode.create(pipeline_name, QuestCode.create_type) + pipeline_id = QuestCode.pipeline_id + response = QuestCode.add_property(pipeline_id, cluster_label, + can_retry=can_retry, + command_line_options=command_line_options) + log.debug(response) + response = QuestCode.save_code(pipeline_id, + code=code, + file_path=file_path, + language=language, + user_arguments=user_arguments) + if channel_id: + response = Quest.set_alert(pipeline_id, channel_id) + log.info(response) + return response + + +class QuestJar(Quest): + create_type = 2 + + @staticmethod + def create_pipeline(pipeline_name, + jar_path, + cluster_label, + main_class_name, + channel_id=None, + can_retry=True, + command_line_options=None, + user_arguments=None): + """ + Method to create pipeline in BYOJ mode + :param pipeline_name: + :param jar_path: + :param cluster_label: + :param main_class_name: + :param channel_id: + :param can_retry: + :param command_line_options: + :param user_arguments: + :return: + """ + QuestJar.create(pipeline_name, QuestJar.create_type) + pipeline_id = QuestJar.pipeline_id + QuestJar.add_property(pipeline_id, + cluster_label, + can_retry=can_retry, + command_line_options=command_line_options) + QuestJar.save_code(pipeline_id, + jar_path=jar_path, + main_class_name=main_class_name, + user_arguments=user_arguments) + QuestJar.jar_path = jar_path + if channel_id: + response = Quest.set_alert(pipeline_id, channel_id) + log.info(response) + return QuestJar + + +class QuestAssisted(Quest): + create_type = 1 + + @staticmethod + def add_source(): + """Method to add source.""" + pass + + @staticmethod + def add_sink(): + """Method to add sink.""" + pass + + @staticmethod + def create_pipeline(): + """Parent Method to create end to end pipeline.""" + pass + + @staticmethod + def add_operator(): + """Parent method to add operator""" + pass + + @staticmethod + def _select_operator(): + """Method to add select operator.""" + pass + + @staticmethod + def _filter_operator(): + """Method to add filter operator.""" + pass + + @staticmethod + def _watermark_operator(): + """Method to add watermark operator""" + pass + + @staticmethod + def _window_group_operator(): + """Method to add window group operator""" + pass + + @staticmethod + def _source_kafka(): + """Method to as kafka as source.""" + pass + + @staticmethod + def _source_kinesis(): + """Method to add kinesis as source.""" + pass + + @staticmethod + def _source_s3(): + """Method to add s3 as source.""" + pass + + @staticmethod + def _source_google_storage(): + """Method to add google storage as source.""" + pass + + @staticmethod + def _sink_kafka(): + """Method to add kafka as sink.""" + pass + + @staticmethod + def _sink_s3(): + """Method to add s3 as sink.""" + pass + + @staticmethod + def _sink_hive(): + """method to add hive as sink.""" + pass + + @staticmethod + def _sink_snowflake(): + """Method to add Snowflake as sink""" + pass + + @staticmethod + def _sink_google_storage(): + """Method to add google storage as sink""" + pass + + @staticmethod + def _sink_BigQuery(): + """Method to add BigQuery as sink.""" + pass + + @staticmethod + def add_registry(): + """Method to add registry.""" + pass + + @staticmethod + def switch_from_assisted(): + """Method to switch to Assisted from BYOC or BYOJ mode.""" + pass diff --git a/tests/test_quest.py b/tests/test_quest.py new file mode 100644 index 00000000..17ae4274 --- /dev/null +++ b/tests/test_quest.py @@ -0,0 +1,100 @@ +from __future__ import print_function +from test_base import QdsCliTestCase +from test_base import print_command +from qds_sdk.quest import QuestCode +from qds_sdk.connection import Connection +import qds +from mock import * +import sys +import os + +if sys.version_info > (2, 7, 0): + import unittest +else: + import unittest2 as unittest + + +sys.path.append(os.path.join(os.path.dirname(__file__), '../bin')) + + +class TestQuestList(QdsCliTestCase): + def test_list_pipeline(self): + sys.argv = ['qds.py', 'quest', 'list', '--pipeline-status', 'draft'] + print_command() + Connection._api_call = Mock(return_value={}) + params = {'filter': "draft"} + qds.main() + Connection._api_call.assert_called_with( + "GET", "pipelines", params=params) + + def test_pause_pipeline(self): + sys.argv = ['qds.py', 'quest', 'pause', '--pipeline-id', '153'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with( + "PUT", "pipelines/153/pause", None) + + def test_clone_pipeline(self): + sys.argv = ['qds.py', 'quest', 'clone', '--pipeline-id', '153'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with( + "POST", "pipelines/153/duplicate", None) + + def test_archive_pipeline(self): + sys.argv = ['qds.py', 'quest', 'archive', '--pipeline-id', '153'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with( + "PUT", "pipelines/153/archive", None) + + def test_delete_pipeline(self): + sys.argv = ['qds.py', 'quest', 'delete', '--pipeline-id', '153'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with( + "PUT", "pipelines/153/delete", None) + + def test_create_pipeline(self): + sys.argv = ['qds.py', 'quest', 'create', '--create-type', '3', '--pipeline-name', 'test_pipeline_name', + '--cluster-label', 'spark', '-c', 'print("hello")', '--language', 'python', '--user-arguments', 'users_argument'] + print_command() + d1 = {"data": {"attributes": {"name": "test_pipeline_name", "status": "DRAFT", "create_type": 3}, + "type": "pipelines"}} + response = {"relationships": {"nodes": [], "alerts": []}, "included": [], + "meta": {"command_details": {"code": "print(\"hello\")", "language": "python"}, + "properties": {"checkpoint_location": None, "trigger_interval": None, + "command_line_options": """--conf spark.driver.extraLibraryPath=/usr/lib/hadoop2/lib/native\n--conf spark.eventLog.compress=true\n--conf spark.eventLog.enabled=true\n--conf spark.sql.streaming.qubole.enableStreamingEvents=true\n--conf spark.qubole.event.enabled=true""", + "cluster_label": "spark", "jar_path": None, + "user_arguments": "users_argument", "main_class_name": None, "can_retry": True, + "is_monitoring_enabled": True}, "query_hist": None, "cluster_id": None}, + "data": {"id": 1, "type": "pipeline", + "attributes": {"name": "test_pipeline_name", "description": None, "status": "draft", + "created_at": "2020-02-10T14:02:20Z", "updated_at": "2020-02-11T11:05:40Z", + "cluster_label": "spark", + "owner_name": "eam-airflow", "pipeline_instance_status": "draft", + "create_type": 3, "health": "UNKNOWN"}}} + + QuestCode.pipeline_id = '1' + QuestCode.pipeline_code = """print("helloworld")""" + QuestCode.pipeline_name = "test_pipeline_name" + d2 = {"data": {"attributes": {"cluster_label": "spark", "can_retry": True, + "checkpoint_location": None, + "trigger_interval": None, "output_mode": None, + "command_line_options": """--conf spark.driver.extraLibraryPath=/usr/lib/hadoop2/lib/native\n--conf spark.eventLog.compress=true\n--conf spark.eventLog.enabled=true\n--conf spark.sql.streaming.qubole.enableStreamingEvents=true\n--conf spark.qubole.event.enabled=true"""}, + "type": "pipeline/properties"}} + d3 = {"data": { + "attributes": {"create_type": 3, "user_arguments": "users_argument", "code": """print("hello")""", + "language": "python"}}} + Connection._api_call = Mock(return_value=response, any_order=False) + qds.main() + Connection._api_call.assert_has_calls( + [call("POST", "pipelines?mode=wizard", d1), call("PUT", "pipelines/1/properties", d2), + call("PUT", "pipelines/1/save_code", d3)]) + +if __name__ == '__main__': + unittest.main() From 9171a499b627f3575c61266cb14ce7d74b0be0f6 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Tue, 18 Feb 2020 15:14:13 +0530 Subject: [PATCH 37/69] Release Version 1.14.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3b9ed249..245d2692 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.13.2", + version="1.14.0", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 46ead9a80c61230297a35cdcc254fea3ba9c7dcd Mon Sep 17 00:00:00 2001 From: Swapnil Kumar Date: Wed, 19 Feb 2020 15:13:07 +0530 Subject: [PATCH 38/69] SDK-388: Support for spot allocation strategy on Cluster (#293) --- qds_sdk/cluster_info_v22.py | 44 ++++++++++++++++++++++++++++++------- tests/test_clusterv22.py | 32 +++++++++++++-------------- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/qds_sdk/cluster_info_v22.py b/qds_sdk/cluster_info_v22.py index 32aa52d8..43e699f6 100644 --- a/qds_sdk/cluster_info_v22.py +++ b/qds_sdk/cluster_info_v22.py @@ -68,6 +68,7 @@ def set_cluster_info_from_arguments(self, arguments): min_spot_percentage=arguments.min_spot_percentage, min_maximum_bid_price_percentage=arguments.min_maximum_bid_price_percentage, min_timeout_for_request=arguments.min_timeout_for_request, + min_spot_allocation_strategy=arguments.min_spot_allocation_strategy, min_spot_fallback=arguments.min_spot_fallback, autoscaling_ondemand_percentage=arguments.autoscaling_ondemand_percentage, autoscaling_spot_block_percentage=arguments.autoscaling_spot_block_percentage, @@ -75,6 +76,7 @@ def set_cluster_info_from_arguments(self, arguments): autoscaling_spot_block_duration=arguments.autoscaling_spot_block_duration, autoscaling_maximum_bid_price_percentage=arguments.autoscaling_maximum_bid_price_percentage, autoscaling_timeout_for_request=arguments.autoscaling_timeout_for_request, + autoscaling_spot_allocation_strategy=arguments.autoscaling_spot_allocation_strategy, autoscaling_spot_fallback=arguments.autoscaling_spot_fallback) def set_cluster_info(self, @@ -237,6 +239,7 @@ def set_composition(self, min_spot_percentage=None, min_maximum_bid_price_percentage=None, min_timeout_for_request=None, + min_spot_allocation_strategy=None, min_spot_fallback=None, autoscaling_ondemand_percentage=None, autoscaling_spot_block_percentage=None, @@ -244,6 +247,7 @@ def set_composition(self, autoscaling_spot_block_duration=None, autoscaling_maximum_bid_price_percentage=None, autoscaling_timeout_for_request=None, + autoscaling_spot_allocation_strategy=None, autoscaling_spot_fallback=None): self.cluster_info["composition"] = {} @@ -260,6 +264,7 @@ def set_composition(self, min_spot_percentage, min_maximum_bid_price_percentage, min_timeout_for_request, + min_spot_allocation_strategy, min_spot_fallback) self.set_autoscaling_config(autoscaling_ondemand_percentage, @@ -268,6 +273,7 @@ def set_composition(self, autoscaling_spot_percentage, autoscaling_maximum_bid_price_percentage, autoscaling_timeout_for_request, + autoscaling_spot_allocation_strategy, autoscaling_spot_fallback) def set_master_config(self, @@ -293,6 +299,7 @@ def set_min_config(self, min_spot_percentage, min_maximum_bid_price_percentage, min_timeout_for_request, + min_spot_allocation_strategy, min_spot_fallback): self.cluster_info["composition"]["min_nodes"] = {"nodes": []} if not min_ondemand_percentage and not min_spot_block_percentage and not min_spot_percentage: @@ -305,7 +312,8 @@ def set_min_config(self, min_spot_block_percentage, min_spot_block_duration) if min_spot_percentage: self.set_min_spot(min_spot_percentage, min_maximum_bid_price_percentage, - min_timeout_for_request, min_spot_fallback) + min_timeout_for_request, min_spot_allocation_strategy, + min_spot_fallback) def set_autoscaling_config(self, autoscaling_ondemand_percentage, @@ -314,11 +322,12 @@ def set_autoscaling_config(self, autoscaling_spot_percentage, autoscaling_maximum_bid_price_percentage, autoscaling_timeout_for_request, + autoscaling_spot_allocation_strategy, autoscaling_spot_fallback): self.cluster_info["composition"]["autoscaling_nodes"] = {"nodes": []} if not autoscaling_ondemand_percentage and not autoscaling_spot_block_percentage and not autoscaling_spot_percentage: self.set_autoscaling_ondemand(50) - self.set_autoscaling_spot(50, 100, 1, 'ondemand') + self.set_autoscaling_spot(50, 100, 1, None, 'ondemand') else: if autoscaling_ondemand_percentage: self.set_autoscaling_ondemand(autoscaling_ondemand_percentage) @@ -326,8 +335,11 @@ def set_autoscaling_config(self, self.set_autoscaling_spot_block(autoscaling_spot_block_percentage, autoscaling_spot_block_duration) if autoscaling_spot_percentage: - self.set_autoscaling_spot(autoscaling_spot_percentage, autoscaling_maximum_bid_price_percentage, - autoscaling_timeout_for_request, autoscaling_spot_fallback) + self.set_autoscaling_spot(autoscaling_spot_percentage, + autoscaling_maximum_bid_price_percentage, + autoscaling_timeout_for_request, + autoscaling_spot_allocation_strategy, + autoscaling_spot_fallback) def set_master_ondemand(self, master_ondemand_percentage=None): ondemand = {"percentage": master_ondemand_percentage, "type": "ondemand"} @@ -360,11 +372,13 @@ def set_min_spot_block(self, min_spot_block_percentage=None, min_spot_block_dura self.cluster_info["composition"]["min_nodes"]["nodes"].append(spot_block) def set_min_spot(self, min_spot_percentage=None, min_maximum_bid_price_percentage=100, - min_timeout_for_request=1, min_spot_fallback=None): + min_timeout_for_request=1, min_spot_allocation_strategy=None, + min_spot_fallback=None): spot = {"percentage": min_spot_percentage, "type": "spot", "maximum_bid_price_percentage": min_maximum_bid_price_percentage, "timeout_for_request": min_timeout_for_request, + "allocation_strategy": min_spot_allocation_strategy, "fallback": min_spot_fallback } self.cluster_info["composition"]["min_nodes"]["nodes"].append(spot) @@ -380,12 +394,16 @@ def set_autoscaling_spot_block(self, autoscaling_spot_block_percentage=None, aut "timeout": autoscaling_spot_block_duration} self.cluster_info["composition"]["autoscaling_nodes"]["nodes"].append(spot_block) - def set_autoscaling_spot(self, autoscaling_spot_percentage=None, autoscaling_maximum_bid_price_percentage=100, - autoscaling_timeout_for_request=1, autoscaling_spot_fallback=None): + def set_autoscaling_spot(self, autoscaling_spot_percentage=None, + autoscaling_maximum_bid_price_percentage=100, + autoscaling_timeout_for_request=1, + autoscaling_spot_allocation_strategy=None, + autoscaling_spot_fallback=None): spot = {"percentage": autoscaling_spot_percentage, "type": "spot", "maximum_bid_price_percentage": autoscaling_maximum_bid_price_percentage, "timeout_for_request": autoscaling_timeout_for_request, + "allocation_strategy": autoscaling_spot_allocation_strategy, "fallback": autoscaling_spot_fallback } self.cluster_info["composition"]["autoscaling_nodes"]["nodes"].append(spot) @@ -653,7 +671,11 @@ def cluster_info_parser(argparser, action): default=None, help="whether to fallback to on-demand instances for min nodes" + " if spot instances aren't available") - + composition_group.add_argument("--min-spot-allocation-strategy", + dest="min_spot_allocation_strategy", + choices=["lowestPrice", "capacityOptimized", None], + default=None, + help="allocation strategy for min spot nodes") composition_group.add_argument("--autoscaling-ondemand-percentage", dest="autoscaling_ondemand_percentage", type=int, @@ -689,6 +711,12 @@ def cluster_info_parser(argparser, action): default=None, help="whether to fallback to on-demand instances for autoscaling nodes" + " if spot instances aren't available") + composition_group.add_argument("--autoscaling-spot-allocation-strategy", + dest="autoscaling_spot_allocation_strategy", + choices=["lowestPrice", "capacityOptimized", None], + default=None, + help="allocation strategy for autoscaling" + + " spot nodes") # monitoring settings monitoring_group = argparser.add_argument_group("monitoring settings") diff --git a/tests/test_clusterv22.py b/tests/test_clusterv22.py index 5b1e4ee7..ab4c03ec 100644 --- a/tests/test_clusterv22.py +++ b/tests/test_clusterv22.py @@ -42,7 +42,7 @@ def test_cluster_info(self): {'timeout_for_request': 1, 'percentage': 50, 'type': 'spot', 'fallback': 'ondemand', - 'maximum_bid_price_percentage': 100}]}}, + 'maximum_bid_price_percentage': 100, 'allocation_strategy': None}]}}, 'datadisk': {'encryption': True}}}) def test_od_od_od(self): @@ -64,7 +64,7 @@ def test_od_od_odspot(self): '--master-type', 'ondemand', '--min-ondemand-percentage', '100', '--autoscaling-ondemand-percentage', '50', '--autoscaling-spot-percentage', '50', '--autoscaling-maximum-bid-price-percentage', '50', - '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', 'ondemand'] + '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', 'ondemand', '--autoscaling-spot-allocation-strategy', 'lowestPrice'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -74,14 +74,14 @@ def test_od_od_odspot(self): 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { 'nodes': [{'percentage': 50, 'type': 'ondemand'}, {'timeout_for_request': 3, 'percentage': 50, 'type': 'spot', 'fallback': 'ondemand', - 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'lowestPrice'}]}}, 'label': ['test_label']}}) def test_od_od_odspot_nofallback(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', '--master-type', 'ondemand', '--min-ondemand-percentage', '100', '--autoscaling-ondemand-percentage', '50', '--autoscaling-spot-percentage', '50', '--autoscaling-maximum-bid-price-percentage', '50', - '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', None] + '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', None, '--autoscaling-spot-allocation-strategy', 'capacityOptimized'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -91,7 +91,7 @@ def test_od_od_odspot_nofallback(self): 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { 'nodes': [{'percentage': 50, 'type': 'ondemand'}, {'timeout_for_request': 3, 'percentage': 50, 'type': 'spot', 'fallback': None, - 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'capacityOptimized'}]}}, 'label': ['test_label']}}) def test_od_od_spotblock(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', @@ -114,7 +114,7 @@ def test_od_od_spotblockspot(self): '--autoscaling-spot-block-percentage', '50', '--autoscaling-spot-block-duration', '60', '--autoscaling-spot-percentage', '50', '--autoscaling-maximum-bid-price-percentage', '50', - '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', None] + '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', None, '--autoscaling-spot-allocation-strategy', 'capacityOptimized'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -124,14 +124,14 @@ def test_od_od_spotblockspot(self): 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { 'nodes': [{'percentage': 50, 'type': 'spotblock', 'timeout': 60}, {'timeout_for_request': 3, 'percentage': 50, 'type': 'spot', 'fallback': None, - 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'capacityOptimized'}]}}, 'label': ['test_label']}}) def test_od_od_spot(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', '--master-type', 'ondemand', '--min-ondemand-percentage', '100', '--autoscaling-spot-percentage', '100', '--autoscaling-maximum-bid-price-percentage', '50', '--autoscaling-timeout-for-request', '3', - '--autoscaling-spot-fallback', None] + '--autoscaling-spot-fallback', None, '--autoscaling-spot-allocation-strategy', 'lowestPrice'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -140,7 +140,7 @@ def test_od_od_spot(self): 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { 'nodes': [{'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, - 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'lowestPrice'}]}}, 'label': ['test_label']}}) def test_od_spot_spot(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', @@ -148,17 +148,17 @@ def test_od_spot_spot(self): '--min-maximum-bid-price-percentage', '50', '--min-timeout-for-request', '3', '--min-spot-fallback', None, '--autoscaling-spot-percentage', '100', '--autoscaling-maximum-bid-price-percentage', '50', '--autoscaling-timeout-for-request', '3', - '--autoscaling-spot-fallback', None] + '--autoscaling-spot-fallback', None, '--autoscaling-spot-allocation-strategy', 'capacityOptimized', '--min-spot-allocation-strategy', 'lowestPrice'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': {'composition': {'min_nodes': { 'nodes': [{'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, - 'maximum_bid_price_percentage': 50}]}, 'master': { + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'lowestPrice'}]}, 'master': { 'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': {'nodes': [ {'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, - 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'capacityOptimized'}]}}, 'label': ['test_label']}}) def test_spotblock_spotblock_spotblock(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', @@ -183,18 +183,18 @@ def test_spot_spot_spot(self): '--min-maximum-bid-price-percentage', '50', '--min-timeout-for-request', '3', '--min-spot-fallback', None, '--autoscaling-spot-percentage', '100', '--autoscaling-maximum-bid-price-percentage', '50', '--autoscaling-timeout-for-request', '3', - '--autoscaling-spot-fallback', None] + '--autoscaling-spot-fallback', None, '--autoscaling-spot-allocation-strategy', 'lowestPrice', '--min-spot-allocation-strategy', 'capacityOptimized'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': {'composition': {'min_nodes': { 'nodes': [{'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, - 'maximum_bid_price_percentage': 50}]}, 'master': {'nodes': [ + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'capacityOptimized'}]}, 'master': {'nodes': [ {'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, 'maximum_bid_price_percentage': 50}]}, 'autoscaling_nodes': {'nodes': [ {'timeout_for_request': 3, 'percentage': 100, 'type': 'spot', 'fallback': None, - 'maximum_bid_price_percentage': 50}]}}, 'label': ['test_label']}}) + 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'lowestPrice'}]}}, 'label': ['test_label']}}) def test_image_version_v22(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', @@ -210,4 +210,4 @@ def test_image_version_v22(self): 'min_nodes': 3, 'slave_instance_type': 'c1.xlarge', 'cluster_image_version': '1.latest', - 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': {'nodes': [{'percentage': 50, 'type': 'ondemand'}, {'timeout_for_request': 1, 'percentage': 50, 'type': 'spot', 'fallback': 'ondemand', 'maximum_bid_price_percentage': 100}]}}, 'label': ['test_label']}}) + 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': {'nodes': [{'percentage': 50, 'type': 'ondemand'}, {'timeout_for_request': 1, 'percentage': 50, 'type': 'spot', 'fallback': 'ondemand', 'maximum_bid_price_percentage': 100, 'allocation_strategy': None}]}}, 'label': ['test_label']}}) From 68657f8610681b65538f6a35e4ce2cd07a82d8d5 Mon Sep 17 00:00:00 2001 From: Anupam-pandey Date: Mon, 24 Feb 2020 13:54:02 +0530 Subject: [PATCH 39/69] SDK-392: Support for Mlflow Cluster Type (#306) --- qds_sdk/engine.py | 46 ++++-- tests/test_clusterv2.py | 312 ++++++++++++++++++++++------------------ 2 files changed, 204 insertions(+), 154 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 24a8289b..b7594b3e 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -13,8 +13,9 @@ def __init__(self, flavour=None): self.hadoop_settings = {} self.presto_settings = {} self.spark_settings = {} - self.airflow_settings ={} + self.airflow_settings = {} self.engine_config = {} + self.mlflow_settings = {} def set_engine_config(self, custom_hadoop_config=None, @@ -31,7 +32,8 @@ def set_engine_config(self, airflow_version=None, airflow_python_version=None, is_ha=None, - enable_rubix=None): + enable_rubix=None, + mlflow_version=None): ''' Args: @@ -68,13 +70,16 @@ def set_engine_config(self, is_ha: Enabling HA config for cluster is_deeplearning : this is a deeplearning cluster config enable_rubix: Enable rubix on the cluster + mlflow_version : this is the version of the mlflow cluster ''' - self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool, enable_rubix) + self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, + default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) + self.set_mlflow_settings(mlflow_version) def set_fairscheduler_settings(self, fairscheduler_config_xml=None, @@ -121,11 +126,15 @@ def set_airflow_settings(self, self.airflow_settings['version'] = airflow_version self.airflow_settings['airflow_python_version'] = airflow_python_version + def set_mlflow_settings(self, + mlflow_version="1.5"): + self.mlflow_settings['version'] = mlflow_version + def set_engine_config_settings(self, arguments): custom_hadoop_config = util._read_file(arguments.custom_hadoop_config_file) fairscheduler_config_xml = util._read_file(arguments.fairscheduler_config_xml_file) custom_presto_config = util._read_file(arguments.presto_custom_config_file) - is_deeplearning=False + is_deeplearning = False self.set_engine_config(custom_hadoop_config=custom_hadoop_config, use_qubole_placement_policy=arguments.use_qubole_placement_policy, @@ -140,14 +149,16 @@ def set_engine_config_settings(self, arguments): overrides=arguments.overrides, airflow_version=arguments.airflow_version, airflow_python_version=arguments.airflow_python_version, - enable_rubix=arguments.enable_rubix) + enable_rubix=arguments.enable_rubix, + mlflow_version=arguments.mlflow_version) @staticmethod def engine_parser(argparser): engine_group = argparser.add_argument_group("engine settings") engine_group.add_argument("--flavour", dest="flavour", - choices=["hadoop", "hadoop2", "hs2", "hive", "presto", "spark", "sparkstreaming", "hbase", "airflow", "deeplearning"], + choices=["hadoop", "hadoop2", "hs2", "hive", "presto", "spark", "sparkstreaming", + "hbase", "airflow", "deeplearning", "mlflow"], default=None, help="Set engine flavour") @@ -172,15 +183,15 @@ def engine_parser(argparser): " for clusters with spot nodes", ) enable_rubix_group = hadoop_settings_group.add_mutually_exclusive_group() enable_rubix_group.add_argument("--enable-rubix", - dest="enable_rubix", - action="store_true", - default=None, - help="Enable rubix for cluster", ) + dest="enable_rubix", + action="store_true", + default=None, + help="Enable rubix for cluster", ) enable_rubix_group.add_argument("--no-enable-rubix", - dest="enable_rubix", - action="store_false", - default=None, - help="Do not enable rubix for cluster", ) + dest="enable_rubix", + action="store_false", + default=None, + help="Do not enable rubix for cluster", ) fairscheduler_group = argparser.add_argument_group( "fairscheduler configuration options") @@ -236,3 +247,10 @@ def engine_parser(argparser): default=None, help="python environment version for airflow cluster", ) + mlflow_settings_group = argparser.add_argument_group("mlflow settings") + + mlflow_settings_group.add_argument("--mlflow-version", + dest="mlflow_version", + default=None, + help="mlflow version for mlflow cluster", ) + diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 38074410..0c791908 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -1,12 +1,14 @@ from __future__ import print_function import sys import os + if sys.version_info > (2, 7, 0): import unittest else: import unittest2 as unittest from mock import Mock, ANY import tempfile + sys.path.append(os.path.join(os.path.dirname(__file__), '../bin')) import qds from qds_sdk.connection import Connection @@ -15,30 +17,31 @@ from qds_sdk.cloud.cloud import Cloud from qds_sdk.qubole import Qubole + class TestClusterCreate(QdsCliTestCase): def test_minimal(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--compute-access-key', 'aki', '--compute-secret-key', 'sak'] + '--compute-access-key', 'aki', '--compute-secret-key', 'sak'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', - {'cluster_info': - {'label': ['test_label'] - }, - 'cloud_config': { - 'compute_config': { - 'compute_secret_key': 'sak', - 'compute_access_key': 'aki'}} - }) + {'cluster_info': + {'label': ['test_label'] + }, + 'cloud_config': { + 'compute_config': { + 'compute_secret_key': 'sak', + 'compute_access_key': 'aki'}} + }) def test_cluster_info(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', '--compute-access-key', 'aki', '--compute-secret-key', 'sak', '--min-nodes', '3', '--max-nodes', '5', '--disallow-cluster-termination', '--enable-ganglia-monitoring', '--node-bootstrap-file', 'test_file_name', '--master-instance-type', - 'm1.xlarge','--slave-instance-type', 'm1.large', '--encrypted-ephemerals'] + 'm1.xlarge', '--slave-instance-type', 'm1.large', '--encrypted-ephemerals'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -64,25 +67,25 @@ def test_aws_compute_config(self): Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': { 'compute_config': {'use_account_compute_creds': True}}, - 'cluster_info': {'label': ['test_label']}}) - + 'cluster_info': {'label': ['test_label']}}) def test_aws_network_config(self): - sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--enable-account-compute-creds', '--vpc-id', 'vpc-12345678', '--subnet-id', 'subnet-12345678', - '--bastion-node-public-dns', 'dummydns','--persistent-security-groups', - 'foopsg','--master-elastic-ip', "10.10.10.10"] - Qubole.cloud = None - print_command() - Connection._api_call = Mock(return_value={}) - qds.main() - Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'compute_config': {'use_account_compute_creds': True}, - 'network_config': {'subnet_id': 'subnet-12345678', - 'vpc_id': 'vpc-12345678', - 'master_elastic_ip': '10.10.10.10', - 'persistent_security_groups': 'foopsg', - 'bastion_node_public_dns': 'dummydns'}}, - 'cluster_info': {'label': ['test_label']}}) + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--enable-account-compute-creds', '--vpc-id', 'vpc-12345678', '--subnet-id', 'subnet-12345678', + '--bastion-node-public-dns', 'dummydns', '--persistent-security-groups', + 'foopsg', '--master-elastic-ip', "10.10.10.10"] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'cloud_config': {'compute_config': {'use_account_compute_creds': True}, + 'network_config': {'subnet_id': 'subnet-12345678', + 'vpc_id': 'vpc-12345678', + 'master_elastic_ip': '10.10.10.10', + 'persistent_security_groups': 'foopsg', + 'bastion_node_public_dns': 'dummydns'}}, + 'cluster_info': {'label': ['test_label']}}) def test_aws_location_config(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', @@ -92,9 +95,9 @@ def test_aws_location_config(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'location': { - 'aws_availability_zone': 'us-east-1a', - 'aws_region': 'us-east-1'}}, - 'cluster_info': {'label': ['test_label']}}) + 'aws_availability_zone': 'us-east-1a', + 'aws_region': 'us-east-1'}}, + 'cluster_info': {'label': ['test_label']}}) def test_oracle_bmc_compute_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_BMC', 'cluster', 'create', '--label', 'test_label', @@ -105,10 +108,11 @@ def test_oracle_bmc_compute_config(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'compute_config': - {'compute_key_finger_print': 'zzz22', - 'compute_api_private_rsa_key': 'aaa', - 'compute_user_id': 'yyyy11', - 'compute_tenant_id': 'xxx11'}}, + { + 'compute_key_finger_print': 'zzz22', + 'compute_api_private_rsa_key': 'aaa', + 'compute_user_id': 'yyyy11', + 'compute_tenant_id': 'xxx11'}}, 'cluster_info': {'label': ['test_label']}}) def test_oracle_bmc_storage_config(self): @@ -133,7 +137,7 @@ def test_oracle_bmc_storage_config(self): def test_oracle_bmc_network_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_BMC', 'cluster', 'create', '--label', 'test_label', '--compartment-id', 'abc-compartment', '--image-id', 'abc-image', '--vcn-id', 'vcn-1', - '--subnet-id', 'subnet-1' ] + '--subnet-id', 'subnet-1'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -160,7 +164,7 @@ def test_oracle_bmc_network_config_az_info_map(self): 'image_id': 'abc-image', 'availability_domain_info_map': [{'availability_domain': 'AD-1', - 'subnet_id': 'subnet-1'}]}}, + 'subnet_id': 'subnet-1'}]}}, 'cluster_info': {'label': ['test_label']}}) def test_oracle_bmc_location_config(self): @@ -243,7 +247,7 @@ def test_azure_master_static_nic(self): 'network_config': {'vnet_resource_group_name': 'vnetresname', 'subnet_name': 'testsubnet', 'vnet_name': 'testvnet', - 'master_static_nic_name':'nic1'}}, + 'master_static_nic_name': 'nic1'}}, 'cluster_info': {'label': ['test_label']}}) def test_azure_master_static_pip(self): @@ -259,7 +263,7 @@ def test_azure_master_static_pip(self): 'network_config': {'vnet_resource_group_name': 'vnetresname', 'subnet_name': 'testsubnet', 'vnet_name': 'testvnet', - 'master_static_public_ip_name':'pip1'}}, + 'master_static_public_ip_name': 'pip1'}}, 'cluster_info': {'label': ['test_label']}}) def test_azure_resource_group_name(self): @@ -273,7 +277,7 @@ def test_azure_resource_group_name(self): {'cloud_config': { 'resource_group_name': 'testrg' }, - 'cluster_info': {'label': ['test_label']}}) + 'cluster_info': {'label': ['test_label']}}) def test_oracle_opc_compute_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_OPC', 'cluster', 'create', '--label', 'test_label', @@ -302,8 +306,8 @@ def test_oracle_opc_storage_config(self): Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': {'label': ['test_label'], - 'datadisk': {'count': 1, 'size': 100} - }, + 'datadisk': {'count': 1, 'size': 100} + }, 'cloud_config': {'storage_config': {'storage_username': 'testusername', @@ -336,17 +340,18 @@ def test_gcp_compute_config(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'compute_config': - {'qsa_private_key_id': 'zzz22', - 'qsa_private_key': 'aaa', - 'qsa_client_email': 'yyyy11', - 'customer_project_id': 'www11', - 'qsa_client_id': 'xxx11'}}, + { + 'qsa_private_key_id': 'zzz22', + 'qsa_private_key': 'aaa', + 'qsa_client_email': 'yyyy11', + 'customer_project_id': 'www11', + 'qsa_client_id': 'xxx11'}}, 'cluster_info': {'label': ['test_label']}}) def test_gcp_storage_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', '--storage-client-email', 'aaa', '--storage-disk-size-in-gb', 'aaa', - '--storage-disk-count', 'bbb', '--storage-disk-type', 'ccc' ] + '--storage-disk-count', 'bbb', '--storage-disk-type', 'ccc'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -361,7 +366,7 @@ def test_gcp_storage_config(self): def test_gcp_network_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', - '--vpc-id', 'vpc-1', '--subnet-id', 'subnet-1' ] + '--vpc-id', 'vpc-1', '--subnet-id', 'subnet-1'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -428,7 +433,7 @@ def test_presto_engine_config(self): temp.write("config.properties:\na=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--flavour', 'presto', '--enable-rubix' , '--presto-custom-config', temp.name] + '--flavour', 'presto', '--enable-rubix', '--presto-custom-config', temp.name] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -438,7 +443,7 @@ def test_presto_engine_config(self): {'flavour': 'presto', 'presto_settings': { 'custom_presto_config': 'config.properties:\na=1\nb=2'}, - 'hadoop_settings':{ + 'hadoop_settings': { 'enable_rubix': True }}, 'cluster_info': {'label': ['test_label']}}) @@ -448,7 +453,8 @@ def test_hs2_engine_config(self): temp.write("config.properties:\na=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', - 'test_label', '--flavour', 'hs2', '--node-bootstrap-file', 'test_file_name', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--parent-cluster-id', '1'] + 'test_label', '--flavour', 'hs2', '--node-bootstrap-file', 'test_file_name', + '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--parent-cluster-id', '1'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -456,17 +462,20 @@ def test_hs2_engine_config(self): Connection._api_call.assert_called_with('POST', 'clusters', {'engine_config': {'flavour': 'hs2'}, - 'cluster_info': {'label': ['test_label'], - 'parent_cluster_id': 1, - 'min_nodes': 3, - 'node_bootstrap': 'test_file_name', - 'slave_instance_type': 'c1.xlarge' }}) + 'cluster_info': {'label': ['test_label'], + 'parent_cluster_id': 1, + 'min_nodes': 3, + 'node_bootstrap': 'test_file_name', + 'slave_instance_type': 'c1.xlarge'}}) + def test_hs2_parent_cluster_label(self): with tempfile.NamedTemporaryFile() as temp: temp.write("config.properties:\na=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', - 'test_label', '--flavour', 'hs2', '--node-bootstrap-file', 'test_file_name', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--parent-cluster-label', 'parent_cluster_label'] + 'test_label', '--flavour', 'hs2', '--node-bootstrap-file', 'test_file_name', + '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--parent-cluster-label', + 'parent_cluster_label'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -478,7 +487,7 @@ def test_hs2_parent_cluster_label(self): 'parent_cluster_label': 'parent_cluster_label', 'min_nodes': 3, 'node_bootstrap': 'test_file_name', - 'slave_instance_type': 'c1.xlarge' }}) + 'slave_instance_type': 'c1.xlarge'}}) def test_spark_engine_config(self): with tempfile.NamedTemporaryFile() as temp: @@ -495,7 +504,7 @@ def test_spark_engine_config(self): {'flavour': 'spark', 'spark_settings': { 'custom_spark_config': 'spark-overrides'}}, - 'cluster_info': {'label': ['test_label'],}}) + 'cluster_info': {'label': ['test_label'], }}) def test_sparkstreaming_engine_config(self): with tempfile.NamedTemporaryFile() as temp: @@ -508,18 +517,19 @@ def test_sparkstreaming_engine_config(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', - {'engine_config': - {'flavour': 'sparkstreaming', - 'spark_settings': { - 'custom_spark_config': 'spark-overrides'}}, - 'cluster_info': {'label': ['test_label'],}}) + {'engine_config': + {'flavour': 'sparkstreaming', + 'spark_settings': { + 'custom_spark_config': 'spark-overrides'}}, + 'cluster_info': {'label': ['test_label'], }}) def test_airflow_engine_config(self): with tempfile.NamedTemporaryFile() as temp: temp.write("config.properties:\na=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--flavour', 'airflow', '--dbtap-id', '1', '--fernet-key', '-1', '--overrides', 'airflow_overrides', '--airflow-version', '1.10.0', '--airflow-python-version', '2.7'] + '--flavour', 'airflow', '--dbtap-id', '1', '--fernet-key', '-1', '--overrides', + 'airflow_overrides', '--airflow-version', '1.10.0', '--airflow-python-version', '2.7'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -534,7 +544,22 @@ def test_airflow_engine_config(self): 'version': '1.10.0', 'airflow_python_version': '2.7' }}, - 'cluster_info': {'label': ['test_label'],}}) + 'cluster_info': {'label': ['test_label'], }}) + + def test_mlflow_engine_config(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--flavour', 'mlflow', '--mlflow-version', '1.5'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'mlflow', + 'mlflow_settings': { + 'version': '1.5' + }}, + 'cluster_info': {'label': ['test_label'], }}) def test_persistent_security_groups_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', @@ -586,13 +611,14 @@ def test_image_override(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', - {'cluster_info':{'label': ['test_label']}, - 'internal':{'image_uri_overrides': 'test/image1'} - }) + {'cluster_info': {'label': ['test_label']}, + 'internal': {'image_uri_overrides': 'test/image1'} + }) def test_image_version_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', - 'test_label', '--flavour', 'hadoop2', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', '--image-version', '1.latest'] + 'test_label', '--flavour', 'hadoop2', '--slave-instance-type', 'c1.xlarge', '--min-nodes', '3', + '--image-version', '1.latest'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -600,13 +626,10 @@ def test_image_version_v2(self): Connection._api_call.assert_called_with('POST', 'clusters', {'engine_config': {'flavour': 'hadoop2'}, - 'cluster_info': {'label': ['test_label'], - 'min_nodes': 3, - 'slave_instance_type': 'c1.xlarge', - 'cluster_image_version': '1.latest'}}) - - - + 'cluster_info': {'label': ['test_label'], + 'min_nodes': 3, + 'slave_instance_type': 'c1.xlarge', + 'cluster_image_version': '1.latest'}}) def test_spot_block_duration_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', @@ -632,7 +655,7 @@ def test_slave_request_type_spotblock_v2(self): def test_node_base_cooldown_period_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--node-base-cooldown-period', '10'] + '--node-base-cooldown-period', '10'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -642,14 +665,14 @@ def test_node_base_cooldown_period_v2(self): def test_node_base_cooldown_period_invalid_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--node-base-cooldown-period', 'invalid_value'] + '--node-base-cooldown-period', 'invalid_value'] print_command() with self.assertRaises(SystemExit): qds.main() def test_node_spot_cooldown_period_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--node-spot-cooldown-period', '15'] + '--node-spot-cooldown-period', '15'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -659,22 +682,22 @@ def test_node_spot_cooldown_period_v2(self): def test_node_spot_cooldown_period_invalid_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--node-spot-cooldown-period', 'invalid_value'] + '--node-spot-cooldown-period', 'invalid_value'] print_command() with self.assertRaises(SystemExit): qds.main() def test_env_settings_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--env-name', 'test_env', '--python-version', '2.7', '--r-version', '3.3'] + '--env-name', 'test_env', '--python-version', '2.7', '--r-version', '3.3'] print_command() Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': {'label': ['test_label'], - 'env_settings': {'name':'test_env', - 'python_version':'2.7', - 'r_version':'3.3'}}}) + 'env_settings': {'name': 'test_env', + 'python_version': '2.7', + 'r_version': '3.3'}}}) def test_root_disk_size_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', @@ -693,7 +716,6 @@ def test_root_disk_size_invalid_v2(self): with self.assertRaises(SystemExit): qds.main() - def test_disable_start_stop(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', '--disable-cluster-pause', '--disable-autoscale-node-pause'] @@ -708,6 +730,7 @@ def test_disable_start_stop(self): 'disable_autoscale_node_pause': 1 } }) + def test_start_stop_timeouts(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', '--no-disable-cluster-pause', '--paused-cluster-timeout', '30', @@ -757,10 +780,10 @@ def test_aws_cloud_config(self): 'location': {'aws_region': 'us-east-1'}, 'network_config': - {'bastion_node_public_dns': 'dummydns'}} + { + 'bastion_node_public_dns': 'dummydns'}} }) - def test_azure_cloud_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'AZURE', 'cluster', 'update', '123', '--vnet-name', 'testvnet', '--storage-account-name', 'test_account_name', @@ -770,9 +793,12 @@ def test_azure_cloud_config(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': {'compute_config': - {'compute_subscription_id': 'testsubscriptionid'}, - 'storage_config': {'storage_account_name': 'test_account_name'}, - 'network_config': {'vnet_name': 'testvnet'}}}) + { + 'compute_subscription_id': 'testsubscriptionid'}, + 'storage_config': { + 'storage_account_name': 'test_account_name'}, + 'network_config': { + 'vnet_name': 'testvnet'}}}) def test_azure_master_static_nic(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'AZURE', 'cluster', 'update', '123', @@ -787,7 +813,7 @@ def test_azure_master_static_nic(self): 'network_config': {'vnet_resource_group_name': 'vnetresname', 'subnet_name': 'testsubnet', 'vnet_name': 'testvnet', - 'master_static_nic_name':'nic1'}}}) + 'master_static_nic_name': 'nic1'}}}) def test_azure_master_static_pip(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'AZURE', 'cluster', 'update', '123', @@ -802,7 +828,7 @@ def test_azure_master_static_pip(self): 'network_config': {'vnet_resource_group_name': 'vnetresname', 'subnet_name': 'testsubnet', 'vnet_name': 'testvnet', - 'master_static_public_ip_name':'pip1'}}}) + 'master_static_public_ip_name': 'pip1'}}}) def test_oracle_bmc_cloud_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_BMC', 'cluster', 'update', '123', @@ -812,14 +838,16 @@ def test_oracle_bmc_cloud_config(self): print_command() Connection._api_call = Mock(return_value={}) qds.main() - Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': - {'network_config': - {'compartment_id': 'abc-compartment'}, - 'compute_config': {'compute_user_id': 'yyyy11'}, - 'storage_config': {'storage_tenant_id': 'xxx11'}, - 'location': {'region': 'us-phoenix-1'} - } - }) + Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': + {'network_config': + {'compartment_id': 'abc-compartment'}, + 'compute_config': { + 'compute_user_id': 'yyyy11'}, + 'storage_config': { + 'storage_tenant_id': 'xxx11'}, + 'location': {'region': 'us-phoenix-1'} + } + }) def test_oracle_opc_cloud_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_OPC', 'cluster', 'update', '123', @@ -829,13 +857,15 @@ def test_oracle_opc_cloud_config(self): print_command() Connection._api_call = Mock(return_value={}) qds.main() - Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': - {'network_config': - {'acl': 'acl_1'}, - 'compute_config': {'rest_api_endpoint': 'rest_api_endpoint_1'}, - 'storage_config': {'storage_rest_api_endpoint': 'storage_rest_api_endpoint_1'} - } - }) + Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': + {'network_config': + {'acl': 'acl_1'}, + 'compute_config': { + 'rest_api_endpoint': 'rest_api_endpoint_1'}, + 'storage_config': { + 'storage_rest_api_endpoint': 'storage_rest_api_endpoint_1'} + } + }) def test_gcp_cloud_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'update', '123', @@ -845,14 +875,16 @@ def test_gcp_cloud_config(self): print_command() Connection._api_call = Mock(return_value={}) qds.main() - Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': - {'network_config': - {'subnet': 'abc-subnet'}, - 'compute_config': {'qsa_client_id': 'yyyy11'}, - 'storage_config': {'inst_client_email': 'xxx11'}, - 'location': {'region': 'xxx'} - } - }) + Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': + {'network_config': + {'subnet': 'abc-subnet'}, + 'compute_config': { + 'qsa_client_id': 'yyyy11'}, + 'storage_config': { + 'inst_client_email': 'xxx11'}, + 'location': {'region': 'xxx'} + } + }) def test_engine_config(self): with tempfile.NamedTemporaryFile() as temp: @@ -860,17 +892,18 @@ def test_engine_config(self): temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', '--use-qubole-placement-policy', '--enable-rubix', - '--custom-hadoop-config',temp.name] + '--custom-hadoop-config', temp.name] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) qds.main() - Connection._api_call.assert_called_with('PUT', 'clusters/123', {'engine_config': - {'hadoop_settings': - {'use_qubole_placement_policy': True, - 'custom_hadoop_config': 'a=1\nb=2', - 'enable_rubix': True}} - }) + Connection._api_call.assert_called_with('PUT', 'clusters/123', {'engine_config': + {'hadoop_settings': + { + 'use_qubole_placement_policy': True, + 'custom_hadoop_config': 'a=1\nb=2', + 'enable_rubix': True}} + }) def test_cluster_info(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', @@ -906,7 +939,7 @@ def test_slave_request_type_spotblock_v2(self): def test_node_base_cooldown_period_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', - '--node-base-cooldown-period', '10'] + '--node-base-cooldown-period', '10'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -915,14 +948,14 @@ def test_node_base_cooldown_period_v2(self): def test_node_base_cooldown_period_invalid_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', - '--node-base-cooldown-period', 'invalid_value'] + '--node-base-cooldown-period', 'invalid_value'] print_command() with self.assertRaises(SystemExit): qds.main() def test_node_spot_cooldown_period_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', - '--node-spot-cooldown-period', '15'] + '--node-spot-cooldown-period', '15'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -931,7 +964,7 @@ def test_node_spot_cooldown_period_v2(self): def test_node_spot_cooldown_period_invalid_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', - '--node-spot-cooldown-period', 'invalid_value'] + '--node-spot-cooldown-period', 'invalid_value'] print_command() with self.assertRaises(SystemExit): qds.main() @@ -944,7 +977,7 @@ def test_root_disk_size_v2(self): qds.main() Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cluster_info': { - 'rootdisk': {'size': 100}}}) + 'rootdisk': {'size': 100}}}) def test_root_disk_size_invalid_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', @@ -955,7 +988,6 @@ def test_root_disk_size_invalid_v2(self): class TestClusterClone(QdsCliTestCase): - def test_minimal(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'clone', '1234', '--label', 'test_label1', 'test_label2'] Qubole.cloud = None @@ -963,10 +995,11 @@ def test_minimal(self): Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters/1234/clone', {'cluster_info': - {'label': ['test_label1', 'test_label2']}}) + {'label': ['test_label1', + 'test_label2']}}) -class TestClusterList(QdsCliTestCase): +class TestClusterList(QdsCliTestCase): def test_id(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'list', '--id', '123'] print_command() @@ -993,7 +1026,7 @@ def test_state_up(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'list', '--state', 'up'] Qubole.cloud = None print_command() - Connection._api_call = Mock(return_value=[{"cluster" : {"state" : "up"}}]) + Connection._api_call = Mock(return_value=[{"cluster": {"state": "up"}}]) qds.main() Connection._api_call.assert_called_with('GET', 'clusters', params=None) @@ -1001,7 +1034,7 @@ def test_state_down(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'list', '--state', 'down'] Qubole.cloud = None print_command() - Connection._api_call = Mock(return_value=[{"cluster" : {"state" : "down"}}]) + Connection._api_call = Mock(return_value=[{"cluster": {"state": "down"}}]) qds.main() Connection._api_call.assert_called_with('GET', 'clusters', params=None) @@ -1009,7 +1042,7 @@ def test_state_terminating(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'list', '--state', 'terminating'] Qubole.cloud = None print_command() - Connection._api_call = Mock(return_value=[{"cluster" : {"state" : "terminating"}}]) + Connection._api_call = Mock(return_value=[{"cluster": {"state": "terminating"}}]) qds.main() Connection._api_call.assert_called_with('GET', 'clusters', params=None) @@ -1017,7 +1050,7 @@ def test_state_pending(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'list', '--state', 'pending'] Qubole.cloud = None print_command() - Connection._api_call = Mock(return_value=[{"cluster" : {"state" : "pending"}}]) + Connection._api_call = Mock(return_value=[{"cluster": {"state": "pending"}}]) qds.main() Connection._api_call.assert_called_with('GET', 'clusters', params=None) @@ -1025,7 +1058,7 @@ def test_state_invalid(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'list', '--state', 'invalid'] Qubole.cloud = None print_command() - Connection._api_call = Mock(return_value=[{"cluster" : {"state" : "invalid"}}]) + Connection._api_call = Mock(return_value=[{"cluster": {"state": "invalid"}}]) qds.main() Connection._api_call.assert_called_with('GET', 'clusters', params=None) @@ -1034,7 +1067,7 @@ def test_page(self): Qubole.cloud = None params = {"page": 2} print_command() - Connection._api_call = Mock(return_value=[{"cluster" : {"state" : "up"}}]) + Connection._api_call = Mock(return_value=[{"cluster": {"state": "up"}}]) qds.main() Connection._api_call.assert_called_with('GET', 'clusters', params=params) @@ -1049,7 +1082,7 @@ def test_per_page(self): Qubole.cloud = None params = {"per_page": 5} print_command() - Connection._api_call = Mock(return_value=[{"cluster" : {"state" : "up"}}]) + Connection._api_call = Mock(return_value=[{"cluster": {"state": "up"}}]) qds.main() Connection._api_call.assert_called_with('GET', 'clusters', params=params) @@ -1061,7 +1094,6 @@ def test_per_page_invalid(self): class TestClusterShow(QdsCliTestCase): - def test_connection(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'list', '--label', 'test_label'] print_command() @@ -1070,8 +1102,8 @@ def test_connection(self): qds.main() Connection.__init__.assert_called_with(ANY, 'https://qds.api.url/api/v2', ANY, ANY, ANY, ANY) -class TestClusterStatus(QdsCliTestCase): +class TestClusterStatus(QdsCliTestCase): def test_status_api(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'status', '123'] Qubole.cloud = None From c02dd7cab300a4dc0241a093007febf410659ed8 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Thu, 27 Feb 2020 14:43:00 +0530 Subject: [PATCH 40/69] Validate a JSON response before checking response code (#308) --- qds_sdk/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index 3ec4c0a8..c456a4e1 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -131,12 +131,12 @@ def _api_call_raw(self, req_type, path, data=None, params=None): else: raise NotImplemented + self._validate_json(r) self._handle_error(r) return r def _api_call(self, req_type, path, data=None, params=None): response = self._api_call_raw(req_type, path, data=data, params=params) - self._validate_json(response) return response.json() @staticmethod From b44b2a8535fa610b55a75a10e75bdd6ab81c36a9 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Mon, 2 Mar 2020 18:38:36 +0530 Subject: [PATCH 41/69] Refactor Connection Adapter name (#309) --- qds_sdk/connection.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index c456a4e1..f1328fc7 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -22,9 +22,9 @@ """ -class MyAdapter(HTTPAdapter): +class RequestAdapter(HTTPAdapter): def __init__(self, *args, **kwargs): - super(MyAdapter, self).__init__(*args, **kwargs) + super(RequestAdapter, self).__init__(*args, **kwargs) def init_poolmanager(self, connections, maxsize,block=False): self.poolmanager = PoolManager(num_pools=connections, @@ -49,11 +49,11 @@ def __init__(self, auth, rest_url, skip_ssl_cert_check, self.base_retry_delay = base_retry_delay if reuse: self.session = requests.Session() - self.session.mount('https://', MyAdapter()) + self.session.mount('https://', RequestAdapter()) # retries for get requests self.session_with_retries = requests.Session() - self.session_with_retries.mount('https://', MyAdapter(max_retries=3)) + self.session_with_retries.mount('https://', RequestAdapter(max_retries=3)) def retry(ExceptionToCheck, tries=5, delay=10, backoff=2): def deco_retry(f): @@ -107,7 +107,7 @@ def _api_call_raw(self, req_type, path, data=None, params=None): else: x = requests x_with_retries = requests.Session() - x_with_retries.mount('https://', MyAdapter(max_retries=3)) + x_with_retries.mount('https://', RequestAdapter(max_retries=3)) kwargs = {'headers': self._headers, 'auth': self.auth, 'verify': not self.skip_ssl_cert_check} @@ -131,12 +131,12 @@ def _api_call_raw(self, req_type, path, data=None, params=None): else: raise NotImplemented - self._validate_json(r) self._handle_error(r) return r def _api_call(self, req_type, path, data=None, params=None): response = self._api_call_raw(req_type, path, data=data, params=params) + self._validate_json(response) return response.json() @staticmethod From 9495e28cc06bbfa8e4265600029724e48dc7b873 Mon Sep 17 00:00:00 2001 From: Tarun Gavara Date: Thu, 12 Mar 2020 12:22:29 +0530 Subject: [PATCH 42/69] new: usr: JUPY-567, SDK-394: Add SDK bindings for JupyterNotebookCommand (#304) * changes * changes * changes * add tests * fix tests * changes to make scheduler create from command line work * Remove non ASCII characters Co-Authored-By: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> * Remove redundant parenthesis Co-Authored-By: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> * Update qds_sdk/commands.py Co-Authored-By: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> * changes * changes * add support for macros * add name option * add support for tags * add support for notify and timeout * update tests * add support for pool * update tests * add support for retry and retry_delay * update tests * add support for jupyter_notebook_id * update tests * replace notebook id with uuid * remove support for uuid * seperate validate json input method * fix style * some more style fixes * changes Co-authored-by: Gavara Tarun Co-authored-by: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> --- bin/qds.py | 5 +- qds_sdk/commands.py | 86 +++++++++++++++ qds_sdk/scheduler.py | 2 +- tests/test_command.py | 250 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 340 insertions(+), 3 deletions(-) diff --git a/bin/qds.py b/bin/qds.py index 48b74218..d56e2fdc 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -35,7 +35,8 @@ "shellcmd": ShellCommand, "dbexportcmd": DbExportCommand, "dbimportcmd": DbImportCommand, - "prestocmd": PrestoCommand + "prestocmd": PrestoCommand, + "jupyternotebookcmd": JupyterNotebookCommand } SensorClasses = { @@ -46,7 +47,7 @@ usage_str = ( "Usage: qds.py [options] \n" "\nCommand subcommands:\n" - " \n" + " \n" " submit [cmd-specific-args .. ] : submit cmd & print id\n" " run [cmd-specific-args .. ] : submit cmd & wait. print results\n" " check : id -> print the cmd object for this id\n" diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index fd715fd2..1d04bcd9 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -1347,6 +1347,82 @@ def parse(cls, args): v["command_type"] = "DbTapQueryCommand" return v + +class JupyterNotebookCommand(Command): + usage = "jupyternotebookcmd [options]" + + optparser = GentleOptionParser(usage=usage) + optparser.add_option("--path", dest="path", + help="Path including name of the Jupyter notebook to \ + be run with extension.") + optparser.add_option("--cluster-label", dest="label", + help="Label of the cluster on which the this command \ + should be run. If this parameter is not specified \ + then label = 'default' is used.") + optparser.add_option("--arguments", dest="arguments", + help="Valid JSON to be sent to the notebook. Specify \ + the parameters in notebooks and pass the parameter value \ + using the JSON format. key is the parameter's name and \ + value is the parameter's value. Supported types in \ + parameters are string, integer, float and boolean.") + optparser.add_option("--macros", dest="macros", + help="expressions to expand macros used in query") + optparser.add_option("--name", dest="name", help="Assign a name to this query") + optparser.add_option("--tags", dest="tags", + help="comma-separated list of tags to be associated with \ + the query ( e.g. tag1 tag1,tag2 )") + optparser.add_option("--notify", action="store_true", dest="can_notify", + default=False, help="sends an email on command completion") + optparser.add_option("--timeout", dest="timeout", type="int", + help="Timeout for command execution in seconds") + optparser.add_option("--retry", dest="retry", choices=['1', '2', '3'], + help="Number of retries for a job") + optparser.add_option("--retry-delay", dest="retry_delay", type="int", + help="Time interval between the retries when a job fails.") + optparser.add_option("--pool", dest="pool", + help="Specify the Fairscheduler pool name for the \ + command to use") + optparser.add_option("--print-logs", action="store_true", dest="print_logs", + default=False, help="Fetch logs and print them to stderr.") + optparser.add_option("--print-logs-live", action="store_true", + dest="print_logs_live", default=False, help="Fetch logs \ + and print them to stderr while command is running.") + + @classmethod + def parse(cls, args): + """ + Parse command line arguments to construct a dictionary of command + parameters that can be used to create a command + + Args: + `args`: sequence of arguments + + Returns: + Dictionary that can be used in create method + + Raises: + ParseError: when the arguments are not correct + """ + try: + options, args = cls.optparser.parse_args(args) + if options.path is None: + raise ParseError("Notebook Path must be specified", + cls.optparser.format_help()) + if options.arguments is not None: + validate_json_input(options.arguments, 'Arguments', cls) + if options.macros is not None: + options.macros = validate_json_input(options.macros, 'Macros', cls) + if options.retry is not None: + options.retry = int(options.retry) + except OptionParsingError as e: + raise ParseError(e.msg, cls.optparser.format_help()) + except OptionParsingExit as e: + return None + + params = vars(options) + params["command_type"] = "JupyterNotebookCommand" + return params + class SignalHandler: """ Catch terminate signals to allow graceful termination of run() @@ -1367,6 +1443,16 @@ def handler(self, signum, frame): if signum in self.term_signals: self.received_term_signal = True + +def validate_json_input(string, option_type, cls): + """Converts String to JSON and throws ParseError if string is not valid JSON""" + + try: + return json.loads(string) + except ValueError as e: + raise ParseError("Given %s is not valid JSON: %s" % (option_type, str(e)), + cls.optparser.format_help()) + def _read_iteratively(key_instance, fp, delim): key_instance.open_read() while True: diff --git a/qds_sdk/scheduler.py b/qds_sdk/scheduler.py index b4d414ab..4ac56665 100644 --- a/qds_sdk/scheduler.py +++ b/qds_sdk/scheduler.py @@ -118,7 +118,7 @@ def filter_fields(schedule, fields): def create(args): with open(args.data) as f: spec = json.load(f) - schedule = Scheduler(spec) + schedule = Scheduler.create(**spec) return json.dumps(schedule.attributes, sort_keys=True, indent=4) @staticmethod diff --git a/tests/test_command.py b/tests/test_command.py index 55198415..f8caf745 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -141,6 +141,13 @@ def test_dbtapquerycmd(self): qds.main() Connection._api_call.assert_called_with("GET", "commands/123", params={'include_query_properties': 'false'}) + def test_jupyternotebookcmd(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'check', '123'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with("GET", "commands/123", params={'include_query_properties': 'false'}) + def test_includequeryproperty(self): sys.argv = ['qds.py', 'hivecmd', 'check', '123', 'true'] print_command() @@ -224,6 +231,14 @@ def test_dbtapquerycmd(self): Connection._api_call.assert_called_with("PUT", "commands/123", {'status': 'kill'}) + def test_jupyternotebookcmd(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'cancel', '123'] + print_command() + Connection._api_call = Mock(return_value={'kill_succeeded': True}) + qds.main() + Connection._api_call.assert_called_with("PUT", "commands/123", + {'status': 'kill'}) + class TestCommandGetJobs(QdsCliTestCase): @@ -2029,6 +2044,241 @@ def test_submit_with_valid_local_script_location(self): 'command_type': 'DbTapQueryCommand', 'can_notify': False}) +class TestJupyterNotebookCommand(QdsCliTestCase): + + def test_submit_none(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit'] + print_command() + with self.assertRaises(qds_sdk.exception.ParseError): + qds.main() + + def test_submit_no_path(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--cluster-label', 'demo-cluster'] + print_command() + with self.assertRaises(qds_sdk.exception.ParseError): + qds.main() + + def test_submit_improper_macros(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--macros', '{"key1"}'] + print_command() + with self.assertRaises(qds_sdk.exception.ParseError): + qds.main() + + def test_submit_improper_arguments(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--arguments', '{"key1"}'] + print_command() + with self.assertRaises(qds_sdk.exception.ParseError): + qds.main() + + def test_submit_retry_more_than_3(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--retry', '4'] + print_command() + with self.assertRaises(qds_sdk.exception.ParseError): + qds.main() + + def test_submit_cluster_label(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--cluster-label', 'demo-cluster'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': 'demo-cluster', + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + + def test_submit_macros(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--macros', '[{"key1":"11","key2":"22"}, {"key3":"key1+key2"}]'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': [{"key1":"11","key2":"22"}, {"key3":"key1+key2"}], + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + + def test_submit_arguments(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--arguments', '{"key1":"val1", "key2":"val2"}'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': '{"key1":"val1", "key2":"val2"}', + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + + def test_submit_tags(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--tags', 'abc,def'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': ['abc', 'def'], + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + + def test_submit_name(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--name', 'demo'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': 'demo', + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + + def test_submit_notify(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--notify'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': True, + 'pool': None}) + + def test_submit_timeout(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--timeout', '10'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': 10, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + + def test_submit_pool(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--pool', 'batch'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': 'batch'}) + + def test_submit_retry(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--retry', '1'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': 1, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + + def test_submit_retry_delay(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--retry-delay', '2'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': 2, + 'command_type': 'JupyterNotebookCommand', + 'can_notify': False, + 'pool': None}) + class TestGetResultsCommand(QdsCliTestCase): def test_result_with_enable_header_true(self): From f2023d7c9a83aa6554b131c8a3775484feac07a0 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Wed, 18 Mar 2020 18:31:11 +0530 Subject: [PATCH 43/69] Release Version 1.15.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 245d2692..5a2f1736 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.14.0", + version="1.15.0", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From ed4347b9dc4f1c0891a1e891b9cc0bafe87e5961 Mon Sep 17 00:00:00 2001 From: shekharsaurabh <37484772+shekharsaurabh@users.noreply.github.com> Date: Mon, 23 Mar 2020 13:20:48 +0530 Subject: [PATCH 44/69] SDK-386: Changing retry conditions for 503 response (#311) --- bin/qds.py | 4 ++-- qds_sdk/connection.py | 33 ++++++++++++++++++++++----------- qds_sdk/exception.py | 5 +++-- qds_sdk/qubole.py | 2 +- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/bin/qds.py b/bin/qds.py index d56e2fdc..bc434023 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -600,7 +600,7 @@ def main(): type=int, default=os.getenv('QDS_MAX_RETRIES'), help="Number of re-attempts for an api-call in case of " - " retryable exceptions. Defaults to 5.") + " retryable exceptions. Defaults to 6.") optparser.add_option("-v", dest="verbose", action="store_true", default=False, @@ -634,7 +634,7 @@ def main(): options.poll_interval = 5 if options.max_retries is None: - options.max_retries = 5 + options.max_retries = 6 if options.base_retry_delay is None: options.base_retry_delay = 10 diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index f1328fc7..3cdb4a88 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -36,7 +36,7 @@ def init_poolmanager(self, connections, maxsize,block=False): class Connection: def __init__(self, auth, rest_url, skip_ssl_cert_check, - reuse=True, max_retries=5, + reuse=True, max_retries=6, base_retry_delay=10): self.auth = auth self.rest_url = rest_url @@ -55,7 +55,7 @@ def __init__(self, auth, rest_url, skip_ssl_cert_check, self.session_with_retries = requests.Session() self.session_with_retries.mount('https://', RequestAdapter(max_retries=3)) - def retry(ExceptionToCheck, tries=5, delay=10, backoff=2): + def retry(ExceptionToCheck, tries=6, delay=10, backoff=2): def deco_retry(f): @wraps(f) def f_retry(self, *args, **kwargs): @@ -78,23 +78,23 @@ def f_retry(self, *args, **kwargs): return f_retry # true decorator return deco_retry - @retry((RetryWithDelay, requests.Timeout, ServerError, ApiThrottledRetry)) + @retry((RetryWithDelay, requests.Timeout, ServerError, AlwaysRetryWithDelay)) def get_raw(self, path, params=None): return self._api_call_raw("GET", path, params=params) - @retry((RetryWithDelay, requests.Timeout, ServerError, ApiThrottledRetry)) + @retry((RetryWithDelay, requests.Timeout, ServerError, AlwaysRetryWithDelay)) def get(self, path, params=None): return self._api_call("GET", path, params=params) - @retry(ApiThrottledRetry) + @retry(AlwaysRetryWithDelay) def put(self, path, data=None): return self._api_call("PUT", path, data) - @retry(ApiThrottledRetry) + @retry(AlwaysRetryWithDelay) def post(self, path, data=None): return self._api_call("POST", path, data) - @retry(ApiThrottledRetry) + @retry(AlwaysRetryWithDelay) def delete(self, path, data=None): return self._api_call("DELETE", path, data) @@ -188,15 +188,15 @@ def _handle_error(response): elif code == 422: sys.stderr.write(response.text + "\n") raise ResourceInvalid(response) - elif code in (502, 503, 504): + elif code in (502, 504): sys.stderr.write(response.text + "\n") raise RetryWithDelay(response) elif code == 449: sys.stderr.write(response.text + "\n") - raise RetryWithDelay(response, "Data requested is unavailable. Retrying...") - elif code == 429: + raise RetryWithDelay(response, Connection._get_error_message(code)) + elif code in (429, 503): sys.stderr.write(response.text + "\n") - raise ApiThrottledRetry(response, "Too many requests. Retrying...") + raise AlwaysRetryWithDelay(response, Connection._get_error_message(code)) elif 401 <= code < 500: sys.stderr.write(response.text + "\n") raise ClientError(response) @@ -214,3 +214,14 @@ def _validate_json(response): except Exception as e: sys.stderr.write("Error: {0}\nInvalid Response from Server, please contact Qubole Support".format(str(e))) raise ServerError(response) + + @staticmethod + def _get_error_message(code): + if code == 429: + return "Too many requests. Retrying..." + elif code == 449: + return "Data requested is unavailable. Retrying..." + elif code == 503: + return "Service Unavailable. Retrying..." + else: + return '' diff --git a/qds_sdk/exception.py b/qds_sdk/exception.py index 5f545a30..ec8f83cb 100644 --- a/qds_sdk/exception.py +++ b/qds_sdk/exception.py @@ -87,7 +87,8 @@ class MethodNotAllowed(ClientError): pass -class ApiThrottledRetry(ClientError): - """An error raised when upstream requests are throttled.""" +class AlwaysRetryWithDelay(ClientError): + """An error will be retryable across all HTTP methods""" # 429 Too Many Requests + # 503 Service Unavailable pass diff --git a/qds_sdk/qubole.py b/qds_sdk/qubole.py index df830a2c..cf87a681 100644 --- a/qds_sdk/qubole.py +++ b/qds_sdk/qubole.py @@ -40,7 +40,7 @@ class Qubole: def configure(cls, api_token, api_url="https://api.qubole.com/api/", version="v1.2", poll_interval=5, skip_ssl_cert_check=False, cloud_name="AWS", - base_retry_delay=10, max_retries=5): + base_retry_delay=10, max_retries=6): """ Set parameters governing interaction with QDS Args: From 8e07683db338aeee66c4075811184d82cd52b12f Mon Sep 17 00:00:00 2001 From: santosh Date: Mon, 30 Mar 2020 17:06:37 +0530 Subject: [PATCH 45/69] SDK-397: Add DBTap Config for MlFlow Cluster (#313) --- qds_sdk/engine.py | 16 ++++++++++++---- tests/test_clusterv2.py | 5 +++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index b7594b3e..c84ca366 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -33,7 +33,8 @@ def set_engine_config(self, airflow_python_version=None, is_ha=None, enable_rubix=None, - mlflow_version=None): + mlflow_version=None, + mlflow_dbtap_id=None): ''' Args: @@ -79,7 +80,7 @@ def set_engine_config(self, self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) - self.set_mlflow_settings(mlflow_version) + self.set_mlflow_settings(mlflow_version, mlflow_dbtap_id) def set_fairscheduler_settings(self, fairscheduler_config_xml=None, @@ -127,8 +128,10 @@ def set_airflow_settings(self, self.airflow_settings['airflow_python_version'] = airflow_python_version def set_mlflow_settings(self, - mlflow_version="1.5"): + mlflow_version="1.7", + mlflow_dbtap_id=None): self.mlflow_settings['version'] = mlflow_version + self.mlflow_settings['dbtap_id'] = mlflow_dbtap_id def set_engine_config_settings(self, arguments): custom_hadoop_config = util._read_file(arguments.custom_hadoop_config_file) @@ -150,7 +153,8 @@ def set_engine_config_settings(self, arguments): airflow_version=arguments.airflow_version, airflow_python_version=arguments.airflow_python_version, enable_rubix=arguments.enable_rubix, - mlflow_version=arguments.mlflow_version) + mlflow_version=arguments.mlflow_version, + mlflow_dbtap_id=arguments.mlflow_dbtap_id) @staticmethod def engine_parser(argparser): @@ -253,4 +257,8 @@ def engine_parser(argparser): dest="mlflow_version", default=None, help="mlflow version for mlflow cluster", ) + mlflow_settings_group.add_argument("--mlflow-dbtap-id", + dest="mlflow_dbtap_id", + default=None, + help="dbtap id for mlflow cluster", ) diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 0c791908..078108ff 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -548,7 +548,7 @@ def test_airflow_engine_config(self): def test_mlflow_engine_config(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--flavour', 'mlflow', '--mlflow-version', '1.5'] + '--flavour', 'mlflow', '--mlflow-version', '1.7', '--mlflow-dbtap-id', '-1'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -557,7 +557,8 @@ def test_mlflow_engine_config(self): {'engine_config': {'flavour': 'mlflow', 'mlflow_settings': { - 'version': '1.5' + 'version': '1.7', + 'dbtap_id': '-1' }}, 'cluster_info': {'label': ['test_label'], }}) From 702095f90e54958545288f17952e2538c6dc70a5 Mon Sep 17 00:00:00 2001 From: satyabolnedi <57746175+satyabolnedi@users.noreply.github.com> Date: Wed, 1 Apr 2020 12:43:15 +0530 Subject: [PATCH 46/69] SDK-387: Add Support for Hive Version (#303) --- qds_sdk/engine.py | 18 +++++++++++++++++- tests/test_clusterv2.py | 20 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index c84ca366..534c940a 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -11,6 +11,7 @@ class Engine: def __init__(self, flavour=None): self.flavour = flavour self.hadoop_settings = {} + self.hive_settings = {} self.presto_settings = {} self.spark_settings = {} self.airflow_settings = {} @@ -26,6 +27,7 @@ def set_engine_config(self, custom_presto_config=None, spark_version=None, custom_spark_config=None, + hive_version=None, dbtap_id=None, fernet_key=None, overrides=None, @@ -56,6 +58,8 @@ def set_engine_config(self, custom_spark_config: Specify the custom Spark configuration overrides + hive_version: Version of hive to be used in cluster + dbtap_id: ID of the data store inside QDS fernet_key: Encryption key for sensitive information inside airflow database. @@ -75,8 +79,10 @@ def set_engine_config(self, ''' - self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, + self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, + is_ha, fairscheduler_config_xml, default_pool, enable_rubix) + self.set_hive_settings(hive_version) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) @@ -103,6 +109,10 @@ def set_hadoop_settings(self, self.set_fairscheduler_settings(fairscheduler_config_xml, default_pool) self.hadoop_settings['enable_rubix'] = enable_rubix + def set_hive_settings(self, + hive_version=None): + self.hive_settings['hive_version'] = hive_version + def set_presto_settings(self, presto_version=None, custom_presto_config=None): @@ -147,6 +157,7 @@ def set_engine_config_settings(self, arguments): custom_presto_config=custom_presto_config, spark_version=arguments.spark_version, custom_spark_config=arguments.custom_spark_config, + hive_version=arguments.hive_version, dbtap_id=arguments.dbtap_id, fernet_key=arguments.fernet_key, overrides=arguments.overrides, @@ -218,6 +229,11 @@ def engine_parser(argparser): dest="presto_custom_config_file", help="location of file containg custom" + " presto configuration overrides") + hive_settings_group = argparser.add_argument_group("hive version settings") + hive_settings_group.add_argument("--hive_version", + dest="hive_version", + default=None, + help="Version of hive for the cluster",) spark_settings_group = argparser.add_argument_group("spark settings") spark_settings_group.add_argument("--spark-version", diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 078108ff..17859456 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -562,6 +562,26 @@ def test_mlflow_engine_config(self): }}, 'cluster_info': {'label': ['test_label'], }}) + def test_hive_engine_config(self): + with tempfile.NamedTemporaryFile() as temp: + temp.write("config.properties:\na=1\nb=2".encode("utf8")) + temp.flush() + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--flavour', 'hadoop2', '--hive_version', '2.3'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'hadoop2', + 'hive_settings': { + 'hive_version': '2.3' + }}, + 'cluster_info': {'label': ['test_label'],}}) + + + def test_persistent_security_groups_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', '--persistent-security-groups', 'sg1, sg2'] From 35dcae627234298f1c4e2c36151353e78bb6b8a5 Mon Sep 17 00:00:00 2001 From: Aaditya Sharma Date: Tue, 14 Apr 2020 12:45:51 +0530 Subject: [PATCH 47/69] QUEST-608: Replaced Quest by Pipelines (#314) Co-authored-by: Aaditya Sharma --- bin/qds.py | 12 +-- qds_sdk/{quest.py => pipelines.py} | 128 ++++++++++++++--------------- tests/test_quest.py | 20 ++--- 3 files changed, 80 insertions(+), 80 deletions(-) rename qds_sdk/{quest.py => pipelines.py} (87%) diff --git a/bin/qds.py b/bin/qds.py index bc434023..7f015f98 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -17,7 +17,7 @@ from qds_sdk.template import TemplateCmdLine from qds_sdk.clusterv2 import ClusterCmdLine from qds_sdk.sensors import * -from qds_sdk.quest import QuestCmdLine +from qds_sdk.pipelines import PipelinesCmdLine import os import sys import traceback @@ -90,8 +90,8 @@ " action --help\n" "\nScheduler subcommand:\n" " scheduler --help\n" - "\nQuest subcommand:\n" - " quest --help\n" + "\nPipelines subcommand:\n" + " pipelines --help\n" "\nTemplate subcommand:\n" " template --help\n" "\nAccount subcommand:\n" @@ -559,7 +559,7 @@ def templatemain(args): print(result) def questmain(args): - result = QuestCmdLine.run(args) + result = PipelinesCmdLine.run(args) print(result) @@ -706,13 +706,13 @@ def main(): return usermain(args) if a0 == "template": return templatemain(args) - if a0 == "quest": + if a0 == "pipelines": return questmain(args) cmdset = set(CommandClasses.keys()) sys.stderr.write("First command must be one of <%s>\n" % "|".join(cmdset.union(["cluster", "action", "scheduler", "report", - "dbtap", "role", "group", "app", "account", "nezha", "user", "template", "quest"]))) + "dbtap", "role", "group", "app", "account", "nezha", "user", "template", "pipelines"]))) usage(optparser) diff --git a/qds_sdk/quest.py b/qds_sdk/pipelines.py similarity index 87% rename from qds_sdk/quest.py rename to qds_sdk/pipelines.py index d522d69c..f1fa500d 100644 --- a/qds_sdk/quest.py +++ b/qds_sdk/pipelines.py @@ -1,6 +1,6 @@ """ -The quest module contains the base definition for -a generic quest commands. +The Pipelines module contains the base definition for +a generic Pipelines commands. """ from qds_sdk.actions import * import json @@ -14,13 +14,13 @@ _URI_RE = re.compile(r's3://([^/]+)/?(.*)') -class QuestCmdLine: - """qds_sdk.QuestCmdLine is the interface used by qds.py.""" +class PipelinesCmdLine: + """qds_sdk.PipelinesCmdLine is the interface used by qds.py.""" @staticmethod def parsers(): - argparser = ArgumentParser(prog="qds.py quest", - description="Quest client for Qubole Data Service.") + argparser = ArgumentParser(prog="qds.py pipelines", + description="Pipelines client for Qubole Data Service.") subparsers = argparser.add_subparsers() # Create @@ -51,7 +51,7 @@ def parsers(): create.add_argument("--command-line-options", dest="command_line_options", help="command line options on property page.") - create.set_defaults(func=QuestCmdLine.create) + create.set_defaults(func=PipelinesCmdLine.create) # Update/Edit update_properties = subparsers.add_parser("update-property", @@ -67,14 +67,14 @@ def parsers(): help="command line options on property page.") update_properties.add_argument("--can-retry", dest="can_retry", help="can retry true or false") - update_properties.set_defaults(func=QuestCmdLine.update_properties) + update_properties.set_defaults(func=PipelinesCmdLine.update_properties) update_code = subparsers.add_parser("update-code", help="Update code of a existing pipeline") update_code.add_argument( "-c", "--code", dest="code", help="query string") update_code.add_argument("-f", "--script-location", dest="script_location", help="Path where code to run is stored. local file path") - update_code.set_defaults(func=QuestCmdLine.update_code) + update_code.set_defaults(func=PipelinesCmdLine.update_code) update_code.add_argument( "--jar-path", dest="jar_path", @@ -94,37 +94,37 @@ def parsers(): delete = subparsers.add_parser("delete", help="Delete Pipeline") delete.add_argument("--pipeline-id", dest="pipeline_id", required=True, help='Id of pipeline which need to be started') - delete.set_defaults(func=QuestCmdLine.delete) + delete.set_defaults(func=PipelinesCmdLine.delete) status = subparsers.add_parser("status", help="Status of Pipeline") status.add_argument("--pipeline-id", dest="pipeline_id", required=True, help='Id of pipeline which need to be started') - status.set_defaults(func=QuestCmdLine.status) + status.set_defaults(func=PipelinesCmdLine.status) start = subparsers.add_parser("start", help="Start Pipeline") start.add_argument("--pipeline-id", dest="pipeline_id", required=True, help='Id of pipeline which need to be started') - start.set_defaults(func=QuestCmdLine.start) + start.set_defaults(func=PipelinesCmdLine.start) pause = subparsers.add_parser("pause", help="pause Pipeline") pause.add_argument("--pipeline-id", dest="pipeline_id", required=True, help='Id of pipeline which need to be started') - pause.set_defaults(func=QuestCmdLine.pause) + pause.set_defaults(func=PipelinesCmdLine.pause) clone = subparsers.add_parser("clone", help="clone Pipeline") clone.add_argument("--pipeline-id", dest="pipeline_id", required=True, help='Id of pipeline which need to be started') - clone.set_defaults(func=QuestCmdLine.clone) + clone.set_defaults(func=PipelinesCmdLine.clone) archive = subparsers.add_parser("archive", help="archive Pipeline") archive.add_argument("--pipeline-id", dest="pipeline_id", required=True, help='Id of pipeline which need to be started') - archive.set_defaults(func=QuestCmdLine.archive) + archive.set_defaults(func=PipelinesCmdLine.archive) health = subparsers.add_parser("health", help="health of Pipeline") health.add_argument("--pipeline-id", dest="pipeline_id", required=True, help='Id of pipeline which need to be started') - health.set_defaults(func=QuestCmdLine.health) + health.set_defaults(func=PipelinesCmdLine.health) # list index = subparsers.add_parser("list", help="list of Pipeline.") index.add_argument("--pipeline-status", dest="status", required=True, help='Id of pipeline which need to be started. ' 'Valid values = [active, archive, all, draft] ') - index.set_defaults(func=QuestCmdLine.index) + index.set_defaults(func=PipelinesCmdLine.index) return argparser @staticmethod @@ -134,7 +134,7 @@ def run(args): :param args: :return: """ - parser = QuestCmdLine.parsers() + parser = PipelinesCmdLine.parsers() parsed = parser.parse_args(args) return parsed.func(parsed) @@ -145,7 +145,7 @@ def delete(args): :param args: :return: """ - response = Quest.delete(args.pipeline_id) + response = Pipelines.delete(args.pipeline_id) return json.dumps( response, default=lambda o: o.attributes, sort_keys=True, indent=4) @@ -156,7 +156,7 @@ def pause(args): :param args: :return: """ - response = Quest.pause(args.pipeline_id) + response = Pipelines.pause(args.pipeline_id) return json.dumps( response, default=lambda o: o.attributes, sort_keys=True, indent=4) @@ -167,7 +167,7 @@ def archive(args): :param args: :return: """ - response = Quest.archive(args.pipeline_id) + response = Pipelines.archive(args.pipeline_id) return json.dumps( response, default=lambda o: o.attributes, sort_keys=True, indent=4) @@ -178,7 +178,7 @@ def clone(args): :param args: :return: """ - response = Quest.clone(args.pipeline_id) + response = Pipelines.clone(args.pipeline_id) return json.dumps(response, default=lambda o: o.attributes, sort_keys=True, indent=4) @staticmethod @@ -188,7 +188,7 @@ def status(args): :param args: :return: """ - response = Quest.get_status(args.pipeline_id) + response = Pipelines.get_status(args.pipeline_id) return json.dumps( response, default=lambda o: o.attributes, sort_keys=True, indent=4) @@ -199,7 +199,7 @@ def health(args): :param args: :return: """ - response = Quest.get_health(args.pipeline_id) + response = Pipelines.get_health(args.pipeline_id) return json.dumps( response, default=lambda o: o.attributes, sort_keys=True, indent=4) @@ -210,7 +210,7 @@ def start(args): :param args: :return: """ - response = Quest.start(args.pipeline_id) + response = Pipelines.start(args.pipeline_id) return json.dumps(response, sort_keys=True, indent=4) @staticmethod @@ -220,7 +220,7 @@ def index(args): :param args: :return: """ - pipelinelist = Quest.list(args.status) + pipelinelist = Pipelines.list(args.status) return json.dumps( pipelinelist, default=lambda o: o.attributes, sort_keys=True, indent=4) @@ -233,7 +233,7 @@ def create(args): """ pipeline = None if int(args.create_type) == 2: - pipeline = QuestJar.create_pipeline(pipeline_name=args.name, + pipeline = PipelinesJar.create_pipeline(pipeline_name=args.name, jar_path=args.jar_path, main_class_name=args.main_class_name, cluster_label=args.cluster_label, @@ -241,7 +241,7 @@ def create(args): command_line_options=args.command_line_options) elif int(args.create_type) == 3: if args.code: - pipeline = QuestCode.create_pipeline(pipeline_name=args.name, + pipeline = PipelinesCode.create_pipeline(pipeline_name=args.name, cluster_label=args.cluster_label, code=args.code, file_path=args.script_location, @@ -249,7 +249,7 @@ def create(args): user_arguments=args.user_arguments, command_line_options=args.command_line_options) elif args.script_location: - pipeline = QuestCode.create_pipeline(pipeline_name=args.name, + pipeline = PipelinesCode.create_pipeline(pipeline_name=args.name, cluster_label=args.cluster_label, code=args.code, file_path=args.script_location, @@ -268,7 +268,7 @@ def update_properties(args): """ params = args.__dict__ log.debug(params) - Quest.add_property(pipeline_id=args.pipeline_id, + Pipelines.add_property(pipeline_id=args.pipeline_id, cluster_label=args.cluster_label, can_retry=args.can_retry, command_line_options=args.command_line_options) @@ -281,7 +281,7 @@ def update_code(args): :return: """ if args.jar_path or args.main_class_name: - response = QuestJar.save_code(pipeline_id=args.pipeline_id, + response = PipelinesJar.save_code(pipeline_id=args.pipeline_id, code=args.code, file_path=args.script_location, language=args.language, @@ -289,7 +289,7 @@ def update_code(args): user_arguments=args.user_arguments, main_class_name=args.main_class_name) elif args.code or args.script_location: - response = QuestCode.save_code(pipeline_id=args.pipeline_id, + response = PipelinesCode.save_code(pipeline_id=args.pipeline_id, code=args.code, file_path=args.script_location, language=args.language, @@ -299,8 +299,8 @@ def update_code(args): return json.dumps(response, sort_keys=True, indent=4) -class Quest(Resource): - """qds_sdk.Quest is the base Qubole Quest class.""" +class Pipelines(Resource): + """qds_sdk.Pipelines is the base Qubole Pipelines class.""" """ all commands use the /pipelines endpoint""" @@ -326,7 +326,7 @@ def list(status=None): else: params = {"filter": status.lower()} conn = Qubole.agent() - url_path = Quest.rest_entity_path + url_path = Pipelines.rest_entity_path pipeline_list = conn.get(url_path, params) return pipeline_list @@ -352,9 +352,9 @@ def create(cls, pipeline_name, create_type, **kwargs): "create_type": create_type}, "type": "pipelines"} } - url = Quest.rest_entity_path + "?mode=wizard" + url = Pipelines.rest_entity_path + "?mode=wizard" response = conn.post(url, data) - cls.pipeline_id = Quest.get_pipline_id(response) + cls.pipeline_id = Pipelines.get_pipline_id(response) cls.pipeline_name = pipeline_name @staticmethod @@ -365,9 +365,9 @@ def start(pipeline_id): :return: response """ conn = Qubole.agent() - url = Quest.rest_entity_path + "/" + pipeline_id + "/start" + url = Pipelines.rest_entity_path + "/" + pipeline_id + "/start" response = conn.put(url) - pipeline_status = Quest.get_status(pipeline_id) + pipeline_status = Pipelines.get_status(pipeline_id) while pipeline_status == 'waiting': log.info("Pipeline is in waiting state....") time.sleep(10) @@ -410,7 +410,7 @@ def add_property(pipeline_id, } } log.info("Data {}".format(data)) - url = Quest.rest_entity_path + "/" + pipeline_id + "/properties" + url = Pipelines.rest_entity_path + "/" + pipeline_id + "/properties" response = conn.put(url, data) log.debug(response) return response @@ -481,7 +481,7 @@ def get_health(pipeline_id): :return: """ conn = Qubole.agent() - url = Quest.rest_entity_path + "/" + pipeline_id + url = Pipelines.rest_entity_path + "/" + pipeline_id response = conn.get(url) log.info(response) return response.get("data").get("attributes").get("health") @@ -493,7 +493,7 @@ def clone(pipeline_id): :param pipeline_id: :return: """ - url = Quest.rest_entity_path + "/" + pipeline_id + "/duplicate" + url = Pipelines.rest_entity_path + "/" + pipeline_id + "/duplicate" log.info("Cloning pipeline with id {}".format(pipeline_id)) conn = Qubole.agent() return conn.post(url) @@ -505,7 +505,7 @@ def pause(pipeline_id): :param pipeline_id: :return: """ - url = Quest.rest_entity_path + "/" + pipeline_id + "/pause" + url = Pipelines.rest_entity_path + "/" + pipeline_id + "/pause" log.info("Pausing pipeline with id {}".format(pipeline_id)) conn = Qubole.agent() return conn.put(url) @@ -517,7 +517,7 @@ def archive(pipeline_id): :param pipeline_id: :return: """ - url = Quest.rest_entity_path + "/" + pipeline_id + "/archive" + url = Pipelines.rest_entity_path + "/" + pipeline_id + "/archive" log.info("Archiving pipeline with id {}".format(pipeline_id)) conn = Qubole.agent() return conn.put(url) @@ -530,7 +530,7 @@ def get_status(pipeline_id): :return: """ conn = Qubole.agent() - url = Quest.rest_entity_path + "/" + pipeline_id + url = Pipelines.rest_entity_path + "/" + pipeline_id response = conn.get(url) log.debug(response) return response.get("data").get( @@ -544,7 +544,7 @@ def delete(pipeline_id): :return: """ conn = Qubole.agent() - url = Quest.rest_entity_path + "/" + pipeline_id + "/delete" + url = Pipelines.rest_entity_path + "/" + pipeline_id + "/delete" log.info("Deleting Pipeline with id: {}".format(pipeline_id)) response = conn.put(url) log.info(response) @@ -559,7 +559,7 @@ def edit_pipeline_name(pipeline_id, pipeline_name): :return: """ conn = Qubole.agent() - url = Quest.rest_entity_path + "/" + pipeline_id + url = Pipelines.rest_entity_path + "/" + pipeline_id data = { "data": { "attributes": { @@ -584,7 +584,7 @@ def set_alert(pipeline_id, channel_id): } } conn = Qubole.agent() - url = Quest.rest_entity_path + "/" + pipeline_id + "/alerts" + url = Pipelines.rest_entity_path + "/" + pipeline_id + "/alerts" return conn.put(url, data) @staticmethod @@ -594,14 +594,14 @@ def get_code(pipeline_id): :param pipeline_id: :return: """ - url = Quest.rest_entity_path + "/" + pipeline_id + url = Pipelines.rest_entity_path + "/" + pipeline_id conn = Qubole.agent() reponse = conn.get(url) code = reponse.get("meta")["command_details"]["code"] return code -class QuestCode(Quest): +class PipelinesCode(Pipelines): create_type = 3 @staticmethod @@ -627,24 +627,24 @@ def create_pipeline(pipeline_name, :param channel_id: :return: """ - QuestCode.create(pipeline_name, QuestCode.create_type) - pipeline_id = QuestCode.pipeline_id - response = QuestCode.add_property(pipeline_id, cluster_label, + PipelinesCode.create(pipeline_name, PipelinesCode.create_type) + pipeline_id = PipelinesCode.pipeline_id + response = PipelinesCode.add_property(pipeline_id, cluster_label, can_retry=can_retry, command_line_options=command_line_options) log.debug(response) - response = QuestCode.save_code(pipeline_id, + response = PipelinesCode.save_code(pipeline_id, code=code, file_path=file_path, language=language, user_arguments=user_arguments) if channel_id: - response = Quest.set_alert(pipeline_id, channel_id) + response = Pipelines.set_alert(pipeline_id, channel_id) log.info(response) return response -class QuestJar(Quest): +class PipelinesJar(Pipelines): create_type = 2 @staticmethod @@ -668,24 +668,24 @@ def create_pipeline(pipeline_name, :param user_arguments: :return: """ - QuestJar.create(pipeline_name, QuestJar.create_type) - pipeline_id = QuestJar.pipeline_id - QuestJar.add_property(pipeline_id, + PipelinesJar.create(pipeline_name, PipelinesJar.create_type) + pipeline_id = PipelinesJar.pipeline_id + PipelinesJar.add_property(pipeline_id, cluster_label, can_retry=can_retry, command_line_options=command_line_options) - QuestJar.save_code(pipeline_id, + PipelinesJar.save_code(pipeline_id, jar_path=jar_path, main_class_name=main_class_name, user_arguments=user_arguments) - QuestJar.jar_path = jar_path + PipelinesJar.jar_path = jar_path if channel_id: - response = Quest.set_alert(pipeline_id, channel_id) + response = Pipelines.set_alert(pipeline_id, channel_id) log.info(response) - return QuestJar + return PipelinesJar -class QuestAssisted(Quest): +class PipelinesAssisted(Pipelines): create_type = 1 @staticmethod diff --git a/tests/test_quest.py b/tests/test_quest.py index 17ae4274..0ee50385 100644 --- a/tests/test_quest.py +++ b/tests/test_quest.py @@ -1,7 +1,7 @@ from __future__ import print_function from test_base import QdsCliTestCase from test_base import print_command -from qds_sdk.quest import QuestCode +from qds_sdk.pipelines import PipelinesCode from qds_sdk.connection import Connection import qds from mock import * @@ -19,7 +19,7 @@ class TestQuestList(QdsCliTestCase): def test_list_pipeline(self): - sys.argv = ['qds.py', 'quest', 'list', '--pipeline-status', 'draft'] + sys.argv = ['qds.py', 'pipelines', 'list', '--pipeline-status', 'draft'] print_command() Connection._api_call = Mock(return_value={}) params = {'filter': "draft"} @@ -28,7 +28,7 @@ def test_list_pipeline(self): "GET", "pipelines", params=params) def test_pause_pipeline(self): - sys.argv = ['qds.py', 'quest', 'pause', '--pipeline-id', '153'] + sys.argv = ['qds.py', 'pipelines', 'pause', '--pipeline-id', '153'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -36,7 +36,7 @@ def test_pause_pipeline(self): "PUT", "pipelines/153/pause", None) def test_clone_pipeline(self): - sys.argv = ['qds.py', 'quest', 'clone', '--pipeline-id', '153'] + sys.argv = ['qds.py', 'pipelines', 'clone', '--pipeline-id', '153'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -44,7 +44,7 @@ def test_clone_pipeline(self): "POST", "pipelines/153/duplicate", None) def test_archive_pipeline(self): - sys.argv = ['qds.py', 'quest', 'archive', '--pipeline-id', '153'] + sys.argv = ['qds.py', 'pipelines', 'archive', '--pipeline-id', '153'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -52,7 +52,7 @@ def test_archive_pipeline(self): "PUT", "pipelines/153/archive", None) def test_delete_pipeline(self): - sys.argv = ['qds.py', 'quest', 'delete', '--pipeline-id', '153'] + sys.argv = ['qds.py', 'pipelines', 'delete', '--pipeline-id', '153'] print_command() Connection._api_call = Mock(return_value={}) qds.main() @@ -60,7 +60,7 @@ def test_delete_pipeline(self): "PUT", "pipelines/153/delete", None) def test_create_pipeline(self): - sys.argv = ['qds.py', 'quest', 'create', '--create-type', '3', '--pipeline-name', 'test_pipeline_name', + sys.argv = ['qds.py', 'pipelines', 'create', '--create-type', '3', '--pipeline-name', 'test_pipeline_name', '--cluster-label', 'spark', '-c', 'print("hello")', '--language', 'python', '--user-arguments', 'users_argument'] print_command() d1 = {"data": {"attributes": {"name": "test_pipeline_name", "status": "DRAFT", "create_type": 3}, @@ -79,9 +79,9 @@ def test_create_pipeline(self): "owner_name": "eam-airflow", "pipeline_instance_status": "draft", "create_type": 3, "health": "UNKNOWN"}}} - QuestCode.pipeline_id = '1' - QuestCode.pipeline_code = """print("helloworld")""" - QuestCode.pipeline_name = "test_pipeline_name" + PipelinesCode.pipeline_id = '1' + PipelinesCode.pipeline_code = """print("helloworld")""" + PipelinesCode.pipeline_name = "test_pipeline_name" d2 = {"data": {"attributes": {"cluster_label": "spark", "can_retry": True, "checkpoint_location": None, "trigger_interval": None, "output_mode": None, From 041151cefd4cc2f8ee3d184c2f108e0a33ea922b Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Tue, 14 Apr 2020 19:04:13 +0530 Subject: [PATCH 48/69] Release Version 1.15.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5a2f1736..b80add4b 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.15.0", + version="1.15.1", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From ce5745c2fdf3d314c119a7d4d893c385da8bfdfb Mon Sep 17 00:00:00 2001 From: Tarun Gavara Date: Wed, 15 Apr 2020 13:20:29 +0530 Subject: [PATCH 49/69] SDK-404: Add --skip-upload-to-source option in JupyterNotebookCommand (#315) --- qds_sdk/commands.py | 3 +++ tests/test_command.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 1d04bcd9..99cc14ef 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -1387,6 +1387,9 @@ class JupyterNotebookCommand(Command): optparser.add_option("--print-logs-live", action="store_true", dest="print_logs_live", default=False, help="Fetch logs \ and print them to stderr while command is running.") + optparser.add_option("--skip-upload-to-source", action="store_false", + dest="upload_to_source", default=True, help="Do not \ + upload notebook to source after completion of execution") @classmethod def parse(cls, args): diff --git a/tests/test_command.py b/tests/test_command.py index f8caf745..bbbf83a6 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -2096,6 +2096,7 @@ def test_submit_cluster_label(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) @@ -2116,6 +2117,7 @@ def test_submit_macros(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) @@ -2136,6 +2138,7 @@ def test_submit_arguments(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) @@ -2156,6 +2159,7 @@ def test_submit_tags(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) @@ -2176,6 +2180,7 @@ def test_submit_name(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) @@ -2196,6 +2201,7 @@ def test_submit_notify(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': True, 'pool': None}) @@ -2216,6 +2222,7 @@ def test_submit_timeout(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) @@ -2236,9 +2243,31 @@ def test_submit_pool(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': 'batch'}) + def test_submit_skip_upload_to_source(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--skip-upload-to-source'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': False, + 'can_notify': False, + 'pool': None}) + def test_submit_retry(self): sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', '--retry', '1'] @@ -2256,6 +2285,7 @@ def test_submit_retry(self): 'path': 'folder/file', 'retry_delay': None, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) @@ -2276,6 +2306,7 @@ def test_submit_retry_delay(self): 'path': 'folder/file', 'retry_delay': 2, 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, 'can_notify': False, 'pool': None}) From 8bf79f9e4511c656d1a388c310672b0b19179b01 Mon Sep 17 00:00:00 2001 From: Anmol Dhingra Date: Wed, 15 Apr 2020 13:43:01 +0530 Subject: [PATCH 50/69] SDK-401: Fix inflection version upgrade on basis of python version (#316) --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b80add4b..e35de3bc 100644 --- a/setup.py +++ b/setup.py @@ -2,9 +2,14 @@ import sys from setuptools import setup -INSTALL_REQUIRES = ['requests >=1.0.3', 'boto >=2.45.0', 'six >=1.2.0', 'urllib3 >= 1.0.2', 'inflection >= 0.3.1'] +INSTALL_REQUIRES = ['requests >=2.21.0', 'boto >=2.45.0', 'six >=1.12.0', + 'urllib3 >= 1.24.3'] if sys.version_info < (2, 7, 0): INSTALL_REQUIRES.append('argparse>=1.1') +if sys.version_info < (3, 5): + INSTALL_REQUIRES.append('inflection==0.3.1') +else: + INSTALL_REQUIRES.append('inflection>=0.3.1') def read(fname): From e10933e4534419417b233fc72e1064cb1d1de81d Mon Sep 17 00:00:00 2001 From: shekharsaurabh <37484772+shekharsaurabh@users.noreply.github.com> Date: Tue, 21 Apr 2020 11:10:58 +0530 Subject: [PATCH 51/69] SDK-386: Increase number of default retries for HTTP Requests (#317) --- bin/qds.py | 4 ++-- qds_sdk/connection.py | 4 ++-- qds_sdk/qubole.py | 4 ++-- tests/test_connection.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bin/qds.py b/bin/qds.py index 7f015f98..ea8deb3e 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -600,7 +600,7 @@ def main(): type=int, default=os.getenv('QDS_MAX_RETRIES'), help="Number of re-attempts for an api-call in case of " - " retryable exceptions. Defaults to 6.") + " retryable exceptions. Defaults to 7.") optparser.add_option("-v", dest="verbose", action="store_true", default=False, @@ -634,7 +634,7 @@ def main(): options.poll_interval = 5 if options.max_retries is None: - options.max_retries = 6 + options.max_retries = 7 if options.base_retry_delay is None: options.base_retry_delay = 10 diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index 3cdb4a88..324c27f5 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -36,7 +36,7 @@ def init_poolmanager(self, connections, maxsize,block=False): class Connection: def __init__(self, auth, rest_url, skip_ssl_cert_check, - reuse=True, max_retries=6, + reuse=True, max_retries=7, base_retry_delay=10): self.auth = auth self.rest_url = rest_url @@ -55,7 +55,7 @@ def __init__(self, auth, rest_url, skip_ssl_cert_check, self.session_with_retries = requests.Session() self.session_with_retries.mount('https://', RequestAdapter(max_retries=3)) - def retry(ExceptionToCheck, tries=6, delay=10, backoff=2): + def retry(ExceptionToCheck, tries=7, delay=10, backoff=2): def deco_retry(f): @wraps(f) def f_retry(self, *args, **kwargs): diff --git a/qds_sdk/qubole.py b/qds_sdk/qubole.py index cf87a681..659516af 100644 --- a/qds_sdk/qubole.py +++ b/qds_sdk/qubole.py @@ -21,7 +21,7 @@ class Qubole: """ MIN_POLL_INTERVAL = 1 - RETRIES_CAP = 5 + RETRIES_CAP = 7 MAX_RETRY_DELAY = 10 _auth = None @@ -40,7 +40,7 @@ class Qubole: def configure(cls, api_token, api_url="https://api.qubole.com/api/", version="v1.2", poll_interval=5, skip_ssl_cert_check=False, cloud_name="AWS", - base_retry_delay=10, max_retries=6): + base_retry_delay=10, max_retries=7): """ Set parameters governing interaction with QDS Args: diff --git a/tests/test_connection.py b/tests/test_connection.py index ae7486a1..4be80912 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -33,7 +33,7 @@ def test_connection_override(self): Connection.__init__ = Mock(return_value=None) Connection._api_call = Mock(return_value={}) qds.main() - Connection.__init__.assert_called_with(ANY, ANY, ANY, ANY, 5, 10) + Connection.__init__.assert_called_with(ANY, ANY, ANY, ANY, 7, 10) #Test with no values given should set default def test_connection_default(self): @@ -42,7 +42,7 @@ def test_connection_default(self): Connection.__init__ = Mock(return_value=None) Connection._api_call = Mock(return_value={}) qds.main() - Connection.__init__.assert_called_with(ANY, ANY, ANY, ANY, 5, 10) + Connection.__init__.assert_called_with(ANY, ANY, ANY, ANY, 7, 10) if __name__ == '__main__': unittest.main() From 5b6694c914ac116382fd0a9a43eddb1ad3361649 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Wed, 22 Apr 2020 10:51:31 +0530 Subject: [PATCH 52/69] Release Version 1.15.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e35de3bc..78576f6e 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def read(fname): setup( name="qds_sdk", - version="1.15.1", + version="1.15.2", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 053843a58011d001e5384df0870ef4f2d21b3c12 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Wed, 22 Apr 2020 10:56:08 +0530 Subject: [PATCH 53/69] Revert "SDK-401: Fix inflection version upgrade on basis of python version (#316)" This reverts commit 8bf79f9e4511c656d1a388c310672b0b19179b01. --- setup.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 78576f6e..33fe153d 100644 --- a/setup.py +++ b/setup.py @@ -2,14 +2,9 @@ import sys from setuptools import setup -INSTALL_REQUIRES = ['requests >=2.21.0', 'boto >=2.45.0', 'six >=1.12.0', - 'urllib3 >= 1.24.3'] +INSTALL_REQUIRES = ['requests >=1.0.3', 'boto >=2.45.0', 'six >=1.2.0', 'urllib3 >= 1.0.2', 'inflection >= 0.3.1'] if sys.version_info < (2, 7, 0): INSTALL_REQUIRES.append('argparse>=1.1') -if sys.version_info < (3, 5): - INSTALL_REQUIRES.append('inflection==0.3.1') -else: - INSTALL_REQUIRES.append('inflection>=0.3.1') def read(fname): From 1645310d553c0c2b08897ceeb59120227697d350 Mon Sep 17 00:00:00 2001 From: Anmol Dhingra Date: Wed, 15 Apr 2020 13:43:01 +0530 Subject: [PATCH 54/69] SDK-401: Fix inflection version upgrade on basis of python version (#316) --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 33fe153d..0cd1a4be 100644 --- a/setup.py +++ b/setup.py @@ -2,9 +2,14 @@ import sys from setuptools import setup -INSTALL_REQUIRES = ['requests >=1.0.3', 'boto >=2.45.0', 'six >=1.2.0', 'urllib3 >= 1.0.2', 'inflection >= 0.3.1'] +INSTALL_REQUIRES = ['requests >=1.0.3', 'boto >=2.45.0', 'six >=1.2.0', 'urllib3 >= 1.0.2'] + if sys.version_info < (2, 7, 0): INSTALL_REQUIRES.append('argparse>=1.1') +if sys.version_info < (3, 5): + INSTALL_REQUIRES.append('inflection==0.3.1') +else: + INSTALL_REQUIRES.append('inflection>=0.3.1') def read(fname): From 8040972befa7087b705fc8f0fc93855170fba8c0 Mon Sep 17 00:00:00 2001 From: juhi gupta <32215750+juhi-09@users.noreply.github.com> Date: Tue, 5 May 2020 09:53:48 +0530 Subject: [PATCH 55/69] SDK-405: Add autoscaling spotblock fallback capabilities to ClusterV2 (#318) --- qds_sdk/cluster_info_v22.py | 28 +++++++++++++++++++++------- tests/test_clusterv22.py | 14 +++++++------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/qds_sdk/cluster_info_v22.py b/qds_sdk/cluster_info_v22.py index 43e699f6..9f946a3f 100644 --- a/qds_sdk/cluster_info_v22.py +++ b/qds_sdk/cluster_info_v22.py @@ -77,7 +77,8 @@ def set_cluster_info_from_arguments(self, arguments): autoscaling_maximum_bid_price_percentage=arguments.autoscaling_maximum_bid_price_percentage, autoscaling_timeout_for_request=arguments.autoscaling_timeout_for_request, autoscaling_spot_allocation_strategy=arguments.autoscaling_spot_allocation_strategy, - autoscaling_spot_fallback=arguments.autoscaling_spot_fallback) + autoscaling_spot_fallback=arguments.autoscaling_spot_fallback, + autoscaling_spot_block_fallback=arguments.autoscaling_spot_block_fallback) def set_cluster_info(self, disallow_cluster_termination=None, @@ -248,7 +249,8 @@ def set_composition(self, autoscaling_maximum_bid_price_percentage=None, autoscaling_timeout_for_request=None, autoscaling_spot_allocation_strategy=None, - autoscaling_spot_fallback=None): + autoscaling_spot_fallback=None, + autoscaling_spot_block_fallback=None): self.cluster_info["composition"] = {} @@ -274,7 +276,8 @@ def set_composition(self, autoscaling_maximum_bid_price_percentage, autoscaling_timeout_for_request, autoscaling_spot_allocation_strategy, - autoscaling_spot_fallback) + autoscaling_spot_fallback, + autoscaling_spot_block_fallback) def set_master_config(self, master_type, @@ -323,7 +326,8 @@ def set_autoscaling_config(self, autoscaling_maximum_bid_price_percentage, autoscaling_timeout_for_request, autoscaling_spot_allocation_strategy, - autoscaling_spot_fallback): + autoscaling_spot_fallback, + autoscaling_spot_block_fallback): self.cluster_info["composition"]["autoscaling_nodes"] = {"nodes": []} if not autoscaling_ondemand_percentage and not autoscaling_spot_block_percentage and not autoscaling_spot_percentage: self.set_autoscaling_ondemand(50) @@ -333,7 +337,8 @@ def set_autoscaling_config(self, self.set_autoscaling_ondemand(autoscaling_ondemand_percentage) if autoscaling_spot_block_percentage: self.set_autoscaling_spot_block(autoscaling_spot_block_percentage, - autoscaling_spot_block_duration) + autoscaling_spot_block_duration, + autoscaling_spot_block_fallback) if autoscaling_spot_percentage: self.set_autoscaling_spot(autoscaling_spot_percentage, autoscaling_maximum_bid_price_percentage, @@ -388,10 +393,13 @@ def set_autoscaling_ondemand(self, autoscaling_ondemand_percentage=None): "percentage": autoscaling_ondemand_percentage, "type": "ondemand"} self.cluster_info["composition"]["autoscaling_nodes"]["nodes"].append(ondemand) - def set_autoscaling_spot_block(self, autoscaling_spot_block_percentage=None, autoscaling_spot_block_duration=120): + def set_autoscaling_spot_block(self, autoscaling_spot_block_percentage=None, + autoscaling_spot_block_duration=120, + autoscaling_spot_block_fallback=None): spot_block = {"percentage": autoscaling_spot_block_percentage, "type": "spotblock", - "timeout": autoscaling_spot_block_duration} + "timeout": autoscaling_spot_block_duration, + "fallback": autoscaling_spot_block_fallback} self.cluster_info["composition"]["autoscaling_nodes"]["nodes"].append(spot_block) def set_autoscaling_spot(self, autoscaling_spot_percentage=None, @@ -693,6 +701,12 @@ def cluster_info_parser(argparser, action): type=int, default=120, help="spot block duration unit: minutes") + composition_group.add_argument("--autoscaling-spot-block-fallback", + dest="autoscaling_spot_block_fallback", + choices=["ondemand", None], + default=None, + help="whether to fallback to on-demand instances for autoscaling" + + " nodes if spot block instances aren't available") composition_group.add_argument("--autoscaling-maximum-bid-price-percentage", dest="autoscaling_maximum_bid_price_percentage", type=int, diff --git a/tests/test_clusterv22.py b/tests/test_clusterv22.py index ab4c03ec..c188936b 100644 --- a/tests/test_clusterv22.py +++ b/tests/test_clusterv22.py @@ -97,7 +97,7 @@ def test_od_od_spotblock(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', '--master-type', 'ondemand', '--min-ondemand-percentage', '100', '--autoscaling-spot-block-percentage', - '100', '--autoscaling-spot-block-duration', '60'] + '100', '--autoscaling-spot-block-duration', '60', '--autoscaling-spot-block-fallback', 'ondemand'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -105,15 +105,15 @@ def test_od_od_spotblock(self): Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, - 'autoscaling_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}}, + 'autoscaling_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60, 'fallback': 'ondemand'}]}}, 'label': ['test_label']}}) def test_od_od_spotblockspot(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', '--master-type', 'ondemand', '--min-ondemand-percentage', '100', '--autoscaling-spot-block-percentage', - '50', '--autoscaling-spot-block-duration', '60', '--autoscaling-spot-percentage', '50', - '--autoscaling-maximum-bid-price-percentage', '50', + '50', '--autoscaling-spot-block-duration', '60', '--autoscaling-spot-block-fallback', None, + '--autoscaling-spot-percentage', '50', '--autoscaling-maximum-bid-price-percentage', '50', '--autoscaling-timeout-for-request', '3', '--autoscaling-spot-fallback', None, '--autoscaling-spot-allocation-strategy', 'capacityOptimized'] Qubole.cloud = None print_command() @@ -122,7 +122,7 @@ def test_od_od_spotblockspot(self): Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': { - 'nodes': [{'percentage': 50, 'type': 'spotblock', 'timeout': 60}, + 'nodes': [{'percentage': 50, 'type': 'spotblock', 'timeout': 60, 'fallback': None}, {'timeout_for_request': 3, 'percentage': 50, 'type': 'spot', 'fallback': None, 'maximum_bid_price_percentage': 50, 'allocation_strategy': 'capacityOptimized'}]}}, 'label': ['test_label']}}) @@ -164,7 +164,7 @@ def test_spotblock_spotblock_spotblock(self): sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', '--master-type', 'spotblock', '--master-spot-block-duration', '60', '--min-spot-block-percentage', '100', '--min-spot-block-duration', '60', '--autoscaling-spot-block-percentage', - '100', '--autoscaling-spot-block-duration', '60'] + '100', '--autoscaling-spot-block-duration', '60', '--autoscaling-spot-block-fallback', None] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -172,7 +172,7 @@ def test_spotblock_spotblock_spotblock(self): Connection._api_call.assert_called_with('POST', 'clusters', {'cluster_info': { 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}, - 'autoscaling_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60}]}}, + 'autoscaling_nodes': {'nodes': [{'percentage': 100, 'type': 'spotblock', 'timeout': 60, 'fallback': None}]}}, 'label': ['test_label']}}) def test_spot_spot_spot(self): From ba1fbc52153e0b5fcf56c47093cdb1b8d753e0c0 Mon Sep 17 00:00:00 2001 From: surajg-q <57791187+surajg-q@users.noreply.github.com> Date: Wed, 6 May 2020 14:28:07 +0530 Subject: [PATCH 56/69] ACM-6842: Ability to Create Cluster with HS2 on master (#320) --- qds_sdk/engine.py | 26 +++++++++++++++++++++++--- tests/test_clusterv22.py | 19 +++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 534c940a..61c863d9 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -28,6 +28,8 @@ def set_engine_config(self, spark_version=None, custom_spark_config=None, hive_version=None, + is_hs2=None, + hs2_thrift_port=None, dbtap_id=None, fernet_key=None, overrides=None, @@ -60,6 +62,10 @@ def set_engine_config(self, hive_version: Version of hive to be used in cluster + is_hs2: Enable HS2 on master + + hs2_thrift_port: Thrift port HS2 on master will run on + dbtap_id: ID of the data store inside QDS fernet_key: Encryption key for sensitive information inside airflow database. @@ -82,7 +88,7 @@ def set_engine_config(self, self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool, enable_rubix) - self.set_hive_settings(hive_version) + self.set_hive_settings(hive_version, is_hs2, hs2_thrift_port) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) @@ -110,8 +116,12 @@ def set_hadoop_settings(self, self.hadoop_settings['enable_rubix'] = enable_rubix def set_hive_settings(self, - hive_version=None): + hive_version=None, + is_hs2=None, + hs2_thrift_port=None): self.hive_settings['hive_version'] = hive_version + self.hive_settings['is_hs2'] = is_hs2 + self.hive_settings['hs2_thrift_port'] = hs2_thrift_port def set_presto_settings(self, presto_version=None, @@ -158,6 +168,8 @@ def set_engine_config_settings(self, arguments): spark_version=arguments.spark_version, custom_spark_config=arguments.custom_spark_config, hive_version=arguments.hive_version, + is_hs2=arguments.is_hs2, + hs2_thrift_port=arguments.hs2_thrift_port, dbtap_id=arguments.dbtap_id, fernet_key=arguments.fernet_key, overrides=arguments.overrides, @@ -229,11 +241,19 @@ def engine_parser(argparser): dest="presto_custom_config_file", help="location of file containg custom" + " presto configuration overrides") - hive_settings_group = argparser.add_argument_group("hive version settings") + hive_settings_group = argparser.add_argument_group("hive settings") hive_settings_group.add_argument("--hive_version", dest="hive_version", default=None, help="Version of hive for the cluster",) + hive_settings_group.add_argument("--is_hs2", + dest="is_hs2", + default=None, + help="Enable hs2 on master", ) + hive_settings_group.add_argument("--hs2_thrift_port", + dest="hs2_thrift_port", + default=None, + help="thrift port hs2 master will run on", ) spark_settings_group = argparser.add_argument_group("spark settings") spark_settings_group.add_argument("--spark-version", diff --git a/tests/test_clusterv22.py b/tests/test_clusterv22.py index c188936b..928ace03 100644 --- a/tests/test_clusterv22.py +++ b/tests/test_clusterv22.py @@ -211,3 +211,22 @@ def test_image_version_v22(self): 'slave_instance_type': 'c1.xlarge', 'cluster_image_version': '1.latest', 'composition': {'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, 'autoscaling_nodes': {'nodes': [{'percentage': 50, 'type': 'ondemand'}, {'timeout_for_request': 1, 'percentage': 50, 'type': 'spot', 'fallback': 'ondemand', 'maximum_bid_price_percentage': 100, 'allocation_strategy': None}]}}, 'label': ['test_label']}}) + + def test_hive_settings(self): + sys.argv = ['qds.py', '--version', 'v2.2', 'cluster', 'create', '--label', 'test_label', + '--is_hs2', 'true', '--hs2_thrift_port', '10001'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + print("Suraj") + print(Connection._api_call) + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': {'hive_settings': {'is_hs2': 'true', 'hs2_thrift_port': '10001'}}, + 'cluster_info': {'label': ['test_label'], + 'composition': {'master': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'min_nodes': {'nodes': [{'percentage': 100, 'type': 'ondemand'}]}, + 'autoscaling_nodes': {'nodes': [{'percentage': 50, 'type': 'ondemand'}, + {'percentage': 50, 'type': 'spot', 'maximum_bid_price_percentage': 100, 'timeout_for_request': 1, 'allocation_strategy': None, 'fallback': 'ondemand'}]}}}}) + +if __name__ == '__main__': + unittest.main() From 1311b399258733bd72714cbd61faef2de518ed97 Mon Sep 17 00:00:00 2001 From: Tarun Gavara Date: Wed, 3 Jun 2020 10:59:18 +0530 Subject: [PATCH 57/69] SDK-407, JUPY-884: Add '--upload-to-source' param for JupyterNotebookCommand (#321) --- qds_sdk/commands.py | 17 +++++++++++--- tests/test_command.py | 52 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 99cc14ef..0966a9e2 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -1387,9 +1387,10 @@ class JupyterNotebookCommand(Command): optparser.add_option("--print-logs-live", action="store_true", dest="print_logs_live", default=False, help="Fetch logs \ and print them to stderr while command is running.") - optparser.add_option("--skip-upload-to-source", action="store_false", - dest="upload_to_source", default=True, help="Do not \ - upload notebook to source after completion of execution") + optparser.add_option("--upload-to-source", dest="upload_to_source", default='true', + help="Upload notebook to source after completion of \ + execution. Specify the value as either 'true' or 'false'.\ + Default value is 'true'.") @classmethod def parse(cls, args): @@ -1417,6 +1418,16 @@ def parse(cls, args): options.macros = validate_json_input(options.macros, 'Macros', cls) if options.retry is not None: options.retry = int(options.retry) + if options.upload_to_source is not None: + options.upload_to_source = options.upload_to_source.lower() + if options.upload_to_source == 'true': + options.upload_to_source = True + elif options.upload_to_source == 'false': + options.upload_to_source = False + else: + msg = "Upload to Source parameter takes a value of either 'true' \ + or 'false' only." + raise ParseError(msg, cls.optparser.format_help()) except OptionParsingError as e: raise ParseError(e.msg, cls.optparser.format_help()) except OptionParsingExit as e: diff --git a/tests/test_command.py b/tests/test_command.py index bbbf83a6..84c8955a 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -2247,9 +2247,50 @@ def test_submit_pool(self): 'can_notify': False, 'pool': 'batch'}) - def test_submit_skip_upload_to_source(self): + def test_submit_no_upload_to_source(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, + 'can_notify': False, + 'pool': None}) + + def test_submit_upload_to_source(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--upload-to-source', 'True'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'retry': None, + 'name': None, + 'tags': None, + 'label': None, + 'macros': None, + 'arguments': None, + 'timeout': None, + 'path': 'folder/file', + 'retry_delay': None, + 'command_type': 'JupyterNotebookCommand', + 'upload_to_source': True, + 'can_notify': False, + 'pool': None}) + + def test_submit_upload_to_source_false(self): sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', - '--skip-upload-to-source'] + '--upload-to-source', 'False'] print_command() Connection._api_call = Mock(return_value={'id': 1234}) qds.main() @@ -2268,6 +2309,13 @@ def test_submit_skip_upload_to_source(self): 'can_notify': False, 'pool': None}) + def test_submit_upload_to_source_wrong_param(self): + sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', + '--upload-to-source', 'wrong'] + print_command() + with self.assertRaises(qds_sdk.exception.ParseError): + qds.main() + def test_submit_retry(self): sys.argv = ['qds.py', 'jupyternotebookcmd', 'submit', '--path', 'folder/file', '--retry', '1'] From 88c42db9d3cb228b6c900d62511a8327e50c95e5 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj <8450903+chattarajoy@users.noreply.github.com> Date: Wed, 3 Jun 2020 14:08:53 +0530 Subject: [PATCH 58/69] Release Version 1.16.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0cd1a4be..3c9a565a 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def read(fname): setup( name="qds_sdk", - version="1.15.2", + version="1.16.0", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 8c74f40abb0de85a54e0f5a0763dc62303fda6c4 Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 08:37:11 +0530 Subject: [PATCH 59/69] add payload changes --- qds_sdk/pipelines.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index f1fa500d..1438dbc7 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -346,13 +346,35 @@ def create(cls, pipeline_name, create_type, **kwargs): response """ conn = Qubole.agent() - data = {"data": { - "attributes": - {"name": pipeline_name, "status": "DRAFT", - "create_type": create_type}, - "type": "pipelines"} - } - url = Pipelines.rest_entity_path + "?mode=wizard" + url = Pipelines.rest_entity_path + if create_type == 1: + data = {"data": { + "attributes": + {"name": pipeline_name, + "status": "DRAFT", + "create_type": create_type}, + "type": "pipeline"} + } + url = url + "?mode=wizard" + else: + data = { + "data": { + "type": "pipeline", + "attributes": { + "name": pipeline_name, + "create_type": create_type, + "properties": { + "cluster_label": kwargs['cluster_label'], + "can_retry": True, + "command_line_options": "command_line_options", + "user_arguments": "optional-args", + "code" : kwargs['code']", + "language": "scala" + } + } + } + } + response = conn.post(url, data) cls.pipeline_id = Pipelines.get_pipline_id(response) cls.pipeline_name = pipeline_name From 3541de4be34106e9e7d960d3056ecf92d483791c Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 09:26:33 +0530 Subject: [PATCH 60/69] typo --- qds_sdk/pipelines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index 1438dbc7..694113e5 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -368,7 +368,7 @@ def create(cls, pipeline_name, create_type, **kwargs): "can_retry": True, "command_line_options": "command_line_options", "user_arguments": "optional-args", - "code" : kwargs['code']", + "code" : kwargs['code'], "language": "scala" } } From 9156ef74f27d6dffc9fdb9be1e93ba8b9375ce13 Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 10:23:29 +0530 Subject: [PATCH 61/69] changes --- qds_sdk/pipelines.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index 694113e5..d3778978 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -339,7 +339,7 @@ def create(cls, pipeline_name, create_type, **kwargs): Args: pipeline_name: Name to be given. - create_type: 1->Assisted, 2->Code, 3->Jar + create_type: 1->Assisted, 2->Jar, 3->Code **kwargs: keyword arguments specific to create type Returns: @@ -347,6 +347,8 @@ def create(cls, pipeline_name, create_type, **kwargs): """ conn = Qubole.agent() url = Pipelines.rest_entity_path + if create_type is None: + raise ParseError("Provide create_type for Pipeline.") if create_type == 1: data = {"data": { "attributes": @@ -365,11 +367,22 @@ def create(cls, pipeline_name, create_type, **kwargs): "create_type": create_type, "properties": { "cluster_label": kwargs['cluster_label'], - "can_retry": True, - "command_line_options": "command_line_options", - "user_arguments": "optional-args", + "can_retry": kwargs['can_retry'], + "command_line_options": kwargs['command_line_options'], + "user_arguments": kwargs['user_arguments'], "code" : kwargs['code'], - "language": "scala" + "language": kwargs['language'] + } + } + }, + "relationships": { + "alerts": { + "data": { + "type": "pipeline/alerts", + "attributes": { + "can_notify": kwargs['can_notify'], + "notification_channels": kwargs['channel_ids'] + } } } } From 98bcbbe5b93f08b4bc72856e462230db1aaa51fd Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 10:38:20 +0530 Subject: [PATCH 62/69] refactor --- qds_sdk/pipelines.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index d3778978..cd42f890 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -373,15 +373,15 @@ def create(cls, pipeline_name, create_type, **kwargs): "code" : kwargs['code'], "language": kwargs['language'] } - } - }, - "relationships": { - "alerts": { - "data": { - "type": "pipeline/alerts", - "attributes": { - "can_notify": kwargs['can_notify'], - "notification_channels": kwargs['channel_ids'] + }, + "relationships": { + "alerts": { + "data": { + "type": "pipeline/alerts", + "attributes": { + "can_notify": kwargs['can_notify'], + "notification_channels": kwargs['channel_ids'] + } } } } From e1a48305eb3ce7a404d578e1da90ab755effbcc0 Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 10:56:14 +0530 Subject: [PATCH 63/69] return resp --- qds_sdk/pipelines.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index cd42f890..5597a780 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -391,6 +391,7 @@ def create(cls, pipeline_name, create_type, **kwargs): response = conn.post(url, data) cls.pipeline_id = Pipelines.get_pipline_id(response) cls.pipeline_name = pipeline_name + return response @staticmethod def start(pipeline_id): From 8d4bace647bfb5d34ec449be8baeb3892934a7de Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 13:48:58 +0530 Subject: [PATCH 64/69] changes --- qds_sdk/pipelines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index 5597a780..b2fe92c6 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -348,8 +348,8 @@ def create(cls, pipeline_name, create_type, **kwargs): conn = Qubole.agent() url = Pipelines.rest_entity_path if create_type is None: - raise ParseError("Provide create_type for Pipeline.") - if create_type == 1: + raise ParseError("Provide create_type for Pipeline.", None) + if not kwargs || create_type == 1: data = {"data": { "attributes": {"name": pipeline_name, From e3d702522841a58b5c7c9539a7307c57f5bc4dfb Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 14:09:39 +0530 Subject: [PATCH 65/69] use or --- qds_sdk/pipelines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index b2fe92c6..7404d640 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -349,7 +349,7 @@ def create(cls, pipeline_name, create_type, **kwargs): url = Pipelines.rest_entity_path if create_type is None: raise ParseError("Provide create_type for Pipeline.", None) - if not kwargs || create_type == 1: + if not kwargs or create_type == 1: data = {"data": { "attributes": {"name": pipeline_name, From 9c68f94430beacc5318f2255563813b6764bfaed Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 15:04:08 +0530 Subject: [PATCH 66/69] pipeline --- tests/test_quest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_quest.py b/tests/test_quest.py index 0ee50385..71dc75ed 100644 --- a/tests/test_quest.py +++ b/tests/test_quest.py @@ -64,7 +64,7 @@ def test_create_pipeline(self): '--cluster-label', 'spark', '-c', 'print("hello")', '--language', 'python', '--user-arguments', 'users_argument'] print_command() d1 = {"data": {"attributes": {"name": "test_pipeline_name", "status": "DRAFT", "create_type": 3}, - "type": "pipelines"}} + "type": "pipeline"}} response = {"relationships": {"nodes": [], "alerts": []}, "included": [], "meta": {"command_details": {"code": "print(\"hello\")", "language": "python"}, "properties": {"checkpoint_location": None, "trigger_interval": None, From e1e0e5c3107b866a8347cfc6652b7856cd048355 Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 15:19:32 +0530 Subject: [PATCH 67/69] changes --- qds_sdk/pipelines.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index 7404d640..b91b0bd9 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -350,12 +350,15 @@ def create(cls, pipeline_name, create_type, **kwargs): if create_type is None: raise ParseError("Provide create_type for Pipeline.", None) if not kwargs or create_type == 1: - data = {"data": { - "attributes": - {"name": pipeline_name, - "status": "DRAFT", - "create_type": create_type}, - "type": "pipeline"} + data = { + "data": { + "attributes":{ + "name": pipeline_name, + "status": "DRAFT", + "create_type": create_type + }, + "type": "pipeline" + } } url = url + "?mode=wizard" else: @@ -369,9 +372,7 @@ def create(cls, pipeline_name, create_type, **kwargs): "cluster_label": kwargs['cluster_label'], "can_retry": kwargs['can_retry'], "command_line_options": kwargs['command_line_options'], - "user_arguments": kwargs['user_arguments'], - "code" : kwargs['code'], - "language": kwargs['language'] + "user_arguments": kwargs['user_arguments'] } }, "relationships": { @@ -387,7 +388,13 @@ def create(cls, pipeline_name, create_type, **kwargs): } } } - + if create_type == 2: + data['data']['attributes']['properties']['jar_path'] = kwargs['jar_path'] + data['data']['attributes']['properties']['main_class_name'] = kwargs['main_class_name'] + elif create_type == 3: + data['data']['attributes']['properties']['code'] = kwargs['code'] + data['data']['attributes']['properties']['language'] = kwargs['language'] + response = conn.post(url, data) cls.pipeline_id = Pipelines.get_pipline_id(response) cls.pipeline_name = pipeline_name From 5c7387337e9fd0db573211e83c9e2662ed1703a4 Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Fri, 19 Jun 2020 16:17:35 +0530 Subject: [PATCH 68/69] lint --- qds_sdk/pipelines.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index b91b0bd9..5f90267a 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -347,13 +347,13 @@ def create(cls, pipeline_name, create_type, **kwargs): """ conn = Qubole.agent() url = Pipelines.rest_entity_path - if create_type is None: + if create_type is None: raise ParseError("Provide create_type for Pipeline.", None) if not kwargs or create_type == 1: data = { "data": { - "attributes":{ - "name": pipeline_name, + "attributes": { + "name": pipeline_name, "status": "DRAFT", "create_type": create_type }, @@ -389,11 +389,15 @@ def create(cls, pipeline_name, create_type, **kwargs): } } if create_type == 2: - data['data']['attributes']['properties']['jar_path'] = kwargs['jar_path'] - data['data']['attributes']['properties']['main_class_name'] = kwargs['main_class_name'] + data['data']['attributes']['properties']['jar_path'] = \ + kwargs['jar_path'] + data['data']['attributes']['properties']['main_class_name'] = \ + kwargs['main_class_name'] elif create_type == 3: - data['data']['attributes']['properties']['code'] = kwargs['code'] - data['data']['attributes']['properties']['language'] = kwargs['language'] + data['data']['attributes']['properties']['code'] = \ + kwargs['code'] + data['data']['attributes']['properties']['language'] = \ + kwargs['language'] response = conn.post(url, data) cls.pipeline_id = Pipelines.get_pipline_id(response) From 7729a18b8a367e3f505b0ab876a77d99b7973bdc Mon Sep 17 00:00:00 2001 From: siddharth1001 Date: Mon, 22 Jun 2020 12:52:42 +0530 Subject: [PATCH 69/69] use kwargs.get --- qds_sdk/pipelines.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/qds_sdk/pipelines.py b/qds_sdk/pipelines.py index 5f90267a..ffe6f64d 100644 --- a/qds_sdk/pipelines.py +++ b/qds_sdk/pipelines.py @@ -369,10 +369,10 @@ def create(cls, pipeline_name, create_type, **kwargs): "name": pipeline_name, "create_type": create_type, "properties": { - "cluster_label": kwargs['cluster_label'], - "can_retry": kwargs['can_retry'], - "command_line_options": kwargs['command_line_options'], - "user_arguments": kwargs['user_arguments'] + "cluster_label": kwargs.get('cluster_label'), + "can_retry": kwargs.get('can_retry'), + "command_line_options": kwargs.get('command_line_options'), + "user_arguments": kwargs.get('user_arguments') } }, "relationships": { @@ -380,8 +380,8 @@ def create(cls, pipeline_name, create_type, **kwargs): "data": { "type": "pipeline/alerts", "attributes": { - "can_notify": kwargs['can_notify'], - "notification_channels": kwargs['channel_ids'] + "can_notify": kwargs.get('can_notify'), + "notification_channels": kwargs.get('channel_ids') } } } @@ -390,14 +390,14 @@ def create(cls, pipeline_name, create_type, **kwargs): } if create_type == 2: data['data']['attributes']['properties']['jar_path'] = \ - kwargs['jar_path'] + kwargs.get('jar_path') data['data']['attributes']['properties']['main_class_name'] = \ - kwargs['main_class_name'] + kwargs.get('main_class_name') elif create_type == 3: data['data']['attributes']['properties']['code'] = \ - kwargs['code'] + kwargs.get('code') data['data']['attributes']['properties']['language'] = \ - kwargs['language'] + kwargs.get('language') response = conn.post(url, data) cls.pipeline_id = Pipelines.get_pipline_id(response)