diff --git a/.env b/.env new file mode 100644 index 0000000..11b77b5 --- /dev/null +++ b/.env @@ -0,0 +1,18 @@ +PROFILE=default +VERSION=latest +REGION= +CATALOG=AwsDataCatalog +WORKGROUP=primary +QUERY_OUTPUT= +AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL1QUERIES=5 +AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL1QUERIES=10 +AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL2QUERIES=5 +AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL2QUERIES=20 +AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL3QUERIES=20 +AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL3QUERIES=40 +AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL4QUERIES=20 +AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL4QUERIES=80 +AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL5QUERIES=100 +AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL5QUERIES=200 +AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES=20 +AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES=20 \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100755 index 0000000..d6a8227 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Francois Chaumont + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100755 index 0000000..886053e --- /dev/null +++ b/README.md @@ -0,0 +1,122 @@ +# Toolkit for AWS Athena API + +[![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/FrancoisChaumont/aws-athena-api-tools/issues) +![GitHub release](https://img.shields.io/github/release/FrancoisChaumont/aws-athena-api-tools.svg) +[![GitHub issues](https://img.shields.io/github/issues/FrancoisChaumont/aws-athena-api-tools.svg)](https://github.com/FrancoisChaumont/aws-athena-api-tools/issues) +[![GitHub stars](https://img.shields.io/github/stars/FrancoisChaumont/aws-athena-api-tools.svg)](https://github.com/FrancoisChaumont/aws-athena-api-tools/stargazers) +![Github All Releases](https://img.shields.io/github/downloads/FrancoisChaumont/aws-athena-api-tools/total.svg) + +## Introduction +**What it does?** It allows you to do the following from the command line: +- create/drop database +- execute a single query +- execute multiple queries simultaneously while remaining within your max rate limits +- create partitions on non-hive or hive formatted data +- get one or multiple queries current states +- stop a running query +- delete metadata files +- create a named query +- list & detail named queries +- list & detail databases +- list & detail database tables + +## Requirements +- [PHP](https://www.php.net/releases/7_4_0.php) ^7.4 +- [aws/aws-sdk-php](https://github.com/aws/aws-sdk-php) ^3.175 +- [vlucas/phpdotenv](https://github.com/vlucas/phpdotenv) ^5.3 +- [Composer](https://getcomposer.org) +- [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html) +- AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY¹ + +> ¹ The SDK should detect the credentials from environment variables (via AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY), an AWS credentials INI file in your HOME directory, AWS Identity and Access Management (IAM) instance profile credentials, or credential providers + +## Installation +Download a copy of this repository and run the following: +``` +composer install +``` + +## Configuration +Modify the following variables inside the file [.env](.env) for default values to use when related options are omitted +- `PROFILE`: AWS profile from ~/.AWS/credentials +- `VERSION`: AWS webservice version +- `REGION`: AWS region to connect to +- `CATALOG`: Athena data source catalog +- `WORKGROUP`: Athena workgroup +- `QUERY_OUTPUT`: S3 bucket for query results +- `AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL1QUERIES`¹: level 1 queries max calls per second +- `AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL1QUERIES`¹⁺⁰: level 1 queries max burst capacity +- `AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL2QUERIES`²: level 2 queries max calls per second +- `AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL2QUERIES`²⁺⁰: level 2 queries max burst capacity +- `AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL3QUERIES`³: level 3 queries max calls per second +- `AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL3QUERIES`³⁺⁰: level 3 queries max burst capacity +- `AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL4QUERIES`⁴: level 4 queries max calls per second +- `AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL4QUERIES`⁴⁺⁰: level 4 queries max burst capacity +- `AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL5QUERIES`⁵: level 5 queries max calls per second +- `AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL5QUERIES`⁵⁺⁰: level 5 queries max burst capacity +- `AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES`⁶: max simultaneous DDL queries +- `AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES`⁷: max simultaneous DML queries + +¹BatchGetNamedQuery, ListNamedQueries, ListQueryExecutions +²CreateNamedQuery, DeleteNamedQuery, GetNamedQuery +³BatchGetQueryExecution +⁴StartQueryExecution, StopQueryExecution +⁵GetQueryExecution, GetQueryResults - `a value higher than 2 will exceed the max rate limit` +⁶create table, create table add partition +⁷select, create table as (CTAS) + +⁰max burst capacity not yet implemented + +## Important +- Make sure to double % inside query files for other than parameters passed to the query or they will be replaced by sprintf + +Example passing year + month to constitute the table name: +```sql +SELECT DATE_FORMAT(FROM_UNIXTIME(1614716423), '%%Y-%%m-%%d %%H:%%i:%%S') +FROM database.table_name_%1$s%2$s +LIMIT 1 +``` + +## The tools +See tools [documentation](READMEs/README.tools.md) for more details. + +## Testing +See tests [documentation](READMEs/README.tests.md) for more details. + +## AWS documentation +AWS documentation: +- AWS SDK [Basic Usage](https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/getting-started_basic-usage.html) +- AWS SDK [API documentation for Athena](https://docs.aws.amazon.com/aws-sdk-php/v3/api/namespace-Aws.Athena.html) +- AWS SDK for PHP v3 [Getting Started](https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/getting-started_index.html) +- AWS Athena [Service Limits](https://docs.aws.amazon.com/athena/latest/ug/service-limits.html) +- List of [AWS regions](http://docs.aws.amazon.com/general/latest/gr/rande.html) +- Data [Partitioning](https://docs.aws.amazon.com/athena/latest/ug/partitions.html) + +## TODO +Methods: +- BatchGetNamedQuery +- BatchGetQueryExecution +- CreateDataCatalog +- CreatePreparedStatement +- CreateWorkGroup +- DeleteDataCatalog +- DeletePreparedStatement +- DeleteWorkGroup +- GetDataCatalog +- GetPreparedStatement +- GetQueryResults +- GetWorkGroup +- ListDataCatalogs +- ListEngineVersions +- ListPreparedStatements +- ListQueryExecutions +- ListTagsForResource +- ListWorkGroups +- TagResource +- UntagResource +- UpdateDataCatalog +- UpdatePreparedStatement +- UpdateWorkGroup + +Others: +- implement burst capacity? diff --git a/READMEs/README.tests.md b/READMEs/README.tests.md new file mode 100644 index 0000000..bfd1ba5 --- /dev/null +++ b/READMEs/README.tests.md @@ -0,0 +1,45 @@ +## Tests +This [test](../tests/test.sh) script allows to tests every tools of this library. + +Make sure to read **Requirements**, **Installation** and **Configuration** first. +**For safety, a confirmation to delete data on s3 and drop database/tables is required at start.** + +Tested on Ubuntu 20.04 running PHP7.4. + +It requires the database `sampledb` and performs the following: +1. list database `sampledb` +2. create a new database +3. create test data by extracting from `sampledb.elb_logs` table and creating tables with daily data +4. create table for test data with multiple days data +5. create day partitions on test data tables +6. select data from multiple days table +7. select several days data from single day table +8. display query result files on s3 +9. delete metadata files +10. display query result files on s3 without metadata files +11. detail tables in the database +12. drop database and all tables +13. delete data from s3 +14. create named query +15. detail named query +16. delete named query + +Usage: +```shell +/bin/bash test.sh \ + -d DATABASE_TO_CREATE \ + -y YEAR_OF_DATA_TO_EXTRACT \ + -m MONTH_OF_DATA_TO_EXTRACT +``` + +Example: +```shell +/bin/bash test.sh \ + -d aws_athena_api_tools_tests \ + -y 2015 \ + -m 01 +``` + +Output: +See expected [output](../tests/output.txt) for more details. + diff --git a/READMEs/README.tools.md b/READMEs/README.tools.md new file mode 100644 index 0000000..75cbe83 --- /dev/null +++ b/READMEs/README.tools.md @@ -0,0 +1,101 @@ +## The tools +**Create/Drop a database** +See [usage](../tools/usage/database.usage.php) or +``` +php database.php -h/--help +``` + +**Execute a single query** +select | create table [as] | create view | create database | delete table ... + +See [usage](../tools/usage/query.usage.php) or +``` +php query.php -h/--help +``` + +**Execute queries for each day in the given date range within the max rate limit** +select | create table [as] | create view | create database | delete table ... + +Examples: [query-daily.sql](../examples/query-daily.sql) + +See [usage](../tools/usage/query-daily.usage.php) or +``` +php query-daily.php -h/--help +``` + +**Execute queries for each month in the given date range within the max rate limit** +select | create table [as] | create view | create database | delete table ... + +Examples: +[create-table.sql](../examples/create-table.sql), +[create-table-partitioned.sql](../examples/create-table-partitioned.sql), +[create-table-partitioned-hive.sql](../examples/create-table-partitioned-hive.sql), +[drop-table.sql](../examples/drop-table.sql), +[query-monthly.sql](../examples/query-monthly.sql) + +See [usage](../tools/usage/query-monthly.usage.php) or +``` +php query-monthly.php -h/--help +``` + +**Create day partitions on a table (non-Hive formatted data)** +See [usage](../tools/usage/partitions-daily.usage.php) or +``` +php partitions-daily.php -h/--help +``` + +**Create day partitions on a table (Hive formatted data)** +See [usage](../tools/usage/partitions-daily-hive.usage.php) or +``` +php partitions-daily-hive.php -h/--help +``` + +**Get the execution state of a query (running, failed, succeeded, ...)** +See [usage](../tools/usage/state.usage.php) or +``` +php state.php -h/--help +``` + +**Get the execution state of queries listed in a file (running, failed, succeeded, ...)** +Example: [query-ids-list.txt](../examples/query-ids-list.txt) + +See [usage](../tools/usage/state-from-list.usage.php) or +``` +php state-from-list.php -h/--help +``` + +**Stop a running query** +See [usage](../tools/usage/stop.usage.php) or +``` +php stop.php -h/--help +``` + +**Delete metadata files recursively from an S3 location (bucket/prefixes)** +See [usage](../tools/usage/delete-metadata-files.usage.sh) or +``` +delete-metadata-files.sh -h/--help +``` + +**Create or delete a named query** +See [usage](../tools/usage/named-query.usage.php) or +``` +php named-query.php -h/--help +``` + +**Detail one or all named queries and output to json format** +See [usage](../tools/usage/list-named-queries.usage.php) or +``` +php list-named-queries.php -h/--help +``` + +**Detail one or all databases and output to json format** +See [usage](../tools/usage/list-databases.usage.php) or +``` +php list-databases.php -h/--help +``` + +**Detail one or all tables of a database and output to json format** +See [usage](../tools/usage/list-tables.usage.php) or +``` +php list-tables.php -h/--help +``` diff --git a/composer.json b/composer.json new file mode 100755 index 0000000..450e53d --- /dev/null +++ b/composer.json @@ -0,0 +1,20 @@ +{ + "name": "francoischaumont/aws-athena-api-tools", + "description": "Toolkit for AWS Athena using AWS SDK for PHP v3", + "authors": [ + { + "name": "Francois Chaumont", + "role": "main developer" + } + ], + "require": { + "php": "^7.4", + "aws/aws-sdk-php": "^3.175", + "vlucas/phpdotenv": "^5.3" + }, + "autoload": { + "psr-4": { + "FC\\AWS\\": "src/AWS/" + } + } +} diff --git a/composer.lock b/composer.lock new file mode 100755 index 0000000..15c76fd --- /dev/null +++ b/composer.lock @@ -0,0 +1,1010 @@ +{ + "_readme": [ + "This file locks the dependencies of your project to a known state", + "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", + "This file is @generated automatically" + ], + "content-hash": "3c6b0a9cc8fb327c91636b9c692688a7", + "packages": [ + { + "name": "aws/aws-sdk-php", + "version": "3.175.0", + "source": { + "type": "git", + "url": "https://github.com/aws/aws-sdk-php.git", + "reference": "31baa9e0c4330f01eb74b8a7ef086e9d34f8391e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/31baa9e0c4330f01eb74b8a7ef086e9d34f8391e", + "reference": "31baa9e0c4330f01eb74b8a7ef086e9d34f8391e", + "shasum": "" + }, + "require": { + "ext-json": "*", + "ext-pcre": "*", + "ext-simplexml": "*", + "guzzlehttp/guzzle": "^5.3.3|^6.2.1|^7.0", + "guzzlehttp/promises": "^1.4.0", + "guzzlehttp/psr7": "^1.7.0", + "mtdowling/jmespath.php": "^2.6", + "php": ">=5.5" + }, + "require-dev": { + "andrewsville/php-token-reflection": "^1.4", + "aws/aws-php-sns-message-validator": "~1.0", + "behat/behat": "~3.0", + "doctrine/cache": "~1.4", + "ext-dom": "*", + "ext-openssl": "*", + "ext-pcntl": "*", + "ext-sockets": "*", + "nette/neon": "^2.3", + "paragonie/random_compat": ">= 2", + "phpunit/phpunit": "^4.8.35|^5.4.3", + "psr/cache": "^1.0", + "psr/simple-cache": "^1.0", + "sebastian/comparator": "^1.2.3" + }, + "suggest": { + "aws/aws-php-sns-message-validator": "To validate incoming SNS notifications", + "doctrine/cache": "To use the DoctrineCacheAdapter", + "ext-curl": "To send requests using cURL", + "ext-openssl": "Allows working with CloudFront private distributions and verifying received SNS messages", + "ext-sockets": "To use client-side monitoring" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0-dev" + } + }, + "autoload": { + "psr-4": { + "Aws\\": "src/" + }, + "files": [ + "src/functions.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "authors": [ + { + "name": "Amazon Web Services", + "homepage": "http://aws.amazon.com" + } + ], + "description": "AWS SDK for PHP - Use Amazon Web Services in your PHP project", + "homepage": "http://aws.amazon.com/sdkforphp", + "keywords": [ + "amazon", + "aws", + "cloud", + "dynamodb", + "ec2", + "glacier", + "s3", + "sdk" + ], + "support": { + "forum": "https://forums.aws.amazon.com/forum.jspa?forumID=80", + "issues": "https://github.com/aws/aws-sdk-php/issues", + "source": "https://github.com/aws/aws-sdk-php/tree/3.175.0" + }, + "time": "2021-03-19T18:13:22+00:00" + }, + { + "name": "graham-campbell/result-type", + "version": "v1.0.1", + "source": { + "type": "git", + "url": "https://github.com/GrahamCampbell/Result-Type.git", + "reference": "7e279d2cd5d7fbb156ce46daada972355cea27bb" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/GrahamCampbell/Result-Type/zipball/7e279d2cd5d7fbb156ce46daada972355cea27bb", + "reference": "7e279d2cd5d7fbb156ce46daada972355cea27bb", + "shasum": "" + }, + "require": { + "php": "^7.0|^8.0", + "phpoption/phpoption": "^1.7.3" + }, + "require-dev": { + "phpunit/phpunit": "^6.5|^7.5|^8.5|^9.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "psr-4": { + "GrahamCampbell\\ResultType\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Graham Campbell", + "email": "graham@alt-three.com" + } + ], + "description": "An Implementation Of The Result Type", + "keywords": [ + "Graham Campbell", + "GrahamCampbell", + "Result Type", + "Result-Type", + "result" + ], + "support": { + "issues": "https://github.com/GrahamCampbell/Result-Type/issues", + "source": "https://github.com/GrahamCampbell/Result-Type/tree/v1.0.1" + }, + "funding": [ + { + "url": "https://github.com/GrahamCampbell", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/graham-campbell/result-type", + "type": "tidelift" + } + ], + "time": "2020-04-13T13:17:36+00:00" + }, + { + "name": "guzzlehttp/guzzle", + "version": "7.2.0", + "source": { + "type": "git", + "url": "https://github.com/guzzle/guzzle.git", + "reference": "0aa74dfb41ae110835923ef10a9d803a22d50e79" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/guzzle/guzzle/zipball/0aa74dfb41ae110835923ef10a9d803a22d50e79", + "reference": "0aa74dfb41ae110835923ef10a9d803a22d50e79", + "shasum": "" + }, + "require": { + "ext-json": "*", + "guzzlehttp/promises": "^1.4", + "guzzlehttp/psr7": "^1.7", + "php": "^7.2.5 || ^8.0", + "psr/http-client": "^1.0" + }, + "provide": { + "psr/http-client-implementation": "1.0" + }, + "require-dev": { + "ext-curl": "*", + "php-http/client-integration-tests": "^3.0", + "phpunit/phpunit": "^8.5.5 || ^9.3.5", + "psr/log": "^1.1" + }, + "suggest": { + "ext-curl": "Required for CURL handler support", + "ext-intl": "Required for Internationalized Domain Name (IDN) support", + "psr/log": "Required for using the Log middleware" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "7.1-dev" + } + }, + "autoload": { + "psr-4": { + "GuzzleHttp\\": "src/" + }, + "files": [ + "src/functions_include.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Michael Dowling", + "email": "mtdowling@gmail.com", + "homepage": "https://github.com/mtdowling" + }, + { + "name": "Márk Sági-Kazár", + "email": "mark.sagikazar@gmail.com", + "homepage": "https://sagikazarmark.hu" + } + ], + "description": "Guzzle is a PHP HTTP client library", + "homepage": "http://guzzlephp.org/", + "keywords": [ + "client", + "curl", + "framework", + "http", + "http client", + "psr-18", + "psr-7", + "rest", + "web service" + ], + "support": { + "issues": "https://github.com/guzzle/guzzle/issues", + "source": "https://github.com/guzzle/guzzle/tree/7.2.0" + }, + "funding": [ + { + "url": "https://github.com/GrahamCampbell", + "type": "github" + }, + { + "url": "https://github.com/Nyholm", + "type": "github" + }, + { + "url": "https://github.com/alexeyshockov", + "type": "github" + }, + { + "url": "https://github.com/gmponos", + "type": "github" + } + ], + "time": "2020-10-10T11:47:56+00:00" + }, + { + "name": "guzzlehttp/promises", + "version": "1.4.1", + "source": { + "type": "git", + "url": "https://github.com/guzzle/promises.git", + "reference": "8e7d04f1f6450fef59366c399cfad4b9383aa30d" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/guzzle/promises/zipball/8e7d04f1f6450fef59366c399cfad4b9383aa30d", + "reference": "8e7d04f1f6450fef59366c399cfad4b9383aa30d", + "shasum": "" + }, + "require": { + "php": ">=5.5" + }, + "require-dev": { + "symfony/phpunit-bridge": "^4.4 || ^5.1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.4-dev" + } + }, + "autoload": { + "psr-4": { + "GuzzleHttp\\Promise\\": "src/" + }, + "files": [ + "src/functions_include.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Michael Dowling", + "email": "mtdowling@gmail.com", + "homepage": "https://github.com/mtdowling" + } + ], + "description": "Guzzle promises library", + "keywords": [ + "promise" + ], + "support": { + "issues": "https://github.com/guzzle/promises/issues", + "source": "https://github.com/guzzle/promises/tree/1.4.1" + }, + "time": "2021-03-07T09:25:29+00:00" + }, + { + "name": "guzzlehttp/psr7", + "version": "1.7.0", + "source": { + "type": "git", + "url": "https://github.com/guzzle/psr7.git", + "reference": "53330f47520498c0ae1f61f7e2c90f55690c06a3" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/guzzle/psr7/zipball/53330f47520498c0ae1f61f7e2c90f55690c06a3", + "reference": "53330f47520498c0ae1f61f7e2c90f55690c06a3", + "shasum": "" + }, + "require": { + "php": ">=5.4.0", + "psr/http-message": "~1.0", + "ralouphie/getallheaders": "^2.0.5 || ^3.0.0" + }, + "provide": { + "psr/http-message-implementation": "1.0" + }, + "require-dev": { + "ext-zlib": "*", + "phpunit/phpunit": "~4.8.36 || ^5.7.27 || ^6.5.14 || ^7.5.20 || ^8.5.8 || ^9.3.10" + }, + "suggest": { + "laminas/laminas-httphandlerrunner": "Emit PSR-7 responses" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.7-dev" + } + }, + "autoload": { + "psr-4": { + "GuzzleHttp\\Psr7\\": "src/" + }, + "files": [ + "src/functions_include.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Michael Dowling", + "email": "mtdowling@gmail.com", + "homepage": "https://github.com/mtdowling" + }, + { + "name": "Tobias Schultze", + "homepage": "https://github.com/Tobion" + } + ], + "description": "PSR-7 message implementation that also provides common utility methods", + "keywords": [ + "http", + "message", + "psr-7", + "request", + "response", + "stream", + "uri", + "url" + ], + "support": { + "issues": "https://github.com/guzzle/psr7/issues", + "source": "https://github.com/guzzle/psr7/tree/1.7.0" + }, + "time": "2020-09-30T07:37:11+00:00" + }, + { + "name": "mtdowling/jmespath.php", + "version": "2.6.0", + "source": { + "type": "git", + "url": "https://github.com/jmespath/jmespath.php.git", + "reference": "42dae2cbd13154083ca6d70099692fef8ca84bfb" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/jmespath/jmespath.php/zipball/42dae2cbd13154083ca6d70099692fef8ca84bfb", + "reference": "42dae2cbd13154083ca6d70099692fef8ca84bfb", + "shasum": "" + }, + "require": { + "php": "^5.4 || ^7.0 || ^8.0", + "symfony/polyfill-mbstring": "^1.17" + }, + "require-dev": { + "composer/xdebug-handler": "^1.4", + "phpunit/phpunit": "^4.8.36 || ^7.5.15" + }, + "bin": [ + "bin/jp.php" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.6-dev" + } + }, + "autoload": { + "psr-4": { + "JmesPath\\": "src/" + }, + "files": [ + "src/JmesPath.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Michael Dowling", + "email": "mtdowling@gmail.com", + "homepage": "https://github.com/mtdowling" + } + ], + "description": "Declaratively specify how to extract elements from a JSON document", + "keywords": [ + "json", + "jsonpath" + ], + "support": { + "issues": "https://github.com/jmespath/jmespath.php/issues", + "source": "https://github.com/jmespath/jmespath.php/tree/2.6.0" + }, + "time": "2020-07-31T21:01:56+00:00" + }, + { + "name": "phpoption/phpoption", + "version": "1.7.5", + "source": { + "type": "git", + "url": "https://github.com/schmittjoh/php-option.git", + "reference": "994ecccd8f3283ecf5ac33254543eb0ac946d525" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/schmittjoh/php-option/zipball/994ecccd8f3283ecf5ac33254543eb0ac946d525", + "reference": "994ecccd8f3283ecf5ac33254543eb0ac946d525", + "shasum": "" + }, + "require": { + "php": "^5.5.9 || ^7.0 || ^8.0" + }, + "require-dev": { + "bamarni/composer-bin-plugin": "^1.4.1", + "phpunit/phpunit": "^4.8.35 || ^5.7.27 || ^6.5.6 || ^7.0 || ^8.0 || ^9.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.7-dev" + } + }, + "autoload": { + "psr-4": { + "PhpOption\\": "src/PhpOption/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "authors": [ + { + "name": "Johannes M. Schmitt", + "email": "schmittjoh@gmail.com" + }, + { + "name": "Graham Campbell", + "email": "graham@alt-three.com" + } + ], + "description": "Option Type for PHP", + "keywords": [ + "language", + "option", + "php", + "type" + ], + "support": { + "issues": "https://github.com/schmittjoh/php-option/issues", + "source": "https://github.com/schmittjoh/php-option/tree/1.7.5" + }, + "funding": [ + { + "url": "https://github.com/GrahamCampbell", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/phpoption/phpoption", + "type": "tidelift" + } + ], + "time": "2020-07-20T17:29:33+00:00" + }, + { + "name": "psr/http-client", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/php-fig/http-client.git", + "reference": "2dfb5f6c5eff0e91e20e913f8c5452ed95b86621" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/http-client/zipball/2dfb5f6c5eff0e91e20e913f8c5452ed95b86621", + "reference": "2dfb5f6c5eff0e91e20e913f8c5452ed95b86621", + "shasum": "" + }, + "require": { + "php": "^7.0 || ^8.0", + "psr/http-message": "^1.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Http\\Client\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common interface for HTTP clients", + "homepage": "https://github.com/php-fig/http-client", + "keywords": [ + "http", + "http-client", + "psr", + "psr-18" + ], + "support": { + "source": "https://github.com/php-fig/http-client/tree/master" + }, + "time": "2020-06-29T06:28:15+00:00" + }, + { + "name": "psr/http-message", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/php-fig/http-message.git", + "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363", + "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Http\\Message\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common interface for HTTP messages", + "homepage": "https://github.com/php-fig/http-message", + "keywords": [ + "http", + "http-message", + "psr", + "psr-7", + "request", + "response" + ], + "support": { + "source": "https://github.com/php-fig/http-message/tree/master" + }, + "time": "2016-08-06T14:39:51+00:00" + }, + { + "name": "ralouphie/getallheaders", + "version": "3.0.3", + "source": { + "type": "git", + "url": "https://github.com/ralouphie/getallheaders.git", + "reference": "120b605dfeb996808c31b6477290a714d356e822" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ralouphie/getallheaders/zipball/120b605dfeb996808c31b6477290a714d356e822", + "reference": "120b605dfeb996808c31b6477290a714d356e822", + "shasum": "" + }, + "require": { + "php": ">=5.6" + }, + "require-dev": { + "php-coveralls/php-coveralls": "^2.1", + "phpunit/phpunit": "^5 || ^6.5" + }, + "type": "library", + "autoload": { + "files": [ + "src/getallheaders.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ralph Khattar", + "email": "ralph.khattar@gmail.com" + } + ], + "description": "A polyfill for getallheaders.", + "support": { + "issues": "https://github.com/ralouphie/getallheaders/issues", + "source": "https://github.com/ralouphie/getallheaders/tree/develop" + }, + "time": "2019-03-08T08:55:37+00:00" + }, + { + "name": "symfony/polyfill-ctype", + "version": "v1.22.1", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-ctype.git", + "reference": "c6c942b1ac76c82448322025e084cadc56048b4e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/c6c942b1ac76c82448322025e084cadc56048b4e", + "reference": "c6c942b1ac76c82448322025e084cadc56048b4e", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "suggest": { + "ext-ctype": "For best performance" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "1.22-dev" + }, + "thanks": { + "name": "symfony/polyfill", + "url": "https://github.com/symfony/polyfill" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Ctype\\": "" + }, + "files": [ + "bootstrap.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Gert de Pagter", + "email": "BackEndTea@gmail.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill for ctype functions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "ctype", + "polyfill", + "portable" + ], + "support": { + "source": "https://github.com/symfony/polyfill-ctype/tree/v1.22.1" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2021-01-07T16:49:33+00:00" + }, + { + "name": "symfony/polyfill-mbstring", + "version": "v1.22.1", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-mbstring.git", + "reference": "5232de97ee3b75b0360528dae24e73db49566ab1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/5232de97ee3b75b0360528dae24e73db49566ab1", + "reference": "5232de97ee3b75b0360528dae24e73db49566ab1", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "suggest": { + "ext-mbstring": "For best performance" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "1.22-dev" + }, + "thanks": { + "name": "symfony/polyfill", + "url": "https://github.com/symfony/polyfill" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Mbstring\\": "" + }, + "files": [ + "bootstrap.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill for the Mbstring extension", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "mbstring", + "polyfill", + "portable", + "shim" + ], + "support": { + "source": "https://github.com/symfony/polyfill-mbstring/tree/v1.22.1" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2021-01-22T09:19:47+00:00" + }, + { + "name": "symfony/polyfill-php80", + "version": "v1.22.1", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-php80.git", + "reference": "dc3063ba22c2a1fd2f45ed856374d79114998f91" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-php80/zipball/dc3063ba22c2a1fd2f45ed856374d79114998f91", + "reference": "dc3063ba22c2a1fd2f45ed856374d79114998f91", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "1.22-dev" + }, + "thanks": { + "name": "symfony/polyfill", + "url": "https://github.com/symfony/polyfill" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Php80\\": "" + }, + "files": [ + "bootstrap.php" + ], + "classmap": [ + "Resources/stubs" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ion Bazan", + "email": "ion.bazan@gmail.com" + }, + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill backporting some PHP 8.0+ features to lower PHP versions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "polyfill", + "portable", + "shim" + ], + "support": { + "source": "https://github.com/symfony/polyfill-php80/tree/v1.22.1" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2021-01-07T16:49:33+00:00" + }, + { + "name": "vlucas/phpdotenv", + "version": "v5.3.0", + "source": { + "type": "git", + "url": "https://github.com/vlucas/phpdotenv.git", + "reference": "b3eac5c7ac896e52deab4a99068e3f4ab12d9e56" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/vlucas/phpdotenv/zipball/b3eac5c7ac896e52deab4a99068e3f4ab12d9e56", + "reference": "b3eac5c7ac896e52deab4a99068e3f4ab12d9e56", + "shasum": "" + }, + "require": { + "ext-pcre": "*", + "graham-campbell/result-type": "^1.0.1", + "php": "^7.1.3 || ^8.0", + "phpoption/phpoption": "^1.7.4", + "symfony/polyfill-ctype": "^1.17", + "symfony/polyfill-mbstring": "^1.17", + "symfony/polyfill-php80": "^1.17" + }, + "require-dev": { + "bamarni/composer-bin-plugin": "^1.4.1", + "ext-filter": "*", + "phpunit/phpunit": "^7.5.20 || ^8.5.14 || ^9.5.1" + }, + "suggest": { + "ext-filter": "Required to use the boolean validator." + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.3-dev" + } + }, + "autoload": { + "psr-4": { + "Dotenv\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Graham Campbell", + "email": "graham@alt-three.com", + "homepage": "https://gjcampbell.co.uk/" + }, + { + "name": "Vance Lucas", + "email": "vance@vancelucas.com", + "homepage": "https://vancelucas.com/" + } + ], + "description": "Loads environment variables from `.env` to `getenv()`, `$_ENV` and `$_SERVER` automagically.", + "keywords": [ + "dotenv", + "env", + "environment" + ], + "support": { + "issues": "https://github.com/vlucas/phpdotenv/issues", + "source": "https://github.com/vlucas/phpdotenv/tree/v5.3.0" + }, + "funding": [ + { + "url": "https://github.com/GrahamCampbell", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/vlucas/phpdotenv", + "type": "tidelift" + } + ], + "time": "2021-01-20T15:23:13+00:00" + } + ], + "packages-dev": [], + "aliases": [], + "minimum-stability": "stable", + "stability-flags": [], + "prefer-stable": false, + "prefer-lowest": false, + "platform": { + "php": "^7.4" + }, + "platform-dev": [], + "plugin-api-version": "2.0.0" +} diff --git a/examples/create-table-partitioned-hive.sql b/examples/create-table-partitioned-hive.sql new file mode 100644 index 0000000..aad2c2f --- /dev/null +++ b/examples/create-table-partitioned-hive.sql @@ -0,0 +1,6 @@ +CREATE EXTERNAL TABLE IF NOT EXISTS database_name.table_name_%1$s%2$s ( + column1 string +) +PARTITIONED BY (day string) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +LOCATION 's3://BUCKET/PREFIXES/year=%1$s/month=%2$s' \ No newline at end of file diff --git a/examples/create-table-partitioned.sql b/examples/create-table-partitioned.sql new file mode 100644 index 0000000..00e4bf4 --- /dev/null +++ b/examples/create-table-partitioned.sql @@ -0,0 +1,6 @@ +CREATE EXTERNAL TABLE IF NOT EXISTS database_name.table_name_%1$s%2$s ( + column1 string +) +PARTITIONED BY (day string) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +LOCATION 's3://BUCKET/PREFIXES/%1$s/%2$s' \ No newline at end of file diff --git a/examples/create-table.sql b/examples/create-table.sql new file mode 100644 index 0000000..d3d9676 --- /dev/null +++ b/examples/create-table.sql @@ -0,0 +1,5 @@ +CREATE EXTERNAL TABLE IF NOT EXISTS database_name.table_name_%1$s%2$s ( + column1 string +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +LOCATION 's3://BUCKET/PREFIXES/%1$s/%2$s' \ No newline at end of file diff --git a/examples/drop-table.sql b/examples/drop-table.sql new file mode 100644 index 0000000..af704fb --- /dev/null +++ b/examples/drop-table.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS database_name.table_name_%1$s%2$s \ No newline at end of file diff --git a/examples/named-query.sql b/examples/named-query.sql new file mode 100644 index 0000000..2e2c239 --- /dev/null +++ b/examples/named-query.sql @@ -0,0 +1,4 @@ +SELECT * +FROM sampledb.elb_logs +ORDER BY request_timestamp DESC +LIMIT 352 \ No newline at end of file diff --git a/examples/query-daily.sql b/examples/query-daily.sql new file mode 100644 index 0000000..4e2664b --- /dev/null +++ b/examples/query-daily.sql @@ -0,0 +1,4 @@ +SELECT * +FROM database.table_name_%1$s%2$s +WHERE day = '%3$s' +LIMIT 1 \ No newline at end of file diff --git a/examples/query-ids-list.txt b/examples/query-ids-list.txt new file mode 100644 index 0000000..c05a0e9 --- /dev/null +++ b/examples/query-ids-list.txt @@ -0,0 +1,10 @@ +Queries for project X +9007743a-4b26-4090-91bb-51a232eb4643 01 +73c7735e-cf89-455e-8391-afa36efa6cf8 2019-12 + +Queries for project Y +ATTENTION 3053043c-8493-4f4d-a05c-1b5d7553d2be + +Miscellaneous +87234c79-c668-478e-ae2a-5adfb0e67891 re-run after failure +dbe16904-b0ce-4d59-8709-c4424510539e blablabla diff --git a/examples/query-monthly.sql b/examples/query-monthly.sql new file mode 100644 index 0000000..6d3cc5b --- /dev/null +++ b/examples/query-monthly.sql @@ -0,0 +1,3 @@ +SELECT * +FROM database.table_name_%1$s%2$s +LIMIT 1 \ No newline at end of file diff --git a/examples/query-single.sql b/examples/query-single.sql new file mode 100644 index 0000000..db2a9d0 --- /dev/null +++ b/examples/query-single.sql @@ -0,0 +1,3 @@ +SELECT * +FROM sampledb.elb_logs +LIMIT 352 \ No newline at end of file diff --git a/src/AWS/Athena.php b/src/AWS/Athena.php new file mode 100755 index 0000000..98f0777 --- /dev/null +++ b/src/AWS/Athena.php @@ -0,0 +1,637 @@ + BatchGetNamedQuery, ListNamedQueries, ListQueryExecutions */ + private int $level1QueriesMaxCalls; + /** @var int $level2QueriesMaxCalls max calls per second -> CreateNamedQuery, DeleteNamedQuery, GetNamedQuery */ + private int $level2QueriesMaxCalls; + /** @var int $level3QueriesMaxCalls max calls per second -> BatchGetQueryExecution */ + private int $level3QueriesMaxCalls; + /** @var int $level4QueriesMaxCalls max calls per second -> StartQueryExecution, StopQueryExecution */ + private int $level4QueriesMaxCalls; + /** @var int $level5QueriesMaxCalls max calls per second -> GetQueryExecution, GetQueryResults */ + private int $level5QueriesMaxCalls; + /** @var int $level1QueriesBurstCapacity burst capacity -> BatchGetNamedQuery, ListNamedQueries, ListQueryExecutions */ + private int $level1QueriesBurstCapacity; + /** @var int $level2QueriesBurstCapacity burst capacity -> CreateNamedQuery, DeleteNamedQuery, GetNamedQuery */ + private int $level2QueriesBurstCapacity; + /** @var int $level3QueriesBurstCapacity burst capacity -> BatchGetQueryExecution */ + private int $level3QueriesBurstCapacity; + /** @var int $level4QueriesBurstCapacity burst capacity -> StartQueryExecution, StopQueryExecution */ + private int $level4QueriesBurstCapacity; + /** @var int $level5QueriesBurstCapacity burst capacity -> GetQueryExecution, GetQueryResults */ + private int $level5QueriesBurstCapacity; + /** @var int $ddlQueriesSimultaneous max simultaneous DDL queries */ + private int $ddlQueriesSimultaneous; + /** @var int $dmlQueriesSimultaneous max simultaneous DML queries */ + private int $dmlQueriesSimultaneous; + + private \Aws\Athena\AthenaClient $athenaClient; + private array $queries = []; + + /** + * Reset the query details (IDs and states) + * + * @param boolean $force true to force reset with existing running queries + * @return boolean false if force flag is not set and there is at least one running query, true otherwise + */ + public function resetQueries(bool $force = false): bool + { + if (!$force && array_search(self::QUERY_STATE_RUNNING, array_column($this->queries, 'state')) !== false) { + return false; + } + + $this->queries = []; + return true; + } + + /** + * Get a copy of the array containing query IDs and states + * + * @return array + */ + public function getQueries(): array { return $this->queries; } + + /** + * Get the total of query executions + * + * @return integer + */ + public function getTotalQueries(): int { return sizeof($this->queries); } + + /** + * Get a stale query state (not current, see getQueryCurrentState) from a query detail array (identical to query['state']) + * + * @param array $query single query details + * @return string query state (RUNNING, QUEUED, SUCCEEDED, FAILED or CANCELLED) + */ + public function getQueryStaleState(array $query): string { return $query['state']; } + + /** + * Get a stale query id from a query detail array (identical to query['execId']) + * + * @param array $query single query details + * @return string query id + */ + public function getQueryStaleId(array $query): string { return $query['execId']; } + + /** + * Constructor + * level 1 queries: BatchGetNamedQuery, ListNamedQueries, ListQueryExecutions + * level 2 queries: CreateNamedQuery, DeleteNamedQuery, GetNamedQuery + * level 3 queries: BatchGetQueryExecution + * level 4 queries: StartQueryExecution, StopQueryExecution + * level 5 queries: GetQueryExecution, GetQueryResults + * DDL queries: create table, create table add partition + * DML queries: select, create table as (CTAS) + * + * @param \Aws\Athena\AthenaClient $athenaClient + * @param integer $level1QueriesMaxCalls max calls per second -> BatchGetNamedQuery, ListNamedQueries, ListQueryExecutions + * @param integer $level2QueriesMaxCalls max calls per second -> CreateNamedQuery, DeleteNamedQuery, GetNamedQuery + * @param integer $level3QueriesMaxCalls max calls per second -> BatchGetQueryExecution + * @param integer $level4QueriesMaxCalls max calls per second -> StartQueryExecution, StopQueryExecution + * @param integer $level5QueriesMaxCalls max calls per second -> GetQueryExecution, GetQueryResults + * @param integer $level1QueriesBurstCapacity burst capacity -> BatchGetNamedQuery, ListNamedQueries, ListQueryExecutions + * @param integer $level2QueriesBurstCapacity burst capacity -> CreateNamedQuery, DeleteNamedQuery, GetNamedQuery + * @param integer $level3QueriesBurstCapacity burst capacity -> BatchGetQueryExecution + * @param integer $level4QueriesBurstCapacity burst capacity -> StartQueryExecution, StopQueryExecution + * @param integer $level5QueriesBurstCapacity burst capacity -> GetQueryExecution, GetQueryResults + * @param integer $ddlQueriesSimultaneous max simultaneous DDL queries -> create table, create table add partition + * @param integer $dmlQueriesSimultaneous max simultaneous DML queries -> select, create table as (CTAS) + */ + public function __construct( + \Aws\Athena\AthenaClient $athenaClient, + int $level1QueriesMaxCalls = self::AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL1QUERIES, + int $level2QueriesMaxCalls = self::AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL2QUERIES, + int $level3QueriesMaxCalls = self::AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL3QUERIES, + int $level4QueriesMaxCalls = self::AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL4QUERIES, + int $level5QueriesMaxCalls = self::AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL5QUERIES, + int $level1QueriesBurstCapacity = self::AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL1QUERIES, + int $level2QueriesBurstCapacity = self::AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL2QUERIES, + int $level3QueriesBurstCapacity = self::AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL3QUERIES, + int $level4QueriesBurstCapacity = self::AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL4QUERIES, + int $level5QueriesBurstCapacity = self::AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL5QUERIES, + int $ddlQueriesSimultaneous = self::AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES, + int $dmlQueriesSimultaneous = self::AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES) + { + for ($i = 1; $i <= 5; $i++) { + $qmv = "level".$i."QueriesMaxCalls"; + $qbc = "level".$i."QueriesBurstCapacity"; + + if (${$qmv} < 1) { ${$qmv} = constant('self::AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL'.$i.'QUERIES'); } + else { $this->{$qmv} = ${$qmv}; } + + if (${$qbc} < 1) { ${$qmv} = constant('self::AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL'.$i.'QUERIES'); } + else { $this->{$qbc} = ${$qbc}; } + } + + if ($ddlQueriesSimultaneous < 1) { $this->ddlQueriesSimultaneous = self::AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES; } + else { $this->ddlQueriesSimultaneous = $ddlQueriesSimultaneous; } + if ($dmlQueriesSimultaneous < 1) { $this->dmlQueriesSimultaneous = self::AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES; } + else { $this->dmlQueriesSimultaneous = $dmlQueriesSimultaneous; } + + $this->athenaClient = $athenaClient; + } + + /** + * Constructor for default values except simultaneous DDL queries + * DDL queries: create table, create table add partition + * + * @param \Aws\Athena\AthenaClient $athenaClient + * @param integer $ddlQueriesSimultaneous + * @return \FC\AWS\Athena + */ + public static function newDefaultWithDDL(\Aws\Athena\AthenaClient $athenaClient, int $ddlQueriesSimultaneous): \FC\AWS\Athena + { + $instance = new self($athenaClient); + $instance->ddlQueriesSimultaneous = $ddlQueriesSimultaneous; + + return $instance; + } + + /** + * Constructor for default values except simultaneous DML queries + * DML queries: select, create table as (CTAS) + * + * @param \Aws\Athena\AthenaClient $athenaClient + * @param integer $dmlQueriesSimultaneous + * @return \FC\AWS\Athena + */ + public static function newDefaultWithDML(\Aws\Athena\AthenaClient $athenaClient, int $dmlQueriesSimultaneous): \FC\AWS\Athena + { + $instance = new self($athenaClient); + $instance->dmlQueriesSimultaneous = $dmlQueriesSimultaneous; + + return $instance; + } + + /** + * Execute a query script + * + * @param string $queryScript the query script (UTF-8 formatted) + * @param string $queryOutputLocation the output location on S3 for the query result + * @param string $catalogName name of the source catalog + * @param string $workgroup name of the workgroup + * @return string ID of the query started + */ + public function startQuery(string $queryScript, string $queryOutputLocation, string $catalogName, string $workgroup): string + { + // verify if query string max length not exceeded + $queryLength = strlen($queryScript); + if ($queryLength > self::QUERY_MAX_LENGTH) { + throw new \Exception(sprintf("Query string max length exceeded: max length = %s, current query length = %s", number_format(self::QUERY_MAX_LENGTH), number_format($queryLength))); + } + + // pause execution to stay under the limits + usleep(intval(1000000 / $this->level4QueriesMaxCalls)); + + // start query execution + $query = $this->athenaClient->StartQueryExecution([ + 'QueryString' => $queryScript, + 'ResultConfiguration' => [ + 'OutputLocation' => $queryOutputLocation + ], + 'QueryExecutionContext' => [ + 'Catalog' => $catalogName + ], + 'WorkGroup' => $workgroup + ]); + + $execId = $query->get('QueryExecutionId'); + $this->queries[] = [ 'execId' => $execId, 'state' => self::QUERY_STATE_RUNNING ]; + + return $execId; + } + + /** + * Get the current state of a query + * + * @param string $queryId query ID + * @param string $executionTime readable execution time + * @param string $reason reason of failure + * @return string query state (RUNNING, QUEUED, SUCCEEDED, FAILED or CANCELLED) + */ + public function getQueryCurrentState(string $queryId, string &$executionTime = null, string &$reason = null): string + { + // pause execution to stay under the limits + usleep(intval(1000000 / $this->level5QueriesMaxCalls)); + + $e = $this->athenaClient->getQueryExecution([ 'QueryExecutionId' => $queryId ]); + + if ($e['QueryExecution']['Status']['State'] == self::QUERY_STATE_FAILED && isset($e['QueryExecution']['Status']['StateChangeReason'])) { + $reason = $e['QueryExecution']['Status']['StateChangeReason']; + } + + if (isset($e['QueryExecution']['Statistics']['EngineExecutionTimeInMillis'])) { + $executionTimeInSeconds = intval(intval($e['QueryExecution']['Statistics']['EngineExecutionTimeInMillis']) / 1000); + $executionTime = gmdate("H:i:s", $executionTimeInSeconds); + } + + return $e['QueryExecution']['Status']['State']; + } + + /** + * Check if the limit set for simultaneous running queries has been reached + * + * @return boolean true if reached, false otherwise + */ + public function isQueryLimitReached(string $queryType): bool + { + // queryType must be DDL or DML (see class constants) + if ($queryType == self::QUERY_TYPE_DDL) { + $limit = $this->ddlQueriesSimultaneous; + } elseif ($queryType == self::QUERY_TYPE_DML) { + $limit = $this->dmlQueriesSimultaneous; + } else { + throw new \Exception(sprintf("Query type must be either of these: %s or %s, %s given", self::QUERY_TYPE_DDL, self::QUERY_TYPE_DML, $queryType)); + } + + $this->updateQueriesState(); + + $running = 0; + foreach (array_column($this->queries, 'state') as $state) { + if (!in_array($state, self::QUERY_STOP_STATES)) { + $running++; + } + } + return ($running >= $limit); + } + + /** + * Check if there are running queries + * + * @return boolean true if at least one query is still running, false otherwise + */ + public function existRunningQuery(): bool + { + $this->updateQueriesState(); + + foreach (array_column($this->queries, 'state') as $state) { + if (!in_array($state, self::QUERY_STOP_STATES)) { + return true; + } + } + + return false; + } + + /** + * Update the queries state + * + * @return void + */ + private function updateQueriesState(): void + { + for ($i = 0, $imax = sizeof($this->queries); $i < $imax; $i++) { + if (!in_array($this->queries[$i]['state'], self::QUERY_STOP_STATES)) { + $this->queries[$i]['state'] = $this->getQueryCurrentState($this->queries[$i]['execId']); + } + } + } + + /** + * Stop a query + * State must be confirmed using getQueryCurrentState + * + * @param string $queryId query ID + * @return void + */ + public function StopQuery(string $queryId): void + { + // pause execution to stay under the limits + usleep(intval(1000000 / $this->level4QueriesMaxCalls)); + + $this->athenaClient->stopQueryExecution([ 'QueryExecutionId' => $queryId ]); + if (($key = array_search($queryId, array_column($this->queries, 'execId'))) === true) { + $this->queries[$key]['state'] = self::QUERY_STATE_CANCELLED; + } + } + + /** + * Stop all queries + * + * @return void + */ + public function StopAllQueries(): void + { + for ($i = 0, $imax = sizeof($this->queries); $i < $imax; $i++) { + if (!in_array($this->queries[$i]['state'], self::QUERY_STOP_STATES)) { + // get current query state + $this->queries[$i]['state'] = $this->getQueryCurrentState($this->queries[$i]['execId']); + + if (!in_array($this->queries[$i]['state'], self::QUERY_STOP_STATES)) { + // pause execution to stay under the limits + usleep(intval(1000000 / $this->level4QueriesMaxCalls)); + + $this->athenaClient->stopQueryExecution([ 'QueryExecutionId' => $this->queries[$i]['execId'] ]); + $this->queries[$i]['state'] = self::QUERY_STATE_CANCELLED; + } + } + } + } + + /** + * Create a named query + * + * @param string $queryScript the query script (UTF-8 formatted) + * @param string $database database to which the query belong + * @param string $queryName name for the query + * @param string $queryDescription description for the query + * @return string named query id + */ + public function createNamedQuery(string $queryScript, string $database, string $queryName, string $queryDescription = ''): string + { + // pause execution to stay under the limits + usleep(intval(1000000 / $this->level2QueriesMaxCalls)); + + $r = $this->athenaClient->createNamedQuery([ + 'QueryString' => $queryScript, + 'Database' => $database, + 'Name' => $queryName, + 'Description' => $queryDescription + ]); + + if (!isset($r['NamedQueryId'])) { + throw new \Exception("Failed to create named query: " . self::getErrorMessage($r)); + } + + return $r['NamedQueryId']; + } + + /** + * Delete a named query + * + * @param string $namedQueryId Id of the named query + * @return void + */ + public function deleteNamedQuery(string $namedQueryId): void + { + // pause execution to stay under the limits + usleep(intval(1000000 / $this->level2QueriesMaxCalls)); + + $r = $this->athenaClient->deleteNamedQuery([ + 'NamedQueryId' => $namedQueryId + ]); + + if (is_array($r) && sizeof((array)$r) != 0) { + throw new \Exception("Failed to delete named query: " . self::getErrorMessage($r)); + } + } + + /** + * Retrieve details of a named query + * + * @param string $namedQueryId Id of the named query + * @return array associative array with database, description, name, id, query script, workgroup + */ + public function getNamedQueryDetails(string $namedQueryId): array + { + // pause execution to stay under the limits + usleep(intval(1000000 / $this->level2QueriesMaxCalls)); + + $r = $this->athenaClient->getNamedQuery([ + 'NamedQueryId' => $namedQueryId + ]); + + if (!isset($r['NamedQuery'])) { + throw new \Exception("Failed to retrieve named query details: " . self::getErrorMessage($r)); + } + + return (array)$r['NamedQuery']; + } + + /** + * List all named queries + * + * @return array + */ + public function listNamedQueries(string &$nextPaginationToken): array + { + $namedQueriesList = []; + + if ($nextPaginationToken != '') { + // pause execution to stay under the limits + // usleep(intval(1000000 / $this->level1QueriesMaxCalls)); + sleep(10); // IMPROVE: forced 10 second wait between calls to avoid max rate exceeded error + + $r = $this->athenaClient->listNamedQueries([ + 'NextToken' => $nextPaginationToken + ]); + } else { + $r = $this->athenaClient->listNamedQueries(); + } + + if (!isset($r['NamedQueryIds'])) { + throw new \Exception("Failed to retrieve named queries list: " . self::getErrorMessage($r)); + } else { + $namedQueriesList = array_merge($namedQueriesList, $r['NamedQueryIds']); + } + + if (isset($r['NextToken']) && $r['NextToken'] != '') { + $nextPaginationToken = $r['NextToken']; + } else { + $nextPaginationToken = ''; + } + + return $namedQueriesList; + } + + /** + * List all databases inside a source catalog + * + * @param string $catalogName name of the source catalog + * @return void + */ + public function listAllDatabases(string $catalogName): array + { + $nextPaginationToken = ''; + $databaseList = []; + + do { + if ($nextPaginationToken != '') { + $r = $this->athenaClient->listDatabases([ + 'CatalogName' => $catalogName, + 'NextToken' => $nextPaginationToken + ]); + } else { + $r = $this->athenaClient->listDatabases([ + 'CatalogName' => $catalogName + ]); + } + + if (!isset($r['DatabaseList'])) { + throw new \Exception("Failed to retrieve database list: " . self::getErrorMessage($r)); + } else { + foreach($r['DatabaseList'] as $databaseDetails) { + if (isset($databaseDetails['Name'])) { $databaseList[] = $databaseDetails['Name']; } + } + } + + if (isset($r['NextToken']) && $r['NextToken'] != '') { + $nextPaginationToken = $r['NextToken']; + } else { + $nextPaginationToken = ''; + } + } while ($nextPaginationToken != ''); + + return $databaseList; + } + + /** + * Retrieve database details + * + * @param string $database database name + * @param string $catalogName name of the source catalog + * @return array database details + */ + public function getDatabaseDetails(string $database, string $catalogName): array + { + $r = $this->athenaClient->getDatabase([ + 'CatalogName' => $catalogName, + 'DatabaseName' => $database + ]); + + if (!isset($r['Database'])) { + throw new \Exception("Failed to retrieve database details: " . self::getErrorMessage($r)); + } + + return (array)($r['Database']); + } + + /** + * Retrieve database tables details + * + * @param string $database database name + * @param string $catalogName name of the source catalog + * @param string $tableNameRegex pattern-matching expression for table names to include (empty for all tables) + * @return array tables details + */ + public function listAllTablesDetails(string $database, string $catalogName, string $tableNameRegex = ''): array + { + $nextPaginationToken = ''; + $tablesDetails = []; + + do { + if ($nextPaginationToken != '') { + $r = $this->athenaClient->listTableMetadata([ + 'CatalogName' => $catalogName, + 'DatabaseName' => $database, + 'Expression' => $tableNameRegex, + 'NextToken' => $nextPaginationToken + ]); + } else { + $r = $this->athenaClient->listTableMetadata([ + 'CatalogName' => $catalogName, + 'DatabaseName' => $database, + 'Expression' => $tableNameRegex + ]); + } + + if (!isset($r['TableMetadataList'])) { + throw new \Exception("Failed to retrieve tables details list: " . self::getErrorMessage($r)); + } else { + $tablesDetails = array_merge($tablesDetails, $r['TableMetadataList']); + } + + if (isset($r['NextToken']) && $r['NextToken'] != '') { + $nextPaginationToken = $r['NextToken']; + } else { + $nextPaginationToken = ''; + } + } while ($nextPaginationToken != ''); + + return $tablesDetails; + } + + /** + * Retrieve table details + * + * @param string $database database name + * @param string $table table name + * @param string $catalogName name of the source catalog + * @return void + */ + public function getTableDetails(string $database, string $table, string $catalogName) + { + $r = $this->athenaClient->getTableMetadata([ + 'CatalogName' => $catalogName, + 'DatabaseName' => $database, + 'TableName' => $table + ]); + + if (!isset($r['TableMetadata'])) { + throw new \Exception("Failed to retrieve table details: " . self::getErrorMessage($r)); + } + + return (array)($r['TableMetadata']); + } + + /** + * Retrieve error message from results array + * + * @param array $results array containing results from previous API call + * @return string error message + */ + private static function getErrorMessage(array $results): string + { + return isset($results['Message']) ? $results['Message'] : "no error detail"; + } +} + diff --git a/tests/configuration.sh b/tests/configuration.sh new file mode 100644 index 0000000..0b714c0 --- /dev/null +++ b/tests/configuration.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# read .env file +if [[ ! -f $DIR/../.env.dev ]]; then + if [[ ! -f $DIR/../.env ]]; then + echo -e "Missing configuration variable file!" + exit 1 + else + ENV="$DIR/../.env" + fi +else + ENV="$DIR/../.env.dev" +fi + +# retrieve configuration variables +set -o allexport +source $ENV +set +o allexport + +AWS_PROFILE=$PROFILE +OUTPUT_BUCKET=$QUERY_OUTPUT + +# unset all variables +unset PROFILE +unset VERSION +unset REGION +unset CATALOG +unset WORKGROUP +unset QUERY_OUTPUT +unset AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL1QUERIES +unset AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL1QUERIES +unset AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL2QUERIES +unset AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL2QUERIES +unset AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL3QUERIES +unset AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL3QUERIES +unset AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL4QUERIES +unset AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL4QUERIES +unset AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL5QUERIES +unset AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL5QUERIES +unset AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES +unset AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES + +DATABASE='' +YYYY='' +MM='' + +if [[ "$1" == "" ]]; then echo "Missing parameters!"; exit; fi + +while [ -n "$1" ]; do + case "$1" in + -d | --database) DATABASE="$2"; shift ;; + -y | --year) YYYY="$2"; shift ;; + -m | --month) MM="$2"; shift ;; + + *) ;; + esac + shift +done + +if [[ $DATABASE == '' ]]; then echo "Missing parameter DATABASE!"; exit; fi +if [[ $YYYY == '' ]]; then echo "Missing parameter YEAR!"; exit; fi +if [[ $MM == '' ]]; then echo "Missing parameter MONTH!"; exit; fi diff --git a/tests/create-data.sql b/tests/create-data.sql new file mode 100644 index 0000000..6d954a0 --- /dev/null +++ b/tests/create-data.sql @@ -0,0 +1,10 @@ +CREATE TABLE _DATABASE_.sampledb_%1$s%2$s%3$s +WITH ( + external_location = 's3://_OUTPUT_BUCKET_/_DATABASE_/sampledb/%1$s/%2$s/%3$s', + format = 'PARQUET', + parquet_compression = 'SNAPPY' +) AS +SELECT * +FROM sampledb.elb_logs +WHERE DATE_FORMAT(FROM_ISO8601_TIMESTAMP(request_timestamp), '%%Y-%%m-%%d') = '%1$s-%2$s-%3$s' +LIMIT 100 \ No newline at end of file diff --git a/tests/create-table.sql b/tests/create-table.sql new file mode 100644 index 0000000..4d05f30 --- /dev/null +++ b/tests/create-table.sql @@ -0,0 +1,26 @@ +CREATE EXTERNAL TABLE _DATABASE_.sampledb_%1$s%2$s ( + request_timestamp string, + elb_name string, + request_ip string, + request_port int, + backend_ip string, + backend_port int, + request_processing_time double, + backend_processing_time double, + client_response_time double, + elb_response_code string, + backend_response_code string, + received_bytes bigint, + sent_bytes bigint, + request_verb string, + url string, + protocol string, + user_agent string, + ssl_cipher string, + ssl_protocol string +) +PARTITIONED BY (day string) +ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +WITH SERDEPROPERTIES ( + 'serialization.format' = '1' +) LOCATION 's3://_OUTPUT_BUCKET_/_DATABASE_/sampledb/' diff --git a/tests/output.txt b/tests/output.txt new file mode 100644 index 0000000..9721460 --- /dev/null +++ b/tests/output.txt @@ -0,0 +1,234 @@ +The following critical operations are about to be performed: + - drop database aws_athena_api_tools_tests + - delete data under s3://QUERY_OUTPUT/aws_athena_api_tools_tests/ +Do you agree? (Y/N): Y + +[ cleaning previous test data ] + +[ list sampledb database ] +{"Name":"sampledb","Description":"Sample database","Parameters":{"CreatedBy":"Athena","EXTERNAL":"TRUE"}} + +[ create database ] +Executing query: +6f8f8d12-bd40-439c-92eb-703df57effe7 +Query result: +6f8f8d12-bd40-439c-92eb-703df57effe7 SUCCEEDED 00:00:00 + +[ create data ] +Executing queries: +a5d0c97a-45a7-41d9-bdac-6f34bebcb7fd 2015-01-01 +8c8b6b9b-5ddc-4525-8dfa-a94ee635e56d 2015-01-02 +47bda107-03a2-4e1a-8630-51f9735ddd71 2015-01-03 +ba63262a-a3d5-44bf-8ca5-0adf9767ae6f 2015-01-04 +349711e7-a0f3-4eff-8485-f42a8a2257f6 2015-01-05 + +Displaying queries result: +a5d0c97a-45a7-41d9-bdac-6f34bebcb7fd 2015-01-01 SUCCEEDED 00:00:02 +8c8b6b9b-5ddc-4525-8dfa-a94ee635e56d 2015-01-02 SUCCEEDED 00:00:01 +47bda107-03a2-4e1a-8630-51f9735ddd71 2015-01-03 SUCCEEDED 00:00:01 +ba63262a-a3d5-44bf-8ca5-0adf9767ae6f 2015-01-04 SUCCEEDED 00:00:01 +349711e7-a0f3-4eff-8485-f42a8a2257f6 2015-01-05 SUCCEEDED 00:00:01 + +[ create table for created data ] +Executing queries: +da54ec8f-2dc5-4d39-90d3-3dede9f7a33f 2015-01 + +Displaying queries result: +da54ec8f-2dc5-4d39-90d3-3dede9f7a33f 2015-01 SUCCEEDED 00:00:00 + +[ create day partitions ] +Executing queries: +e5bf9a1b-236c-46d0-9e4c-ceab8da4f062 2015-01-01 +ceb28ec6-0b35-4f91-9ef7-f78610f3e1ba 2015-01-02 +cd2822b6-d8c2-47e7-97f7-84c748ecf320 2015-01-03 +2f753fc4-d00e-4bd6-8d1a-6f1e04d8e296 2015-01-04 +30303218-25d8-4895-b31f-dda81e5c8763 2015-01-05 + +Displaying queries result: +e5bf9a1b-236c-46d0-9e4c-ceab8da4f062 2015-01-01 SUCCEEDED 00:00:00 +ceb28ec6-0b35-4f91-9ef7-f78610f3e1ba 2015-01-02 SUCCEEDED 00:00:00 +cd2822b6-d8c2-47e7-97f7-84c748ecf320 2015-01-03 SUCCEEDED 00:00:00 +2f753fc4-d00e-4bd6-8d1a-6f1e04d8e296 2015-01-04 SUCCEEDED 00:00:00 +30303218-25d8-4895-b31f-dda81e5c8763 2015-01-05 SUCCEEDED 00:00:00 + +[ query ] +SELECT request_timestamp, elb_name, request_ip, request_port FROM aws_athena_api_tools_tests.sampledb_201501 LIMIT 5 + +[ get query state ] +953ca04e-d4f2-450d-b9b9-bd7fc30075f8 SUCCEEDED 00:00:00 + +[ stop query ] +Stopping query... +953ca04e-d4f2-450d-b9b9-bd7fc30075f8 SUCCEEDED 00:00:00 + +[ display query results ] +"request_timestamp","elb_name","request_ip","request_port" +"2015-01-04T16:00:01.206255Z","elb_demo_007","240.127.144.192","1230" +"2015-01-04T16:00:01.612598Z","elb_demo_002","250.94.140.192","5727" +"2015-01-04T16:00:02.793335Z","elb_demo_009","247.99.30.109","24344" +"2015-01-04T16:00:03.068897Z","elb_demo_005","243.86.30.190","17123" +"2015-01-04T16:00:03.470121Z","elb_demo_005","243.227.105.90","29529" + +[ query daily ] +SELECT * FROM aws_athena_api_tools_tests.sampledb_%1$s%2$s WHERE day = '%3$s' +Executing queries: +3c7f27d3-c04b-4196-aba2-c43c63ff7885 2015-01-02 +c3cf6828-d2e7-4173-a605-fba5cbdad131 2015-01-03 + +Displaying queries result: +3c7f27d3-c04b-4196-aba2-c43c63ff7885 2015-01-02 SUCCEEDED 00:00:00 +c3cf6828-d2e7-4173-a605-fba5cbdad131 2015-01-03 SUCCEEDED 00:00:00 + +[ display query results on s3 ] +2021-03-19 21:42:46 34430 aws_athena_api_tools_tests/query-daily/3c7f27d3-c04b-4196-aba2-c43c63ff7885.csv +2021-03-19 21:42:46 1166 aws_athena_api_tools_tests/query-daily/3c7f27d3-c04b-4196-aba2-c43c63ff7885.csv.metadata +2021-03-19 21:42:45 34459 aws_athena_api_tools_tests/query-daily/c3cf6828-d2e7-4173-a605-fba5cbdad131.csv +2021-03-19 21:42:46 1166 aws_athena_api_tools_tests/query-daily/c3cf6828-d2e7-4173-a605-fba5cbdad131.csv.metadata + +[ delete metadata files ] +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/query-daily/3c7f27d3-c04b-4196-aba2-c43c63ff7885.csv.metadata +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/query-daily/c3cf6828-d2e7-4173-a605-fba5cbdad131.csv.metadata + +[ display query results on s3 without metadata files ] +2021-03-19 21:42:46 34430 aws_athena_api_tools_tests/query-daily/3c7f27d3-c04b-4196-aba2-c43c63ff7885.csv +2021-03-19 21:42:45 34459 aws_athena_api_tools_tests/query-daily/c3cf6828-d2e7-4173-a605-fba5cbdad131.csv + +[ list database table like '*db_201501 ] +{ + "Name": "sampledb_201501", + "CreateTime": "2021-03-20T02:42:31+00:00", + "LastAccessTime": "1970-01-01T00:00:00+00:00", + "TableType": "EXTERNAL_TABLE", + "Columns": [ + { + "Name": "request_timestamp", + "Type": "string" + }, + { + "Name": "elb_name", + "Type": "string" + }, + { + "Name": "request_ip", + "Type": "string" + }, + { + "Name": "request_port", + "Type": "int" + }, + { + "Name": "backend_ip", + "Type": "string" + }, + { + "Name": "backend_port", + "Type": "int" + }, + { + "Name": "request_processing_time", + "Type": "double" + }, + { + "Name": "backend_processing_time", + "Type": "double" + }, + { + "Name": "client_response_time", + "Type": "double" + }, + { + "Name": "elb_response_code", + "Type": "string" + }, + { + "Name": "backend_response_code", + "Type": "string" + }, + { + "Name": "received_bytes", + "Type": "bigint" + }, + { + "Name": "sent_bytes", + "Type": "bigint" + }, + { + "Name": "request_verb", + "Type": "string" + }, + { + "Name": "url", + "Type": "string" + }, + { + "Name": "protocol", + "Type": "string" + }, + { + "Name": "user_agent", + "Type": "string" + }, + { + "Name": "ssl_cipher", + "Type": "string" + }, + { + "Name": "ssl_protocol", + "Type": "string" + } + ], + "PartitionKeys": [ + { + "Name": "day", + "Type": "string" + } + ], + "Parameters": { + "EXTERNAL": "TRUE", + "inputformat": "org.apache.hadoop.mapred.TextInputFormat", + "location": "s3:\/\/QUERY_OUTPUT\/aws_athena_api_tools_tests\/sampledb", + "outputformat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serde.param.serialization.format": "1", + "serde.serialization.lib": "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", + "transient_lastDdlTime": "1616208150" + } +} + +[ drop database + tables ] +Executing query: +0d736813-c213-4194-bf52-adba42d1439f +Query result: +0d736813-c213-4194-bf52-adba42d1439f SUCCEEDED 00:00:00 + +[ delete created data from s3 ] +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/query-daily/3c7f27d3-c04b-4196-aba2-c43c63ff7885.csv +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/query-daily/c3cf6828-d2e7-4173-a605-fba5cbdad131.csv +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/query/953ca04e-d4f2-450d-b9b9-bd7fc30075f8.csv.metadata +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/query/953ca04e-d4f2-450d-b9b9-bd7fc30075f8.csv +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/sampledb/2015/01/02/20210320_024222_00031_6t2y2_80ab7184-0316-4a55-9fc3-ff7d03689cb7 +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/query/c15b8ff2-f75c-4d51-86a0-3c717489bb4e.csv.metadata +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/sampledb/2015/01/04/20210320_024225_00032_jempt_4cd2986a-74b1-44fa-8c32-31cddcfd6413 +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/sampledb/2015/01/03/20210320_024223_00029_davdh_041e9c20-402f-4687-a80c-d2383297bedd +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/sampledb/2015/01/05/20210320_024227_00094_ucpqg_80cc32cb-1466-4062-8c99-362264792735 +delete: s3://QUERY_OUTPUT/aws_athena_api_tools_tests/sampledb/2015/01/01/20210320_024221_00013_kx6py_17c9132e-c671-4dec-89ec-f2563de6872a + +[ create named query ] +SELECT * FROM elb_logs ORDER BY request_timestamp DESC LIMIT 352 +3a62f803-1393-4123-a4bf-d112bdd21b94 + +[ list that named query ] +{ + "Name": "log sample", + "Description": "get the latest 352 log records", + "Database": "sampledb", + "QueryString": "SELECT * FROM elb_logs ORDER BY request_timestamp DESC LIMIT 352\n", + "NamedQueryId": "3a62f803-1393-4123-a4bf-d112bdd21b94", + "WorkGroup": "primary" +} + +[ delete named query ] +Deleting named query: +3a62f803-1393-4123-a4bf-d112bdd21b94 +Deleted successfully + +Done. diff --git a/tests/test.sh b/tests/test.sh new file mode 100755 index 0000000..be7f5b2 --- /dev/null +++ b/tests/test.sh @@ -0,0 +1,220 @@ +#!/bin/bash + +# +# CONFIGURATION +# + +DIR=$( cd $( dirname "$0" ) > /dev/null 2>&1 && pwd ) +source $DIR/configuration.sh + +# +# DATABASE & TABLES +# + +echo "The following critical operations are about to be performed:" +echo -e "\t- drop database $DATABASE" +echo -e "\t- delete data under s3://$OUTPUT_BUCKET/$DATABASE/" +echo -n "Do you agree? (Y/N): " +read confirm +if [[ $confirm != 'Y' ]] && [[ $confirm != 'y' ]]; then + echo "Bye." + exit +fi + +# remove previous data, database, tables +echo "[ cleaning previous test data ]" +php ../tools/database.php \ + -d -t \ + -n $DATABASE \ +> /dev/null && \ +aws s3 --profile $AWS_PROFILE rm s3://$OUTPUT_BUCKET/$DATABASE/ --recursive + +echo + +# list sampledb database +echo "[ list sampledb database ]" +php ../tools/list-databases.php -n sampledb + +if [[ $? -eq 1 ]]; then echo "Sampledb not present, exiting"; exit; fi + +echo + +# create database +echo "[ create database ]" +php ../tools/database.php \ + -c \ + -n $DATABASE + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# create data +echo "[ create data ]" +sed "s/_DATABASE_/$DATABASE/g;s/_OUTPUT_BUCKET_/$OUTPUT_BUCKET/g" create-data.sql \ +| php ../tools/query-daily.php \ + -b $YYYY-$MM-01 \ + -e $YYYY-$MM-05 \ + -u 2 \ + -x DML + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# create table for created data +echo "[ create table for created data ]" +sed "s/_DATABASE_/$DATABASE/g;s/_OUTPUT_BUCKET_/$OUTPUT_BUCKET/g" create-table.sql \ +| php ../tools/query-monthly.php \ + -b $YYYY-$MM \ + -x DDL + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# create day partitions +echo "[ create day partitions ]" +php ../tools/partitions-daily.php \ + -t $DATABASE.sampledb_$YYYY$MM \ + -d $OUTPUT_BUCKET/$DATABASE/sampledb/$YYYY/$MM \ + -y $YYYY \ + -m $MM \ + -e 05 + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# single query +echo "[ query ]" +query="SELECT request_timestamp, elb_name, request_ip, request_port FROM $DATABASE.sampledb_$YYYY$MM LIMIT 5" +echo "$query" +id=$(echo "$query" \ +| php ../tools/query.php \ + -o $OUTPUT_BUCKET/$DATABASE/query \ + -s \ +) && \ +echo && \ +if [[ $? -eq 0 ]]; then + # get query state + echo "[ get query state ]" + php ../tools/state.php -i "$id" + + echo + + # stop query + echo "[ stop query ]" + php ../tools/stop.php -i "$id" + + echo + + # display query results + echo "[ display query results ]" + aws s3 --profile $AWS_PROFILE cp s3://$OUTPUT_BUCKET/$DATABASE/query/$id.csv - +else + echo "Command failed, exiting" + exit +fi + +echo + +# query daily +echo "[ query daily ]" +query="SELECT * FROM $DATABASE.sampledb_%1\$s%2\$s WHERE day = '%3\$s'" +echo "$query" +echo "$query" \ +| php ../tools/query-daily.php \ + -b $YYYY-$MM-02 \ + -e $YYYY-$MM-03 \ + -x DML \ + -o $OUTPUT_BUCKET/$DATABASE/query-daily + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# display query results on s3 +echo "[ display query results on s3 ]" +aws s3 --profile $AWS_PROFILE ls s3://$OUTPUT_BUCKET/$DATABASE/query-daily/ --recursive + +echo + +# delete metadata files +echo "[ delete metadata files ]" +/bin/bash ../tools/delete-metadata-files.sh \ + -b $OUTPUT_BUCKET \ + -f $DATABASE/query-daily + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# display query results on s3 +echo "[ display query results on s3 without metadata files ]" +aws s3 --profile $AWS_PROFILE ls s3://$OUTPUT_BUCKET/$DATABASE/query-daily/ --recursive + +echo + +# list tables +echo "[ list database table like '*db_"$YYYY$MM" ]" +php ../tools/list-tables.php \ + -n $DATABASE \ + -t '*db_'$YYYY$MM + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# drop database + all tables +echo "[ drop database + tables ]" +php ../tools/database.php \ + -d -t \ + -n $DATABASE + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# delete data from s3 +echo "[ delete created data from s3 ]" +aws s3 --profile $AWS_PROFILE rm s3://$OUTPUT_BUCKET/$DATABASE/ --recursive + +echo + +# +# NAMED QUERY +# + +# create named query +echo "[ create named query ]" +query="SELECT * FROM elb_logs ORDER BY request_timestamp DESC LIMIT 352" +echo "$query" +id=$(echo "$query" \ +| php ../tools/named-query.php \ + -s -c \ + -n sampledb \ + -j 'log sample' \ + -z 'get the latest 352 log records' \ +) && echo "$id" + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# list named queries +echo "[ list that named query ]" +php ../tools/list-named-queries.php -i "$id" + +if [[ $? -eq 1 ]]; then echo "Command failed, exiting"; exit; fi + +echo + +# delete named query +echo "[ delete named query ]" +php ../tools/named-query.php -d -i "$id" + +echo + +echo "Done." + diff --git a/tools/database.php b/tools/database.php new file mode 100755 index 0000000..170052d --- /dev/null +++ b/tools/database.php @@ -0,0 +1,83 @@ +!"); } + else { $database = $options[OPTION_DATABASE]; } + + if (isset($options[OPTION_CREATE])) { + $sqlAction = CREATE; + } else { + if (!isset($options[OPTION_DROP])) { throw new \Exception("Either option or must be provided!"); } + else { + $sqlAction = DROP; + if (isset($options[OPTION_CASCADE])) { $cascade = CASCADE; } + } + } + + if (!isset($options[OPTION_OUTPUT])) { $output = DEFAULT_QUERY_OUTPUT; } + else { $output = $options[OPTION_OUTPUT]; } + + if (!isset($options[OPTION_CATALOG])) { $catalog = DEFAULT_CATALOG; } + else { $catalog = $options[OPTION_CATALOG]; } + + if (!isset($options[OPTION_WORKGROUP])) { $workgroup = DEFAULT_WORKGROUP; } + else { $workgroup = $options[OPTION_WORKGROUP]; } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + // output location on S3 for query results + $queryOutputLocation = sprintf(QUERY_OUTPUT_LOCATION, $output); + + // build query + $query = "$sqlAction DATABASE $database $cascade"; + + // start query execution + echo "Executing query:\n"; + $execId = $athena->startQuery($query, $queryOutputLocation, $catalog, $workgroup); + + // display the executing query details + print $execId . PHP_EOL; + + $exit = 0; + $loop = true; + do { + // get the current state of that query + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($execId, $executionTime, $failureReason); + + if (in_array($state, \FC\AWS\Athena::QUERY_STOP_STATES)) { + $loop = false; + print "Query result:\n"; + print $execId . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + if ($state != \FC\AWS\Athena::QUERY_STATE_SUCCEEDED) { $exit = 1; } + } + } while ($loop); + + exit($exit); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/delete-metadata-files.sh b/tools/delete-metadata-files.sh new file mode 100755 index 0000000..ee18fb6 --- /dev/null +++ b/tools/delete-metadata-files.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Delete metadata files recursively from an S3 location (bucket/prefixes) + +DIR=$( cd $( dirname "$0" ) > /dev/null 2>&1 && pwd ) +source $DIR/usage/delete-metadata-files.usage.sh + +if [[ ! -f $DIR/../.env.dev ]]; then + if [[ ! -f $DIR/../.env ]]; then + echo -e "Missing configuration variable file!" + exit 1 + else + ENV="$DIR/../.env" + fi +else + ENV="$DIR/../.env.dev" +fi + +# retrieve configuration variables +set -o allexport +source $ENV +set +o allexport + +profile=$PROFILE + +if [[ "$1" == "" ]]; then display_usage; exit; fi + +while [ -n "$1" ]; do + case "$1" in + -h | --help) display_usage; exit ;; + + -b | --bucket) bucket="$2"; shift ;; + -f | --prefix) prefix="$2"; shift ;; + -p | --profile) profile="$2"; shift ;; + + *) ;; + esac + shift +done + +while read -r file; do + aws s3 --profile $profile rm s3://$bucket/$file; +done <<< $(aws s3 --profile $profile ls s3://$bucket/$prefix/ --recursive | grep '.metadata$' | awk '{print $4}') diff --git a/tools/imports/common.php b/tools/imports/common.php new file mode 100644 index 0000000..a469cca --- /dev/null +++ b/tools/imports/common.php @@ -0,0 +1,139 @@ +load()) == 0) { + throw new \Exception("Make sure they were not previously loaded."); + } + + define('DEFAULT_PROFILE', $_ENV['PROFILE']); + define('DEFAULT_VERSION', $_ENV['VERSION']); + define('DEFAULT_REGION', $_ENV['REGION']); + define('DEFAULT_CATALOG', $_ENV['CATALOG']); + define('DEFAULT_WORKGROUP', $_ENV['WORKGROUP']); + define('DEFAULT_QUERY_OUTPUT', $_ENV['QUERY_OUTPUT']); + + define('DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL1QUERIES', $_ENV['AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL1QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL1QUERIES', $_ENV['AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL1QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL2QUERIES', $_ENV['AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL2QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL2QUERIES', $_ENV['AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL2QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL3QUERIES', $_ENV['AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL3QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL3QUERIES', $_ENV['AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL3QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL4QUERIES', $_ENV['AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL4QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL4QUERIES', $_ENV['AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL4QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL5QUERIES', $_ENV['AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL5QUERIES']); + define('DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL5QUERIES', $_ENV['AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL5QUERIES']); + define('DEFAULT_AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES', $_ENV['AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES']); + define('DEFAULT_AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES', $_ENV['AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES']); + +} catch (\Exception $e) { + echo "Failed to load configuration variables from file: " . $e->getMessage(); + exit(1); +} + +/** + * Set options and display usage + * + * @param string $shortOpts list of short options + * @param array $longOpts array of long options + * @param string $usage usage details to display + * @return array options + */ +function setOptions(string $shortOpts, array $longOpts, string $usage): array +{ + $options = getopt($shortOpts, $longOpts); + if (isset($options['h']) || isset($options['help'])) { + print $usage; + exit; + } + + return $options; +} + +/** + * Initialize AWS configuration options + * + * @param array $options + * @return void + */ +function initAwsConfigOptions(array $options): void +{ + define('OPTION_PROFILE', isset($options['p']) ? 'p' : 'profile'); + define('OPTION_VERSION', isset($options['v']) ? 'v' : 'version'); + define('OPTION_REGION', isset($options['r']) ? 'r' : 'region'); +} + +/** + * Initialize AWS configuration + * + * @param array $options + * @return array associative array with profile, version and region initialized according to provided options + */ +function getAwsConfig(array $options): array +{ + return [ + 'profile' => (isset($options[OPTION_PROFILE]) && $options[OPTION_PROFILE] != '' ? $options[OPTION_PROFILE] : DEFAULT_PROFILE), + 'version' => (isset($options[OPTION_VERSION]) && $options[OPTION_VERSION] != '' ? $options[OPTION_VERSION] : DEFAULT_VERSION), + 'region' => (isset($options[OPTION_REGION]) && $options[OPTION_REGION] != '' ? $options[OPTION_REGION] : DEFAULT_REGION) + ]; +} + +function instantiateAthena(\Aws\Athena\AthenaClient $athenaClient, int $maxDDL = null, int $maxDML = null): \FC\AWS\Athena +{ + return new \FC\AWS\Athena( + $athenaClient, + DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL1QUERIES, + DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL1QUERIES, + DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL2QUERIES, + DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL2QUERIES, + DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL3QUERIES, + DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL3QUERIES, + DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL4QUERIES, + DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL4QUERIES, + DEFAULT_AWS_DEFAULT_MAX_CALLS_PER_SECOND_LEVEL5QUERIES, + DEFAULT_AWS_DEFAULT_MAX_BURST_CAPACITY_LEVEL5QUERIES, + is_null($maxDDL) ? DEFAULT_AWS_DEFAULT_SIMULTANEOUS_DDL_QUERIES : $maxDDL, + is_null($maxDML) ? DEFAULT_AWS_DEFAULT_SIMULTANEOUS_DML_QUERIES : $maxDML + ); +} + +/** + * Date&Time validator for all formats + * + * @param $datetime date&time, date, time (full or partial), can be integer or string + * @param string $format date/time format + * @return bool true if validated, false otherwise + */ +function validateDateTime($datetime, string $format = 'Y-m-d H:i:s'): bool +{ + $d = \DateTime::createFromFormat($format, $datetime); + + return $d && $d->format($format) == $datetime; +} + diff --git a/tools/list-databases.php b/tools/list-databases.php new file mode 100755 index 0000000..26eaf8a --- /dev/null +++ b/tools/list-databases.php @@ -0,0 +1,40 @@ +getDatabaseDetails($options[OPTION_DATABASE], $catalog)) . PHP_EOL; + } else { + foreach($athena->listAllDatabases($catalog) as $databaseName) { + echo json_encode($athena->getDatabaseDetails($databaseName, $catalog)) . PHP_EOL; + } + } + + exit(0); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/list-named-queries.php b/tools/list-named-queries.php new file mode 100755 index 0000000..99187b3 --- /dev/null +++ b/tools/list-named-queries.php @@ -0,0 +1,39 @@ +getNamedQueryDetails($options[OPTION_ID]), JSON_PRETTY_PRINT) . PHP_EOL; + } else { + $nextPaginationToken = ''; + do { + foreach($athena->listNamedQueries($nextPaginationToken) as $namedQueryId) { + echo json_encode($athena->getNamedQueryDetails($namedQueryId), JSON_PRETTY_PRINT) . PHP_EOL; + } + } while ($nextPaginationToken != ''); + } + + exit(0); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/list-tables.php b/tools/list-tables.php new file mode 100755 index 0000000..bd85122 --- /dev/null +++ b/tools/list-tables.php @@ -0,0 +1,44 @@ +!"); } + else { $database = $options[OPTION_DATABASE]; } + + if (!isset($options[OPTION_CATALOG])) { $catalog = DEFAULT_CATALOG; } + else { $catalog = $options[OPTION_CATALOG]; } + + $tables = ''; + if (isset($options[OPTION_TABLE])) { + $tables = $options[OPTION_TABLE]; + } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + // detail one or all tables + foreach($athena->listAllTablesDetails($database, $catalog, $tables) as $tableDetails) { + echo json_encode($tableDetails, JSON_PRETTY_PRINT) . PHP_EOL; + } + + exit(0); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/named-query.php b/tools/named-query.php new file mode 100755 index 0000000..5a97a0b --- /dev/null +++ b/tools/named-query.php @@ -0,0 +1,80 @@ +!"); } + $database = $options[OPTION_DATABASE]; + + if (!isset($options[OPTION_NAME])) { throw new \Exception("Missing option !"); } + $name = $options[OPTION_NAME]; + + if (!isset($options[OPTION_DESCRIPTION])) { throw new \Exception("Missing option !"); } + else { $description = $options[OPTION_DESCRIPTION]; } + } else { + if (!isset($options[OPTION_DELETE])) { throw new \Exception("Either option or must be provided!"); } + else { + $sqlAction = DELETE; + + if (!isset($options[OPTION_ID])) { throw new \Exception("Missing option !"); } + $id = $options[OPTION_ID]; + } + } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + // create or delete named query + if ($sqlAction == CREATE) { + if (!isset($options[OPTION_SILENT])) { echo "Creating named query:" . PHP_EOL; } + $queryId = $athena->createNamedQuery($query, $database, $name, $description); + + if (isset($options[OPTION_SILENT])) { print $queryId; } + else { print $queryId . PHP_EOL; } + + if (!isset($options[OPTION_SILENT])) { print "Created successfully" . PHP_EOL; } + } elseif ($sqlAction == DELETE) { + echo "Deleting named query:" . PHP_EOL; + print $id . PHP_EOL; + $athena->deleteNamedQuery($id); + print "Deleted successfully" . PHP_EOL; + } + + exit(0); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/partitions-daily-hive.php b/tools/partitions-daily-hive.php new file mode 100755 index 0000000..780fa6b --- /dev/null +++ b/tools/partitions-daily-hive.php @@ -0,0 +1,68 @@ +!"); } + $table = $options[OPTION_TABLE]; + + if (!isset($options[OPTION_OUTPUT])) { $output = DEFAULT_QUERY_OUTPUT; } + else { $output = $options[OPTION_OUTPUT]; } + + if (!isset($options[OPTION_CATALOG])) { $catalog = DEFAULT_CATALOG; } + else { $catalog = $options[OPTION_CATALOG]; } + + if (!isset($options[OPTION_WORKGROUP])) { $workgroup = DEFAULT_WORKGROUP; } + else { $workgroup = $options[OPTION_WORKGROUP]; } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + $query = sprintf("MSCK REPAIR TABLE %s", $table); + + // output location for query results + $queryOutputLocation = sprintf(QUERY_OUTPUT_LOCATION, $output); + + // execute all queries + echo "Executing query:\n"; + $execId = $athena->startQuery($query, $queryOutputLocation, $catalog, $workgroup); + + // display the executing query details + print $execId . PHP_EOL; + + $exit = 0; + $loop = true; + do { + // get the current state of that query + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($execId, $executionTime, $failureReason); + + if (in_array($state, \FC\AWS\Athena::QUERY_STOP_STATES)) { + $loop = false; + print "Query result:\n"; + print $execId . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + if ($state != \FC\AWS\Athena::QUERY_STATE_SUCCEEDED) { $exit = 1; } + } + } while ($loop); + + exit($exit); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/partitions-daily.php b/tools/partitions-daily.php new file mode 100755 index 0000000..846c082 --- /dev/null +++ b/tools/partitions-daily.php @@ -0,0 +1,117 @@ +!"); } + $table = $options[OPTION_TABLE]; + + if (!isset($options[OPTION_DATA])) { throw new \Exception("Missing !"); } + $data = $options[OPTION_DATA]; + + if (!isset($options[OPTION_YEAR])) { throw new \Exception("Missing !"); } + $year = intval($options[OPTION_YEAR]); + + if (!isset($options[OPTION_MONTH])) { throw new \Exception("Missing !"); } + $month = substr("00" . (string)intval($options[OPTION_MONTH]), -2); + + $maxDays = cal_days_in_month(CAL_GREGORIAN, intval($month), $year); + if (!($maxDays > 0)) { throw new \Exception("Max days for the month must superior to 0"); } + + if (!isset($options[OPTION_BEGIN])) { $startDate = '01'; } + else { $startDate = $options[OPTION_BEGIN]; } + if (intval($startDate) == 0) { throw new \Exception("Invalid start date: $startDate"); } + + if (!isset($options[OPTION_END])) { $endDate = $maxDays; } + else { + $endDate = $options[OPTION_END]; + if (intval($endDate) == 0) { throw new \Exception("Invalid end date: $endDate"); } + } + + if ($endDate < $startDate) { throw new \Exception("End date $endDate must be posterior to start date $startDate!"); } + + if (!isset($options[OPTION_OUTPUT])) { $output = DEFAULT_QUERY_OUTPUT; } + else { $output = $options[OPTION_OUTPUT]; } + + $column = (isset($options[OPTION_COLUMN]) && $options[OPTION_COLUMN] != '' ? $options[OPTION_COLUMN] : DEFAULT_PARTITION_COLUMN); + + if (!isset($options[OPTION_CATALOG])) { $catalog = DEFAULT_CATALOG; } + else { $catalog = $options[OPTION_CATALOG]; } + + if (!isset($options[OPTION_WORKGROUP])) { $workgroup = DEFAULT_WORKGROUP; } + else { $workgroup = $options[OPTION_WORKGROUP]; } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + $query = "ALTER TABLE $table ADD PARTITION ($column='%s') location 's3://$data/%s/'"; + + // output location for query results + $queryOutputLocation = sprintf(QUERY_OUTPUT_LOCATION, $output); + + // execute all queries + echo "Executing queries:\n"; + for ($d = intval($startDate); $d <= intval($endDate); $d++) { + // execute a query + $day = substr('0'.$d, -2); + $queryScript = sprintf($query, $day, $day); + $execId = $athena->startQuery($queryScript, $queryOutputLocation, $catalog, $workgroup); + $execDetails[$execId] = "$year-$month-$day"; + + // display the executing query details + print $execId . "\t" . "$year-$month-$day" . PHP_EOL; + + // check if the limit of max simultaneous running queries is reached + // and only resume the execution once within that limit + while ($athena->isQueryLimitReached(\FC\AWS\Athena::QUERY_TYPE_DDL)) {} + } + + echo PHP_EOL; + + $exit = 0; + + // display query state once done running + echo "Displaying queries result:\n"; + foreach ($athena->getQueries() as $query) { + $loop = true; + + do { + // get query id from the query array line + $queryId = $athena->getQueryStaleId($query); + + // get the current state of that query + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($queryId, $executionTime, $failureReason); + + if (in_array($state, \FC\AWS\Athena::QUERY_STOP_STATES)) { + $loop = false; + print $queryId . "\t" . $execDetails[$queryId] . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + + if ($state != \FC\AWS\Athena::QUERY_STATE_SUCCEEDED) { + $exit = 1; + } + } + } while ($loop); + } + + exit($exit); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/query-daily.php b/tools/query-daily.php new file mode 100755 index 0000000..f7bc36f --- /dev/null +++ b/tools/query-daily.php @@ -0,0 +1,140 @@ +!"); } + $startDate = $options[OPTION_BEGIN]; + if (!preg_match('/[0-9]{4}-[0-9]{2}-[0-9]{2}/', $startDate) || !validateDateTime($startDate, DATE_FORMAT)) { throw new \Exception("Wrong format! Accepted format YYYY-MM-DD"); } + $startDate = \DateTime::createFromFormat(DATE_FORMAT, $startDate); + + if (!isset($options[OPTION_END])) { $endDate = clone $startDate; } + else { + $endDate = $options[OPTION_END]; + if (!preg_match('/[0-9]{4}-[0-9]{2}-[0-9]{2}/', $endDate) || !validateDateTime($endDate, DATE_FORMAT)) { throw new \Exception("Wrong format! Accepted format YYYY-MM-DD"); } + $endDate = \DateTime::createFromFormat(DATE_FORMAT, $endDate); + + if ($startDate > $endDate) { throw new \Exception("Date error: must be posterior or equal to !"); } + } + + // interval in days + $interval = $startDate->diff($endDate)->days; + + if (!isset($options[OPTION_OUTPUT])) { $output = DEFAULT_QUERY_OUTPUT; } + else { $output = $options[OPTION_OUTPUT]; } + + if (!isset($options[OPTION_QUERYTYPE])) { throw new \Exception("Missing "); } + $queryType = strtoupper($options[OPTION_QUERYTYPE]); + if (!in_array($queryType, \FC\AWS\Athena::QUERY_TYPES)) { + throw new \Exception(sprintf("Query type must be either %s or %s, %s given!", \FC\AWS\Athena::QUERY_TYPE_DDL, \FC\AWS\Athena::QUERY_TYPE_DML, $queryType)); + } + + if (isset($options[OPTION_MAXQUERY])) { $maxquery = $options[OPTION_MAXQUERY]; } + else { $maxquery = constant("\FC\AWS\Athena::AWS_DEFAULT_SIMULTANEOUS_" . $queryType . "_QUERIES"); } + + if (!isset($options[OPTION_CATALOG])) { $catalog = DEFAULT_CATALOG; } + else { $catalog = $options[OPTION_CATALOG]; } + + if (!isset($options[OPTION_WORKGROUP])) { $workgroup = DEFAULT_WORKGROUP; } + else { $workgroup = $options[OPTION_WORKGROUP]; } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena( + new \Aws\Athena\AthenaClient($awsConfig), + $queryType == \FC\AWS\Athena::QUERY_TYPE_DDL ? $maxquery : null, + $queryType == \FC\AWS\Athena::QUERY_TYPE_DML ? $maxquery : null, + ); + + // output location on S3 for query results + $queryOutputLocation = sprintf(QUERY_OUTPUT_LOCATION, $output); + + // execute all queries + if (!isset($options[OPTION_DISPLAY])) { echo "Executing queries:\n"; } + $currentDate = clone $startDate; + $execDetails = []; + do { + // build query + $y = $currentDate->format('Y'); + $m = $currentDate->format('m'); + $d = $currentDate->format('d'); + $query = sprintf(QUERY, $y, $m, $d); + + if (isset($options[OPTION_DISPLAY])) { + echo "$query"; + exit(0); + } + + // start query execution + $execId = $athena->startQuery($query, $queryOutputLocation, $catalog, $workgroup); + $execDetails[$execId] = "$y-$m-$d"; + + // display the executing query details + print $execId . "\t" . $currentDate->format(DATE_FORMAT) . PHP_EOL; + + // check if the limit of max simultaneous running queries is reached + // and only resume the execution once within that limit + while ($athena->isQueryLimitReached($queryType)) {} + + $currentDate->add(new \DateInterval('P1D')); + } while ($currentDate <= $endDate); + + echo PHP_EOL; + + $exit = 0; + + // display query state once done running + echo "Displaying queries result:\n"; + foreach ($athena->getQueries() as $query) { + $loop = true; + + do { + // get query id from the query array line + $queryId = $athena->getQueryStaleId($query); + + // get the current state of that query + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($queryId, $executionTime, $failureReason); + + if (in_array($state, \FC\AWS\Athena::QUERY_STOP_STATES)) { + $loop = false; + print $queryId . "\t" . $execDetails[$queryId] . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + + if ($state != \FC\AWS\Athena::QUERY_STATE_SUCCEEDED) { + $exit = 1; + } + } + } while ($loop); + } + + exit($exit); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/query-monthly.php b/tools/query-monthly.php new file mode 100755 index 0000000..5f7bcd5 --- /dev/null +++ b/tools/query-monthly.php @@ -0,0 +1,145 @@ +!"); } + $startDate = $options[OPTION_BEGIN]; + if (!preg_match('/[0-9]{4}-[0-9]{2}/', $startDate) || !validateDateTime($startDate, 'Y-m')) { throw new \Exception("Wrong format! Accepted format YYYY-MM"); } + $startDate = explode('-', $startDate); + $startDate[YEAR_IDX] = intval($startDate[YEAR_IDX]); + $startDate[MONTH_IDX] = intval($startDate[MONTH_IDX]); + + if (!isset($options[OPTION_END])) { $endDate = $startDate; } + else { + $endDate = $options[OPTION_END]; + if (!preg_match('/[0-9]{4}-[0-9]{2}/', $endDate) || !validateDateTime($endDate, 'Y-m')) { throw new \Exception("Wrong format! Accepted format YYYY-MM"); } + $endDate = explode('-', $endDate); + $endDate[YEAR_IDX] = intval($endDate[YEAR_IDX]); + $endDate[MONTH_IDX] = intval($endDate[MONTH_IDX]); + + if (($endDate[YEAR_IDX] == $startDate[YEAR_IDX] && $endDate[MONTH_IDX] < $startDate[MONTH_IDX]) || $endDate[YEAR_IDX] < $startDate[YEAR_IDX]) { + throw new \Exception("Date error: must be posterior or equal to !"); + } + } + + if (!isset($options[OPTION_OUTPUT])) { $output = DEFAULT_QUERY_OUTPUT; } + else { $output = $options[OPTION_OUTPUT]; } + + if (!isset($options[OPTION_QUERYTYPE])) { throw new \Exception("Missing "); } + $queryType = strtoupper($options[OPTION_QUERYTYPE]); + if (!in_array($queryType, \FC\AWS\Athena::QUERY_TYPES)) { + throw new \Exception(sprintf("Query type must be either %s or %s, %s given!", \FC\AWS\Athena::QUERY_TYPE_DDL, \FC\AWS\Athena::QUERY_TYPE_DML, $queryType)); + } + + if (isset($options[OPTION_MAXQUERY])) { $maxquery = $options[OPTION_MAXQUERY]; } + else { $maxquery = constant("\FC\AWS\Athena::AWS_DEFAULT_SIMULTANEOUS_" . $queryType . "_QUERIES"); } + + if (!isset($options[OPTION_CATALOG])) { $catalog = DEFAULT_CATALOG; } + else { $catalog = $options[OPTION_CATALOG]; } + + if (!isset($options[OPTION_WORKGROUP])) { $workgroup = DEFAULT_WORKGROUP; } + else { $workgroup = $options[OPTION_WORKGROUP]; } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena( + new \Aws\Athena\AthenaClient($awsConfig), + $queryType == \FC\AWS\Athena::QUERY_TYPE_DDL ? $maxquery : null, + $queryType == \FC\AWS\Athena::QUERY_TYPE_DML ? $maxquery : null, + ); + + // output location on S3 for query results + $queryOutputLocation = sprintf(QUERY_OUTPUT_LOCATION, $output); + + // execute all queries + if (!isset($options[OPTION_DISPLAY])) { echo "Executing queries:\n"; } + $execDetails = []; + $m = $startDate[MONTH_IDX]; + for ($y = $startDate[YEAR_IDX]; $y <= $endDate[YEAR_IDX]; $y++) { + for (; $m <= 12; $m++) { + if ($y == $endDate[YEAR_IDX] && $m > $endDate[MONTH_IDX]) { break; } + + // build query + $month = substr('00' . $m, -2); + $query = sprintf(QUERY, $y, $month); + + if (isset($options[OPTION_DISPLAY])) { + echo "$query"; + exit(0); + } + + // start query execution + $execId = $athena->startQuery($query, $queryOutputLocation, $catalog, $workgroup); + $execDetails[$execId] = "$y-$month"; + + // display the executing query details + print $execId . "\t" . "$y-$month" . PHP_EOL; + + // check if the limit of max simultaneous running queries is reached + // and only resume the execution once within that limit + while ($athena->isQueryLimitReached($queryType)) {} + } + + $m = 1; + } + + echo PHP_EOL; + + $exit = 0; + + // display query state once done running + echo "Displaying queries result:\n"; + foreach ($athena->getQueries() as $query) { + $loop = true; + + do { + // get query id from the query array line + $queryId = $athena->getQueryStaleId($query); + + // get the current state of that query + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($queryId, $executionTime, $failureReason); + + if (in_array($state, \FC\AWS\Athena::QUERY_STOP_STATES)) { + $loop = false; + print $queryId . "\t" . $execDetails[$queryId] . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + + if ($state != \FC\AWS\Athena::QUERY_STATE_SUCCEEDED) { + $exit = 1; + } + } + } while ($loop); + } + + exit($exit); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/query.php b/tools/query.php new file mode 100755 index 0000000..1cb8cb5 --- /dev/null +++ b/tools/query.php @@ -0,0 +1,93 @@ +startQuery($query, $queryOutputLocation, $catalog, $workgroup); + + // display the executing query details + if (!isset($options[OPTION_SILENT])) { print $execId . PHP_EOL; } + + $exit = 0; + $loop = true; + do { + // get the current state of that query + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($execId, $executionTime, $failureReason); + + if (in_array($state, \FC\AWS\Athena::QUERY_STOP_STATES)) { + $loop = false; + + if (!isset($options[OPTION_SILENT])) { + print "Query result:" . PHP_EOL; + print $execId . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + } + + if ($state != \FC\AWS\Athena::QUERY_STATE_SUCCEEDED) { + if (isset($options[OPTION_SILENT])) { + print ''; + } + $exit = 1; + } else { + if (isset($options[OPTION_SILENT])) { + print $execId; + } else { + print ''; + } + } + } + } while ($loop); + + exit($exit); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} diff --git a/tools/state-from-list.php b/tools/state-from-list.php new file mode 100755 index 0000000..3c9c012 --- /dev/null +++ b/tools/state-from-list.php @@ -0,0 +1,53 @@ +!"); } + $list = $options[OPTION_LIST]; + + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + $file = fopen($list,"r"); + + while(!feof($file)) { + $line = trim(fgets($file)); + if ($line !== false) { + $matches = []; + + if (!preg_match(sprintf('/%s/', ID_FORMAT), $line, $matches)) { + print $line . PHP_EOL; + } else { + if (isset($matches[0]) && $matches[0] != '') { + $queryID = $matches[0]; + $comments = trim(str_replace($queryID, '', $line)); + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($queryID, $executionTime, $failureReason); + + print $queryID . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . "\t" . $comments . PHP_EOL; + } + } + } + } + + fclose($file); + exit(0); + +} catch (\Exception $e) { + echo $e->getMessage() . "\n"; + fclose($file); + exit(1); +} + diff --git a/tools/state.php b/tools/state.php new file mode 100755 index 0000000..3fa8bfe --- /dev/null +++ b/tools/state.php @@ -0,0 +1,38 @@ +!"); } + $id = $options[OPTION_ID]; + if (!preg_match(sprintf('/^%s$/', ID_FORMAT), $id)) { + throw new \Exception("Wrong format! Accepted format " . ID_FORMAT); + } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + // display query state + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($id, $executionTime, $failureReason); + print $id . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + + exit(0); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} + diff --git a/tools/stop.php b/tools/stop.php new file mode 100755 index 0000000..7f5528a --- /dev/null +++ b/tools/stop.php @@ -0,0 +1,42 @@ +!"); } + $id = $options[OPTION_ID]; + if (!preg_match(sprintf('/^%s$/', ID_FORMAT), $id)) { + throw new \Exception("Wrong format! Accepted format " . ID_FORMAT); + } + + // AWS Athena client configuration + $awsConfig = getAwsConfig($options); + + // init Athena object + $athena = instantiateAthena(new \Aws\Athena\AthenaClient($awsConfig)); + + // stop query + print "Stopping query...\n"; + $athena->StopQuery($id); + + // display query state + $executionTime = ''; + $failureReason = ''; + $state = $athena->getQueryCurrentState($id, $executionTime, $failureReason); + print $id . "\t" . $state . ($executionTime != '' ? "\t" . $executionTime : '') . ($state == \FC\AWS\Athena::QUERY_STATE_FAILED ? "\t" . $failureReason : '') . PHP_EOL; + + exit(0); + +} catch (\Exception $e) { + echo $e->getMessage() . PHP_EOL; + exit(1); +} + diff --git a/tools/usage/database.usage.php b/tools/usage/database.usage.php new file mode 100644 index 0000000..5bd09a3 --- /dev/null +++ b/tools/usage/database.usage.php @@ -0,0 +1,95 @@ + is replaced < %%1\$s >, < MONTH > is replaced by < %%2\$s > and < DAY > is replaced by < %%3\$s > + + php %1\$s -q PATH_TO_SCRIPT -b START_DATE [-e END_DATE] [-u MAX_SIMULTANEOUS_QUERIES] [-o QUERY_OUTPUT_PATH] -x QUERY_TYPE [-g WORKGROUP_NAME] [-k CATALOG_NAME] [-p AWS_PROFILE] [-v AWS_VERSION] [-r AWS_REGION] [-w] + php %1\$s --script PATH_TO_SCRIPT --begin START_DATE [--end END_DATE] [--maxquery MAX_SIMULTANEOUS_QUERIES] [--output QUERY_OUTPUT_PATH] --querytype QUERY_TYPE [--workgroup WORKGROUP_NAME] [--catalog CATALOG_NAME] [--profile AWS_PROFILE] [--version AWS_VERSION] [--region AWS_REGION] [--display] + +Example: + echo "SELECT * FROM database.table_%%1\\\$s%%2\\\$s WHERE day = '%%3\\\$s'" \ + | php %1\$s \ + -b start_date \ + -x DML + + php %1\$s \ + -q ../../examples/query-daily.sql \ + -b start_date \ + -e end_date \ + -u 4 \ + -o query_output_path \ + -x DML \ + -g primary \ + -k AwsDataCatalog \ + -p default \ + -v latest \ + -r us-east-1 +EOS, INCLUDED_FILE))); + +$shortOpts = 'q:b:e:u:o:x:g:k:p:v:r:wh'; +$longOpts = [ + 'script:', + 'begin:', + 'end:', + 'maxquery:', + 'output:', + 'querytype:', + 'workgroup:', + 'catalog:', + 'profile:', + 'version:', + 'region:', + 'display', + 'help' +]; + +// set options +$options = setOptions($shortOpts, $longOpts, USAGE); + +define('OPTION_SCRIPT', isset($options['q']) ? 'q' : 'script'); +define('OPTION_BEGIN', isset($options['b']) ? 'b' : 'begin'); +define('OPTION_END', isset($options['e']) ? 'e' : 'end'); +define('OPTION_MAXQUERY', isset($options['u']) ? 'u' : 'maxquery'); +define('OPTION_OUTPUT', isset($options['o']) ? 'o' : 'output'); +define('OPTION_QUERYTYPE', isset($options['x']) ? 'x' : 'querytype'); +define('OPTION_WORKGROUP', isset($options['g']) ? 'g' : 'workgroup'); +define('OPTION_CATALOG', isset($options['k']) ? 'k' : 'catalog'); +define('OPTION_DISPLAY', isset($options['w']) ? 'w' : 'display'); + +// Initialize AWS configuration options +initAwsConfigOptions($options); diff --git a/tools/usage/query-monthly.usage.php b/tools/usage/query-monthly.usage.php new file mode 100644 index 0000000..203a665 --- /dev/null +++ b/tools/usage/query-monthly.usage.php @@ -0,0 +1,99 @@ + is replaced < %%1\$s > and < MONTH > is replaced by < %%2\$s > + + php %1\$s -q PATH_TO_SCRIPT -b START_DATE [-e END_DATE] [-u MAX_SIMULTANEOUS_QUERIES] [-o QUERY_OUTPUT_PATH] -x QUERY_TYPE [-g WORKGROUP_NAME] [-k CATALOG_NAME] [-p AWS_PROFILE] [-v AWS_VERSION] [-r AWS_REGION] [-w] + php %1\$s --script PATH_TO_SCRIPT --begin START_DATE [--end END_DATE] [--maxquery MAX_SIMULTANEOUS_QUERIES] [--output QUERY_OUTPUT_PATH] --querytype QUERY_TYPE [--workgroup WORKGROUP_NAME] [--catalog CATALOG_NAME] [--profile AWS_PROFILE] [--version AWS_VERSION] [--region AWS_REGION] [--display] + +Example: + echo "SELECT * FROM database.table_%%1\\\$s%%2\\\$s" \ + | php %1\$s \ + -b start_date \ + -x DML + + php %1\$s \ + -q ../../examples/create-table.sql|drop-table.sql \ + -b start_date \ + -e end_date \ + -u 4 \ + -o query_output_path \ + -x DML \ + -g primary \ + -k AwsDataCatalog \ + -p default \ + -v latest \ + -r us-east-1 +EOS, INCLUDED_FILE))); + +$shortOpts = 'q:b:e:u:o:x:g:k:p:v:r:wh'; +$longOpts = [ + 'script:', + 'begin:', + 'end:', + 'maxquery:', + 'output:', + 'querytype:', + 'workgroup:', + 'catalog:', + 'profile:', + 'version:', + 'region:', + 'display', + 'help' +]; + +// set options +$options = setOptions($shortOpts, $longOpts, USAGE); + +define('OPTION_SCRIPT', isset($options['q']) ? 'q' : 'script'); +define('OPTION_BEGIN', isset($options['b']) ? 'b' : 'begin'); +define('OPTION_END', isset($options['e']) ? 'e' : 'end'); +define('OPTION_MAXQUERY', isset($options['u']) ? 'u' : 'maxquery'); +define('OPTION_OUTPUT', isset($options['o']) ? 'o' : 'output'); +define('OPTION_QUERYTYPE', isset($options['x']) ? 'x' : 'querytype'); +define('OPTION_WORKGROUP', isset($options['g']) ? 'g' : 'workgroup'); +define('OPTION_CATALOG', isset($options['k']) ? 'k' : 'catalog'); +define('OPTION_DISPLAY', isset($options['w']) ? 'w' : 'display'); + +// Initialize AWS configuration options +initAwsConfigOptions($options); diff --git a/tools/usage/query.usage.php b/tools/usage/query.usage.php new file mode 100644 index 0000000..c1105a3 --- /dev/null +++ b/tools/usage/query.usage.php @@ -0,0 +1,80 @@ +