Skip to content
This repository has been archived by the owner on Apr 30, 2022. It is now read-only.

add exporting table files #120

Merged
merged 18 commits into from
Oct 19, 2018
27 changes: 27 additions & 0 deletions FOR_ANALYSTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,33 @@ data = quandl.get_table('ZACKS/FC', paginate=True, ticker=['AAPL', 'MSFT'], per_

In this query we are asking for more pages of data, `ticker` values of either `AAPL` or `MSFT` and a `per_end_date` that is greater than or equal to `2015-01-01`. We are also filtering the returned columns on `ticker`, `per_end_date` and `comp_name` rather than all available columns. The output format is `pandas`.

Download table data as a zip file. You can download all the table data in a data table in a single call. The following will download the entire F1 table data as a zip file to your current working directory:

```python
import quandl
data = quandl.export_table('MER/F1')
```

You can also specify where to download the zip file:

```python
import quandl
data = quandl.export_table('MER/F1', filename='/my/path/MER_F1_DB.zip')
```

Note that if you are downloading the whole table, it will take longer to generate the zip file.

You can also specify what data you want to download with filters and parameters:

```python
import quandl
quandl.export_table('MER/F1', params={'compnumber': '39102', 'mapcode':'-5370','reporttype': 'A', 'qopts': {'columns': ['reportdate', 'amount']}})
```

After the download is finished, the filename of the downloaded zip file will be returned.

Sometimes it takes a while to generate the zip file, you'll get a message while the file is being generated. Once the file is generated, it will start the download of the zip file.

#### Available parameters:

The following additional parameters can be specified for a datatable call:
Expand Down
28 changes: 28 additions & 0 deletions FOR_DEVELOPERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,34 @@ while True:
break
```

Download table data as a zip file. You can download all the table data in a data table in a single call. The following will download the entire F1 table data as a zip file to your current working directory:

```python
import quandl
data = quandl.export_table('MER/F1')
```

You can also specify where to download the zip file:

```python
import quandl
data = quandl.export_table('MER/F1', filename='/my/path/MER_F1_DB.zip')
```

Note that if you are downloading the whole table, it will take longer to generate the zip file.

You can also specify what data you want to download with filters and parameters:

```python
import quandl
quandl.export_table('MER/F1', params={'compnumber': '39102', 'mapcode':'-5370','reporttype': 'A', 'qopts': {'columns': ['reportdate', 'amount']}})
```

After the download is finished, the filename of the downloaded zip file will be returned.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you still returning the filename? I think you changed the method to only print the filename.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated


Sometimes it takes a while to generate the zip file, you'll get a message while the file is being generated. Once the file is generated, it will start the download of the zip file.


### Download Entire Database (Bulk Download)

To get the url for downloading all dataset data in a database:
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,26 @@ The following are instructions for running our tests:
`python setup.py install`
4. Run the following command to test the plugin in all versions of python we support:
`tox`

Once you have all required packages installed, you can run tests locally with:

Running all tests locally

```python
python -W always setup.py -q test
```

Running an individual test

```python
python -m unittest test.[test file name].[class name].[individual test name]`
```

Example:

```python
python -m unittest -v test.test_datatable.ExportDataTableTest.test_download_get_file_info
```

## Recommended Usage

Expand Down
1 change: 1 addition & 0 deletions quandl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
from .model.merged_dataset import MergedDataset
from .get import get
from .bulkdownload import bulkdownload
from .export_table import export_table
from .get_table import get_table
22 changes: 22 additions & 0 deletions quandl/export_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from quandl.errors.quandl_error import InvalidRequestError
from .utils.api_key_util import ApiKeyUtil
from .model.datatable import Datatable
from .message import Message


def export_table(datatable_code, **kwargs):
"""Downloads an entire table as a zip file.
:param str datatable_code: The datatable code to download, such as MER/F1
:param str filename: The filename for the download. \
If not specified, will download to the current working directory
:param str api_key: Most databases require api_key for bulk download
"""

# discourage users from using authtoken
if 'authtoken' in kwargs:
raise InvalidRequestError(Message.ERROR_AUTHTOKEN_NOT_SUPPORTED)

ApiKeyUtil.init_api_key_from_args(kwargs)

filename = kwargs.pop('filename', '.')
return Datatable(datatable_code).download_file(filename, params=kwargs)
1 change: 1 addition & 0 deletions quandl/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ class Message:
quandl.get_table() call. For more information see our documentation: \
https://github.com/quandl/quandl-python/blob/master/FOR_ANALYSTS.md#things-to-note'
WARN_PARAMS_NOT_SUPPORTED = '%s will no longer supported. Please use %s instead'
LONG_GENERATION_TIME = 'We are generating the zip file now, please wait...'
80 changes: 79 additions & 1 deletion quandl/model/datatable.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,28 @@
try:
from urllib.request import urlopen
from urllib.parse import urlencode
except ImportError:
from urllib import urlopen
from urllib import urlencode

from time import sleep
import os

from quandl.connection import Connection
from quandl.util import Util
from quandl.errors.quandl_error import QuandlError
from quandl.operations.get import GetOperation
from quandl.operations.list import ListOperation
from quandl.util import Util

from .model_base import ModelBase
from quandl.message import Message
from .data import Data


class Datatable(GetOperation, ListOperation, ModelBase):
BULK_CHUNK_SIZE = 16 * 1024
WAIT_GENERATION_INTERVAL = 30
IS_FILE_READY = False

@classmethod
def get_path(cls):
Expand All @@ -14,3 +31,64 @@ def get_path(cls):
def data(self, **options):
updated_options = Util.convert_options(**options)
return Data.page(self, **updated_options)

def download_file(self, file_or_folder_path, **options):
if not isinstance(file_or_folder_path, str):
raise QuandlError(Message.ERROR_FOLDER_ISSUE)

if 'params' not in options:
options['params'] = {}

while True:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

status = false
while !status:
            status = self._request_file_info(file_or_folder_path, **options['params'])
            if !status
                 sleep(self.WAIT_GENERATION_INTERVAL)

self._request_file_info(file_or_folder_path, **options['params'])
if self.IS_FILE_READY:
break

def _request_file_info(self, file_or_folder_path, **options):
url = self._download_request_path(**options)
code_name = self.code

r = Connection.request('get', url, **options)
response_data = r.json()

file_info = response_data['datatable_bulk_download']['file']

status = file_info['status']

if status == 'fresh':
file_link = file_info['link']
self.IS_FILE_READY = True
self._download_file_with_link(file_or_folder_path, file_link, code_name)
else:
print(Message.LONG_GENERATION_TIME)
sleep(self.WAIT_GENERATION_INTERVAL)

def _download_file_with_link(self, file_or_folder_path, file_link, code_name):
file_path = file_or_folder_path
if os.path.isdir(file_or_folder_path):
file_path = os.path.join(file_or_folder_path,
'{}.{}'.format(code_name.replace('/', '_'), 'zip'))

res = urlopen(file_link)

with open(file_path, 'wb') as fd:
while True:
chunk = res.read(self.BULK_CHUNK_SIZE)
if not chunk:
break
fd.write(chunk)

print(file_path)

def _download_request_path(self, **options):
url = self.default_path()
url = Util.constructed_path(url, {'id': self.code})
url += '.json?qopts.export=true&'

if 'params' not in options:
options['params'] = {}

if options['params']:
url += urlencode(options['params'])

return url
58 changes: 58 additions & 0 deletions test/test_datatable.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
try:
from urllib.parse import urlparse
from urllib.parse import parse_qs
except ImportError:
from urlparse import urlparse
from cgi import parse_qs

import re
import unittest
import httpretty
Expand All @@ -6,6 +13,8 @@
from quandl.model.datatable import Datatable
from mock import patch, call
from test.factories.datatable import DatatableFactory
from quandl.api_config import ApiConfig
from quandl.errors.quandl_error import (InternalServerError, QuandlError)


class GetDatatableDatasetTest(unittest.TestCase):
Expand Down Expand Up @@ -48,3 +57,52 @@ def test_dataset_column_names_match_expected(self):
metadata = Datatable('ZACKS/FC').data_fields()
six.assertCountEqual(self,
metadata, [u'datatable_code', u'id', u'name', u'vendor_code'])


class ExportDataTableTest(unittest.TestCase):

@classmethod
def setUpClass(cls):
httpretty.enable()
datatable = {'datatable': DatatableFactory.build(
vendor_code='AUSBS', datatable_code='D')}
httpretty.register_uri(httpretty.GET,
re.compile(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason for such formatting?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure, I was looking at the old tests...

'https://www.quandl.com/api/v3/datatables/*'),
body=json.dumps(datatable))
cls.datatable_instance = Datatable(datatable['datatable'])

@classmethod
def tearDownClass(cls):
httpretty.disable()
httpretty.reset()

def setUp(self):
datatable = {'datatable': DatatableFactory.build(
vendor_code='AUSBS', datatable_code='D')}
self.datatable = Datatable(datatable['datatable']['vendor_code'] + '/' +
datatable['datatable']['datatable_code'], datatable['datatable'])
ApiConfig.api_key = 'api_token'
ApiConfig.api_version = '2015-04-09'

def test_download_get_file_info(self):
url = self.datatable._download_request_path()
parsed_url = urlparse(url)
self.assertEqual(parsed_url.path, 'datatables/AUSBS/D.json')
self.assertDictEqual(parse_qs(parsed_url.query), {
'qopts.export': ['true']})

def test_bulk_download_raises_exception_when_no_path(self):
self.assertRaises(
QuandlError, lambda: self.datatable.download_file(None))

def test_bulk_download_table_raises_exception_when_error_response(self):
httpretty.register_uri(httpretty.GET,
re.compile(
'https://www.quandl.com/api/v3/datatables/*'),
body=json.dumps(
{'quandl_error':
{'code': 'QEMx01', 'message': 'something went wrong'}}),
status=500)
self.assertRaises(
InternalServerError, lambda: self.datatable.download_file('.'))