Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added "resolve" option to "resource/package.to_zip" #556

Merged
merged 6 commits into from
Nov 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions docs/build/api-reference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1531,14 +1531,19 @@ Create a copy of the package
#### <big>to\_zip</big>

```python
| to_zip(target, encoder_class=None)
| to_zip(target, *, resolve=[], encoder_class=None)
```

Save package to a zip

**Arguments**:

- `target` _str_ - target path
- `resolve` _str[]_ - Data sources to resolve.
For "inline" data it means saving them as CSV and including into ZIP.
For "remote" data it means downloading them and including into ZIP.
For example, `resolve=["inline", "remote"]`
- `encoder_class` _object_ - json encoder class


**Raises**:
Expand Down Expand Up @@ -6110,14 +6115,19 @@ Convert resource to File
#### <big>to\_zip</big>

```python
| to_zip(target, encoder_class=None)
| to_zip(target, *, resolve=[], encoder_class=None)
```

Save resource to a zip

**Arguments**:

- `target` _str_ - target path
- `resolve` _str[]_ - Data sources to resolve.
For "inline" data it means saving them as CSV and including into ZIP.
For "remote" data it means downloading them and including into ZIP.
For example, `resolve=["inline", "remote"]`
- `encoder_class` _object_ - json encoder class


**Raises**:
Expand Down
78 changes: 63 additions & 15 deletions frictionless/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import glob
import zipfile
import tempfile
from copy import deepcopy
from .exception import FrictionlessException
from .metadata import Metadata
Expand Down Expand Up @@ -382,33 +383,80 @@ def to_copy(self):
trusted=self.__trusted,
)

# NOTE: support multipart
def to_zip(self, target, encoder_class=None):
# TODO: support multipart
# TODO: there is 100% duplication with resource.to_zip
def to_zip(self, target, *, resolve=[], encoder_class=None):
"""Save package to a zip

Parameters:
target (str): target path
resolve (str[]): Data sources to resolve.
For "inline" data it means saving them as CSV and including into ZIP.
For "remote" data it means downloading them and including into ZIP.
For example, `resolve=["inline", "remote"]`
encoder_class (object): json encoder class

Raises:
FrictionlessException: on any error
"""
try:
with zipfile.ZipFile(target, "w") as zip:
descriptor = self.copy()
for resource in self.resources:
if resource.inline:
continue
if resource.remote:
continue
package_descriptor = self.to_dict()
for index, resource in enumerate(self.resources):
descriptor = package_descriptor["resources"][index]

# Multipart data
if resource.multipart:
continue
if not helpers.is_safe_path(resource.path):
continue
zip.write(resource.source, resource.path)
descriptor = json.dumps(
descriptor, indent=2, ensure_ascii=False, cls=encoder_class
note = "Zipping multipart resource is not yet supported"
raise FrictionlessException(errors.ResourceError(note=note))

# Inline data
elif resource.inline:
if "inline" in resolve:
path = f"{resource.name}.csv"
descriptor["path"] = path
del descriptor["data"]
with tempfile.NamedTemporaryFile() as file:
resource.write(file.name, format="csv")
zip.write(file.name, path)
elif not isinstance(resource.data, list):
note = f"Use resolve argument to zip {resource.data}"
raise FrictionlessException(errors.ResourceError(note=note))

# Remote data
elif resource.remote:
if "remote" in resolve:
path = f"{resource.name}.{resource.format}"
descriptor["path"] = path
with tempfile.NamedTemporaryFile() as file:
byte_stream = resource.read_byte_stream()
while True:
chunk = byte_stream.read(1024)
if not chunk:
break
file.write(chunk)
file.flush()
zip.write(file.name, path)

# Local Data
else:
path = resource.path
if not helpers.is_safe_path(path):
path = f"{resource.name}.{resource.format}"
descriptor["path"] = path
zip.write(resource.source, path)

# Metadata
zip.writestr(
"datapackage.json",
json.dumps(
package_descriptor,
indent=2,
ensure_ascii=False,
cls=encoder_class,
),
)
zip.writestr("datapackage.json", descriptor)

except Exception as exception:
error = errors.PackageError(note=str(exception))
raise FrictionlessException(error) from exception
Expand Down
77 changes: 62 additions & 15 deletions frictionless/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import petl
import zipfile
import tempfile
import warnings
from copy import deepcopy
from importlib import import_module
Expand Down Expand Up @@ -791,33 +792,79 @@ def to_file(self, **options):
options.setdefault("control", self.control)
return module.File(**options)

# NOTE: support multipart
def to_zip(self, target, encoder_class=None):
# TODO: support multipart
# TODO: there is 100% duplication with package.to_zip
def to_zip(self, target, *, resolve=[], encoder_class=None):
"""Save resource to a zip

Parameters:
target (str): target path
resolve (str[]): Data sources to resolve.
For "inline" data it means saving them as CSV and including into ZIP.
For "remote" data it means downloading them and including into ZIP.
For example, `resolve=["inline", "remote"]`
encoder_class (object): json encoder class

Raises:
FrictionlessException: on any error
"""
try:
with zipfile.ZipFile(target, "w") as zip:
descriptor = self.copy()
for resource in [self]:
if resource.inline:
continue
if resource.remote:
continue
descriptor = self.to_dict()

# Multipart data
if resource.multipart:
continue
if not helpers.is_safe_path(resource.path):
continue
zip.write(resource.source, resource.path)
descriptor = json.dumps(
descriptor, indent=2, ensure_ascii=False, cls=encoder_class
)
zip.writestr("dataresource.json", descriptor)
note = "Zipping multipart resource is not yet supported"
raise FrictionlessException(errors.ResourceError(note=note))

# Inline data
elif resource.inline:
if "inline" in resolve:
path = f"{resource.name}.csv"
descriptor["path"] = path
del descriptor["data"]
with tempfile.NamedTemporaryFile() as file:
resource.write(file.name, format="csv")
zip.write(file.name, path)
elif not isinstance(resource.data, list):
note = f"Use resolve argument to zip {resource.data}"
raise FrictionlessException(errors.ResourceError(note=note))

# Remote data
elif resource.remote:
if "remote" in resolve:
path = f"{resource.name}.{resource.format}"
descriptor["path"] = path
with tempfile.NamedTemporaryFile() as file:
byte_stream = resource.read_byte_stream()
while True:
chunk = byte_stream.read(1024)
if not chunk:
break
file.write(chunk)
file.flush()
zip.write(file.name, path)

# Local Data
else:
path = resource.path
if not helpers.is_safe_path(path):
path = f"{resource.name}.{resource.format}"
descriptor["path"] = path
zip.write(resource.source, path)

# Metadata
zip.writestr(
"dataresource.json",
json.dumps(
descriptor,
indent=2,
ensure_ascii=False,
cls=encoder_class,
),
)

except (IOError, zipfile.BadZipfile, zipfile.LargeZipFile) as exception:
error = errors.ResourceError(note=str(exception))
raise FrictionlessException(error) from exception
Expand Down
12 changes: 9 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,24 @@
# Fixtures


# TODO: create the same for other databases
@pytest.fixture
def database_url(tmpdir):
path = str(tmpdir.join("database.db"))
conn = sqlite3.connect(path)
conn.execute("CREATE TABLE data (id INTEGER PRIMARY KEY, name TEXT)")
conn.execute("INSERT INTO data VALUES (1, 'english'), (2, '中国人')")
conn.execute("CREATE TABLE 'table' (id INTEGER PRIMARY KEY, name TEXT)")
conn.execute("INSERT INTO 'table' VALUES (1, 'english'), (2, '中国人')")
conn.commit()
yield "sqlite:///%s" % path
conn.close()


# TODO: create the same for other databases
@pytest.fixture
def sqlite_url(tmpdir):
path = str(tmpdir.join("database.db"))
return "sqlite:///%s" % path


# Settings


Expand Down
Loading