diff --git a/docs/build/api-reference/README.md b/docs/build/api-reference/README.md
index e88ceae216..af51fa4870 100644
--- a/docs/build/api-reference/README.md
+++ b/docs/build/api-reference/README.md
@@ -1531,7 +1531,7 @@ Create a copy of the package
#### to\_zip
```python
- | to_zip(target, encoder_class=None)
+ | to_zip(target, *, resolve=[], encoder_class=None)
```
Save package to a zip
@@ -1539,6 +1539,11 @@ Save package to a zip
**Arguments**:
- `target` _str_ - target path
+- `resolve` _str[]_ - Data sources to resolve.
+ For "inline" data it means saving them as CSV and including into ZIP.
+ For "remote" data it means downloading them and including into ZIP.
+ For example, `resolve=["inline", "remote"]`
+- `encoder_class` _object_ - json encoder class
**Raises**:
@@ -6110,7 +6115,7 @@ Convert resource to File
#### to\_zip
```python
- | to_zip(target, encoder_class=None)
+ | to_zip(target, *, resolve=[], encoder_class=None)
```
Save resource to a zip
@@ -6118,6 +6123,11 @@ Save resource to a zip
**Arguments**:
- `target` _str_ - target path
+- `resolve` _str[]_ - Data sources to resolve.
+ For "inline" data it means saving them as CSV and including into ZIP.
+ For "remote" data it means downloading them and including into ZIP.
+ For example, `resolve=["inline", "remote"]`
+- `encoder_class` _object_ - json encoder class
**Raises**:
diff --git a/frictionless/package.py b/frictionless/package.py
index d53b9b3e1b..2b517f687e 100644
--- a/frictionless/package.py
+++ b/frictionless/package.py
@@ -2,6 +2,7 @@
import json
import glob
import zipfile
+import tempfile
from copy import deepcopy
from .exception import FrictionlessException
from .metadata import Metadata
@@ -382,33 +383,80 @@ def to_copy(self):
trusted=self.__trusted,
)
- # NOTE: support multipart
- def to_zip(self, target, encoder_class=None):
+ # TODO: support multipart
+ # TODO: there is 100% duplication with resource.to_zip
+ def to_zip(self, target, *, resolve=[], encoder_class=None):
"""Save package to a zip
Parameters:
target (str): target path
+ resolve (str[]): Data sources to resolve.
+ For "inline" data it means saving them as CSV and including into ZIP.
+ For "remote" data it means downloading them and including into ZIP.
+ For example, `resolve=["inline", "remote"]`
+ encoder_class (object): json encoder class
Raises:
FrictionlessException: on any error
"""
try:
with zipfile.ZipFile(target, "w") as zip:
- descriptor = self.copy()
- for resource in self.resources:
- if resource.inline:
- continue
- if resource.remote:
- continue
+ package_descriptor = self.to_dict()
+ for index, resource in enumerate(self.resources):
+ descriptor = package_descriptor["resources"][index]
+
+ # Multipart data
if resource.multipart:
- continue
- if not helpers.is_safe_path(resource.path):
- continue
- zip.write(resource.source, resource.path)
- descriptor = json.dumps(
- descriptor, indent=2, ensure_ascii=False, cls=encoder_class
+ note = "Zipping multipart resource is not yet supported"
+ raise FrictionlessException(errors.ResourceError(note=note))
+
+ # Inline data
+ elif resource.inline:
+ if "inline" in resolve:
+ path = f"{resource.name}.csv"
+ descriptor["path"] = path
+ del descriptor["data"]
+ with tempfile.NamedTemporaryFile() as file:
+ resource.write(file.name, format="csv")
+ zip.write(file.name, path)
+ elif not isinstance(resource.data, list):
+ note = f"Use resolve argument to zip {resource.data}"
+ raise FrictionlessException(errors.ResourceError(note=note))
+
+ # Remote data
+ elif resource.remote:
+ if "remote" in resolve:
+ path = f"{resource.name}.{resource.format}"
+ descriptor["path"] = path
+ with tempfile.NamedTemporaryFile() as file:
+ byte_stream = resource.read_byte_stream()
+ while True:
+ chunk = byte_stream.read(1024)
+ if not chunk:
+ break
+ file.write(chunk)
+ file.flush()
+ zip.write(file.name, path)
+
+ # Local Data
+ else:
+ path = resource.path
+ if not helpers.is_safe_path(path):
+ path = f"{resource.name}.{resource.format}"
+ descriptor["path"] = path
+ zip.write(resource.source, path)
+
+ # Metadata
+ zip.writestr(
+ "datapackage.json",
+ json.dumps(
+ package_descriptor,
+ indent=2,
+ ensure_ascii=False,
+ cls=encoder_class,
+ ),
)
- zip.writestr("datapackage.json", descriptor)
+
except Exception as exception:
error = errors.PackageError(note=str(exception))
raise FrictionlessException(error) from exception
diff --git a/frictionless/resource.py b/frictionless/resource.py
index e14e435b20..31940182db 100644
--- a/frictionless/resource.py
+++ b/frictionless/resource.py
@@ -3,6 +3,7 @@
import json
import petl
import zipfile
+import tempfile
import warnings
from copy import deepcopy
from importlib import import_module
@@ -791,33 +792,79 @@ def to_file(self, **options):
options.setdefault("control", self.control)
return module.File(**options)
- # NOTE: support multipart
- def to_zip(self, target, encoder_class=None):
+ # TODO: support multipart
+ # TODO: there is 100% duplication with package.to_zip
+ def to_zip(self, target, *, resolve=[], encoder_class=None):
"""Save resource to a zip
Parameters:
target (str): target path
+ resolve (str[]): Data sources to resolve.
+ For "inline" data it means saving them as CSV and including into ZIP.
+ For "remote" data it means downloading them and including into ZIP.
+ For example, `resolve=["inline", "remote"]`
+ encoder_class (object): json encoder class
Raises:
FrictionlessException: on any error
"""
try:
with zipfile.ZipFile(target, "w") as zip:
- descriptor = self.copy()
for resource in [self]:
- if resource.inline:
- continue
- if resource.remote:
- continue
+ descriptor = self.to_dict()
+
+ # Multipart data
if resource.multipart:
- continue
- if not helpers.is_safe_path(resource.path):
- continue
- zip.write(resource.source, resource.path)
- descriptor = json.dumps(
- descriptor, indent=2, ensure_ascii=False, cls=encoder_class
- )
- zip.writestr("dataresource.json", descriptor)
+ note = "Zipping multipart resource is not yet supported"
+ raise FrictionlessException(errors.ResourceError(note=note))
+
+ # Inline data
+ elif resource.inline:
+ if "inline" in resolve:
+ path = f"{resource.name}.csv"
+ descriptor["path"] = path
+ del descriptor["data"]
+ with tempfile.NamedTemporaryFile() as file:
+ resource.write(file.name, format="csv")
+ zip.write(file.name, path)
+ elif not isinstance(resource.data, list):
+ note = f"Use resolve argument to zip {resource.data}"
+ raise FrictionlessException(errors.ResourceError(note=note))
+
+ # Remote data
+ elif resource.remote:
+ if "remote" in resolve:
+ path = f"{resource.name}.{resource.format}"
+ descriptor["path"] = path
+ with tempfile.NamedTemporaryFile() as file:
+ byte_stream = resource.read_byte_stream()
+ while True:
+ chunk = byte_stream.read(1024)
+ if not chunk:
+ break
+ file.write(chunk)
+ file.flush()
+ zip.write(file.name, path)
+
+ # Local Data
+ else:
+ path = resource.path
+ if not helpers.is_safe_path(path):
+ path = f"{resource.name}.{resource.format}"
+ descriptor["path"] = path
+ zip.write(resource.source, path)
+
+ # Metadata
+ zip.writestr(
+ "dataresource.json",
+ json.dumps(
+ descriptor,
+ indent=2,
+ ensure_ascii=False,
+ cls=encoder_class,
+ ),
+ )
+
except (IOError, zipfile.BadZipfile, zipfile.LargeZipFile) as exception:
error = errors.ResourceError(note=str(exception))
raise FrictionlessException(error) from exception
diff --git a/tests/conftest.py b/tests/conftest.py
index 57e21f5282..bd285a2140 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,18 +12,24 @@
# Fixtures
-# TODO: create the same for other databases
@pytest.fixture
def database_url(tmpdir):
path = str(tmpdir.join("database.db"))
conn = sqlite3.connect(path)
- conn.execute("CREATE TABLE data (id INTEGER PRIMARY KEY, name TEXT)")
- conn.execute("INSERT INTO data VALUES (1, 'english'), (2, '中国人')")
+ conn.execute("CREATE TABLE 'table' (id INTEGER PRIMARY KEY, name TEXT)")
+ conn.execute("INSERT INTO 'table' VALUES (1, 'english'), (2, '中国人')")
conn.commit()
yield "sqlite:///%s" % path
conn.close()
+# TODO: create the same for other databases
+@pytest.fixture
+def sqlite_url(tmpdir):
+ path = str(tmpdir.join("database.db"))
+ return "sqlite:///%s" % path
+
+
# Settings
diff --git a/tests/plugins/test_sql.py b/tests/plugins/test_sql.py
index 1267d7f3d0..fbec6bc844 100644
--- a/tests/plugins/test_sql.py
+++ b/tests/plugins/test_sql.py
@@ -13,7 +13,7 @@
def test_table_sql(database_url):
- dialect = SqlDialect(table="data")
+ dialect = SqlDialect(table="table")
with Table(database_url, dialect=dialect) as table:
assert table.schema == {
"fields": [
@@ -27,14 +27,14 @@ def test_table_sql(database_url):
def test_table_sql_order_by(database_url):
- dialect = SqlDialect(table="data", order_by="id")
+ dialect = SqlDialect(table="table", order_by="id")
with Table(database_url, dialect=dialect) as table:
assert table.header == ["id", "name"]
assert table.read_data() == [[1, "english"], [2, "中国人"]]
def test_table_sql_order_by_desc(database_url):
- dialect = SqlDialect(table="data", order_by="id desc")
+ dialect = SqlDialect(table="table", order_by="id desc")
with Table(database_url, dialect=dialect) as table:
assert table.header == ["id", "name"]
assert table.read_data() == [[2, "中国人"], [1, "english"]]
@@ -50,7 +50,7 @@ def test_table_sql_table_is_required_error(database_url):
def test_table_sql_headers_false(database_url):
- dialect = SqlDialect(table="data")
+ dialect = SqlDialect(table="table")
with Table(database_url, dialect=dialect, headers=False) as table:
assert table.header == []
assert table.read_data() == [["id", "name"], [1, "english"], [2, "中国人"]]
@@ -69,8 +69,8 @@ def test_table_sql_write(database_url):
# Storage
-def test_storage_types(database_url):
- engine = sa.create_engine(database_url)
+def test_storage_types(sqlite_url):
+ engine = sa.create_engine(sqlite_url)
prefix = "prefix_"
# Export/Import
@@ -126,8 +126,8 @@ def test_storage_types(database_url):
storage.delete_package(target.resource_names)
-def test_storage_integrity(database_url):
- engine = sa.create_engine(database_url)
+def test_storage_integrity(sqlite_url):
+ engine = sa.create_engine(sqlite_url)
prefix = "prefix_"
# Export/Import
@@ -184,8 +184,8 @@ def test_storage_integrity(database_url):
storage.delete_package(target.resource_names)
-def test_storage_constraints(database_url):
- engine = sa.create_engine(database_url)
+def test_storage_constraints(sqlite_url):
+ engine = sa.create_engine(sqlite_url)
prefix = "prefix_"
# Export/Import
@@ -235,8 +235,8 @@ def test_storage_constraints(database_url):
("maximum", 9),
],
)
-def test_storage_constraints_not_valid_error(database_url, field_name, cell):
- engine = sa.create_engine(database_url)
+def test_storage_constraints_not_valid_error(sqlite_url, field_name, cell):
+ engine = sa.create_engine(sqlite_url)
package = Package("data/storage/constraints.json")
resource = package.get_resource("constraints")
# We set an invalid cell to the data property
@@ -248,8 +248,8 @@ def test_storage_constraints_not_valid_error(database_url, field_name, cell):
resource.to_sql(engine=engine, force=True)
-def test_storage_read_resource_not_existent_error(database_url):
- engine = sa.create_engine(database_url)
+def test_storage_read_resource_not_existent_error(sqlite_url):
+ engine = sa.create_engine(sqlite_url)
storage = SqlStorage(engine=engine)
with pytest.raises(FrictionlessException) as excinfo:
storage.read_resource("bad")
@@ -258,8 +258,8 @@ def test_storage_read_resource_not_existent_error(database_url):
assert error.note.count("does not exist")
-def test_storage_write_resource_existent_error(database_url):
- engine = sa.create_engine(database_url)
+def test_storage_write_resource_existent_error(sqlite_url):
+ engine = sa.create_engine(sqlite_url)
resource = Resource(path="data/table.csv")
storage = resource.to_sql(engine=engine)
with pytest.raises(FrictionlessException) as excinfo:
@@ -271,8 +271,8 @@ def test_storage_write_resource_existent_error(database_url):
storage.delete_package(list(storage))
-def test_storage_delete_resource_not_existent_error(database_url):
- engine = sa.create_engine(database_url)
+def test_storage_delete_resource_not_existent_error(sqlite_url):
+ engine = sa.create_engine(sqlite_url)
storage = SqlStorage(engine=engine)
with pytest.raises(FrictionlessException) as excinfo:
storage.delete_resource("bad")
@@ -281,11 +281,13 @@ def test_storage_delete_resource_not_existent_error(database_url):
assert error.note.count("does not exist")
-def test_storage_views_support(database_url):
- engine = sa.create_engine(database_url)
- engine.execute("CREATE VIEW data_view AS SELECT * FROM data")
+def test_storage_views_support(sqlite_url):
+ engine = sa.create_engine(sqlite_url)
+ engine.execute("CREATE TABLE 'table' (id INTEGER PRIMARY KEY, name TEXT)")
+ engine.execute("INSERT INTO 'table' VALUES (1, 'english'), (2, '中国人')")
+ engine.execute("CREATE VIEW 'table_view' AS SELECT * FROM 'table'")
storage = SqlStorage(engine=engine)
- resource = storage.read_resource("data_view")
+ resource = storage.read_resource("table_view")
assert resource.schema == {
"fields": [
{"name": "id", "type": "integer"},
@@ -298,10 +300,10 @@ def test_storage_views_support(database_url):
]
-def test_storage_resource_url_argument(database_url):
+def test_storage_resource_url_argument(sqlite_url):
source = Resource(path="data/table.csv")
- source.to_sql(url=database_url)
- target = Resource.from_sql(name="table", url=database_url)
+ source.to_sql(url=sqlite_url)
+ target = Resource.from_sql(name="table", url=sqlite_url)
assert target.schema == {
"fields": [
{"name": "id", "type": "integer"},
@@ -314,10 +316,10 @@ def test_storage_resource_url_argument(database_url):
]
-def test_storage_package_url_argument(database_url):
+def test_storage_package_url_argument(sqlite_url):
source = Package(resources=[Resource(path="data/table.csv")])
- source.to_sql(url=database_url)
- target = Package.from_sql(url=database_url)
+ source.to_sql(url=sqlite_url)
+ target = Package.from_sql(url=sqlite_url)
assert target.get_resource("table").schema == {
"fields": [
{"name": "id", "type": "integer"},
@@ -453,7 +455,7 @@ def test_postgresql_storage_integrity():
# TODO: recover enum support
@pytest.mark.ci
@pytest.mark.skip
-def test_postgresql_storage_constraints(database_url):
+def test_postgresql_storage_constraints(sqlite_url):
engine = sa.create_engine(os.environ["POSTGRESQL_URL"])
prefix = "prefix_"
@@ -505,7 +507,7 @@ def test_postgresql_storage_constraints(database_url):
("maximum", 9),
],
)
-def test_postgresql_storage_constraints_not_valid_error(database_url, field_name, cell):
+def test_postgresql_storage_constraints_not_valid_error(sqlite_url, field_name, cell):
engine = sa.create_engine(os.environ["POSTGRESQL_URL"])
package = Package("data/storage/constraints.json")
resource = package.get_resource("constraints")
diff --git a/tests/test_package.py b/tests/test_package.py
index a5316887e3..ad844aea1c 100644
--- a/tests/test_package.py
+++ b/tests/test_package.py
@@ -486,16 +486,125 @@ def test_package_to_zip(tmpdir):
# Read
package = Package(target)
- assert package == {
- "name": "name",
- "resources": [{"name": "name", "path": "table.csv"}],
- }
+ assert package.name == "name"
+ assert package.get_resource("name").name == "name"
+ assert package.get_resource("name").path == "table.csv"
assert package.get_resource("name").read_rows() == [
{"id": 1, "name": "english"},
{"id": 2, "name": "中国人"},
]
+def test_package_to_zip_withdir_path(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "package.zip")
+ resource = Resource(path="data/table.csv")
+ package = Package(resources=[resource])
+ package.to_zip(target)
+
+ # Read
+ package = Package(target)
+ assert package.get_resource("table").path == "data/table.csv"
+ assert package.get_resource("table").read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+def test_package_to_zip_absolute_path(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "package.zip")
+ resource = Resource(path=os.path.abspath("data/table.csv"), trusted=True)
+ package = Package(resources=[resource], trusted=True)
+ package.to_zip(target)
+
+ # Read
+ package = Package(target)
+ assert package.get_resource("table").path == "table.csv"
+ assert package.get_resource("table").read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+def test_package_to_zip_resolve_inline(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "package.zip")
+ resource = Resource(name="table", data=[["id", "name"], [1, "english"], [2, "中国人"]])
+ package = Package(resources=[resource])
+ package.to_zip(target, resolve=["inline"])
+
+ # Read
+ package = Package(target)
+ assert package.get_resource("table").path == "table.csv"
+ assert package.get_resource("table").read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+def test_package_to_zip_resolve_inline_sql(tmpdir, database_url):
+
+ # Write
+ target = os.path.join(tmpdir, "package.zip")
+ resource = Resource.from_sql(name="table", url=database_url)
+ package = Package(resources=[resource])
+ package.to_zip(target, resolve=["inline"])
+
+ # Read
+ package = Package(target)
+ assert package.get_resource("table").path == "table.csv"
+ assert package.get_resource("table").read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+@pytest.mark.ci
+def test_package_to_zip_resolve_remote(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "package.zip")
+ resource = Resource(path=BASE_URL % "data/table.csv")
+ package = Package(resources=[resource])
+ package.to_zip(target, resolve=["remote"])
+
+ # Read
+ package = Package(target)
+ assert package.get_resource("table").path == "table.csv"
+ assert package.get_resource("table").read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+@pytest.mark.ci
+def test_package_to_zip_resolve_inline_and_remote(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "package.zip")
+ resource1 = Resource(name="name1", data=[["id", "name"], [1, "english"], [2, "中国人"]])
+ resource2 = Resource(name="name2", path=BASE_URL % "data/table.csv")
+ package = Package(resources=[resource1, resource2])
+ package.to_zip(target, resolve=["inline", "remote"])
+
+ # Read
+ package = Package(target)
+ assert package.get_resource("name1").path == "name1.csv"
+ assert package.get_resource("name1").read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+ assert package.get_resource("name2").path == "name2.csv"
+ assert package.get_resource("name2").read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
@pytest.mark.ci
def test_package_to_zip_source_remote(tmpdir):
diff --git a/tests/test_resource.py b/tests/test_resource.py
index 6cbed497b7..fcaf6e3c55 100644
--- a/tests/test_resource.py
+++ b/tests/test_resource.py
@@ -662,6 +662,90 @@ def test_resource_to_zip(tmpdir):
]
+def test_resource_to_zip_withdir_path(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "resource.zip")
+ resource = Resource(path="data/table.csv")
+ resource.to_zip(target)
+
+ # Read
+ resource = Resource(target)
+ assert resource == {"path": "data/table.csv"}
+ assert resource.read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+def test_resource_to_zip_absolute_path(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "resource.zip")
+ resource = Resource(path=os.path.abspath("data/table.csv"), trusted=True)
+ resource.to_zip(target)
+
+ # Read
+ resource = Resource(target)
+ assert resource == {"path": "table.csv"}
+ assert resource.read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+def test_resource_to_zip_resolve_inline(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "resource.zip")
+ resource = Resource(name="table", data=[["id", "name"], [1, "english"], [2, "中国人"]])
+ resource.to_zip(target, resolve=["inline"])
+
+ # Read
+ resource = Resource(target)
+ assert resource.name == "table"
+ assert resource.path == "table.csv"
+ assert resource.read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+def test_resource_to_zip_resolve_inline_sql(tmpdir, database_url):
+
+ # Write
+ target = os.path.join(tmpdir, "resource.zip")
+ resource = Resource.from_sql(name="table", url=database_url)
+ resource.to_zip(target, resolve=["inline"])
+
+ # Read
+ resource = Resource(target)
+ assert resource.name == "table"
+ assert resource.path == "table.csv"
+ assert resource.read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
+@pytest.mark.ci
+def test_resource_to_zip_resolve_remote(tmpdir):
+
+ # Write
+ target = os.path.join(tmpdir, "resource.zip")
+ resource = Resource(path=BASE_URL % "data/table.csv")
+ resource.to_zip(target, resolve=["remote"])
+
+ # Read
+ resource = Resource(target)
+ assert resource.name == "table"
+ assert resource.path == "table.csv"
+ assert resource.read_rows() == [
+ {"id": 1, "name": "english"},
+ {"id": 2, "name": "中国人"},
+ ]
+
+
@pytest.mark.ci
def test_resource_to_zip_source_remote(tmpdir):