slidoapp · y0j0 · Dec 20, 2023 · Dec 20, 2023
diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ db.list_partitions(table="nyc.taxis")
 
 ```python
 query = "SELECT * FROM nyc.taxis WHERE trip_distance > 40 ORDER BY tolls_amount DESC"
-df = db.select(table="nyc.taxis", partition_filter="payment_type = 1", sql=query)
+df = db.select(table="nyc.taxis", partition_filter="payment_type = 1", sql=query).read_pandas()
 ```
 
 ## Playground
@@ -105,9 +105,75 @@ init the Iceberg data and catalog.
 Navigate to [localhost:8888](http://localhost:8888). Then select example Jupyter notebook you want to run and enjoy Duckberg!
 
 ## Development
+For the development, there is recommendation to use Python 3.10. If you manage your Python versions by 
+[Pyenv](https://github.com/pyenv/pyenv) use 
 
-TBD ...
+```bash
+pyenv install 3.10.13
+pyenv global 3.10.13
+```
+
+then create and activate virtual environment
+```bash
+python -m venv venv
+source venv/bin/activate 
+```
+
+upgrade pip and install dependencies
+
+```bash
+pip install --upgrade pip
+pip install .
+```
+
+then run dockers that contains Iceberg catalog and file storage containing iceberg files
+
+```shell
+cd playground
+docker-compose up -d
+```
 
+init data by running [Init Jupyter notebook](http://localhost:8889/notebooks/000%20Init%20Iceberg%20data.ipynb) and
+run/test Duckberg in the file `tests/duckberg-sample.py`
+
+### Style & Formatting
+
+Use 
+
+```bash
+hatch run lint:fmt
+hatch run lint:style
+```
+
+## Building package
+
+The Duckberg project is managed by [Hatch](https://hatch.pypa.io/latest/). Follow [Hatch docs] for an installation
+or just install by command
+
+```shell
+brew install hatch
+```
+
+or 
+
+```shell
+pip install hatch
+```
+
+Increase package by
+```bash
+hatch version "x.x.x"
+```
+
+Build 
+```bash
+hatch build
+```
+
+and publish
+```bash
+hatch publish
+```
 ## License
 
 `duckberg` is distributed under the terms of the [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0.txt) license.
diff --git a/playground/init/000 Init Iceberg data.ipynb b/playground/init/000 Init Iceberg data.ipynb
@@ -90,7 +90,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "23/11/28 06:50:40 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n",
+      "23/12/19 22:05:28 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n",
       "                                                                                \r"
      ]
     }

diff --git a/playground/jupyter/notebooks/001 - Duckberg simple query - REST Iceberg catalog.ipynb b/playground/jupyter/notebooks/001 - Duckberg simple query - REST Iceberg catalog.ipynb
@@ -179,7 +179,7 @@
    "outputs": [],
    "source": [
     "query = \"SELECT * FROM 'nyc.taxis' WHERE trip_distance > 40 ORDER BY tolls_amount DESC\"\n",
-    "df = db.select(table=\"nyc.taxis\", partition_filter=\"payment_type = 1\", sql=query)\n",
+    "df = db.select(table=\"nyc.taxis\", partition_filter=\"payment_type = 1\", sql=query).read_pandas()\n",
     "df.head(10)"
    ]
   },
@@ -207,7 +207,7 @@
    "outputs": [],
    "source": [
     "query = \"SELECT passenger_count, AVG(tip_amount) as tip_amount_sum FROM 'nyc.taxis' GROUP BY passenger_count\"\n",
-    "df = db.select(table=\"nyc.taxis\", partition_filter=\"payment_type = 1\", sql=query)\n",
+    "df = db.select(table=\"nyc.taxis\", partition_filter=\"payment_type = 1\", sql=query).read_pandas()\n",
     "df.head(10)"
    ]
   }

diff --git a/playground/jupyter/requirements.txt b/playground/jupyter/requirements.txt
@@ -1,2 +1,2 @@
-duckberg==0.0.2
+duckberg==0.0.3
 pandas
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,23 +7,15 @@ name = "duckberg"
 dynamic = ["version"]
 description = ''
 readme = "README.md"
-requires-python = ">=3.7"
-license = "Apache 2.0"
+requires-python = ">=3.10"
+license = "Apache-2.0"
 keywords = []
 authors = [
   { name = "gregor", email = "gkaretka@cisco.com" },
   { name = "jojo", email = "jcechovs@cisco.com" },
 ]
 classifiers = [
-  "Development Status :: 4 - Beta",
-  "Programming Language :: Python",
-  "Programming Language :: Python :: 3.7",
-  "Programming Language :: Python :: 3.8",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11",
-  "Programming Language :: Python :: Implementation :: CPython",
-  "Programming Language :: Python :: Implementation :: PyPy",
+  "Programming Language :: Python :: 3.10"
 ]
 
 dependencies = [
@@ -32,12 +24,12 @@ dependencies = [
 ]
 
 [[tool.hatch.envs.test.matrix]]
-python = ["3.7", "3.8", "3.9", "3.10", "3.11"]
+python = ["3.10"]
 
 [project.urls]
-Documentation = "https://github.com/unknown/duckberg#readme"
-Issues = "https://github.com/unknown/duckberg/issues"
-Source = "https://github.com/unknown/duckberg"
+Documentation = "https://github.com/slidoapp/duckberg/blob/main/README.md"
+Issues = "https://github.com/slidoapp/duckberg/issues"
+Source = "https://github.com/slidoapp/duckberg"
 
 [tool.hatch.version]
 path = "src/duckberg/__about__.py"
@@ -60,7 +52,7 @@ cov = [
 ]
 
 [[tool.hatch.envs.all.matrix]]
-python = ["3.7", "3.8", "3.9", "3.10", "3.11"]
+python = ["3.10"]
 
 [tool.hatch.envs.lint]
 detached = true

diff --git a/src/duckberg/__about__.py b/src/duckberg/__about__.py
@@ -1,4 +1,4 @@
 # SPDX-FileCopyrightText: 2023-present jojo <jcechovs@cisco.com>
 #
 # SPDX-License-Identifier: MIT
-__version__ = "0.0.2"
+__version__ = "0.0.3"
diff --git a/tests/__init__.py → tests/duckberg-sample.py b/tests/__init__.py → tests/duckberg-sample.py
@@ -27,4 +27,6 @@
 
 query: str = "SELECT * FROM 'nyc.taxis' WHERE trip_distance > 40 ORDER BY tolls_amount DESC"
 
-df = db.select(table="nyc.taxis", partition_filter="payment_type = 1", sql=query)
+dd = db.select(table="nyc.taxis", partition_filter="payment_type = 1", sql=query)
+
+df = dd.read_pandas()