From 842dd9923c9d94aef76b4a1eb94665e51bb4915e Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Tue, 23 Jan 2024 12:37:09 -0500 Subject: [PATCH] sync read buffer with buffer size of copyexpert and since we don't want super big network packets, we reduce the default buffer size to 64k --- datapusher/dot-env.template | 4 ++-- datapusher/jobs.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datapusher/dot-env.template b/datapusher/dot-env.template index 5de2dbb..60bd359 100644 --- a/datapusher/dot-env.template +++ b/datapusher/dot-env.template @@ -14,8 +14,8 @@ WRITE_ENGINE_URL = 'postgresql://datapusher:YOURPASSWORD@localhost/datastore_def SQLALCHEMY_DATABASE_URI = 'postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs' # READ BUFFER SIZE IN BYTES WHEN READING CSV FILE WHEN USING POSTGRES COPY -# default 1mb = 1048576 -COPY_READBUFFER_SIZE = 1048576 +# default 64k = 65536 +COPY_READBUFFER_SIZE = 65536 # =============== DOWNLOAD SETTINGS ============== # 25mb, this is ignored if either PREVIEW_ROWS > 0 diff --git a/datapusher/jobs.py b/datapusher/jobs.py index 38a11c0..624b35c 100644 --- a/datapusher/jobs.py +++ b/datapusher/jobs.py @@ -1445,7 +1445,7 @@ def _push_to_datastore(task_id, input, dry_run=False, temp_dir=None): # specify a 1MB buffer size for COPY read from disk with open(tmp, "rb", copy_readbuffer_size) as f: try: - cur.copy_expert(copy_sql, f) + cur.copy_expert(copy_sql, f, size=copy_readbuffer_size) except psycopg2.Error as e: raise util.JobError("Postgres COPY failed: {}".format(e)) else: