From 4017befa80fd862040af0ddf68c7ba31a78cb8ac Mon Sep 17 00:00:00 2001
From: Fikre Mengistu <fikremen@gmail.com>
Date: Fri, 24 Nov 2023 14:40:20 -0500
Subject: [PATCH 1/5] supporting spaces in column names for csv files

---
 evadb/parser/lark_visitor/_common_clauses_ids.py   | 5 +++++
 test/integration_tests/short/test_load_executor.py | 4 ++--
 test/util.py                                       | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/evadb/parser/lark_visitor/_common_clauses_ids.py b/evadb/parser/lark_visitor/_common_clauses_ids.py
index 4dd3080dd..6f4b78c7c 100644
--- a/evadb/parser/lark_visitor/_common_clauses_ids.py
+++ b/evadb/parser/lark_visitor/_common_clauses_ids.py
@@ -43,6 +43,11 @@ def full_id(self, tree):
             return (self.visit(tree.children[0]), self.visit(tree.children[1]))
 
     def uid(self, tree):
+        if (hasattr(tree.children[0],"type") and tree.children[0].type == "REVERSE_QUOTE_ID"):
+            # tree.children[0].value = tree.children[0].value.replace("`","")
+            temp = str(tree.children[0]).replace("`","")
+            tree.children[0].type = "simple_id"
+            return temp
         return self.visit(tree.children[0])
 
     def full_column_name(self, tree):
diff --git a/test/integration_tests/short/test_load_executor.py b/test/integration_tests/short/test_load_executor.py
index afc45c592..317704300 100644
--- a/test/integration_tests/short/test_load_executor.py
+++ b/test/integration_tests/short/test_load_executor.py
@@ -84,7 +84,7 @@ def test_should_load_csv_in_table(self):
 
             CREATE TABLE IF NOT EXISTS MyVideoCSV (
                 id INTEGER UNIQUE,
-                frame_id INTEGER,
+                `frame_id` INTEGER,
                 video_id INTEGER,
                 dataset_name TEXT(30),
                 label TEXT(30),
@@ -100,7 +100,7 @@ def test_should_load_csv_in_table(self):
         execute_query_fetch_all(self.evadb, load_query)
 
         # execute a select query
-        select_query = """SELECT id, frame_id, video_id,
+        select_query = """SELECT id, `frame_id`, video_id,
                           dataset_name, label, bbox,
                           object_id
                           FROM MyVideoCSV;"""
diff --git a/test/util.py b/test/util.py
index 3a23a6ff5..7f9c3e4ea 100644
--- a/test/util.py
+++ b/test/util.py
@@ -304,7 +304,7 @@ def create_sample_csv(num_frames=NUM_FRAMES):
             random_coords = 200 + 300 * np.random.random(4)
             sample_meta[index] = {
                 "id": index,
-                "frame_id": frame_id,
+                "frame id": frame_id,
                 "video_id": video_id,
                 "dataset_name": "test_dataset",
                 "label": sample_labels[np.random.choice(len(sample_labels))],

From f9942e49dc9133e1bacc9651af7abd71bd84847a Mon Sep 17 00:00:00 2001
From: Fikre Mengistu <fikremen@gmail.com>
Date: Fri, 24 Nov 2023 15:33:24 -0500
Subject: [PATCH 2/5] refactoring and adding test case

---
 .../lark_visitor/_common_clauses_ids.py       |  5 +-
 .../short/test_load_executor.py               | 47 ++++++++++++++++++-
 test/util.py                                  | 32 ++++++++++++-
 3 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/evadb/parser/lark_visitor/_common_clauses_ids.py b/evadb/parser/lark_visitor/_common_clauses_ids.py
index 6f4b78c7c..e1c59a428 100644
--- a/evadb/parser/lark_visitor/_common_clauses_ids.py
+++ b/evadb/parser/lark_visitor/_common_clauses_ids.py
@@ -44,10 +44,9 @@ def full_id(self, tree):
 
     def uid(self, tree):
         if (hasattr(tree.children[0],"type") and tree.children[0].type == "REVERSE_QUOTE_ID"):
-            # tree.children[0].value = tree.children[0].value.replace("`","")
-            temp = str(tree.children[0]).replace("`","")
             tree.children[0].type = "simple_id"
-            return temp
+            non_tick_string = str(tree.children[0]).replace("`","")
+            return non_tick_string
         return self.visit(tree.children[0])
 
     def full_column_name(self, tree):
diff --git a/test/integration_tests/short/test_load_executor.py b/test/integration_tests/short/test_load_executor.py
index 317704300..cd663491a 100644
--- a/test/integration_tests/short/test_load_executor.py
+++ b/test/integration_tests/short/test_load_executor.py
@@ -19,6 +19,7 @@
 from test.util import (
     create_dummy_csv_batches,
     create_sample_csv,
+    create_csv_with_comlumn_name_spaces,
     create_sample_video,
     file_remove,
     get_evadb_for_testing,
@@ -45,6 +46,7 @@ def setUp(self):
             f"{EvaDB_ROOT_DIR}/test/data/uadetrac/small-data/MVI_20011/*.jpg"
         )
         self.csv_file_path = create_sample_csv()
+        self.csv_file_with_spaces_path = create_csv_with_comlumn_name_spaces()
 
     def tearDown(self):
         shutdown_ray()
@@ -84,7 +86,7 @@ def test_should_load_csv_in_table(self):
 
             CREATE TABLE IF NOT EXISTS MyVideoCSV (
                 id INTEGER UNIQUE,
-                `frame_id` INTEGER,
+                frame_id INTEGER,
                 video_id INTEGER,
                 dataset_name TEXT(30),
                 label TEXT(30),
@@ -100,7 +102,7 @@ def test_should_load_csv_in_table(self):
         execute_query_fetch_all(self.evadb, load_query)
 
         # execute a select query
-        select_query = """SELECT id, `frame_id`, video_id,
+        select_query = """SELECT id, frame_id, video_id,
                           dataset_name, label, bbox,
                           object_id
                           FROM MyVideoCSV;"""
@@ -117,6 +119,47 @@ def test_should_load_csv_in_table(self):
         drop_query = "DROP TABLE IF EXISTS MyVideoCSV;"
         execute_query_fetch_all(self.evadb, drop_query)
 
+    ###################################
+    # integration tests for csv files with spaces in column names
+    def test_should_load_csv_in_table_with_spaces_in_column_name(self):
+        # loading a csv requires a table to be created first
+        create_table_query = """
+
+            CREATE TABLE IF NOT EXISTS MyVideoCSV (
+                id INTEGER UNIQUE, 
+                `frame id` INTEGER,
+                `video id` INTEGER,
+                `dataset name` TEXT(30),
+                label TEXT(30),
+                bbox NDARRAY FLOAT32(4),
+                `object id` INTEGER
+            );
+
+            """
+        execute_query_fetch_all(self.evadb, create_table_query)
+
+        # load the CSV
+        load_query = f"LOAD CSV '{self.csv_file_with_spaces_path}' INTO MyVideoCSV;"
+        execute_query_fetch_all(self.evadb, load_query)
+
+        # execute a select query
+        select_query = """SELECT id, `frame id`, `video id`,
+                          `dataset name`, label, bbox,
+                          `object id`
+                          FROM MyVideoCSV;"""
+
+        actual_batch = execute_query_fetch_all(self.evadb, select_query)
+        actual_batch.sort()
+
+        # assert the batches are equal
+        expected_batch = next(create_dummy_csv_batches())
+        expected_batch.modify_column_alias("myvideocsv")
+        self.assertEqual(actual_batch, expected_batch)
+
+        # clean up
+        drop_query = "DROP TABLE IF EXISTS MyVideoCSV;"
+        execute_query_fetch_all(self.evadb, drop_query)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/util.py b/test/util.py
index 7f9c3e4ea..f21830385 100644
--- a/test/util.py
+++ b/test/util.py
@@ -304,7 +304,7 @@ def create_sample_csv(num_frames=NUM_FRAMES):
             random_coords = 200 + 300 * np.random.random(4)
             sample_meta[index] = {
                 "id": index,
-                "frame id": frame_id,
+                "frame_id": frame_id,
                 "video_id": video_id,
                 "dataset_name": "test_dataset",
                 "label": sample_labels[np.random.choice(len(sample_labels))],
@@ -318,6 +318,36 @@ def create_sample_csv(num_frames=NUM_FRAMES):
     df_sample_meta.to_csv(os.path.join(get_tmp_dir(), "dummy.csv"), index=False)
     return os.path.join(get_tmp_dir(), "dummy.csv")
 
+def create_csv_with_comlumn_name_spaces(num_frames=NUM_FRAMES):
+    try:
+        os.remove(os.path.join(get_tmp_dir(), "dummy.csv"))
+    except FileNotFoundError:
+        pass
+
+    sample_meta = {}
+
+    index = 0
+    sample_labels = ["car", "pedestrian", "bicycle"]
+    num_videos = 2
+    for video_id in range(num_videos):
+        for frame_id in range(num_frames):
+            random_coords = 200 + 300 * np.random.random(4)
+            sample_meta[index] = {
+                "id": index,
+                "frame id": frame_id, 
+                "video id": video_id,
+                "dataset name": "test_dataset",
+                "label": sample_labels[np.random.choice(len(sample_labels))],
+                "bbox": ",".join([str(coord) for coord in random_coords]),
+                "object id": np.random.choice(3),
+            }
+
+            index += 1
+
+    df_sample_meta = pd.DataFrame.from_dict(sample_meta, "index")
+    df_sample_meta.to_csv(os.path.join(get_tmp_dir(), "dummy.csv"), index=False)
+    return os.path.join(get_tmp_dir(), "dummy.csv")
+
 
 def create_dummy_csv_batches(target_columns=None):
     if target_columns:

From 292f9e6c3cf2900fad8e6f3bc9f27f4d8423d9f5 Mon Sep 17 00:00:00 2001
From: Fikre Mengistu <fikremen@gmail.com>
Date: Fri, 24 Nov 2023 18:49:01 -0500
Subject: [PATCH 3/5] moving csv creation to inside new test method

---
 test/integration_tests/short/test_load_executor.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/integration_tests/short/test_load_executor.py b/test/integration_tests/short/test_load_executor.py
index cd663491a..2494ca0ce 100644
--- a/test/integration_tests/short/test_load_executor.py
+++ b/test/integration_tests/short/test_load_executor.py
@@ -46,7 +46,6 @@ def setUp(self):
             f"{EvaDB_ROOT_DIR}/test/data/uadetrac/small-data/MVI_20011/*.jpg"
         )
         self.csv_file_path = create_sample_csv()
-        self.csv_file_with_spaces_path = create_csv_with_comlumn_name_spaces()
 
     def tearDown(self):
         shutdown_ray()
@@ -126,7 +125,7 @@ def test_should_load_csv_in_table_with_spaces_in_column_name(self):
         create_table_query = """
 
             CREATE TABLE IF NOT EXISTS MyVideoCSV (
-                id INTEGER UNIQUE, 
+                id INTEGER UNIQUE,
                 `frame id` INTEGER,
                 `video id` INTEGER,
                 `dataset name` TEXT(30),
@@ -139,7 +138,7 @@ def test_should_load_csv_in_table_with_spaces_in_column_name(self):
         execute_query_fetch_all(self.evadb, create_table_query)
 
         # load the CSV
-        load_query = f"LOAD CSV '{self.csv_file_with_spaces_path}' INTO MyVideoCSV;"
+        load_query = f"LOAD CSV '{create_csv_with_comlumn_name_spaces()}' INTO MyVideoCSV;"
         execute_query_fetch_all(self.evadb, load_query)
 
         # execute a select query

From 2f5a678a0c42e9605e0d19a317d39d7352110a5e Mon Sep 17 00:00:00 2001
From: americast <sayan.sinha@cc.gatech.edu>
Date: Fri, 24 Nov 2023 20:07:37 -0500
Subject: [PATCH 4/5] linted

---
 evadb/parser/lark_visitor/_common_clauses_ids.py   | 7 +++++--
 test/integration_tests/short/test_load_executor.py | 6 ++++--
 test/util.py                                       | 3 ++-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/evadb/parser/lark_visitor/_common_clauses_ids.py b/evadb/parser/lark_visitor/_common_clauses_ids.py
index e1c59a428..5267761cf 100644
--- a/evadb/parser/lark_visitor/_common_clauses_ids.py
+++ b/evadb/parser/lark_visitor/_common_clauses_ids.py
@@ -43,9 +43,12 @@ def full_id(self, tree):
             return (self.visit(tree.children[0]), self.visit(tree.children[1]))
 
     def uid(self, tree):
-        if (hasattr(tree.children[0],"type") and tree.children[0].type == "REVERSE_QUOTE_ID"):
+        if (
+            hasattr(tree.children[0], "type")
+            and tree.children[0].type == "REVERSE_QUOTE_ID"
+        ):
             tree.children[0].type = "simple_id"
-            non_tick_string = str(tree.children[0]).replace("`","")
+            non_tick_string = str(tree.children[0]).replace("`", "")
             return non_tick_string
         return self.visit(tree.children[0])
 
diff --git a/test/integration_tests/short/test_load_executor.py b/test/integration_tests/short/test_load_executor.py
index 2494ca0ce..542012211 100644
--- a/test/integration_tests/short/test_load_executor.py
+++ b/test/integration_tests/short/test_load_executor.py
@@ -17,9 +17,9 @@
 import unittest
 from pathlib import Path
 from test.util import (
+    create_csv_with_comlumn_name_spaces,
     create_dummy_csv_batches,
     create_sample_csv,
-    create_csv_with_comlumn_name_spaces,
     create_sample_video,
     file_remove,
     get_evadb_for_testing,
@@ -138,7 +138,9 @@ def test_should_load_csv_in_table_with_spaces_in_column_name(self):
         execute_query_fetch_all(self.evadb, create_table_query)
 
         # load the CSV
-        load_query = f"LOAD CSV '{create_csv_with_comlumn_name_spaces()}' INTO MyVideoCSV;"
+        load_query = (
+            f"LOAD CSV '{create_csv_with_comlumn_name_spaces()}' INTO MyVideoCSV;"
+        )
         execute_query_fetch_all(self.evadb, load_query)
 
         # execute a select query
diff --git a/test/util.py b/test/util.py
index f21830385..7df662b4c 100644
--- a/test/util.py
+++ b/test/util.py
@@ -318,6 +318,7 @@ def create_sample_csv(num_frames=NUM_FRAMES):
     df_sample_meta.to_csv(os.path.join(get_tmp_dir(), "dummy.csv"), index=False)
     return os.path.join(get_tmp_dir(), "dummy.csv")
 
+
 def create_csv_with_comlumn_name_spaces(num_frames=NUM_FRAMES):
     try:
         os.remove(os.path.join(get_tmp_dir(), "dummy.csv"))
@@ -334,7 +335,7 @@ def create_csv_with_comlumn_name_spaces(num_frames=NUM_FRAMES):
             random_coords = 200 + 300 * np.random.random(4)
             sample_meta[index] = {
                 "id": index,
-                "frame id": frame_id, 
+                "frame id": frame_id,
                 "video id": video_id,
                 "dataset name": "test_dataset",
                 "label": sample_labels[np.random.choice(len(sample_labels))],

From fed6659d0105fac94dc3113c87d478e359ba9b32 Mon Sep 17 00:00:00 2001
From: Andy Xu <xzdandy@gmail.com>
Date: Sun, 3 Dec 2023 04:06:43 -0500
Subject: [PATCH 5/5] Add column with space support in faq.

---
 docs/source/overview/faq.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/source/overview/faq.rst b/docs/source/overview/faq.rst
index f2d4b61ae..b0dacfa41 100644
--- a/docs/source/overview/faq.rst
+++ b/docs/source/overview/faq.rst
@@ -34,3 +34,16 @@ If a query runs a complex AI task (e.g., sentiment analysis) on a large table, t
     top
     pgrep evadb_server
 
+Can column names have space?
+----------------------------
+
+For column names with space, you can use reverse quote to contain the column names. Below are example `CREATE TABLE` and `SELECT` queries:
+
+.. code-block:: sql
+
+   CREATE TABLE IF NOT EXISTS MyVideoCSV (
+        id INTEGER UNIQUE,
+        `frame id` INTEGER,
+   );
+
+   SELECT id, `frame id` FROM MyVideoCSV;