Merge pull request #1044 from finos/py-segfaults

Remove duplicate `psp_okey` column from arrow updates
finos · May 13, 2020 · cafaec6 · cafaec6
2 parents 1cfe88f + 9dd545e
commit cafaec6
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 4 deletions.
diff --git a/cpp/perspective/src/cpp/emscripten.cpp b/cpp/perspective/src/cpp/emscripten.cpp
@@ -1064,7 +1064,8 @@ namespace binding {
 
             // Always use the `Table` column names and data types on up
             if (table_initialized && is_update) {
-                auto schema = gnode->get_output_schema();
+                auto gnode_output_schema = gnode->get_output_schema();
+                auto schema = gnode_output_schema.drop({"psp_okey"});
                 column_names = schema.columns();
                 data_types = schema.types();
 
@@ -1099,7 +1100,7 @@ namespace binding {
                     }
 
                     // Updated data types need to reflect in new data table
-                    auto new_schema = gnode->get_output_schema();
+                    auto new_schema = gnode->get_output_schema().drop({"psp_okey"});
                     data_types = new_schema.types();
                 }
             } else {

diff --git a/python/perspective/perspective/src/table.cpp b/python/perspective/perspective/src/table.cpp
@@ -62,7 +62,8 @@ std::shared_ptr<Table> make_table_py(t_val table, t_data_accessor accessor,
 
         // Always use the `Table` column names and data types on update.
         if (table_initialized && is_update) {
-            auto schema = gnode->get_output_schema();
+            auto gnode_output_schema = gnode->get_output_schema();
+            auto schema = gnode_output_schema.drop({"psp_okey"});
             column_names = schema.columns();
             data_types = schema.types();
 
@@ -97,7 +98,7 @@ std::shared_ptr<Table> make_table_py(t_val table, t_data_accessor accessor,
                 }
             }
             // Make sure promoted types are used to construct data table
-            auto new_schema = gnode->get_output_schema();
+            auto new_schema = gnode->get_output_schema().drop({"psp_okey"});
             data_types = new_schema.types();
         } else {
             column_names = arrow_loader.names();

diff --git a/python/perspective/perspective/tests/table/test_update_arrow.py b/python/perspective/perspective/tests/table/test_update_arrow.py
@@ -7,8 +7,11 @@
 #
 
 import os
+import random
+import uuid
 import pyarrow as pa
 from datetime import date, datetime
+from pytest import mark
 from perspective.table import Table
 
 SOURCE_STREAM_ARROW = os.path.join(os.path.dirname(__file__), "arrow", "int_float_str.arrow")
@@ -476,3 +479,50 @@ def test_update_arrow_column_order_int(self, util):
         assert tbl.view().to_dict() == {
             name: data[0] for name in names
         }
+
+    def test_update_arrow_thread_safe_int_index(self, util):
+        data = [["a", "b", "c"] for i in range(10)]
+        data += [[1, 2, 3]]
+        names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
+        arrow = util.make_arrow(names, data)
+        tbl = Table(arrow, index="uid")
+
+        for i in range(100):
+            idx = (1, 2, 3)[random.randint(0, 2)]
+            update_data = [[str(uuid.uuid4()) + str(random.randint(100, 1000000000))], [idx]]
+            update_names = [names[random.randint(0, 9)], "uid"]
+            update_arrow = util.make_arrow(update_names, update_data)
+            tbl.update(update_arrow)
+
+        assert tbl.size() == 3
+
+    def test_update_arrow_thread_safe_datetime_index(self, util):
+        data = [["a", "b", "c"] for i in range(10)]
+        data += [[datetime(2020, 1, 15, 12, 17), datetime(2020, 1, 15, 12, 18), datetime(2020, 1, 15, 12, 19)]]
+        names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
+        arrow = util.make_arrow(names, data)
+        tbl = Table(arrow, index="uid")
+
+        for i in range(100):
+            idx = (datetime(2020, 1, 15, 12, 17), datetime(2020, 1, 15, 12, 18), datetime(2020, 1, 15, 12, 19))[random.randint(0, 2)]
+            update_data = [[str(uuid.uuid4()) + str(random.randint(100, 1000000000))], [idx]]
+            update_names = [names[random.randint(0, 9)], "uid"]
+            update_arrow = util.make_arrow(update_names, update_data)
+            tbl.update(update_arrow)
+
+        assert tbl.size() == 3
+
+    def test_update_arrow_thread_safe_str_index(self, util):
+        data = [["a", "b", "c"] for i in range(11)]
+        names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
+        arrow = util.make_arrow(names, data)
+        tbl = Table(arrow, index="uid")
+
+        for i in range(100):
+            idx = ("a", "b", "c")[random.randint(0, 2)]
+            update_data = [[str(uuid.uuid4()) + str(random.randint(100, 1000000000))], [idx]]
+            update_names = [names[random.randint(0, 9)], "uid"]
+            update_arrow = util.make_arrow(update_names, update_data)
+            tbl.update(update_arrow)
+
+        assert tbl.size() == 3