From cc3ec6113c699897738eea770697c644f038ee83 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 12 May 2015 23:14:48 +0800 Subject: [PATCH] Also consider java string in writePrimitive for StringType. --- python/pyspark/sql/tests.py | 8 ++++++++ .../apache/spark/sql/parquet/ParquetTableSupport.scala | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 7e63f4d6461f6..3f48b9991bdc8 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -490,7 +490,15 @@ def test_save_and_load(self): self.assertTrue(sorted(df.collect()) == sorted(actual.collect())) self.sqlCtx.sql("SET spark.sql.sources.default=" + defaultDataSourceName) + tmpPath2 = tempfile.mkdtemp() + shutil.rmtree(tmpPath2) + rdd = self.sc.parallelize(['{"obj": {"a": "hello"}}', '{"obj": {"b": "world"}}']) + df = self.sqlCtx.jsonRDD(rdd, + StructType([StructField("obj", MapType(StringType(), StringType()), True)])) + df.save(tmpPath2, 'org.apache.spark.sql.parquet', mode='overwrite') + shutil.rmtree(tmpPath) + shutil.rmtree(tmpPath2) def test_help_command(self): # Regression test for SPARK-5464 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala index c45c431438efc..411335e458d59 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala @@ -197,7 +197,9 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging { private[parquet] def writePrimitive(schema: DataType, value: Any): Unit = { if (value != null) { schema match { - case StringType => writer.addBinary( + case StringType if value.isInstanceOf[String] => writer.addBinary( + Binary.fromByteArray(UTF8String(value.asInstanceOf[String]).getBytes)) + case StringType if value.isInstanceOf[UTF8String] => writer.addBinary( Binary.fromByteArray(value.asInstanceOf[UTF8String].getBytes)) case BinaryType => writer.addBinary( Binary.fromByteArray(value.asInstanceOf[Array[Byte]]))