From 718bbc939037929ef5b8f4b4fe10aadfbab4408e Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 20 Sep 2017 10:51:00 +0900 Subject: [PATCH] [SPARK-22067][SQL] ArrowWriter should use position when setting UTF8String ByteBuffer ## What changes were proposed in this pull request? The ArrowWriter StringWriter was setting Arrow data using a position of 0 instead of the actual position in the ByteBuffer. This was currently working because of a bug ARROW-1443, and has been fixed as of Arrow 0.7.0. Testing with this version revealed the error in ArrowConvertersSuite test string conversion. ## How was this patch tested? Existing tests, manually verified working with Arrow 0.7.0 Author: Bryan Cutler Closes #19284 from BryanCutler/arrow-ArrowWriter-StringWriter-position-SPARK-22067. --- .../org/apache/spark/sql/execution/arrow/ArrowWriter.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala index 11ba04d2ce9a7..0b740735ffe19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala @@ -234,8 +234,9 @@ private[arrow] class StringWriter(val valueVector: NullableVarCharVector) extend override def setValue(input: SpecializedGetters, ordinal: Int): Unit = { val utf8 = input.getUTF8String(ordinal) + val utf8ByteBuffer = utf8.getByteBuffer // todo: for off-heap UTF8String, how to pass in to arrow without copy? - valueMutator.setSafe(count, utf8.getByteBuffer, 0, utf8.numBytes()) + valueMutator.setSafe(count, utf8ByteBuffer, utf8ByteBuffer.position(), utf8.numBytes()) } }