From ae40547464425dc1d278938a28d4d698c0b12c0b Mon Sep 17 00:00:00 2001 From: Markus Klein Date: Tue, 19 Dec 2023 17:04:11 +0100 Subject: [PATCH] do not map empty string sto null --- src/reader/text.rs | 6 ++++++ tests/integration.rs | 15 +++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/reader/text.rs b/src/reader/text.rs index c8e62e5..7fbdc54 100644 --- a/src/reader/text.rs +++ b/src/reader/text.rs @@ -1,6 +1,7 @@ use std::{char::decode_utf16, cmp::min, ffi::CStr, num::NonZeroUsize, sync::Arc}; use arrow::array::{ArrayRef, StringBuilder}; +use log::warn; use odbc_api::{ buffers::{AnySlice, BufferDesc}, DataType as OdbcDataType, @@ -59,6 +60,11 @@ fn narrow_text_strategy( assume_indicators_are_memory_garbage: bool, ) -> Box { if assume_indicators_are_memory_garbage { + warn!( + "Ignoring indicators, because we expect the ODBC driver of your database to return \ + garbage memory. We can not distiguish between empty strings and NULL. Everything is \ + empty." + ); Box::new(NarrowUseTerminatingZero::new(octet_len)) } else { Box::new(NarrowText::new(octet_len)) diff --git a/tests/integration.rs b/tests/integration.rs index 3c7b459..d07cab1 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -308,7 +308,11 @@ fn fetch_varchar() { #[test] fn fetch_varchar_using_terminating_zeroes_to_indicate_string_length() { let table_name = function_name!().rsplit_once(':').unwrap().1; - let cursor = cursor_over(table_name, "VARCHAR(50)", "('Hello'),('Bonjour'),(NULL)"); + let cursor = cursor_over( + table_name, + "VARCHAR(50)", + "('Hello'),('Bonjour'),(NULL)", + ); let mut quirks = Quirks::new(); quirks.indicators_returned_from_bulk_fetch_are_memory_garbage = true; @@ -331,9 +335,12 @@ fn fetch_varchar_using_terminating_zeroes_to_indicate_string_length() { assert_eq!("Bonjour", array_vals.value(1)); // This workaround is currently only active for UTF-8. Which in turn is only active on Linux - #[cfg(target_os="windows")] + #[cfg(target_os = "windows")] assert!(array_vals.is_null(2)); - #[cfg(not(target_os="windows"))] + // Due to the ambiguity between empty and NULL we map everything to empty. This can not be + // mapped to NULL, due to the fact, that the schema might be a mandatory column. The + // representation is ambigious, because we need to ignore the indicator buffer. + #[cfg(not(target_os = "windows"))] assert_eq!("", array_vals.value(0)); } @@ -345,7 +352,7 @@ fn fetch_varchar_using_terminating_zeroes_to_indicate_string_length() { #[test] fn fetch_empty_string_from_non_null_varchar_using_terminating_zeroes_to_indicate_string_length() { let table_name = function_name!().rsplit_once(':').unwrap().1; - let cursor = cursor_over(table_name, "VARCHAR(50)", "('')"); + let cursor = cursor_over(table_name, "VARCHAR(50) NOT NULL", "('')"); let mut quirks = Quirks::new(); quirks.indicators_returned_from_bulk_fetch_are_memory_garbage = true;