Skip to content

Commit

Permalink
support for non-utf8 locales (#598)
Browse files Browse the repository at this point in the history
* support non-utf8 locales
  • Loading branch information
lilgreenbird authored Nov 21, 2017
1 parent 325e5d2 commit 54f1e5e
Show file tree
Hide file tree
Showing 13 changed files with 340 additions and 124 deletions.
22 changes: 13 additions & 9 deletions Dockerfile-msphpsql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && \
apt-utils \
autoconf \
curl \
libcurl3 \
g++ \
gcc \
git \
Expand All @@ -28,17 +29,14 @@ ENV TEST_PHP_SQL_SERVER sql
ENV TEST_PHP_SQL_UID sa
ENV TEST_PHP_SQL_PWD Password123

# add locale iso-8859-1
RUN sed -i 's/# en_US ISO-8859-1/en_US ISO-8859-1/g' /etc/locale.gen
RUN locale-gen en_US

# set locale to utf-8
RUN locale-gen en_US.UTF-8
ENV LANG='en_US.UTF-8' LANGUAGE='en_US:en' LC_ALL='en_US.UTF-8'

#install ODBC driver
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/ubuntu/16.04/prod.list > /etc/apt/sources.list.d/mssql-release.list

RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && ACCEPT_EULA=Y apt-get install -y msodbcsql mssql-tools
ENV PATH="/opt/mssql-tools/bin:${PATH}"

#install coveralls
RUN pip install --upgrade pip && pip install cpp-coveralls

Expand All @@ -47,8 +45,14 @@ RUN pip install --upgrade pip && pip install cpp-coveralls
#another option is to copy source to build directory on image
RUN mkdir -p $PHPSQLDIR
COPY . $PHPSQLDIR
WORKDIR $PHPSQLDIR/source/

#install ODBC 17 preview driver
WORKDIR $PHPSQLDIR
RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && ACCEPT_EULA=Y dpkg -i "./ODBC 17 binaries preview/Ubuntu 16/msodbcsql_17.0.0.1-1_amd64.deb"
RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && ACCEPT_EULA=Y dpkg -i "./ODBC 17 binaries preview/Ubuntu 16/mssql-tools_17.0.0.1-1_amd64.deb"
ENV PATH="/opt/mssql-tools/bin:${PATH}"

WORKDIR $PHPSQLDIR/source/
RUN chmod +x ./packagize.sh
RUN /bin/bash -c "./packagize.sh"

Expand Down Expand Up @@ -79,4 +83,4 @@ RUN chmod +x ./entrypoint.sh
CMD /bin/bash ./entrypoint.sh

ENV REPORT_EXIT_STATUS 1
ENV TEST_PHP_EXECUTABLE /usr/bin/php
ENV TEST_PHP_EXECUTABLE /usr/bin/php
6 changes: 6 additions & 0 deletions source/shared/core_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ void core_sqlsrv_minit( _Outptr_ sqlsrv_context** henv_cp, _Inout_ sqlsrv_contex
SQLSRV_STATIC_ASSERT( sizeof( sqlsrv_sqltype ) == sizeof( zend_long ) );
SQLSRV_STATIC_ASSERT( sizeof( sqlsrv_phptype ) == sizeof( zend_long ));

#ifndef _WIN32
// set locale from environment
// this is necessary for ODBC and MUST be done before connection
setlocale(LC_ALL, "");
#endif

*henv_cp = *henv_ncp = SQL_NULL_HANDLE; // initialize return values to NULL

try {
Expand Down
18 changes: 15 additions & 3 deletions source/shared/localization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@
#include <locale>

#define CP_UTF8 65001
#define CP_ISO8859_1 28591
#define CP_ISO8859_2 28592
#define CP_ISO8859_3 28593
#define CP_ISO8859_4 28594
#define CP_ISO8859_5 28595
#define CP_ISO8859_6 28596
#define CP_ISO8859_7 28597
#define CP_ISO8859_8 28598
#define CP_ISO8859_9 28599
#define CP_ISO8859_13 28603
#define CP_ISO8859_15 28605
#define CP_UTF16 1200
#define CP_ACP 0 // default to ANSI code page

Expand Down Expand Up @@ -178,14 +189,15 @@ class SystemLocale
SystemLocale & operator=( const SystemLocale & );

std::locale * m_pLocale;
UINT m_uAnsiCP;

explicit SystemLocale( const char * localeName );
~SystemLocale();

static UINT ExpandSpecialCP( UINT codepage )
{
// Convert CP_ACP, CP_OEM to CP_UTF8
return (codepage < 2 ? CP_UTF8 : codepage);
// skip SQLSRV_ENCODING_CHAR
return (codepage <= 3 ? Singleton().m_uAnsiCP : codepage);
}

// Returns the number of bytes this UTF8 code point expects
Expand Down Expand Up @@ -217,7 +229,7 @@ class SystemLocale

inline UINT SystemLocale::AnsiCP() const
{
return CP_UTF8;
return m_uAnsiCP;
}

inline UINT SystemLocale::MaxCharCchSize( UINT codepage )
Expand Down
51 changes: 49 additions & 2 deletions source/shared/localizationimpl.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//---------------------------------------------------------------------------------------------------------------------------------
// File: LocalizationImpl.hpp
// File: localizationimpl.cpp
//
// Contents: Contains non-inline code for the SystemLocale class
// Must be included in one c/cpp file per binary
Expand Down Expand Up @@ -71,6 +71,17 @@ const cp_iconv cp_iconv::g_cp_iconv[] = {
{ 1256, "CP1256//TRANSLIT" },
{ 1257, "CP1257//TRANSLIT" },
{ 1258, "CP1258//TRANSLIT" },
{ CP_ISO8859_1, "ISO8859-1//TRANSLIT" },
{ CP_ISO8859_2, "ISO8859-2//TRANSLIT" },
{ CP_ISO8859_3, "ISO8859-3//TRANSLIT" },
{ CP_ISO8859_4, "ISO8859-4//TRANSLIT" },
{ CP_ISO8859_5, "ISO8859-5//TRANSLIT" },
{ CP_ISO8859_6, "ISO8859-6//TRANSLIT" },
{ CP_ISO8859_7, "ISO8859-7//TRANSLIT" },
{ CP_ISO8859_8, "ISO8859-8//TRANSLIT" },
{ CP_ISO8859_9, "ISO8859-9//TRANSLIT" },
{ CP_ISO8859_13, "ISO8859-13//TRANSLIT" },
{ CP_ISO8859_15, "ISO8859-15//TRANSLIT" },
{ 12000, "UTF-32LE" }
};
const size_t cp_iconv::g_cp_iconv_count = ARRAYSIZE(cp_iconv::g_cp_iconv);
Expand Down Expand Up @@ -270,7 +281,42 @@ using namespace std;

SystemLocale::SystemLocale( const char * localeName )
: m_pLocale( new std::locale(localeName) )
, m_uAnsiCP(CP_UTF8)
{
struct LocaleCP
{
const char* localeName;
UINT codePage;
};
#define CPxxx(cp) { "CP" #cp, cp }
#define ISO8859(n) { "ISO-8859-" #n, CP_ISO8859_ ## n }, \
{ "8859_" #n, CP_ISO8859_ ## n }, \
{ "ISO8859-" #n, CP_ISO8859_ ## n }, \
{ "ISO8859" #n, CP_ISO8859_ ## n }, \
{ "ISO_8859-" #n, CP_ISO8859_ ## n }, \
{ "ISO_8859_" #n, CP_ISO8859_ ## n }
const LocaleCP lcpTable[] = {
{ "utf8", CP_UTF8 },
{ "UTF-8", CP_UTF8 },
CPxxx(1252), CPxxx(850), CPxxx(437), CPxxx(874), CPxxx(932), CPxxx(936), CPxxx(949), CPxxx(950),
CPxxx(1250), CPxxx(1251), CPxxx(1253), CPxxx(1254), CPxxx(1255), CPxxx(1256), CPxxx(1257), CPxxx(1258),
ISO8859(1), ISO8859(2), ISO8859(3), ISO8859(4), ISO8859(5), ISO8859(6),
ISO8859(7), ISO8859(8), ISO8859(9), ISO8859(13), ISO8859(15),
{ "UTF-32LE", 12000 }
};
if (localeName)
{
const char *charsetName = strchr(localeName, '.');
charsetName = charsetName ? charsetName + 1 : localeName;
for (const LocaleCP& lcp : lcpTable)
{
if (!strncasecmp(lcp.localeName, charsetName, strlen(lcp.localeName)))
{
m_uAnsiCP = lcp.codePage;
return;
}
}
}
}

SystemLocale::~SystemLocale()
Expand All @@ -285,7 +331,8 @@ const SystemLocale & SystemLocale::Singleton()
#if !defined(__GNUC__) || defined(NO_THREADSAFE_STATICS)
#error "Relying on GCC's threadsafe initialization of local statics."
#endif
static const SystemLocale s_Default( "en_US.utf-8" );
// get locale from environment and set as default
static const SystemLocale s_Default(setlocale(LC_ALL, NULL));
return s_Default;
}

Expand Down
13 changes: 12 additions & 1 deletion test/functional/sqlsrv/MsCommon.inc
Original file line number Diff line number Diff line change
Expand Up @@ -454,4 +454,15 @@ function handleErrors()
}
}

?>
// non-UTF8 locale support in ODBC 17 and above only
function isLocaleSupported()
{
$conn = AE\connect();

$msodbcsql_ver = sqlsrv_client_info($conn)['DriverVer'];
if (explode(".", $msodbcsql_ver)[0] < 17) {
return false;
}
}

?>
2 changes: 1 addition & 1 deletion test/functional/sqlsrv/MsData_UTF8.inc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*/


function getTestData($index)
function getTestData_UTF8($index)
{
$inputs = null;
switch ($index)
Expand Down
5 changes: 3 additions & 2 deletions test/functional/sqlsrv/MsHelper.inc
Original file line number Diff line number Diff line change
Expand Up @@ -732,12 +732,13 @@ function getInsertArray($index)
{
if (! UseUTF8data()) {
require_once('MsData.inc');
return getTestData($index);
} else {
require_once('MsData_UTF8.inc');
return getTestData_UTF8($index);
}

// get array of input values
return getTestData($index);
}

/**
Expand Down Expand Up @@ -1184,4 +1185,4 @@ function getSampleData($k)
}
}

?>
?>
47 changes: 36 additions & 11 deletions test/functional/sqlsrv/TC43_FetchData.phpt
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
--TEST--
Fetch Field Data Test verifies the data retrieved via "sqlsrv_get_field"
Fetch Field Data Test verifies the data retrieved via sqlsrv_get_field
--ENV--
PHPT_EXEC=true
--SKIPIF--
<?php require('skipif_versions_old.inc'); ?>
<?
// locale must be set before 1st connection
if ( !isWindows() ) {
setlocale(LC_ALL, "en_US.ISO-8859-1");
}
?>
--FILE--
<?php

require_once('MsCommon.inc');

function fetchFields()
{
$testName = "Fetch - Field Data";
startTest($testName);

setup();
$tableName = 'TC43test';

$conn1 = AE\connect();
if (useUTF8Data()) {
$conn1 = AE\connect(array('CharacterSet'=>'UTF-8'));
} else {
$conn1 = AE\connect();
}

AE\createTestTable($conn1, $tableName);

$startRow = 1;
Expand Down Expand Up @@ -62,8 +69,6 @@ function fetchFields()
dropTable($conn1, $tableName);

sqlsrv_close($conn1);

endTest($testName);
}

function checkData($col, $actual, $expected)
Expand Down Expand Up @@ -91,16 +96,36 @@ function checkData($col, $actual, $expected)
return ($success);
}

if (! isWindows()) {
setUTF8Data(true);
if (!isWindows()) {
setlocale(LC_ALL, "en_US.ISO-8859-1");
}

$testName = "Fetch - Field Data";

// test ansi only if windows or non-UTF8 locales are supported (ODBC 17 and above)
startTest($testName);
if (isWindows() || isLocaleSupported()) {

try {
setUTF8Data(false);
fetchFields();
} catch (Exception $e) {
echo $e->getMessage();
}
}
endTest($testName);

// test utf8
startTest($testName);
try {
setUTF8Data(true);
fetchFields();
} catch (Exception $e) {
echo $e->getMessage();
}
setUTF8Data(false);
endTest($testName);

?>
--EXPECT--
Test "Fetch - Field Data" completed successfully.
Test "Fetch - Field Data" completed successfully.
Loading

0 comments on commit 54f1e5e

Please sign in to comment.