Skip to content

Commit

Permalink
deps: update ICU to 64.2
Browse files Browse the repository at this point in the history
Update the version of the bundled ICU (deps/icu-small) to ICU version
64.2 (Unicode 12, CLDR 35)

Fixes: #26388

PR-URL: #27361
Reviewed-By: Richard Lau <riclau@uk.ibm.com>
Reviewed-By: Steven R Loomis <srloomis@us.ibm.com>
Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
  • Loading branch information
ryzokuken authored and BethGriggs committed May 10, 2019
1 parent dcf9519 commit ea6a0ad
Show file tree
Hide file tree
Showing 297 changed files with 20,512 additions and 13,162 deletions.
4 changes: 2 additions & 2 deletions deps/icu-small/LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)

Copyright © 1991-2018 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.

Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
Expand Down
4 changes: 2 additions & 2 deletions deps/icu-small/README-SMALL-ICU.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Small ICU sources - auto generated by shrink-icu-src.py

This directory contains the ICU subset used by --with-intl=small-icu (the default)
It is a strict subset of ICU 63 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt63l.dat : Reduced-size data file
It is a strict subset of ICU 64 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt64l.dat : Reduced-size data file


To rebuild this directory, see ../../tools/icu/README.md
3 changes: 1 addition & 2 deletions deps/icu-small/source/common/brkeng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,12 @@ static void U_CALLCONV _deleteEngine(void *obj) {
U_CDECL_END
U_NAMESPACE_BEGIN

static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;

const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
const LanguageBreakEngine *lbe = NULL;
UErrorCode status = U_ZERO_ERROR;

static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
Mutex m(&gBreakEngineMutex);

if (fEngines == NULL) {
Expand Down
97 changes: 97 additions & 0 deletions deps/icu-small/source/common/capi_helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#ifndef __CAPI_HELPER_H__
#define __CAPI_HELPER_H__

#include "unicode/utypes.h"

U_NAMESPACE_BEGIN

/**
* An internal helper class to help convert between C and C++ APIs.
*/
template<typename CType, typename CPPType, int32_t kMagic>
class IcuCApiHelper {
public:
/**
* Convert from the C type to the C++ type (const version).
*/
static const CPPType* validate(const CType* input, UErrorCode& status);

/**
* Convert from the C type to the C++ type (non-const version).
*/
static CPPType* validate(CType* input, UErrorCode& status);

/**
* Convert from the C++ type to the C type (const version).
*/
const CType* exportConstForC() const;

/**
* Convert from the C++ type to the C type (non-const version).
*/
CType* exportForC();

/**
* Invalidates the object.
*/
~IcuCApiHelper();

private:
/**
* While the object is valid, fMagic equals kMagic.
*/
int32_t fMagic = kMagic;
};


template<typename CType, typename CPPType, int32_t kMagic>
const CPPType*
IcuCApiHelper<CType, CPPType, kMagic>::validate(const CType* input, UErrorCode& status) {
if (U_FAILURE(status)) {
return nullptr;
}
if (input == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
auto* impl = reinterpret_cast<const CPPType*>(input);
if (static_cast<const IcuCApiHelper<CType, CPPType, kMagic>*>(impl)->fMagic != kMagic) {
status = U_INVALID_FORMAT_ERROR;
return nullptr;
}
return impl;
}

template<typename CType, typename CPPType, int32_t kMagic>
CPPType*
IcuCApiHelper<CType, CPPType, kMagic>::validate(CType* input, UErrorCode& status) {
auto* constInput = static_cast<const CType*>(input);
auto* validated = validate(constInput, status);
return const_cast<CPPType*>(validated);
}

template<typename CType, typename CPPType, int32_t kMagic>
const CType*
IcuCApiHelper<CType, CPPType, kMagic>::exportConstForC() const {
return reinterpret_cast<const CType*>(static_cast<const CPPType*>(this));
}

template<typename CType, typename CPPType, int32_t kMagic>
CType*
IcuCApiHelper<CType, CPPType, kMagic>::exportForC() {
return reinterpret_cast<CType*>(static_cast<CPPType*>(this));
}

template<typename CType, typename CPPType, int32_t kMagic>
IcuCApiHelper<CType, CPPType, kMagic>::~IcuCApiHelper() {
// head off application errors by preventing use of of deleted objects.
fMagic = 0;
}


U_NAMESPACE_END

#endif // __CAPI_HELPER_H__
110 changes: 80 additions & 30 deletions deps/icu-small/source/common/characterproperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,34 @@
#include "umutex.h"
#include "uprops.h"

using icu::LocalPointer;
#if !UCONFIG_NO_NORMALIZATION
using icu::Normalizer2Factory;
using icu::Normalizer2Impl;
#endif
using icu::UInitOnce;
using icu::UnicodeSet;

namespace {

UBool U_CALLCONV characterproperties_cleanup();

constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;

struct Inclusion {
UnicodeSet *fSet;
UInitOnce fInitOnce;
};
Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()

UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};

UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};

UMutex cpMutex = U_MUTEX_INITIALIZER;
icu::UMutex *cpMutex() {
static icu::UMutex m = U_MUTEX_INITIALIZER;
return &m;
}

//----------------------------------------------------------------
// Inclusions list
Expand Down Expand Up @@ -80,43 +90,29 @@ UBool U_CALLCONV characterproperties_cleanup() {
return TRUE;
}

} // namespace

U_NAMESPACE_BEGIN

/*
Reduce excessive reallocation, and make it easier to detect initialization problems.
Usually you don't see smaller sets than this for Unicode 5.0.
*/
constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072;

void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) {
void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
// This function is a friend of class UnicodeSet.

U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
if (src == UPROPS_SRC_NONE) {
errorCode = U_INTERNAL_PROGRAM_ERROR;
return;
}
UnicodeSet * &incl = gInclusions[src].fSet;
U_ASSERT(incl == nullptr);
U_ASSERT(gInclusions[src].fSet == nullptr);

incl = new UnicodeSet();
if (incl == nullptr) {
LocalPointer<UnicodeSet> incl(new UnicodeSet());
if (incl.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
USetAdder sa = {
(USet *)incl,
(USet *)incl.getAlias(),
_set_add,
_set_addRange,
_set_addString,
nullptr, // don't need remove()
nullptr // don't need removeRange()
};

incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode);
switch(src) {
case UPROPS_SRC_CHAR:
uchar_addPropertyStarts(&sa, &errorCode);
Expand Down Expand Up @@ -183,12 +179,15 @@ void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCo
}

if (U_FAILURE(errorCode)) {
delete incl;
incl = nullptr;
return;
}
// Compact for caching
if (incl->isBogus()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
// Compact for caching.
incl->compact();
gInclusions[src].fSet = incl.orphan();
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
}

Expand All @@ -199,15 +198,66 @@ const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorC
return nullptr;
}
Inclusion &i = gInclusions[src];
umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode);
umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
return i.fSet;
}

void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
UPropertySource src = uprops_getSource(prop);
const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
if (U_FAILURE(errorCode)) {
return;
}

LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
if (intPropIncl.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t numRanges = incl->getRangeCount();
int32_t prevValue = 0;
for (int32_t i = 0; i < numRanges; ++i) {
UChar32 rangeEnd = incl->getRangeEnd(i);
for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
int32_t value = u_getIntPropertyValue(c, prop);
if (value != prevValue) {
intPropIncl->add(c);
prevValue = value;
}
}
}

if (intPropIncl->isBogus()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
// Compact for caching.
intPropIncl->compact();
gInclusions[inclIndex].fSet = intPropIncl.orphan();
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
}

} // namespace

U_NAMESPACE_BEGIN

const UnicodeSet *CharacterProperties::getInclusionsForProperty(
UProperty prop, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
UPropertySource src = uprops_getSource(prop);
return getInclusionsForSource(src, errorCode);
if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
Inclusion &i = gInclusions[inclIndex];
umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
return i.fSet;
} else {
UPropertySource src = uprops_getSource(prop);
return getInclusionsForSource(src, errorCode);
}
}

U_NAMESPACE_END
Expand All @@ -216,7 +266,7 @@ namespace {

UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
icu::LocalPointer<UnicodeSet> set(new UnicodeSet());
LocalPointer<UnicodeSet> set(new UnicodeSet());
if (set.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
Expand Down Expand Up @@ -311,7 +361,7 @@ u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
Mutex m(&cpMutex);
Mutex m(cpMutex());
UnicodeSet *set = sets[property];
if (set == nullptr) {
sets[property] = set = makeSet(property, *pErrorCode);
Expand All @@ -327,7 +377,7 @@ u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
Mutex m(&cpMutex);
Mutex m(cpMutex());
UCPMap *map = maps[property - UCHAR_INT_START];
if (map == nullptr) {
maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
Expand Down
Loading

0 comments on commit ea6a0ad

Please sign in to comment.