From e6bf7345ce1d72935342f6de7544975fc8016b58 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Mon, 8 Jul 2024 17:29:58 -0700 Subject: [PATCH 1/3] Auto-detect C++ extensions needed to build clang requires manually enabling C++ extensions via `-std=c++`, and ICU 75.1 needs C++17. Auto-detect the flags that are needed to build. Relates to #172 Closes #177 --- ext/charlock_holmes/extconf.rb | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ext/charlock_holmes/extconf.rb b/ext/charlock_holmes/extconf.rb index ce41dfb..079224d 100644 --- a/ext/charlock_holmes/extconf.rb +++ b/ext/charlock_holmes/extconf.rb @@ -49,4 +49,30 @@ $CFLAGS << ' -Wall -funroll-loops' $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG'] +minimal_program = <<~SRC + #include + int main() { return 0; } +SRC + +# Pass -x c++ to force gcc to compile the test program +# as C++ (as it will end in .c by default). +compile_options = +"-x c++" + +icu_requires_version_flag = checking_for("icu that requires explicit C++ version flag") do + !try_compile(minimal_program, compile_options) +end + +if icu_requires_version_flag + abort "Cannot compile icu with your compiler: recent versions require C++17 support." unless %w[c++20 c++17 c++11 c++0x].any? do |std| + checking_for("icu that compiles with #{std} standard") do + flags = compile_options + " -std=#{std}" + if try_compile(minimal_program, flags) + $CPPFLAGS << flags + + true + end + end + end +end + create_makefile 'charlock_holmes/charlock_holmes' From 7f026b45cfee8fb6b6d14a9c9f995a29907afe9e Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Mon, 8 Jul 2024 17:54:54 -0700 Subject: [PATCH 2/3] Fix C pointer conversion errors C++ compilation failed without these explicit casts. --- ext/charlock_holmes/converter.c | 2 +- ext/charlock_holmes/encoding_detector.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/charlock_holmes/converter.c b/ext/charlock_holmes/converter.c index 828b996..0542ef9 100644 --- a/ext/charlock_holmes/converter.c +++ b/ext/charlock_holmes/converter.c @@ -29,7 +29,7 @@ static VALUE rb_converter_convert(VALUE self, VALUE rb_txt, VALUE rb_src_enc, VA if (status != U_BUFFER_OVERFLOW_ERROR) { rb_raise(rb_eArgError, "%s", u_errorName(status)); } - out_buf = malloc(out_len); + out_buf = (char *) malloc(out_len); // now do the actual conversion status = U_ZERO_ERROR; diff --git a/ext/charlock_holmes/encoding_detector.c b/ext/charlock_holmes/encoding_detector.c index d6a8e2d..5be2465 100644 --- a/ext/charlock_holmes/encoding_detector.c +++ b/ext/charlock_holmes/encoding_detector.c @@ -352,7 +352,7 @@ static VALUE rb_encdec__alloc(VALUE klass) UErrorCode status = U_ZERO_ERROR; VALUE obj; - detector = calloc(1, sizeof(charlock_detector_t)); + detector = (charlock_detector_t *) calloc(1, sizeof(charlock_detector_t)); obj = Data_Wrap_Struct(klass, NULL, rb_encdec__free, (void *)detector); detector->csd = ucsdet_open(&status); From 01df9a56e66bb07d0dd954b0c7a792716f2ed629 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Mon, 8 Jul 2024 21:51:46 -0700 Subject: [PATCH 3/3] Avoid C++ name mangling in C code Now that a C++ compiler is used to support later versions of ICU, we need to surround all C code with `extern "C"` to avoid name mangling that would cause issues with symbol resolution on macOS. --- ext/charlock_holmes/common.h | 15 +++++++++++++++ ext/charlock_holmes/ext.c | 6 +----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/ext/charlock_holmes/common.h b/ext/charlock_holmes/common.h index 98a782d..ca091b1 100644 --- a/ext/charlock_holmes/common.h +++ b/ext/charlock_holmes/common.h @@ -38,4 +38,19 @@ static inline VALUE charlock_new_str2(const char *str) #endif } + +#ifdef __cplusplus +extern "C" +{ +#endif + +extern void Init_charlock_holmes(); +extern void _init_charlock_encoding_detector(); +extern void _init_charlock_converter(); +extern void _init_charlock_transliterator(); + +#ifdef __cplusplus +} +#endif + #endif diff --git a/ext/charlock_holmes/ext.c b/ext/charlock_holmes/ext.c index 5a7a81a..7c9539a 100644 --- a/ext/charlock_holmes/ext.c +++ b/ext/charlock_holmes/ext.c @@ -1,9 +1,5 @@ #include "common.h" -extern void _init_charlock_encoding_detector(); -extern void _init_charlock_converter(); -extern void _init_charlock_transliterator(); - VALUE rb_mCharlockHolmes; void Init_charlock_holmes() { @@ -12,4 +8,4 @@ void Init_charlock_holmes() { _init_charlock_encoding_detector(); _init_charlock_converter(); _init_charlock_transliterator(); -} \ No newline at end of file +}