From 1eff9d74519c7d21592dddc3c3665993bb0f117f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harald=20Kj=C3=A6r=20Nielsen?= Date: Fri, 19 Jan 2024 18:45:27 -0800 Subject: [PATCH] Proper fix utf8 command line arguments (#14253) https://github.com/protocolbuffers/protobuf/pull/14197 Tried to fix utf-8 issue, but it didnt handle multibyte chars. Only way I found that works constantly is using `CommandLineToArgvW`. To not ripple out `wchar_t`, I convert to and from where needed Closes #14253 COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/14253 from hknielsen:proper-fix-none-ascii-issue cad753e9e6f229d83657db6e3df7dfdefa39b7e5 PiperOrigin-RevId: 599990369 --- build_defs/cpp_opts.bzl | 1 + .../compiler/command_line_interface.cc | 17 ++++++++++-- .../compiler/command_line_interface.h | 2 +- src/google/protobuf/compiler/main.cc | 26 +++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/build_defs/cpp_opts.bzl b/build_defs/cpp_opts.bzl index 17f330bb4175f..09c8b79179ceb 100644 --- a/build_defs/cpp_opts.bzl +++ b/build_defs/cpp_opts.bzl @@ -36,6 +36,7 @@ LINK_OPTS = select({ "//build_defs:config_msvc": [ # Suppress linker warnings about files with no symbols defined. "-ignore:4221", + "Shell32.lib", ], "@platforms//os:macos": [ "-lpthread", diff --git a/src/google/protobuf/compiler/command_line_interface.cc b/src/google/protobuf/compiler/command_line_interface.cc index d186492110b5d..d5a125fcc9668 100644 --- a/src/google/protobuf/compiler/command_line_interface.cc +++ b/src/google/protobuf/compiler/command_line_interface.cc @@ -1766,10 +1766,23 @@ bool CommandLineInterface::MakeInputsBeProtoPathRelative( bool CommandLineInterface::ExpandArgumentFile( - const std::string& file, std::vector* arguments) { + const char* file, std::vector* arguments) { +// On windows to force ifstream to handle proper utr-8, we need to convert to +// proper supported utf8 wstring. If we dont then the file can't be opened. +#ifdef _MSC_VER + // Convert the file name to wide chars. + int size = MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), NULL, 0); + std::wstring file_str; + file_str.resize(size); + MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), &file_str[0], + file_str.size()); +#else + std::string file_str(file); +#endif + // The argument file is searched in the working directory only. We don't // use the proto import path here. - std::ifstream file_stream(file.c_str()); + std::ifstream file_stream(file_str.c_str()); if (!file_stream.is_open()) { return false; } diff --git a/src/google/protobuf/compiler/command_line_interface.h b/src/google/protobuf/compiler/command_line_interface.h index 4bfee6acd54ea..0828497d6a71a 100644 --- a/src/google/protobuf/compiler/command_line_interface.h +++ b/src/google/protobuf/compiler/command_line_interface.h @@ -240,7 +240,7 @@ class PROTOC_EXPORT CommandLineInterface { // Read an argument file and append the file's content to the list of // arguments. Return false if the file cannot be read. - bool ExpandArgumentFile(const std::string& file, + bool ExpandArgumentFile(const char* file, std::vector* arguments); // Parses a command-line argument into a name/value pair. Returns diff --git a/src/google/protobuf/compiler/main.cc b/src/google/protobuf/compiler/main.cc index b43b2f15d4061..4a4d5c9a50397 100644 --- a/src/google/protobuf/compiler/main.cc +++ b/src/google/protobuf/compiler/main.cc @@ -21,6 +21,10 @@ // Must be included last. #include "google/protobuf/port_def.inc" +#ifdef _MSC_VER +#include +#endif + namespace google { namespace protobuf { namespace compiler { @@ -101,6 +105,28 @@ int ProtobufMain(int argc, char* argv[]) { } // namespace protobuf } // namespace google +#ifdef _MSC_VER +std::string ToMultiByteUtf8String(const wchar_t* input) { + int size = + WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), 0, 0, NULL, NULL); + std::string result(size, 0); + if (size) + WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), &result[0], size, + NULL, NULL); + return result; +} + int main(int argc, char* argv[]) { + wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc); + char** argv_mbcs = new char*[argc]; + for (int i = 0; i < argc; i++) { + std::string* multibyte_string = new auto(ToMultiByteUtf8String(wargv[i])); + argv_mbcs[i] = const_cast(multibyte_string->c_str()); + } return google::protobuf::compiler::ProtobufMain(argc, argv); } +#else +int main(int argc, char* argv[]) { + return google::protobuf::compiler::ProtobufMain(argc, argv); +} +#endif