diff --git a/src/host/directio.cpp b/src/host/directio.cpp index 4933aec9ab5..38be8e8838b 100644 --- a/src/host/directio.cpp +++ b/src/host/directio.cpp @@ -27,8 +27,6 @@ using Microsoft::Console::Interactivity::ServiceLocator; class CONSOLE_INFORMATION; -#define UNICODE_DBCS_PADDING 0xffff - // Routine Description: // - converts non-unicode InputEvents to unicode InputEvents // Arguments: @@ -531,61 +529,59 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, { try { - std::vector tempBuffer(buffer.begin(), buffer.end()); - const auto size = rectangle.Dimensions(); - auto tempIter = tempBuffer.cbegin(); auto outIter = buffer.begin(); - for (til::CoordType i = 0; i < size.Y; i++) + for (til::CoordType i = 0; i < size.Y; ++i) { - for (til::CoordType j = 0; j < size.X; j++) + for (til::CoordType j = 0; j < size.X; ++j, ++outIter) { + auto& in1 = *outIter; + + // If .AsciiChar and .UnicodeChar have the same offset (since they're a union), + // we can just write the latter with a byte-sized value to set the former + // _and_ simultaneously clear the upper byte of .UnicodeChar to 0. Nice! + static_assert(offsetof(CHAR_INFO, Char.AsciiChar) == offsetof(CHAR_INFO, Char.UnicodeChar)); + // Any time we see the lead flag, we presume there will be a trailing one following it. // Giving us two bytes of space (one per cell in the ascii part of the character union) // to fill with whatever this Unicode character converts into. - if (WI_IsFlagSet(tempIter->Attributes, COMMON_LVB_LEADING_BYTE)) + if (WI_IsFlagSet(in1.Attributes, COMMON_LVB_LEADING_BYTE)) { // As long as we're not looking at the exact last column of the buffer... if (j < size.X - 1) { // Walk forward one because we're about to consume two cells. - j++; + ++j; + ++outIter; + + auto& in2 = *outIter; // Try to convert the unicode character (2 bytes) in the leading cell to the codepage. - CHAR AsciiDbcs[2] = { 0 }; - auto NumBytes = gsl::narrow(sizeof(AsciiDbcs)); - NumBytes = ConvertToOem(codepage, &tempIter->Char.UnicodeChar, 1, &AsciiDbcs[0], NumBytes); + CHAR AsciiDbcs[2]{}; + ConvertToOem(codepage, &in1.Char.UnicodeChar, 1, &AsciiDbcs[0], 2); // Fill the 1 byte (AsciiChar) portion of the leading and trailing cells with each of the bytes returned. - outIter->Char.AsciiChar = AsciiDbcs[0]; - outIter->Attributes = tempIter->Attributes; - outIter++; - tempIter++; - outIter->Char.AsciiChar = AsciiDbcs[1]; - outIter->Attributes = tempIter->Attributes; - outIter++; - tempIter++; + // We have to be bit careful here not to directly write the CHARs, because CHARs are signed whereas wchar_t isn't + // and we don't want any sign-extension. We want a 1:1 copy instead, so cast it to an unsigned char first. + in1.Char.UnicodeChar = til::bit_cast(AsciiDbcs[0]); + in2.Char.UnicodeChar = til::bit_cast(AsciiDbcs[1]); } else { // When we're in the last column with only a leading byte, we can't return that without a trailing. // Instead, replace the output data with just a space and clear all flags. - outIter->Char.AsciiChar = UNICODE_SPACE; - outIter->Attributes = tempIter->Attributes; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - outIter++; - tempIter++; + in1.Char.UnicodeChar = UNICODE_SPACE; + WI_ClearAllFlags(in1.Attributes, COMMON_LVB_SBCSDBCS); } } - else if (WI_AreAllFlagsClear(tempIter->Attributes, COMMON_LVB_SBCSDBCS)) + else if (WI_AreAllFlagsClear(in1.Attributes, COMMON_LVB_SBCSDBCS)) { // If there are no leading/trailing pair flags, then we only have 1 ascii byte to try to fit the // 2 byte UTF-16 character into. Give it a go. - ConvertToOem(codepage, &tempIter->Char.UnicodeChar, 1, &outIter->Char.AsciiChar, 1); - outIter->Attributes = tempIter->Attributes; - outIter++; - tempIter++; + CHAR asciiChar{}; + ConvertToOem(codepage, &in1.Char.UnicodeChar, 1, &asciiChar, 1); + in1.Char.UnicodeChar = til::bit_cast(asciiChar); } } } @@ -615,58 +611,57 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, const auto size = rectangle.Dimensions(); auto outIter = buffer.begin(); - for (til::CoordType i = 0; i < size.Y; i++) + for (til::CoordType i = 0; i < size.Y; ++i) { - for (til::CoordType j = 0; j < size.X; j++) + for (til::CoordType j = 0; j < size.X; ++j, ++outIter) { // Clear lead/trailing flags. We'll determine it for ourselves versus the given codepage. - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); + auto& in1 = *outIter; + WI_ClearAllFlags(in1.Attributes, COMMON_LVB_SBCSDBCS); // If the 1 byte given is a lead in this codepage, we likely need two cells for the width. - if (IsDBCSLeadByteConsole(outIter->Char.AsciiChar, &gci.OutputCPInfo)) + if (IsDBCSLeadByteConsole(in1.Char.AsciiChar, &gci.OutputCPInfo)) { // If we're not on the last column, we have two cells to use. if (j < size.X - 1) { // Mark we're consuming two cells. - j++; + ++outIter; + ++j; + + // Just as above - clear the flags, as we're setting them ourselves. + auto& in2 = *outIter; + WI_ClearAllFlags(in2.Attributes, COMMON_LVB_SBCSDBCS); // Grab the lead/trailing byte pair from this cell and the next one forward. CHAR AsciiDbcs[2]; - AsciiDbcs[0] = outIter->Char.AsciiChar; - AsciiDbcs[1] = (outIter + 1)->Char.AsciiChar; + AsciiDbcs[0] = in1.Char.AsciiChar; + AsciiDbcs[1] = in2.Char.AsciiChar; // Convert it to UTF-16. - WCHAR UnicodeDbcs[2]; - ConvertOutputToUnicode(codepage, &AsciiDbcs[0], 2, &UnicodeDbcs[0], 2); + wchar_t wch = UNICODE_SPACE; + ConvertOutputToUnicode(codepage, &AsciiDbcs[0], 2, &wch, 1); // Store the actual character in the first available position. - outIter->Char.UnicodeChar = UnicodeDbcs[0]; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - WI_SetFlag(outIter->Attributes, COMMON_LVB_LEADING_BYTE); - outIter++; + in1.Char.UnicodeChar = wch; + WI_SetFlag(in1.Attributes, COMMON_LVB_LEADING_BYTE); // Put a padding character in the second position. - outIter->Char.UnicodeChar = UNICODE_DBCS_PADDING; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - WI_SetFlag(outIter->Attributes, COMMON_LVB_TRAILING_BYTE); - outIter++; + in2.Char.UnicodeChar = wch; + WI_SetFlag(in2.Attributes, COMMON_LVB_TRAILING_BYTE); } else { // If we were on the last column, put in a space. - outIter->Char.UnicodeChar = UNICODE_SPACE; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - outIter++; + in1.Char.UnicodeChar = UNICODE_SPACE; } } else { // If it's not detected as a lead byte of a pair, then just convert it in place and move on. - auto c = outIter->Char.AsciiChar; - - ConvertOutputToUnicode(codepage, &c, 1, &outIter->Char.UnicodeChar, 1); - outIter++; + wchar_t wch = UNICODE_SPACE; + ConvertOutputToUnicode(codepage, &in1.Char.AsciiChar, 1, &wch, 1); + in1.Char.UnicodeChar = wch; } } } @@ -679,7 +674,7 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, [[nodiscard]] static std::vector _ConvertCellsToMungedW(gsl::span buffer, const Viewport& rectangle) { std::vector result; - result.reserve(buffer.size() * 2); // we estimate we'll need up to double the cells if they all expand. + result.reserve(buffer.size()); const auto size = rectangle.Dimensions(); auto bufferIter = buffer.begin(); @@ -689,12 +684,11 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, for (til::CoordType j = 0; j < size.X; j++) { // Prepare a candidate charinfo on the output side copying the colors but not the lead/trail information. - CHAR_INFO candidate; - candidate.Attributes = bufferIter->Attributes; + auto candidate = *bufferIter; WI_ClearAllFlags(candidate.Attributes, COMMON_LVB_SBCSDBCS); // If the glyph we're given is full width, it needs to take two cells. - if (IsGlyphFullWidth(bufferIter->Char.UnicodeChar)) + if (IsGlyphFullWidth(candidate.Char.UnicodeChar)) { // If we're not on the final cell of the row... if (j < size.X - 1) @@ -703,14 +697,11 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, j++; // Fill one cell with a copy of the color and character marked leading - candidate.Char.UnicodeChar = bufferIter->Char.UnicodeChar; WI_SetFlag(candidate.Attributes, COMMON_LVB_LEADING_BYTE); result.push_back(candidate); // Fill a second cell with a copy of the color marked trailing and a padding character. - candidate.Char.UnicodeChar = UNICODE_DBCS_PADDING; - candidate.Attributes = bufferIter->Attributes; - WI_ClearAllFlags(candidate.Attributes, COMMON_LVB_SBCSDBCS); + WI_ClearFlag(candidate.Attributes, COMMON_LVB_LEADING_BYTE); WI_SetFlag(candidate.Attributes, COMMON_LVB_TRAILING_BYTE); } else @@ -719,17 +710,12 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, candidate.Char.UnicodeChar = UNICODE_SPACE; } } - else - { - // If we're not full-width, we're half-width. Just copy the character over. - candidate.Char.UnicodeChar = bufferIter->Char.UnicodeChar; - } // Push our candidate in. result.push_back(candidate); // Advance to read the next item. - bufferIter++; + ++bufferIter; } } return result; @@ -743,8 +729,8 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, try { const auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - const auto& storageBuffer = context.GetActiveBuffer(); - const auto storageSize = storageBuffer.GetBufferSize().Dimensions(); + const auto& storageBuffer = context.GetActiveBuffer().GetTextBuffer(); + const auto storageSize = storageBuffer.GetSize().Dimensions(); const auto targetSize = requestRectangle.Dimensions(); @@ -802,11 +788,11 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, // Copy the data into position... *targetIter = gci.AsCharInfo(*sourceIter); // ... and advance the read iterator. - sourceIter++; + ++sourceIter; } // Always advance the write iterator, we might have skipped it due to clipping. - targetIter++; + ++targetIter; // Increment the target targetPos.X++; diff --git a/src/host/ft_host/CJK_DbcsTests.cpp b/src/host/ft_host/CJK_DbcsTests.cpp index 4e2fd3a9d1e..e3091d17f28 100644 --- a/src/host/ft_host/CJK_DbcsTests.cpp +++ b/src/host/ft_host/CJK_DbcsTests.cpp @@ -515,6 +515,49 @@ namespace PrepPattern makeCharInfo(0x0020, white), }; + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x029 | 0x0000 (0x00) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // ... + // "Space Padded" means any unused data in the buffer will be filled with spaces and the default attribute. + // "Dedupe" means that any full-width characters in the buffer will be returned as single copies. + // But due to the target being a DBCS character set that can't represent these in a single char, it's null. + // "A" means that we intend in-codepage (char) data to be browsed in the resulting struct + static constexpr CharInfoPattern SpacePaddedDedupeInvalidA{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + }; + // Receive Output Table: // attr | wchar (char) | symbol // ------------------------------------ @@ -685,93 +728,6 @@ namespace PrepPattern makeCharInfo(0x306b, colored | trailing), }; - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3044 (0x44) | Hiragana I - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // 0x129 | 0x304B (0x4B) | Hiragana KA - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // 0x129 | 0x306A (0x6A) | Hiragana NA - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x029 | 0x0055 (0x55) | U - // 0x029 | 0x0054 (0x54) | T - // 0x129 | 0x306B (0x6B) | Hiragana NI - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // ... - // "Doubled" means that any full-width characters in the buffer are returned twice with a leading and trailing byte marker. - // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) - // "NegativeOneTrailing" means that all trailing bytes have their character replaced with the value -1 or 0xFFFF - static constexpr CharInfoPattern DoubledWNegativeOneTrailing{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3044, colored | leading), - makeCharInfo(0xffff, colored | trailing), - makeCharInfo(0x304b, colored | leading), - makeCharInfo(0xffff, colored | trailing), - makeCharInfo(0x306a, colored | leading), - makeCharInfo(0xffff, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0055, colored), - makeCharInfo(0x0054, colored), - makeCharInfo(0x306b, colored | leading), - makeCharInfo(0xffff, colored | trailing), - }; - - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x007 | 0x0020 (0x20) | - // 0x007 | 0x0020 (0x20) | - // 0x007 | 0x0020 (0x20) | - // 0x007 | 0x0020 (0x20) | - // ... - // "AStompsW" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "NegativeOnePattern" means that every trailing byte started as -1 or 0xFFFF - // "TruncateSpacePadded" means that we only allowed ourselves to return as many characters as is in the unicode length - // of the string and then filled the rest of the buffer after that with spaces. - static constexpr CharInfoPattern AStompsWNegativeOnePatternTruncateSpacePadded{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0020, white), - makeCharInfo(0x0020, white), - makeCharInfo(0x0020, white), - makeCharInfo(0x0020, white), - }; - // Receive Output Table: // attr | wchar (char) | symbol // ------------------------------------ @@ -938,138 +894,6 @@ namespace PrepPattern makeCharInfo(0x0000, colored), }; - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x029 | 0x0055 (0x55) | U - // 0x029 | 0x0054 (0x54) | T - // 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC9 (0xC9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 - // ... - // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) - // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF - static constexpr CharInfoPattern AOnDoubledWNegativeOneTrailing{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0055, colored), - makeCharInfo(0x0054, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc9, colored | trailing), - }; - - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // ... - // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) - // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF - static constexpr CharInfoPattern AOnDoubleDoubledWNegativeOneTrailing{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - }; - - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x30A2 (0xA2) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x30A9 (0xA9) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x39C8 (0xC8) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x029 | 0x0055 (0x55) | U - // 0x029 | 0x0054 (0x54) | T - // 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x30C9 (0xC9) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 - // ... - // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) - // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF - static constexpr CharInfoPattern AOnDoubledW{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30a2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30a9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30c8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0055, colored), - makeCharInfo(0x0054, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30c9, colored | trailing), - }; - // Receive Output Table: // attr | wchar (char) | symbol // ------------------------------------ @@ -1195,16 +1019,15 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { if (fIsTrueTypeFont) { - // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under TT font, we will get a deduplicated - // set of Unicode characters (YES. Unicode characters despite calling the A API to read back) that is space padded out - // There will be no lead/trailing markings. - return PrepPattern::SpacePaddedDedupeW; + // Normally this would be SpacePaddedDedupeA (analogous to the SpacePaddedDedupeW above), but since the narrow + // unicode chars can't be represented as narrow DBCS (since those don't exist) we get SpacePaddedDedupeInvalidA. + return PrepPattern::SpacePaddedDedupeInvalidA; } else { // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, we will get the // double-byte sequences stomped on top of a Unicode filled CHAR_INFO structure that used -1 for trailing bytes. - return PrepPattern::AStompsWNegativeOnePatternTruncateSpacePadded; + return PrepPattern::SpacePaddedDedupeA; } } break; @@ -1227,13 +1050,13 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( if (fIsTrueTypeFont) { // In a TrueType font, we will get back Unicode characters doubled up and marked with leading and trailing bytes. - return PrepPattern::AOnDoubledW; + return PrepPattern::A; } else { - // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, we will get the - // double-byte sequences stomped on top of a Unicode filled CHAR_INFO structure that used -1 for trailing bytes. - return PrepPattern::AOnDoubleDoubledWNegativeOneTrailing; + // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, + // we will get the double-byte sequences doubled up, because each narrow cell is written as a DBCS separately. + return PrepPattern::DoubledA; } } break; @@ -1242,10 +1065,9 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { if (fIsTrueTypeFont) { - // When written with WriteConsoleOutputA and read back with ReadConsoleOutputW when the font is TrueType, - // we will get back Unicode characters doubled up and marked with leading and trailing bytes... - // ... except all the trailing bytes character values will be -1. - return PrepPattern::DoubledWNegativeOneTrailing; + // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA when the font is TrueType, + // we will get back Unicode characters doubled up and marked with leading and trailing bytes. + return PrepPattern::DoubledW; } else { @@ -1258,7 +1080,7 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { // When written with WriteConsoleOutputA and read back with ReadConsoleOutputA, // we will get back the double-byte sequences appropriately labeled with leading/trailing bytes. - return PrepPattern::AOnDoubledWNegativeOneTrailing; + return PrepPattern::A; } break; } @@ -1285,7 +1107,7 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { // If we wrote with the CRT and are reading with A functions, the font doesn't matter. // We will always get back the double-byte sequences appropriately labeled with leading/trailing bytes. - return PrepPattern::AOnDoubledW; + return PrepPattern::A; } break; default: @@ -1911,8 +1733,7 @@ void DbcsTests::TestDbcsBisectWriteCellsBeginA() const auto originalReadRegion = readRegion; CHAR_INFO readCell; - CHAR_INFO expectedCell; - expectedCell.Char.UnicodeChar = L'\xffff'; + CHAR_INFO expectedCell{}; expectedCell.Char.AsciiChar = originalCell.Char.AsciiChar; expectedCell.Attributes = originalCell.Attributes; WI_ClearAllFlags(expectedCell.Attributes, COMMON_LVB_LEADING_BYTE | COMMON_LVB_TRAILING_BYTE);