Revision: 7482 http://sourceforge.jp/projects/ttssh2/scm/svn/commits/7482 Author: zmatsuo Date: 2019-03-15 00:38:11 +0900 (Fri, 15 Mar 2019) Log Message: ----------- WideCharToMultiByte()のUsedDefaultCharを使用して変換失敗を検出するようにした WideCharToMB()で変換に失敗することがあったので修正 Modified Paths: -------------- trunk/teraterm/common/codeconv.cpp -------------- next part -------------- Modified: trunk/teraterm/common/codeconv.cpp =================================================================== --- trunk/teraterm/common/codeconv.cpp 2019-03-13 15:33:06 UTC (rev 7481) +++ trunk/teraterm/common/codeconv.cpp 2019-03-14 15:38:11 UTC (rev 7482) @@ -122,7 +122,7 @@ buf[1] = cp932 & 0xff; len = 2; } - ret = MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, (char *)buf, len, &wchar, 1); + ret = ::MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, (char *)buf, len, &wchar, 1); if (ret <= 0) { // MultiByteToWideChar()\x82\xAA\x95ϊ\xB7\x8E\xB8\x94s #if !defined(PRIORITY_CP932_TABLE) @@ -164,6 +164,7 @@ * @param mb_code \x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̕\xB6\x8E\x9A\x83R\x81[\x83h(0x0000-0xffff) * @param code_page \x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̃R\x81[\x83h\x83y\x81[\x83W * @retval unicode(UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h) + * 0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD) */ unsigned int MBCP_UTF32(unsigned short mb_code, int code_page) { @@ -187,7 +188,7 @@ buf[1] = mb_code & 0xff; len = 2; } - ret = MultiByteToWideChar(code_page, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1); + ret = ::MultiByteToWideChar(code_page, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1); if (ret <= 0) { c = 0; } else { @@ -210,6 +211,7 @@ DWORD mblen; wchar_t u16_str[2]; size_t u16_len; + BOOL use_default_char; if (u32 < 0x80) { return (unsigned short)u32; @@ -230,22 +232,20 @@ if (u16_len == 0) { return 0; } - mblen = WideCharToMultiByte(932, 0, u16_str, (int)u16_len, mbstr, 2, NULL, NULL); + use_default_char = FALSE; + mblen = ::WideCharToMultiByte(932, 0, u16_str, (int)u16_len, mbstr, 2, NULL, &use_default_char); + if (use_default_char) { + // \x95ϊ\xB7\x82ł\xAB\x82\xB8\x81A\x8A\xF9\x92\xE8\x82̕\xB6\x8E\x9A\x82\xF0\x8Eg\x82\xC1\x82\xBD + goto next_convert; + } switch (mblen) { case 0: + // \x95ϊ\xB7\x8E\xB8\x94s + goto next_convert; case 1: - default: - if (mblen == 0 || mbstr[0] == '?') { - goto next_convert; - } else { - mb = (unsigned char)mbstr[0]; - return mb; - } + mb = (unsigned char)mbstr[0]; + return mb; case 2: - if (mbstr[0] == '?' && mbstr[1] == '?') { - // 2byte\x8Fo\x97\xCD && "??" \x82̏ꍇ\x82͕ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD - goto next_convert; - } mb = (((unsigned char)mbstr[0]) << 8) | (unsigned char)mbstr[1]; return mb; } @@ -540,11 +540,16 @@ */ size_t UTF32ToCP932(uint32_t u32, char *mb_ptr, size_t mb_len) { + uint16_t cp932; size_t cp932_out; - const uint16_t cp932 = UTF32_CP932(u32); - if (cp932 == 0 && u32 != 0) { + if (u32 == 0) { return 0; } + cp932 = UTF32_CP932(u32); + if (cp932 == 0) { + // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD + return 0; + } if (mb_ptr == NULL) { mb_len = 2; } @@ -577,7 +582,7 @@ * @param[in] code_page \x95ϊ\xB7\x90\xE6codepage * @param[in,out] mb_ptr \x95ϊ\xB7\x90敶\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2) * @param[in] mb_len CP932\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes) - * @retval \x8Fo\x97͂\xB5\x82\xBDCP932\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94) + * @retval \x8Fo\x97͂\xB5\x82\xBDmultibyte\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94) * 0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD) */ size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len) @@ -588,6 +593,7 @@ if (code_page == 932) { return UTF32ToCP932(u32, mb_ptr, mb_len); } else { + BOOL use_default_char; wchar_t u16_str[2]; size_t u16_len; u16_len = UTF32ToUTF16(u32, u16_str, 2); @@ -594,10 +600,11 @@ if (u16_len == 0) { return 0; } - mb_len = WideCharToMultiByte(code_page, 0, u16_str, u16_len, mb_ptr, mb_len, NULL, NULL); - if (mb_ptr != NULL && u32 != '?' && mb_len == 1 && mb_ptr[0] == '?') { - // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x82Ƃ\xAB\x81A\x96߂\xE8\x92l=1, \x95\xB6\x8E\x9A[0]='?' \x82\xF0\x95Ԃ\xB5\x82Ă\xAD\x82\xE9 - mb_len = 0; + use_default_char = FALSE; + mb_len = ::WideCharToMultiByte(code_page, 0, u16_str, u16_len, mb_ptr, mb_len, NULL, &use_default_char); + if (use_default_char) { + // \x95ϊ\xB7\x82ł\xAB\x82\xB8\x81A\x8A\xF9\x92\xE8\x82̕\xB6\x8E\x9A\x82\xF0\x8Eg\x82\xC1\x82\xBD + return 0; } return mb_len; } @@ -605,6 +612,7 @@ /** * wchar_t(UTF-16)\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82ɕϊ\xB7\x82\xB7\x82\xE9 + * \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x95\xB6\x8E\x9A\x82\xCD '?' \x82ŏo\x97͂\xB7\x82\xE9 * * @param[in] *wstr_ptr wchar_t\x95\xB6\x8E\x9A\x97\xF1 * @param[in,out] *wstr_len wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7 @@ -643,33 +651,41 @@ while(mb_len > 0 && wstr_len > 0) { const wchar_t u16 = *wstr_ptr++; - uint32_t u32 = u16; size_t mb_out; wstr_len--; wstr_in++; - // \x83T\x83\x8D\x83Q\x81[\x83g high? - if (IsHighSurrogate(u16)) { - if (wstr_len >= 1) { - const wchar_t u16_lo = *wstr_ptr++; - wstr_len--; - wstr_in++; - // \x83T\x83\x8D\x83Q\x81[\x83g low? - if (IsLowSurrogate(u16_lo)) { - // \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h - u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00); + if (u16 != 0) { + uint32_t u32 = u16; + // \x83T\x83\x8D\x83Q\x81[\x83g high? + if (IsHighSurrogate(u16)) { + if (wstr_len >= 1) { + const wchar_t u16_lo = *wstr_ptr++; + wstr_len--; + wstr_in++; + // \x83T\x83\x8D\x83Q\x81[\x83g low? + if (IsLowSurrogate(u16_lo)) { + // \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h + u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00); + } else { + goto unknown_code; + } } else { goto unknown_code; } - } else { - goto unknown_code; } - } - mb_out = UTF32ToMB(u32, mb_ptr, mb_len); - if (mb_out == 0) { - unknown_code: + mb_out = UTF32ToMB(u32, mb_ptr, mb_len); + if (mb_out == 0) { + unknown_code: + if (mb_ptr != NULL) { + // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ + *mb_ptr = '?'; + } + mb_out = 1; + } + } else { + // '\0' if (mb_ptr != NULL) { - // \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ - *mb_ptr++ = '?'; + *mb_ptr = 0; } mb_out = 1; } @@ -739,10 +755,16 @@ while(wstr_len > 0 && u8_len > 0) { uint32_t u32; size_t u16_out; - size_t u8_in = UTF8ToUTF32(u8_ptr, u8_len, &u32); - if (u8_in == 0) { - u32 = '?'; + size_t u8_in; + if (*u8_ptr == 0) { + u32 = 0; u8_in = 1; + } else { + u8_in = UTF8ToUTF32(u8_ptr, u8_len, &u32); + if (u8_in == 0) { + u32 = '?'; + u8_in = 1; + } } u8_ptr += u8_in; u8_len -= u8_in; @@ -779,6 +801,8 @@ /** * wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7 + * \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x95\xB6\x8E\x9A\x82\xCD '?' \x82ŏo\x97͂\xB7\x82\xE9 + * * @param[in] *wstr_ptr wchar_t\x95\xB6\x8E\x9A\x97\xF1 * @param[in] wstr_len wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCDL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6) * @param[in] code_page \x95ϊ\xB7\x90\xE6\x83R\x81[\x83h\x83y\x81[\x83W