[Ttssh2-commit] [7482] WideCharToMultiByte()のUsedDefaultCharを使用して変換失敗を検出するようにした

Back to archive index
scmno****@osdn***** scmno****@osdn*****
2019年 3月 15日 (金) 00:38:12 JST


Revision: 7482
          http://sourceforge.jp/projects/ttssh2/scm/svn/commits/7482
Author:   zmatsuo
Date:     2019-03-15 00:38:11 +0900 (Fri, 15 Mar 2019)
Log Message:
-----------
WideCharToMultiByte()のUsedDefaultCharを使用して変換失敗を検出するようにした
WideCharToMB()で変換に失敗することがあったので修正

Modified Paths:
--------------
    trunk/teraterm/common/codeconv.cpp

-------------- next part --------------
Modified: trunk/teraterm/common/codeconv.cpp
===================================================================
--- trunk/teraterm/common/codeconv.cpp	2019-03-13 15:33:06 UTC (rev 7481)
+++ trunk/teraterm/common/codeconv.cpp	2019-03-14 15:38:11 UTC (rev 7482)
@@ -122,7 +122,7 @@
 		buf[1] = cp932 & 0xff;
 		len = 2;
 	}
-	ret = MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, (char *)buf, len, &wchar, 1);
+	ret = ::MultiByteToWideChar(932, MB_ERR_INVALID_CHARS, (char *)buf, len, &wchar, 1);
 	if (ret <= 0) {
 		// MultiByteToWideChar()\x82\xAA\x95ϊ\xB7\x8E\xB8\x94s
 #if !defined(PRIORITY_CP932_TABLE)
@@ -164,6 +164,7 @@
  *	@param mb_code		\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̕\xB6\x8E\x9A\x83R\x81[\x83h(0x0000-0xffff)
  *	@param code_page	\x83}\x83\x8B\x83`\x83o\x83C\x83g\x82̃R\x81[\x83h\x83y\x81[\x83W
  *	@retval				unicode(UTF-32\x95\xB6\x8E\x9A\x83R\x81[\x83h)
+ *						0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
  */
 unsigned int MBCP_UTF32(unsigned short mb_code, int code_page)
 {
@@ -187,7 +188,7 @@
 			buf[1] = mb_code & 0xff;
 			len = 2;
 		}
-		ret = MultiByteToWideChar(code_page, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1);
+		ret = ::MultiByteToWideChar(code_page, MB_ERR_INVALID_CHARS, buf, len, &wchar, 1);
 		if (ret <= 0) {
 			c = 0;
 		} else {
@@ -210,6 +211,7 @@
 	DWORD mblen;
 	wchar_t u16_str[2];
 	size_t u16_len;
+	BOOL use_default_char;
 
 	if (u32 < 0x80) {
 		return (unsigned short)u32;
@@ -230,22 +232,20 @@
 	if (u16_len == 0) {
 		return 0;
 	}
-	mblen = WideCharToMultiByte(932, 0, u16_str, (int)u16_len, mbstr, 2, NULL, NULL);
+	use_default_char = FALSE;
+	mblen = ::WideCharToMultiByte(932, 0, u16_str, (int)u16_len, mbstr, 2, NULL, &use_default_char);
+	if (use_default_char) {
+		// \x95ϊ\xB7\x82ł\xAB\x82\xB8\x81A\x8A\xF9\x92\xE8\x82̕\xB6\x8E\x9A\x82\xF0\x8Eg\x82\xC1\x82\xBD
+		goto next_convert;
+	}
 	switch (mblen) {
 	case 0:
+		// \x95ϊ\xB7\x8E\xB8\x94s
+		goto next_convert;
 	case 1:
-	default:
-		if (mblen == 0 || mbstr[0] == '?') {
-			goto next_convert;
-		} else {
-			mb = (unsigned char)mbstr[0];
-			return mb;
-		}
+		mb = (unsigned char)mbstr[0];
+		return mb;
 	case 2:
-		if (mbstr[0] == '?' && mbstr[1] == '?') {
-			// 2byte\x8Fo\x97\xCD && "??" \x82̏ꍇ\x82͕ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD
-			goto next_convert;
-		}
 		mb = (((unsigned char)mbstr[0]) << 8) | (unsigned char)mbstr[1];
 		return mb;
 	}
@@ -540,11 +540,16 @@
  */
 size_t UTF32ToCP932(uint32_t u32, char *mb_ptr, size_t mb_len)
 {
+	uint16_t cp932;
 	size_t cp932_out;
-	const uint16_t cp932 = UTF32_CP932(u32);
-	if (cp932 == 0 && u32 != 0) {
+	if (u32 == 0) {
 		return 0;
 	}
+	cp932 = UTF32_CP932(u32);
+	if (cp932 == 0) {
+		// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD
+		return 0;
+	}
 	if (mb_ptr == NULL) {
 		mb_len = 2;
 	}
@@ -577,7 +582,7 @@
  * @param[in]		code_page	\x95ϊ\xB7\x90\xE6codepage
  * @param[in,out]	mb_ptr		\x95ϊ\xB7\x90敶\x8E\x9A\x97\xF1\x8Fo\x97͐\xE6(NULL\x82̂Ƃ\xAB\x8Fo\x97͂\xB5\x82Ȃ\xA2)
  * @param[in]		mb_len		CP932\x8Fo\x97͐敶\x8E\x9A\x90\x94(\x95\xB6\x8E\x9A\x90\x94,sizeof(wchar_t)*wstr_len bytes)
- * @retval			\x8Fo\x97͂\xB5\x82\xBDCP932\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94)
+ * @retval			\x8Fo\x97͂\xB5\x82\xBDmultibyte\x95\xB6\x8E\x9A\x90\x94(byte\x90\x94)
  *					0=\x83G\x83\x89\x81[(\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD)
  */
 size_t UTF32ToMBCP(unsigned int u32, int code_page, char *mb_ptr, size_t mb_len)
@@ -588,6 +593,7 @@
 	if (code_page == 932) {
 		return UTF32ToCP932(u32, mb_ptr, mb_len);
 	} else {
+		BOOL use_default_char;
 		wchar_t u16_str[2];
 		size_t u16_len;
 		u16_len = UTF32ToUTF16(u32, u16_str, 2);
@@ -594,10 +600,11 @@
 		if (u16_len == 0) {
 			return 0;
 		}
-		mb_len = WideCharToMultiByte(code_page, 0, u16_str, u16_len, mb_ptr, mb_len, NULL, NULL);
-		if (mb_ptr != NULL && u32 != '?' && mb_len == 1 && mb_ptr[0] == '?') {
-			// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x82Ƃ\xAB\x81A\x96߂\xE8\x92l=1, \x95\xB6\x8E\x9A[0]='?' \x82\xF0\x95Ԃ\xB5\x82Ă\xAD\x82\xE9
-			mb_len = 0;
+		use_default_char = FALSE;
+		mb_len = ::WideCharToMultiByte(code_page, 0, u16_str, u16_len, mb_ptr, mb_len, NULL, &use_default_char);
+		if (use_default_char) {
+			// \x95ϊ\xB7\x82ł\xAB\x82\xB8\x81A\x8A\xF9\x92\xE8\x82̕\xB6\x8E\x9A\x82\xF0\x8Eg\x82\xC1\x82\xBD
+			return 0;
 		}
 		return mb_len;
 	}
@@ -605,6 +612,7 @@
 
 /**
  *	wchar_t(UTF-16)\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82ɕϊ\xB7\x82\xB7\x82\xE9
+ *	\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x95\xB6\x8E\x9A\x82\xCD '?' \x82ŏo\x97͂\xB7\x82\xE9
  *
  *	@param[in]		*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
  *	@param[in,out]	*wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7
@@ -643,33 +651,41 @@
 
 	while(mb_len > 0 && wstr_len > 0) {
 		const wchar_t u16 = *wstr_ptr++;
-		uint32_t u32 = u16;
 		size_t mb_out;
 		wstr_len--;
 		wstr_in++;
-		// \x83T\x83\x8D\x83Q\x81[\x83g high?
-		if (IsHighSurrogate(u16)) {
-			if (wstr_len >= 1) {
-				const wchar_t u16_lo = *wstr_ptr++;
-				wstr_len--;
-				wstr_in++;
-				// \x83T\x83\x8D\x83Q\x81[\x83g low?
-				if (IsLowSurrogate(u16_lo)) {
-					// \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h
-					u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00);
+		if (u16 != 0) {
+			uint32_t u32 = u16;
+			// \x83T\x83\x8D\x83Q\x81[\x83g high?
+			if (IsHighSurrogate(u16)) {
+				if (wstr_len >= 1) {
+					const wchar_t u16_lo = *wstr_ptr++;
+					wstr_len--;
+					wstr_in++;
+					// \x83T\x83\x8D\x83Q\x81[\x83g low?
+					if (IsLowSurrogate(u16_lo)) {
+						// \x83T\x83\x8D\x83Q\x81[\x83g\x83y\x83A \x83f\x83R\x81[\x83h
+						u32 = 0x10000 + (u16 - 0xd800) * 0x400 + (u16_lo - 0xdc00);
+					} else {
+						goto unknown_code;
+					}
 				} else {
 					goto unknown_code;
 				}
-			} else {
-				goto unknown_code;
 			}
-		}
-		mb_out = UTF32ToMB(u32, mb_ptr, mb_len);
-		if (mb_out == 0) {
-		unknown_code:
+			mb_out = UTF32ToMB(u32, mb_ptr, mb_len);
+			if (mb_out == 0) {
+			unknown_code:
+				if (mb_ptr != NULL) {
+					// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ
+					*mb_ptr = '?';
+				}
+				mb_out = 1;
+			}
+		} else {
+			// '\0'
 			if (mb_ptr != NULL) {
-				// \x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA9\x82\xC1\x82\xBD\x8Fꍇ
-				*mb_ptr++ = '?';
+				*mb_ptr = 0;
 			}
 			mb_out = 1;
 		}
@@ -739,10 +755,16 @@
 	while(wstr_len > 0 && u8_len > 0) {
 		uint32_t u32;
 		size_t u16_out;
-		size_t u8_in = UTF8ToUTF32(u8_ptr, u8_len, &u32);
-		if (u8_in == 0) {
-			u32 = '?';
+		size_t u8_in;
+		if (*u8_ptr == 0) {
+			u32 = 0;
 			u8_in = 1;
+		} else {
+			u8_in = UTF8ToUTF32(u8_ptr, u8_len, &u32);
+			if (u8_in == 0) {
+				u32 = '?';
+				u8_in = 1;
+			}
 		}
 		u8_ptr += u8_in;
 		u8_len -= u8_in;
@@ -779,6 +801,8 @@
 
 /**
  *	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x82\xF0\x83}\x83\x8B\x83`\x83o\x83C\x83g\x95\xB6\x8E\x9A\x97\xF1\x82֕ϊ\xB7
+ *	\x95ϊ\xB7\x82ł\xAB\x82Ȃ\xA2\x95\xB6\x8E\x9A\x82\xCD '?' \x82ŏo\x97͂\xB7\x82\xE9
+ *
  *	@param[in]	*wstr_ptr	wchar_t\x95\xB6\x8E\x9A\x97\xF1
  *	@param[in]	wstr_len	wchar_t\x95\xB6\x8E\x9A\x97\xF1\x92\xB7(0\x82̂Ƃ\xAB\x8E\xA9\x93\xAE\x81A\x8E\xA9\x93\xAE\x82̂Ƃ\xAB\x82\xCDL'\0'\x82Ń^\x81[\x83~\x83l\x81[\x83g\x82\xB7\x82邱\x82\xC6)
  *	@param[in]	code_page	\x95ϊ\xB7\x90\xE6\x83R\x81[\x83h\x83y\x81[\x83W


Ttssh2-commit メーリングリストの案内
Back to archive index