diff --git a/include/nana/charset.hpp b/include/nana/charset.hpp index 41fa4d72..09f79bdf 100644 --- a/include/nana/charset.hpp +++ b/include/nana/charset.hpp @@ -46,7 +46,31 @@ namespace nana class charset_encoding_interface; } - /// An intelligent charset class for character code conversion. + /*!\class charset + \brief An intelligent charset class for character code conversion. + Example: + 1. A UTF-8 string from the socket. + + int len = ::recv(sd, buf, buflen, 0); + textbox.caption(nana::charset(std::string(buf, len), nana::unicode::utf8)); + + 2. Send the string in text to the socket as UTF-8. + + std::string utf8str = nana::charset(textbox.caption()).to_bytes(nana::unicode::utf8); + ::send(sd, utf8str.c_str(), utf8str.size(), 0); + + 3, Convert a string to the specified multi-byte character code. + + // Convert to a multibytes string through default system language. + std::string mbstr = nana::charset(a_wstring); + + // If the default system language is English and convert + // a Chinese unicode string to multibytes string through GB2312 + std::setlocale(LC_CTYPE, "zh_CN.GB2312"); + //set::setlocale(LC_CTYPE, ".936"); call it in Windows + std::string mbstr = nana::charset(a_wstring_with_chinese); + + */ class charset { public: @@ -74,27 +98,3 @@ namespace nana }//end namespace nana #endif -/*!\class charset - -Example -1. A UTF-8 string from the socket. - - int len = ::recv(sd, buf, buflen, 0); - textbox.caption(nana::charset(std::string(buf, len), nana::unicode::utf8)); - -2. Send the string in text to the socket as UTF-8. - - std::string utf8str = nana::charset(textbox.caption()).to_bytes(nana::unicode::utf8); - ::send(sd, utf8str.c_str(), utf8str.size(), 0); - -3, Convert a string to the specified multi-byte character code. - - //Convert to a multibytes string through default system language. - std::string mbstr = nana::charset(a_wstring); - //If the default system language is English and convert - //a Chinese unicode string to multibytes string through GB2312 - std::setlocale(LC_CTYPE, "zh_CN.GB2312"); - //set::setlocale(LC_CTYPE, ".936"); call it in Windows - std::string mbstr = nana::charset(a_wstring_with_chinese); - -*/ \ No newline at end of file diff --git a/source/charset.cpp b/source/charset.cpp index 686f8182..95753662 100644 --- a/source/charset.cpp +++ b/source/charset.cpp @@ -578,37 +578,38 @@ namespace nana std::string data_for_move_; }; #else + /// return the first code point and move the pointer to next character, springing to the end by errors unsigned long utf8char(const unsigned char*& p, const unsigned char* end) { if(p != end) { - if(*p < 0x80) + if(*p < 0x80) // ASCII char 0-127 or 0-0x80 { return *(p++); } unsigned ch = *p; unsigned long code; - if(ch < 0xC0) + if(ch < 0xC0) // error? - move to end. Posible ANSI or ISO code-page { p = end; return 0; } - else if(ch < 0xE0 && (p + 1 <= end)) + else if(ch < 0xE0 && (p + 1 <= end)) // two byte chararcter { code = ((ch & 0x1F) << 6) | (p[1] & 0x3F); p += 2; } - else if(ch < 0xF0 && (p + 2 <= end)) + else if(ch < 0xF0 && (p + 2 <= end)) // 3 byte character { code = ((((ch & 0xF) << 6) | (p[1] & 0x3F)) << 6) | (p[2] & 0x3F); p += 3; } - else if(ch < 0x1F && (p + 3 <= end)) + else if(ch < 0x1F && (p + 3 <= end)) // 4 byte character { code = ((((((ch & 0x7) << 6) | (p[1] & 0x3F)) << 6) | (p[2] & 0x3F)) << 6) | (p[3] & 0x3F); p += 4; } - else + else // error, go to end { p = end; return 0;