From 0a4f04267eae083c3be75947c35326966da08152 Mon Sep 17 00:00:00 2001 From: Jinhao Date: Fri, 1 Jan 2016 12:30:18 +0800 Subject: [PATCH] add a namespace nana::utf for UTF-8 character processing --- include/nana/charset.hpp | 18 +++ source/charset.cpp | 177 +++++++++++++++++++++++++++ source/gui/programming_interface.cpp | 2 +- source/gui/widgets/button.cpp | 4 +- 4 files changed, 199 insertions(+), 2 deletions(-) diff --git a/include/nana/charset.hpp b/include/nana/charset.hpp index 6c36159b..725fdf96 100644 --- a/include/nana/charset.hpp +++ b/include/nana/charset.hpp @@ -18,6 +18,24 @@ namespace nana { + namespace utf + { + /// Attempt to get a pointer to a character of UTF-8 string by a specifed character index. + /// @param text_utf8 A string encoded as UTF-8. + /// @param pos The unicode character index. + /// @returns A pointer to the unicode character. It returns a null if pos is out of range. + const char* char_ptr(const char* text_utf8, unsigned pos); + const char* char_ptr(const ::std::string& text_utf8, unsigned pos); + + /// Get the unicode character by a specified character index. + /// @param text_utf8 A string encoded as UTF-8. + /// @param pos The unicode character index. + /// @param len A unsigned pointer to receive the number of bytes it takes in UTF-8 encoded. If len is a nullptr, it is ignored. + /// @returns A unicode character. '\0' if pos is out of range. + wchar_t char_at(const char* text_utf8, unsigned pos, unsigned * len); + wchar_t char_at(const ::std::string& text_utf8, unsigned pos, unsigned * len); + } + enum class unicode { utf8, utf16, utf32 diff --git a/source/charset.cpp b/source/charset.cpp index 1b57ad29..388729a9 100644 --- a/source/charset.cpp +++ b/source/charset.cpp @@ -30,6 +30,183 @@ namespace nana { + namespace utf + { + const char* char_ptr(const char* text, unsigned pos) + { + auto ustr = reinterpret_cast(text); + auto const end = ustr + std::strlen(text); + + for (unsigned i = 0; i != pos; ++i) + { + const auto uch = *ustr; + if (uch < 0x80) + { + ++ustr; + continue; + } + + if (uch < 0xC0) + return nullptr; + + if ((uch < 0xE0) && (ustr + 1 < end)) + ustr += 2; + else if (uch < 0xF0 && (ustr + 2 <= end)) + ustr += 3; + else if (uch < 0x1F && (ustr + 3 <= end)) + ustr += 4; + else + return nullptr; + } + + return reinterpret_cast(ustr); + } + + const char* char_ptr(const std::string& text_utf8, unsigned pos) + { + auto ustr = reinterpret_cast(text_utf8.c_str()); + auto const end = ustr + text_utf8.size(); + + for (unsigned i = 0; i != pos; ++i) + { + const auto uch = *ustr; + if (uch < 0x80) + { + ++ustr; + continue; + } + + if (uch < 0xC0) + return nullptr; + + if ((uch < 0xE0) && (ustr + 1 < end)) + ustr += 2; + else if (uch < 0xF0 && (ustr + 2 <= end)) + ustr += 3; + else if (uch < 0x1F && (ustr + 3 <= end)) + ustr += 4; + else + return nullptr; + } + + return reinterpret_cast(ustr); + } + + wchar_t char_at(const char* text_utf8, unsigned pos, unsigned * len) + { + if (!text_utf8) + return 0; + + if (pos) + { + text_utf8 = char_ptr(text_utf8, pos); + if (!text_utf8) + return 0; + } + + const wchar_t uch = *reinterpret_cast(text_utf8); + if (uch < 0x80) + { + if (len) + *len = 1; + + return *text_utf8; + } + + if (uch < 0xC0) + { + if (len) + *len = 0; + + return 0; + } + + const auto end = text_utf8 + std::strlen(text_utf8); + + if (uch < 0xE0 && (text_utf8 + 1 <= end)) + { + if (len) + *len = 2; + return (wchar_t(uch & 0x1F) << 6) | (reinterpret_cast(text_utf8)[1] & 0x3F); + } + else if (uch < 0xF0 && (text_utf8 + 2 <= end)) + { + if (len) + *len = 3; + + return ((((uch & 0xF) << 6) | (reinterpret_cast(text_utf8)[1] & 0x3F)) << 6) | (reinterpret_cast(text_utf8)[2] & 0x3F); + } + else if (uch < 0x1F && (text_utf8 + 3 <= end)) + { + if (len) + *len = 4; + return ((((((uch & 0x7) << 6) | (reinterpret_cast(text_utf8)[1] & 0x3F)) << 6) | (reinterpret_cast(text_utf8)[2] & 0x3F)) << 6) | (reinterpret_cast(text_utf8)[3] & 0x3F); + } + + if (len) + *len = 0; + + return 0; + } + + wchar_t char_at(const ::std::string& text_utf8, unsigned pos, unsigned * len) + { + const char* ptr; + if (pos) + { + ptr = char_ptr(text_utf8, pos); + if (!ptr) + return 0; + } + else + ptr = text_utf8.c_str(); + + const wchar_t uch = *reinterpret_cast(ptr); + if (uch < 0x80) + { + if (len) + *len = 1; + + return *ptr; + } + + if (uch < 0xC0) + { + if (len) + *len = 0; + + return 0; + } + + const auto end = text_utf8.c_str() + text_utf8.size(); + + if (uch < 0xE0 && (ptr + 1 <= end)) + { + if (len) + *len = 2; + return (wchar_t(uch & 0x1F) << 6) | (reinterpret_cast(ptr)[1] & 0x3F); + } + else if (uch < 0xF0 && (ptr + 2 <= end)) + { + if (len) + *len = 3; + + return ((((uch & 0xF) << 6) | (reinterpret_cast(ptr)[1] & 0x3F)) << 6) | (reinterpret_cast(ptr)[2] & 0x3F); + } + else if (uch < 0x1F && (ptr + 3 <= end)) + { + if (len) + *len = 4; + return ((((((uch & 0x7) << 6) | (reinterpret_cast(ptr)[1] & 0x3F)) << 6) | (reinterpret_cast(ptr)[2] & 0x3F)) << 6) | (reinterpret_cast(ptr)[3] & 0x3F); + } + + if (len) + *len = 0; + + return 0; + } + } + namespace detail { class locale_initializer diff --git a/source/gui/programming_interface.cpp b/source/gui/programming_interface.cpp index ccf37675..a0a53cbc 100644 --- a/source/gui/programming_interface.cpp +++ b/source/gui/programming_interface.cpp @@ -348,7 +348,7 @@ namespace API text.erase(pos, 1); if(shortkey == 0 && pos < text.length()) { - shortkey = text.at(pos); + shortkey = utf::char_at(text.c_str() + pos, 0, nullptr); if(shortkey == '&') //This indicates the text contains "&&", it means the symbol have to be ignored. shortkey = 0; else if(skpos) diff --git a/source/gui/widgets/button.cpp b/source/gui/widgets/button.cpp index e091310a..8e3810a0 100644 --- a/source/gui/widgets/button.cpp +++ b/source/gui/widgets/button.cpp @@ -258,7 +258,9 @@ namespace nana{ namespace drawerbase if(shortkey) { unsigned off_w = (shortkey_pos ? graph.text_extent_size(mbstr.c_str(), static_cast(shortkey_pos)).width : 0); - nana::size shortkey_size = graph.text_extent_size(to_wstring(mbstr.c_str() + shortkey_pos), 1); + + wchar_t keystr[2] = {nana::utf::char_at(mbstr.c_str() + shortkey_pos, 0, 0), 0}; + auto shortkey_size = graph.text_extent_size(keystr, 1); unsigned ascent, descent, inleading; graph.text_metrics(ascent, descent, inleading);