diff --git a/include/nana/charset.hpp b/include/nana/charset.hpp index 9e109b65..f45a6054 100644 --- a/include/nana/charset.hpp +++ b/include/nana/charset.hpp @@ -9,6 +9,9 @@ namespace nana utf8, utf16, utf32 }; + /// Checks whether a specified text is utf8 encoding + bool is_utf8(const char* str, unsigned len); + namespace detail { class charset_encoding_interface; diff --git a/source/charset.cpp b/source/charset.cpp index 1b57ad29..ed0b9213 100644 --- a/source/charset.cpp +++ b/source/charset.cpp @@ -30,6 +30,36 @@ namespace nana { + bool is_utf8(const char* str, unsigned len) + { + auto ustr = reinterpret_cast(str); + auto end = ustr + len; + + while (ustr < end) + { + const auto uv = *ustr; + if (uv < 0x80) + { + ++ustr; + continue; + } + + if (uv < 0xC0) + return false; + + if ((uv < 0xE0) && (ustr + 1 < end)) + ustr += 2; + else if (uv < 0xF0 && (ustr + 2 <= end)) + ustr += 3; + else if (uv < 0x1F && (ustr + 3 <= end)) + ustr += 4; + else + return false; + } + + return true; + } + namespace detail { class locale_initializer