comments and using system default encoding as utf error "police"

This commit is contained in:
qPCR4vir 2016-03-25 00:50:09 +01:00
parent a7c5870957
commit e8df4b4f69
2 changed files with 20 additions and 13 deletions

View File

@ -35,6 +35,7 @@ namespace nana
{ {
namespace utf namespace utf
{ {
/// return a pointer to the code unit of the character at pos
const char* char_ptr(const char* text, unsigned pos) const char* char_ptr(const char* text, unsigned pos)
{ {
auto ustr = reinterpret_cast<const unsigned char*>(text); auto ustr = reinterpret_cast<const unsigned char*>(text);
@ -49,10 +50,10 @@ namespace nana
continue; continue;
} }
if (uch < 0xC0) if (uch < 0xC0) // use police ?
return nullptr; return nullptr;
if ((uch < 0xE0) && (ustr + 1 < end)) if ((uch < 0xE0) && (ustr + 1 < end)) //? *(ustr + 1) < 0xE0
ustr += 2; ustr += 2;
else if (uch < 0xF0 && (ustr + 2 <= end)) else if (uch < 0xF0 && (ustr + 2 <= end))
ustr += 3; ustr += 3;
@ -65,6 +66,7 @@ namespace nana
return reinterpret_cast<const char*>(ustr); return reinterpret_cast<const char*>(ustr);
} }
/// return a pointer to the code unit of the character at pos - reuse ^ ?
const char* char_ptr(const std::string& text_utf8, unsigned pos) const char* char_ptr(const std::string& text_utf8, unsigned pos)
{ {
auto ustr = reinterpret_cast<const unsigned char*>(text_utf8.c_str()); auto ustr = reinterpret_cast<const unsigned char*>(text_utf8.c_str());
@ -95,6 +97,7 @@ namespace nana
return reinterpret_cast<const char*>(ustr); return reinterpret_cast<const char*>(ustr);
} }
/// return a code point (max 16 bits?) and the len in code units of the character at pos
wchar_t char_at(const char* text_utf8, unsigned pos, unsigned * len) wchar_t char_at(const char* text_utf8, unsigned pos, unsigned * len)
{ {
if (!text_utf8) if (!text_utf8)
@ -113,10 +116,10 @@ namespace nana
if (len) if (len)
*len = 1; *len = 1;
return *text_utf8; return *text_utf8; // uch ?
} }
if (uch < 0xC0) if (uch < 0xC0) // use police or ??
{ {
if (len) if (len)
*len = 0; *len = 0;
@ -152,6 +155,7 @@ namespace nana
return 0; return 0;
} }
/// return a code point (max 16 bits?) and the len in code units of the character at pos
wchar_t char_at(const ::std::string& text_utf8, unsigned pos, unsigned * len) wchar_t char_at(const ::std::string& text_utf8, unsigned pos, unsigned * len)
{ {
const char* ptr; const char* ptr;
@ -401,6 +405,7 @@ namespace nana
} }
}; };
/// buggie?
struct utf8_error_police_system : public encoding_error_police struct utf8_error_police_system : public encoding_error_police
{ {
unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
@ -408,16 +413,18 @@ namespace nana
std::wstring wc; std::wstring wc;
mb2wc(wc, reinterpret_cast<const char*>(current_code_unit)); mb2wc(wc, reinterpret_cast<const char*>(current_code_unit));
current_code_unit++; current_code_unit++;
return wc[0]; // use utf16char //wchar_t *p = &wc[0];
return wc[0]; // use utf16char but what endian?
} }
}; };
auto def_encoding_error_police = std::make_unique<utf8_error_police>(); // the nana default // auto def_encoding_error_police = std::make_unique<utf8_error_police>(); // the nana default
// auto def_encoding_error_police = std::make_unique<utf8_error_police_latin>(); // auto def_encoding_error_police = std::make_unique<utf8_error_police_latin>();
// auto def_encoding_error_police = std::make_unique<utf8_error_police_throw>(); // auto def_encoding_error_police = std::make_unique<utf8_error_police_throw>();
// auto def_encoding_error_police = std::make_unique<utf8_error_police_def_char>('X'); // auto def_encoding_error_police = std::make_unique<utf8_error_police_def_char>('X');
// auto def_encoding_error_police = std::make_unique<utf8_error_police_system>(); auto def_encoding_error_police = std::make_unique<utf8_error_police_system>();

View File

@ -526,13 +526,13 @@ namespace nana
void throw_not_utf8(const std::string& text) void throw_not_utf8(const std::string& text)
{ {
if (!is_utf8(text.c_str(), text.length())) if (!is_utf8(text.c_str(), text.length()))
return utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit(); return utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + text).emit();
} }
void throw_not_utf8(const char* text, unsigned len) void throw_not_utf8(const char* text, unsigned len)
{ {
if (!is_utf8(text, len)) if (!is_utf8(text, len))
return utf8_Error(std::string("The text is not encoded in UTF8: ") + std::string(text, len) ).emit(); return utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + std::string(text, len) ).emit();
//throw std::invalid_argument("The text is not encoded in UTF8"); //throw std::invalid_argument("The text is not encoded in UTF8");
} }
@ -540,7 +540,7 @@ namespace nana
void throw_not_utf8(const char* text) void throw_not_utf8(const char* text)
{ {
if (!is_utf8(text, std::strlen(text))) if (!is_utf8(text, std::strlen(text)))
return utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit(); return utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + text).emit();
//throw std::invalid_argument("The text is not encoded in UTF8"); //throw std::invalid_argument("The text is not encoded in UTF8");
@ -548,7 +548,7 @@ namespace nana
std::string recode_to_utf8(std::string no_utf8) std::string recode_to_utf8(std::string no_utf8)
{ {
return nana::charset(no_utf8).to_bytes(nana::unicode::utf8); return nana::charset(std::move(no_utf8)).to_bytes(nana::unicode::utf8);
} }
/// this text needed change, it needed review ?? /// this text needed change, it needed review ??
@ -556,7 +556,7 @@ namespace nana
{ {
if (!is_utf8(text.c_str(), text.length())) if (!is_utf8(text.c_str(), text.length()))
{ {
utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit(); utf8_Error(std::string("\nThe const text is not encoded in UTF8: ") + text).emit();
return true; /// it needed change, it needed review !! return true; /// it needed change, it needed review !!
} }
else else
@ -568,7 +568,7 @@ namespace nana
{ {
if (!is_utf8(text.c_str(), text.length())) if (!is_utf8(text.c_str(), text.length()))
{ {
utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit(); utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + text).emit();
text=recode_to_utf8(text); text=recode_to_utf8(text);
return true; /// it needed change, it needed review !! return true; /// it needed change, it needed review !!
} }