comments and using system default encoding as utf error "police"
This commit is contained in:
parent
a7c5870957
commit
e8df4b4f69
@ -35,6 +35,7 @@ namespace nana
|
|||||||
{
|
{
|
||||||
namespace utf
|
namespace utf
|
||||||
{
|
{
|
||||||
|
/// return a pointer to the code unit of the character at pos
|
||||||
const char* char_ptr(const char* text, unsigned pos)
|
const char* char_ptr(const char* text, unsigned pos)
|
||||||
{
|
{
|
||||||
auto ustr = reinterpret_cast<const unsigned char*>(text);
|
auto ustr = reinterpret_cast<const unsigned char*>(text);
|
||||||
@ -49,10 +50,10 @@ namespace nana
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uch < 0xC0)
|
if (uch < 0xC0) // use police ?
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
if ((uch < 0xE0) && (ustr + 1 < end))
|
if ((uch < 0xE0) && (ustr + 1 < end)) //? *(ustr + 1) < 0xE0
|
||||||
ustr += 2;
|
ustr += 2;
|
||||||
else if (uch < 0xF0 && (ustr + 2 <= end))
|
else if (uch < 0xF0 && (ustr + 2 <= end))
|
||||||
ustr += 3;
|
ustr += 3;
|
||||||
@ -65,6 +66,7 @@ namespace nana
|
|||||||
return reinterpret_cast<const char*>(ustr);
|
return reinterpret_cast<const char*>(ustr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// return a pointer to the code unit of the character at pos - reuse ^ ?
|
||||||
const char* char_ptr(const std::string& text_utf8, unsigned pos)
|
const char* char_ptr(const std::string& text_utf8, unsigned pos)
|
||||||
{
|
{
|
||||||
auto ustr = reinterpret_cast<const unsigned char*>(text_utf8.c_str());
|
auto ustr = reinterpret_cast<const unsigned char*>(text_utf8.c_str());
|
||||||
@ -95,6 +97,7 @@ namespace nana
|
|||||||
return reinterpret_cast<const char*>(ustr);
|
return reinterpret_cast<const char*>(ustr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// return a code point (max 16 bits?) and the len in code units of the character at pos
|
||||||
wchar_t char_at(const char* text_utf8, unsigned pos, unsigned * len)
|
wchar_t char_at(const char* text_utf8, unsigned pos, unsigned * len)
|
||||||
{
|
{
|
||||||
if (!text_utf8)
|
if (!text_utf8)
|
||||||
@ -113,10 +116,10 @@ namespace nana
|
|||||||
if (len)
|
if (len)
|
||||||
*len = 1;
|
*len = 1;
|
||||||
|
|
||||||
return *text_utf8;
|
return *text_utf8; // uch ?
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uch < 0xC0)
|
if (uch < 0xC0) // use police or ??
|
||||||
{
|
{
|
||||||
if (len)
|
if (len)
|
||||||
*len = 0;
|
*len = 0;
|
||||||
@ -152,6 +155,7 @@ namespace nana
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// return a code point (max 16 bits?) and the len in code units of the character at pos
|
||||||
wchar_t char_at(const ::std::string& text_utf8, unsigned pos, unsigned * len)
|
wchar_t char_at(const ::std::string& text_utf8, unsigned pos, unsigned * len)
|
||||||
{
|
{
|
||||||
const char* ptr;
|
const char* ptr;
|
||||||
@ -401,6 +405,7 @@ namespace nana
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// buggie?
|
||||||
struct utf8_error_police_system : public encoding_error_police
|
struct utf8_error_police_system : public encoding_error_police
|
||||||
{
|
{
|
||||||
unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
|
unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
|
||||||
@ -408,16 +413,18 @@ namespace nana
|
|||||||
std::wstring wc;
|
std::wstring wc;
|
||||||
mb2wc(wc, reinterpret_cast<const char*>(current_code_unit));
|
mb2wc(wc, reinterpret_cast<const char*>(current_code_unit));
|
||||||
current_code_unit++;
|
current_code_unit++;
|
||||||
return wc[0]; // use utf16char
|
//wchar_t *p = &wc[0];
|
||||||
|
|
||||||
|
return wc[0]; // use utf16char but what endian?
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
auto def_encoding_error_police = std::make_unique<utf8_error_police>(); // the nana default
|
// auto def_encoding_error_police = std::make_unique<utf8_error_police>(); // the nana default
|
||||||
// auto def_encoding_error_police = std::make_unique<utf8_error_police_latin>();
|
// auto def_encoding_error_police = std::make_unique<utf8_error_police_latin>();
|
||||||
// auto def_encoding_error_police = std::make_unique<utf8_error_police_throw>();
|
// auto def_encoding_error_police = std::make_unique<utf8_error_police_throw>();
|
||||||
// auto def_encoding_error_police = std::make_unique<utf8_error_police_def_char>('X');
|
// auto def_encoding_error_police = std::make_unique<utf8_error_police_def_char>('X');
|
||||||
// auto def_encoding_error_police = std::make_unique<utf8_error_police_system>();
|
auto def_encoding_error_police = std::make_unique<utf8_error_police_system>();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -526,13 +526,13 @@ namespace nana
|
|||||||
void throw_not_utf8(const std::string& text)
|
void throw_not_utf8(const std::string& text)
|
||||||
{
|
{
|
||||||
if (!is_utf8(text.c_str(), text.length()))
|
if (!is_utf8(text.c_str(), text.length()))
|
||||||
return utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit();
|
return utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + text).emit();
|
||||||
}
|
}
|
||||||
|
|
||||||
void throw_not_utf8(const char* text, unsigned len)
|
void throw_not_utf8(const char* text, unsigned len)
|
||||||
{
|
{
|
||||||
if (!is_utf8(text, len))
|
if (!is_utf8(text, len))
|
||||||
return utf8_Error(std::string("The text is not encoded in UTF8: ") + std::string(text, len) ).emit();
|
return utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + std::string(text, len) ).emit();
|
||||||
|
|
||||||
//throw std::invalid_argument("The text is not encoded in UTF8");
|
//throw std::invalid_argument("The text is not encoded in UTF8");
|
||||||
}
|
}
|
||||||
@ -540,7 +540,7 @@ namespace nana
|
|||||||
void throw_not_utf8(const char* text)
|
void throw_not_utf8(const char* text)
|
||||||
{
|
{
|
||||||
if (!is_utf8(text, std::strlen(text)))
|
if (!is_utf8(text, std::strlen(text)))
|
||||||
return utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit();
|
return utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + text).emit();
|
||||||
|
|
||||||
//throw std::invalid_argument("The text is not encoded in UTF8");
|
//throw std::invalid_argument("The text is not encoded in UTF8");
|
||||||
|
|
||||||
@ -548,7 +548,7 @@ namespace nana
|
|||||||
|
|
||||||
std::string recode_to_utf8(std::string no_utf8)
|
std::string recode_to_utf8(std::string no_utf8)
|
||||||
{
|
{
|
||||||
return nana::charset(no_utf8).to_bytes(nana::unicode::utf8);
|
return nana::charset(std::move(no_utf8)).to_bytes(nana::unicode::utf8);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// this text needed change, it needed review ??
|
/// this text needed change, it needed review ??
|
||||||
@ -556,7 +556,7 @@ namespace nana
|
|||||||
{
|
{
|
||||||
if (!is_utf8(text.c_str(), text.length()))
|
if (!is_utf8(text.c_str(), text.length()))
|
||||||
{
|
{
|
||||||
utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit();
|
utf8_Error(std::string("\nThe const text is not encoded in UTF8: ") + text).emit();
|
||||||
return true; /// it needed change, it needed review !!
|
return true; /// it needed change, it needed review !!
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -568,7 +568,7 @@ namespace nana
|
|||||||
{
|
{
|
||||||
if (!is_utf8(text.c_str(), text.length()))
|
if (!is_utf8(text.c_str(), text.length()))
|
||||||
{
|
{
|
||||||
utf8_Error(std::string("The text is not encoded in UTF8: ") + text).emit();
|
utf8_Error(std::string("\nThe text is not encoded in UTF8: ") + text).emit();
|
||||||
text=recode_to_utf8(text);
|
text=recode_to_utf8(text);
|
||||||
return true; /// it needed change, it needed review !!
|
return true; /// it needed change, it needed review !!
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user