experimenting with def_encoding_error_police

in my case all the 5 variant are working very well !!
2016-03-24 03:30:14 +01:00
parent 6b2d5afa6b
commit b2b2bf2858
1 changed files with 101 additions and 15 deletions
--- a/source/charset.cpp
+++ b/source/charset.cpp
@@ -1,4 +1,4 @@
-/*
+/**
 *	A Character Encoding Set Implementation
 *	Nana C++ Library(http://www.nanapro.org)
 *	Copyright(C) 2003-2016 Jinhao(cnjinhao@hotmail.com)
@@ -7,9 +7,9 @@
 *	(See accompanying file LICENSE_1_0.txt or copy at
 *	http://www.boost.org/LICENSE_1_0.txt)
 *
- *	@file: nana/charset.cpp
+ *	@file nana/charset.cpp
- *	@brief: A conversion between unicode characters and multi bytes characters
+ *	@brief A conversion between unicode characters and multi bytes characters
- *	@contributions:
+ *	@contributions
 *		UTF16 4-byte decoding issue by Renke Yan.
 *		Pr0curo(pr#98)
 */
@@ -20,6 +20,7 @@
 #include <cwchar>
 #include <clocale>
 #include <cstring>	//Added by Pr0curo(pr#98)
 #include <memory>
 //GCC 4.7.0 does not implement the <codecvt> and codecvt_utfx classes
 #ifndef STD_CODECVT_NOT_SUPPORTED
@@ -211,21 +212,22 @@ namespace nana
 	namespace detail
 	{   
 		/// candidate to be more general??
 		class locale_initializer
 		{
 		public:
 			static void init()
 			{
 				static bool initialized = false;
-				if(false == initialized)
+				if (initialized) return;
-				{
+				
 				initialized = true;
 				//Only set the C library locale
 				std::setlocale(LC_CTYPE, "");
 			}
 			}
 		};
 		/// convert wchar C string from ? ANSI code page CP_ACP (windows) or LC_CTYPE c locale (-nix) into utf8 std::string
 		bool wc2mb(std::string& mbstr, const wchar_t * s)
 		{
 			if(nullptr == s || *s == 0)
@@ -259,6 +261,7 @@ namespace nana
 			return true;
 		}
 		/// convert a char C-string from The system default Windows ANSI code page CP_ACP or from LC_CTYPE c locale (-nix) into utf16 std::wstring
 		bool mb2wc(std::wstring& wcstr, const char* s)
 		{
 			if(nullptr == s || *s == 0)
@@ -291,6 +294,7 @@ namespace nana
 			return true;
 		}
 		/// convert a char C string from The system default Windows ANSI code page CP_ACP or LC_CTYPE c locale (-nix) into utf16 std::string
 		bool mb2wc(std::string& wcstr, const char* s)
 		{
 			if(nullptr == s || *s == 0)
@@ -304,6 +308,7 @@ namespace nana
 			{
 				wcstr.resize((chars - 1) * sizeof(wchar_t));
 				::MultiByteToWideChar(CP_ACP, 0, s, -1, reinterpret_cast<wchar_t*>(&wcstr[0]), chars - 1);
 				                                      // ^ the trick !
 			}
 #else
 			locale_initializer::init();
@@ -338,6 +343,84 @@ namespace nana
 			virtual std::wstring&& wstr_move() = 0;
 		};
 		/// playing with the idea - we need a mechanisme to set a user selected police - Testing an abtract interphase
 		struct encoding_error_police
 		{
 			virtual unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) = 0;
 			virtual ~encoding_error_police() = default;
 		};
 		/// the current nana default: it is safe - you may want to keep it ! use the other at your risk: mainly for debugging
 		struct utf8_error_police : public encoding_error_police
 		{
 			unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
 			{
 				current_code_unit = end;
 				return 0;
 			}
 		};
 		/// 
 		struct utf8_error_police_def_char : public encoding_error_police
 		{
 			static unsigned long def_error_mark ;
 			unsigned long error_mark{ def_error_mark };
 			utf8_error_police_def_char() = default;
 			utf8_error_police_def_char( unsigned long mark): error_mark{mark}{}
 			unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
 			{
 				++current_code_unit;  //check (p != end) ?
 				return error_mark;
 			}
 		};
 		unsigned long utf8_error_police_def_char::def_error_mark{ '*' };
 		///  
 		struct utf8_error_police_throw : public encoding_error_police
 		{
 			unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
 			{
 				//utf8_Error::use_throw = true;
 				utf8_Error(std::string("The text is not encoded in UTF8: ") + 
 					reinterpret_cast<const char*>( current_code_unit) ).emit();;
 				current_code_unit = end;
 				return 0;
 			}
 		};
 		struct utf8_error_police_latin : public encoding_error_police
 		{
 			unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
 			{
 				return *(current_code_unit++) ;
 			}
 		};
 		struct utf8_error_police_system : public encoding_error_police
 		{
 			unsigned long next_code_point(const unsigned char*& current_code_unit, const unsigned char* end) override
 			{
 				std::wstring wc;
 				mb2wc(wc, reinterpret_cast<const char*>(current_code_unit));
 				current_code_unit++;
 				return wc[0];      // use utf16char
 			}
 		};
 		auto def_encoding_error_police = std::make_unique<utf8_error_police>();  // the nana default
 //		auto def_encoding_error_police = std::make_unique<utf8_error_police_latin>();
 //		auto def_encoding_error_police = std::make_unique<utf8_error_police_throw>();
 //		auto def_encoding_error_police = std::make_unique<utf8_error_police_def_char>('X');
 //		auto def_encoding_error_police = std::make_unique<utf8_error_police_system>();
 #ifndef STD_CODECVT_NOT_SUPPORTED
 		class charset_string
 			: public charset_encoding_interface
@@ -578,6 +661,8 @@ namespace nana
 			std::string data_for_move_;
 		};
 #else
        /// return the first code point and move the pointer to next character, springing to the end by errors
 		unsigned long utf8char(const unsigned char*& p, const unsigned char* end)
 		{
@@ -591,9 +676,10 @@ namespace nana
 				unsigned long code;
 				if(ch < 0xC0)       // error? - move to end. Posible ANSI or ISO code-page 
 				{
-					return *(p++); // temp: assume equal
+					//return *(p++); // temp: assume equal
-					p = end;
+					//p = end;
-					return 0;
+					//return 0;
 					return def_encoding_error_police->next_code_point(p, end);
 				}
 				else if(ch < 0xE0 && (p + 1 <= end))      // two byte chararcter
 				{