fix rendering issue of RTL languages

This commit is contained in:
Jinhao 2020-03-24 22:20:04 +08:00
parent 48d54faeed
commit b46966bf02

View File

@ -1,7 +1,7 @@
/* /*
* Text Token Stream * Text Token Stream
* Nana C++ Library(http://www.nanapro.org) * Nana C++ Library(http://www.nanapro.org)
* Copyright(C) 2003-2018 Jinhao(cnjinhao@hotmail.com) * Copyright(C) 2003-2020 Jinhao(cnjinhao@hotmail.com)
* *
* Distributed under the Boost Software License, Version 1.0. * Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at * (See accompanying file LICENSE_1_0.txt or copy at
@ -44,11 +44,22 @@ namespace nana{ namespace widgets{ namespace skeletons
class tokenizer class tokenizer
{ {
public: public:
tokenizer(const std::wstring& s, bool format_enabled) tokenizer(const std::wstring& s, bool format_enabled) :
: iptr_(s.data()), #if 0 //deprecated
iptr_(s.data()),
endptr_(s.data() + s.size()), endptr_(s.data() + s.size()),
#endif
format_enabled_(format_enabled) format_enabled_(format_enabled)
{ {
entities_ = unicode_bidi{}.reorder(s.c_str(), s.size());
for (auto & e : entities_)
{
ptr_ = e.begin;
if (e.begin < e.end)
break;
++idx_;
}
} }
void push(token tk) void push(token tk)
@ -66,8 +77,13 @@ namespace nana{ namespace widgets{ namespace skeletons
return tk; return tk;
} }
#if 0 //deprecated
if (iptr_ == endptr_) if (iptr_ == endptr_)
return token::eof; return token::eof;
#else
if (_m_eof())
return token::eof;
#endif
//Check whether it is a format token. //Check whether it is a format token.
if (format_enabled_ && format_state_) if (format_enabled_ && format_state_)
@ -106,8 +122,11 @@ namespace nana{ namespace widgets{ namespace skeletons
//Read the data token //Read the data token
token _m_token() token _m_token()
{ {
#if 0 //deprecated
wchar_t ch = *iptr_; wchar_t ch = *iptr_;
#else
auto ch = _m_get();
#endif
if (ch > 0xFF) if (ch > 0xFF)
{ {
//This is the Unicode. //This is the Unicode.
@ -115,6 +134,7 @@ namespace nana{ namespace widgets{ namespace skeletons
idstr_.clear(); idstr_.clear();
idstr_.append(1, ch); idstr_.append(1, ch);
#if 0 //deprecated
if (_m_unicode_word_breakable(iptr_)) if (_m_unicode_word_breakable(iptr_))
{ {
++iptr_; ++iptr_;
@ -128,27 +148,61 @@ namespace nana{ namespace widgets{ namespace skeletons
ch = *++iptr_; ch = *++iptr_;
} }
#else
if (_m_unicode_word_breakable(ptr_))
{
_m_read();
return token::data;
}
_m_read();
ch = _m_get();
while ((!_m_eof()) && (ch > 0xFF) && (false == _m_unicode_word_breakable(ptr_)))
{
idstr_.append(1, ch);
_m_read();
ch = _m_get();
}
//When the last _m_unicode_word_breakable returns true, it implies the ch(left character)
//is not the breakable character. So it belongs to the data.
idstr_.append(1, ch);
_m_read();
#endif
return token::data; return token::data;
} }
if ('\n' == ch) if ('\n' == ch)
{ {
#if 0 //deprecated
++iptr_; ++iptr_;
#else
_m_read();
#endif
return token::endl; return token::endl;
} }
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
{ {
#if 0 //deprecated
auto idstr = iptr_; auto idstr = iptr_;
do do
{ {
ch = *(++iptr_); ch = *(++iptr_);
} } while (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'));
while(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'));
idstr_.assign(idstr, iptr_); idstr_.assign(idstr, iptr_);
#else
auto idstr = ptr_;
do
{
_m_read();
ch = _m_get();
} while (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'));
idstr_.assign(idstr, ptr_);
#endif
return token::data; return token::data;
} }
@ -160,8 +214,10 @@ namespace nana{ namespace widgets{ namespace skeletons
if (('<' == ch) && format_enabled_) if (('<' == ch) && format_enabled_)
{ {
//pos keeps the current position, and it used for restring #if 0 //deprecated
//pos keeps the current position, and it used for restoring
//iptr_ when the search is failed. //iptr_ when the search is failed.
auto pos = ++iptr_; auto pos = ++iptr_;
_m_eat_whitespace(); _m_eat_whitespace();
if (*iptr_ == '/') if (*iptr_ == '/')
@ -177,6 +233,29 @@ namespace nana{ namespace widgets{ namespace skeletons
//Restore the iptr_; //Restore the iptr_;
iptr_ = pos; iptr_ = pos;
#else
//pos keeps the current position, and it used for restoring
//iptr_ when the search is failed.
_m_read();
auto idx = idx_;
auto ptr = ptr_;
_m_eat_whitespace();
if (_m_get() == '/')
{
_m_read();
_m_eat_whitespace();
if (_m_get() == '>')
{
_m_read();
return token::format_end;
}
}
//Restore the iptr_;
idx_ = idx;
ptr_ = ptr;
#endif
format_state_ = true; format_state_ = true;
return token::tag_begin; return token::tag_begin;
@ -184,6 +263,7 @@ namespace nana{ namespace widgets{ namespace skeletons
//Escape //Escape
#if 0 //deprecated
if (this->format_enabled_ && (ch == '\\')) if (this->format_enabled_ && (ch == '\\'))
{ {
if (iptr_ + 1 < endptr_) if (iptr_ + 1 < endptr_)
@ -209,6 +289,32 @@ namespace nana{ namespace widgets{ namespace skeletons
} }
else else
++iptr_; ++iptr_;
#else
if (this->format_enabled_ && (ch == '\\'))
{
if (!_m_eof(1))
{
_m_read();
ch = _m_get();
if ('<' == ch || '>' == ch) //two characters need to be escaped.
{
_m_read();
}
else
{
//ignore escape
ch = '\\';
}
}
else
{
_m_set_eof();
return token::eof;
}
}
else
_m_read();
#endif
idstr_.clear(); idstr_.clear();
idstr_.append(1, ch); idstr_.append(1, ch);
@ -220,8 +326,11 @@ namespace nana{ namespace widgets{ namespace skeletons
{ {
_m_eat_whitespace(); _m_eat_whitespace();
#if 0 //deprecated
auto ch = *iptr_++; auto ch = *iptr_++;
#else
auto ch = _m_read();
#endif
switch (ch) switch (ch)
{ {
case ',': return token::comma; case ',': return token::comma;
@ -232,6 +341,7 @@ namespace nana{ namespace widgets{ namespace skeletons
return token::tag_end; return token::tag_end;
case '"': case '"':
//Here is a string and all the meta characters will be ignored except " //Here is a string and all the meta characters will be ignored except "
#if 0 //deprecated
{ {
auto str = iptr_; auto str = iptr_;
@ -240,9 +350,16 @@ namespace nana{ namespace widgets{ namespace skeletons
idstr_.assign(str, iptr_++); idstr_.assign(str, iptr_++);
} }
#else
while (!(_m_eof() || ('"' == _m_get())))
{
idstr_ += _m_read();
}
#endif
return token::string; return token::string;
case '(': case '(':
_m_eat_whitespace(); _m_eat_whitespace();
#if 0 //deprecated
if ((iptr_ < endptr_) && _m_is_idstr_element(*iptr_)) if ((iptr_ < endptr_) && _m_is_idstr_element(*iptr_))
{ {
auto pbegin = iptr_; auto pbegin = iptr_;
@ -273,6 +390,32 @@ namespace nana{ namespace widgets{ namespace skeletons
} }
} }
} }
#else
if ((!_m_eof()) && _m_is_idstr_element(_m_get()))
{
while ((!_m_eof()) && _m_is_idstr_element(_m_get()))
binary_.first += _m_read();
_m_eat_whitespace();
if ((!_m_eof()) && (',' == _m_get()))
{
_m_read();
_m_eat_whitespace();
if ((!_m_eof()) && _m_is_idstr_element(_m_get()))
{
while ((!_m_eof()) && _m_is_idstr_element(_m_get()))
binary_.second += _m_read();
_m_eat_whitespace();
if ((!_m_eof()) && (')' == _m_get()))
{
_m_read();
return token::binary;
}
}
}
}
#endif
return token::eof; return token::eof;
} }
@ -280,7 +423,8 @@ namespace nana{ namespace widgets{ namespace skeletons
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || '_' == ch) if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || '_' == ch)
{ {
--iptr_; _m_move_back();
//Here is a identifier //Here is a identifier
_m_read_idstr(); _m_read_idstr();
@ -330,7 +474,7 @@ namespace nana{ namespace widgets{ namespace skeletons
if ('0' <= ch && ch <= '9') if ('0' <= ch && ch <= '9')
{ {
--iptr_; _m_move_back();
_m_read_number(); _m_read_number();
return token::number; return token::number;
} }
@ -346,16 +490,28 @@ namespace nana{ namespace widgets{ namespace skeletons
//Read the identifier. //Read the identifier.
void _m_read_idstr() void _m_read_idstr()
{ {
#if 0 //deprecated
auto idstr = iptr_; auto idstr = iptr_;
wchar_t ch; wchar_t ch;
do do
{ {
ch = *(++iptr_); ch = *(++iptr_);
} } while (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('_' == ch) || ('0' <= ch && ch <= '9'));
while(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('_' == ch) || ('0' <= ch && ch <= '9'));
idstr_.assign(idstr, iptr_); idstr_.assign(idstr, iptr_);
#else
auto idstr = ptr_;
wchar_t ch;
do
{
_m_read();
ch = _m_get();
} while (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('_' == ch) || ('0' <= ch && ch <= '9'));
idstr_.assign(idstr, ptr_);
#endif
} }
//Read the number //Read the number
@ -363,6 +519,7 @@ namespace nana{ namespace widgets{ namespace skeletons
{ {
idstr_.clear(); idstr_.clear();
#if 0 //deprecated
wchar_t ch = *iptr_; wchar_t ch = *iptr_;
idstr_ += ch; idstr_ += ch;
@ -397,12 +554,54 @@ namespace nana{ namespace widgets{ namespace skeletons
idstr_ += ch; idstr_ += ch;
ch = *++iptr_; ch = *++iptr_;
} }
#else
auto ch = _m_get();
idstr_ += ch;
//First check the number whether will be a hex number.
if ('0' == ch)
{
_m_read();
ch = _m_get();
if ((!('0' <= ch && ch <= '9')) && (ch != 'x' && ch != 'X'))
return;
if (ch == 'x' || ch == 'X')
{
//Here is a hex number
idstr_ += 'x';
_m_read();
ch = _m_get();
while (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
{
idstr_ += ch;
_m_read();
ch = _m_get();
}
return;
}
//Here is not a hex number
idstr_ += ch;
}
_m_read();
ch = _m_get();
while ('0' <= ch && ch <= '9')
{
idstr_ += ch;
_m_read();
ch = _m_get();
}
#endif
} }
void _m_eat_whitespace() void _m_eat_whitespace()
{ {
while (true) while (true)
{ {
#if 0 //deprecated
switch (*iptr_) switch (*iptr_)
{ {
case ' ': case ' ':
@ -412,11 +611,126 @@ namespace nana{ namespace widgets{ namespace skeletons
default: default:
return; return;
} }
#else
switch (_m_get())
{
case ' ':
case '\t':
_m_read();
break;
default:
return;
}
#endif
} }
} }
private: private:
wchar_t _m_get() const noexcept
{
if (idx_ < entities_.size())
return *ptr_;
return 0;
}
void _m_set_eof()
{
idx_ = entities_.size();
if (0 == idx_)
ptr_ = nullptr;
else
ptr_ = entities_.back().end;
}
bool _m_eof(std::size_t off)
{
if (0 == off)
return _m_eof();
bool eof = false;
auto idx = idx_;
auto ptr = ptr_;
while (off)
{
if (_m_eof())
{
eof = true;
break;
}
_m_read();
}
idx_ = idx;
ptr_ = ptr;
return eof;
}
bool _m_eof() noexcept
{
if (idx_ == entities_.size())
return true;
if (ptr_ == entities_[idx_].end)
{
auto idx = idx_;
while (++idx < entities_.size())
{
if (entities_[idx].begin != entities_[idx].end)
return false;
}
return true;
}
return false;
}
wchar_t _m_read() noexcept
{
if (idx_ < entities_.size())
{
if (ptr_ < entities_[idx_].end)
{
if (ptr_ + 1 < entities_[idx_].end)
return *(ptr_++);
}
auto ch = *ptr_;
while ((++idx_) < entities_.size())
{
if (entities_[idx_].begin != entities_[idx_].end)
{
ptr_ = entities_[idx_].begin;
return ch;
}
}
}
return 0;
}
void _m_move_back() noexcept
{
if ((idx_ == entities_.size()) || (entities_[idx_].begin == ptr_))
{
if (0 == idx_)
return;
--idx_;
ptr_ = entities_[idx_].end;
}
--ptr_;
}
private:
std::vector<unicode_bidi::entity> entities_;
std::size_t idx_{ 0 };
const wchar_t* ptr_{ nullptr };
#if 0 //deprecated
const wchar_t * iptr_; const wchar_t * iptr_;
const wchar_t * endptr_; const wchar_t * endptr_;
#endif
const bool format_enabled_; const bool format_enabled_;
bool format_state_{false}; bool format_state_{false};
@ -641,6 +955,8 @@ namespace nana{ namespace widgets{ namespace skeletons
while(true) while(true)
{ {
token tk = tknizer.read(); token tk = tknizer.read();
if (token::eof == tk)
break;
switch(tk) switch(tk)
{ {
@ -665,12 +981,38 @@ namespace nana{ namespace widgets{ namespace skeletons
if(fstack.size() > 1) if(fstack.size() > 1)
fstack.pop(); fstack.pop();
break; break;
case token::eof:
return;
default: default:
throw std::runtime_error("invalid token"); throw std::runtime_error("invalid token");
} }
} }
if (!format_enabled)
return;
//Reorder the sequence of line blocks for RTL languages.
for (auto & ln : lines_)
{
std::wstring str;
std::vector<std::size_t> position;
for (auto & b : ln)
{
position.push_back(str.size());
str += b.data_ptr->text();
}
std::remove_reference<decltype(ln)>::type dump;
dump.swap(ln);
auto entities = unicode_bidi{}.reorder(str.c_str(), str.size());
for (auto & e : entities)
{
auto pos = e.begin - str.c_str();
auto i = std::find(position.cbegin(), position.cend(), pos);
ln.push_back(dump[i - position.cbegin()]);
}
}
} }
iterator begin() iterator begin()