485 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			485 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
//
 | 
						|
//  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
 | 
						|
//
 | 
						|
//  Distributed under the Boost Software License, Version 1.0. (See
 | 
						|
//  accompanying file LICENSE_1_0.txt or copy at
 | 
						|
//  http://www.boost.org/LICENSE_1_0.txt)
 | 
						|
//
 | 
						|
#ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
 | 
						|
#define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
 | 
						|
#include <boost/locale/config.hpp>
 | 
						|
#ifdef BOOST_MSVC
 | 
						|
#  pragma warning(push)
 | 
						|
#  pragma warning(disable : 4275 4251 4231 4660)
 | 
						|
#endif
 | 
						|
#include <locale>
 | 
						|
#include <string>
 | 
						|
#include <iosfwd>
 | 
						|
#include <iterator>
 | 
						|
 | 
						|
 | 
						|
namespace boost {
 | 
						|
namespace locale {
 | 
						|
namespace boundary {
 | 
						|
    /// \cond INTERNAL
 | 
						|
    namespace details {
 | 
						|
        template<typename LeftIterator,typename RightIterator>
 | 
						|
        int compare_text(LeftIterator l_begin,LeftIterator l_end,RightIterator r_begin,RightIterator r_end)
 | 
						|
        {
 | 
						|
            typedef LeftIterator left_iterator;
 | 
						|
            typedef RightIterator right_iterator;
 | 
						|
            typedef typename std::iterator_traits<left_iterator>::value_type char_type;
 | 
						|
            typedef std::char_traits<char_type> traits;
 | 
						|
            while(l_begin!=l_end && r_begin!=r_end) {
 | 
						|
                char_type lchar = *l_begin++;
 | 
						|
                char_type rchar = *r_begin++;
 | 
						|
                if(traits::eq(lchar,rchar))
 | 
						|
                    continue;
 | 
						|
                if(traits::lt(lchar,rchar))
 | 
						|
                    return -1;
 | 
						|
                else
 | 
						|
                    return 1;
 | 
						|
            }
 | 
						|
            if(l_begin==l_end && r_begin==r_end)
 | 
						|
                return 0;
 | 
						|
            if(l_begin==l_end)
 | 
						|
                return -1;
 | 
						|
            else
 | 
						|
                return 1;
 | 
						|
        }
 | 
						|
 | 
						|
 | 
						|
        template<typename Left,typename Right>
 | 
						|
        int compare_text(Left const &l,Right const &r)
 | 
						|
        {
 | 
						|
            return compare_text(l.begin(),l.end(),r.begin(),r.end());
 | 
						|
        }
 | 
						|
        
 | 
						|
        template<typename Left,typename Char>
 | 
						|
        int compare_string(Left const &l,Char const *begin)
 | 
						|
        {
 | 
						|
            Char const *end = begin;
 | 
						|
            while(*end!=0)
 | 
						|
                end++;
 | 
						|
            return compare_text(l.begin(),l.end(),begin,end);
 | 
						|
        }
 | 
						|
 | 
						|
        template<typename Right,typename Char>
 | 
						|
        int compare_string(Char const *begin,Right const &r)
 | 
						|
        {
 | 
						|
            Char const *end = begin;
 | 
						|
            while(*end!=0)
 | 
						|
                end++;
 | 
						|
            return compare_text(begin,end,r.begin(),r.end());
 | 
						|
        }
 | 
						|
 | 
						|
    }
 | 
						|
    /// \endcond
 | 
						|
 | 
						|
    ///
 | 
						|
    /// \addtogroup boundary
 | 
						|
    /// @{
 | 
						|
 | 
						|
    ///
 | 
						|
    /// \brief a segment object that represents a pair of two iterators that define the range where
 | 
						|
    /// this segment exits and a rule that defines it.
 | 
						|
    ///
 | 
						|
    /// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function
 | 
						|
    /// you can get a specific rule this segment was selected with. For example, when you use
 | 
						|
    /// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref word_kana)!=0
 | 
						|
    /// For a sentence analysis you can check if the sentence is selected because a sentence terminator is found (\ref sentence_term) or
 | 
						|
    /// there is a line break (\ref sentence_sep).
 | 
						|
    ///
 | 
						|
    /// This object can be automatically converted to std::basic_string with the same type of character. It is also
 | 
						|
    /// valid range that has begin() and end() member functions returning iterators on the location of the segment.
 | 
						|
    ///
 | 
						|
    /// \see
 | 
						|
    ///
 | 
						|
    /// - \ref segment_index
 | 
						|
    /// - \ref boundary_point 
 | 
						|
    /// - \ref boundary_point_index 
 | 
						|
    ///
 | 
						|
    template<typename IteratorType>
 | 
						|
    class segment : public std::pair<IteratorType,IteratorType> {
 | 
						|
    public:
 | 
						|
        ///
 | 
						|
        /// The type of the underlying character 
 | 
						|
        ///
 | 
						|
        typedef typename std::iterator_traits<IteratorType>::value_type char_type;
 | 
						|
        ///
 | 
						|
        /// The type of the string it is converted to
 | 
						|
        ///
 | 
						|
        typedef std::basic_string<char_type> string_type;
 | 
						|
        ///
 | 
						|
        /// The value that iterators return  - the character itself
 | 
						|
        ///
 | 
						|
        typedef char_type value_type;
 | 
						|
        ///
 | 
						|
        /// The iterator that allows to iterate the range
 | 
						|
        ///
 | 
						|
        typedef IteratorType iterator;
 | 
						|
        ///
 | 
						|
        /// The iterator that allows to iterate the range
 | 
						|
        ///
 | 
						|
        typedef IteratorType const_iterator;
 | 
						|
        ///
 | 
						|
        /// The type that represent a difference between two iterators
 | 
						|
        ///
 | 
						|
        typedef typename std::iterator_traits<IteratorType>::difference_type difference_type;
 | 
						|
 | 
						|
        ///
 | 
						|
        /// Default constructor
 | 
						|
        ///
 | 
						|
        segment() {}
 | 
						|
        ///
 | 
						|
        /// Create a segment using two iterators and a rule that represents this point
 | 
						|
        ///
 | 
						|
        segment(iterator b,iterator e,rule_type r) :
 | 
						|
            std::pair<IteratorType,IteratorType>(b,e),
 | 
						|
            rule_(r)
 | 
						|
        {
 | 
						|
        }
 | 
						|
        ///
 | 
						|
        /// Set the start of the range
 | 
						|
        ///
 | 
						|
        void begin(iterator const &v)
 | 
						|
        {
 | 
						|
            this->first = v;
 | 
						|
        }
 | 
						|
        ///
 | 
						|
        /// Set the end of the range
 | 
						|
        ///
 | 
						|
         void end(iterator const &v)
 | 
						|
        {
 | 
						|
            this->second = v;
 | 
						|
        }
 | 
						|
 | 
						|
        ///
 | 
						|
        /// Get the start of the range
 | 
						|
        ///
 | 
						|
        IteratorType begin() const 
 | 
						|
        {
 | 
						|
            return this->first;
 | 
						|
        }
 | 
						|
        ///
 | 
						|
        /// Set the end of the range
 | 
						|
        ///
 | 
						|
        IteratorType end() const
 | 
						|
        {
 | 
						|
            return this->second;
 | 
						|
        }
 | 
						|
 | 
						|
        ///
 | 
						|
        /// Convert the range to a string automatically
 | 
						|
        ///
 | 
						|
        template <class T, class A>
 | 
						|
        operator std::basic_string<char_type, T, A> ()const
 | 
						|
        {
 | 
						|
            return std::basic_string<char_type, T, A>(this->first, this->second);
 | 
						|
        }
 | 
						|
        
 | 
						|
        ///
 | 
						|
        /// Create a string from the range explicitly
 | 
						|
        ///
 | 
						|
        string_type str() const
 | 
						|
        {
 | 
						|
            return string_type(begin(),end());
 | 
						|
        }
 | 
						|
 | 
						|
        ///
 | 
						|
        /// Get the length of the text chunk
 | 
						|
        ///
 | 
						|
 | 
						|
        size_t length() const
 | 
						|
        {
 | 
						|
            return std::distance(begin(),end());
 | 
						|
        }
 | 
						|
 | 
						|
        ///
 | 
						|
        /// Check if the segment is empty
 | 
						|
        ///
 | 
						|
        bool empty() const
 | 
						|
        {
 | 
						|
            return begin() == end();
 | 
						|
        }
 | 
						|
 | 
						|
        ///
 | 
						|
        /// Get the rule that is used for selection of this segment.
 | 
						|
        ///
 | 
						|
        rule_type rule() const
 | 
						|
        {
 | 
						|
            return rule_;
 | 
						|
        }
 | 
						|
        ///
 | 
						|
        /// Set a rule that is used for segment selection
 | 
						|
        ///
 | 
						|
        void rule(rule_type r)
 | 
						|
        {
 | 
						|
            rule_ = r;
 | 
						|
        }
 | 
						|
 | 
						|
        // make sure we override std::pair's operator==
 | 
						|
 | 
						|
        /// Compare two segments
 | 
						|
        bool operator==(segment const &other)
 | 
						|
        {
 | 
						|
            return details::compare_text(*this,other) == 0;
 | 
						|
        }
 | 
						|
 | 
						|
        /// Compare two segments
 | 
						|
        bool operator!=(segment const &other)
 | 
						|
        {
 | 
						|
            return details::compare_text(*this,other) != 0;
 | 
						|
        }
 | 
						|
 | 
						|
    private:
 | 
						|
        rule_type rule_;
 | 
						|
       
 | 
						|
    };
 | 
						|
 | 
						|
   
 | 
						|
    /// Compare two segments
 | 
						|
    template<typename IteratorL,typename IteratorR>
 | 
						|
    bool operator==(segment<IteratorL> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) == 0; 
 | 
						|
    }
 | 
						|
    /// Compare two segments
 | 
						|
    template<typename IteratorL,typename IteratorR>
 | 
						|
    bool operator!=(segment<IteratorL> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) != 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare two segments
 | 
						|
    template<typename IteratorL,typename IteratorR>
 | 
						|
    bool operator<(segment<IteratorL> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) < 0; 
 | 
						|
    }
 | 
						|
    /// Compare two segments
 | 
						|
    template<typename IteratorL,typename IteratorR>
 | 
						|
    bool operator<=(segment<IteratorL> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) <= 0; 
 | 
						|
    }
 | 
						|
    /// Compare two segments
 | 
						|
    template<typename IteratorL,typename IteratorR>
 | 
						|
    bool operator>(segment<IteratorL> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) > 0; 
 | 
						|
    }
 | 
						|
    /// Compare two segments
 | 
						|
    template<typename IteratorL,typename IteratorR>
 | 
						|
    bool operator>=(segment<IteratorL> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) >= 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
 | 
						|
    bool operator==(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) == 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
 | 
						|
    bool operator!=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) != 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
 | 
						|
    bool operator<(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) < 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
 | 
						|
    bool operator<=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) <= 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
 | 
						|
    bool operator>(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) > 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
 | 
						|
    bool operator>=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) >= 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename Iterator,typename CharType,typename Traits,typename Alloc>
 | 
						|
    bool operator==(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) == 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename Iterator,typename CharType,typename Traits,typename Alloc>
 | 
						|
    bool operator!=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) != 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename Iterator,typename CharType,typename Traits,typename Alloc>
 | 
						|
    bool operator<(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) < 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename Iterator,typename CharType,typename Traits,typename Alloc>
 | 
						|
    bool operator<=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) <= 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename Iterator,typename CharType,typename Traits,typename Alloc>
 | 
						|
    bool operator>(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) > 0; 
 | 
						|
    }
 | 
						|
    /// Compare string and segment
 | 
						|
    template<typename Iterator,typename CharType,typename Traits,typename Alloc>
 | 
						|
    bool operator>=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_text(l,r) >= 0; 
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename CharType,typename IteratorR>
 | 
						|
    bool operator==(CharType const *l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) == 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename CharType,typename IteratorR>
 | 
						|
    bool operator!=(CharType const *l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) != 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename CharType,typename IteratorR>
 | 
						|
    bool operator<(CharType const *l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) < 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename CharType,typename IteratorR>
 | 
						|
    bool operator<=(CharType const *l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) <= 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename CharType,typename IteratorR>
 | 
						|
    bool operator>(CharType const *l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) > 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename CharType,typename IteratorR>
 | 
						|
    bool operator>=(CharType const *l,segment<IteratorR> const &r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) >= 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename Iterator,typename CharType>
 | 
						|
    bool operator==(segment<Iterator> const &l,CharType const *r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) == 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename Iterator,typename CharType>
 | 
						|
    bool operator!=(segment<Iterator> const &l,CharType const *r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) != 0; 
 | 
						|
    }
 | 
						|
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename Iterator,typename CharType>
 | 
						|
    bool operator<(segment<Iterator> const &l,CharType const *r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) < 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename Iterator,typename CharType>
 | 
						|
    bool operator<=(segment<Iterator> const &l,CharType const *r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) <= 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename Iterator,typename CharType>
 | 
						|
    bool operator>(segment<Iterator> const &l,CharType const *r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) > 0; 
 | 
						|
    }
 | 
						|
    /// Compare C string and segment
 | 
						|
    template<typename Iterator,typename CharType>
 | 
						|
    bool operator>=(segment<Iterator> const &l,CharType const *r)
 | 
						|
    {
 | 
						|
        return details::compare_string(l,r) >= 0; 
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
    typedef segment<std::string::const_iterator> ssegment;      ///< convenience typedef
 | 
						|
    typedef segment<std::wstring::const_iterator> wssegment;    ///< convenience typedef
 | 
						|
    #ifdef BOOST_HAS_CHAR16_T
 | 
						|
    typedef segment<std::u16string::const_iterator> u16ssegment;///< convenience typedef
 | 
						|
    #endif
 | 
						|
    #ifdef BOOST_HAS_CHAR32_T
 | 
						|
    typedef segment<std::u32string::const_iterator> u32ssegment;///< convenience typedef
 | 
						|
    #endif
 | 
						|
   
 | 
						|
    typedef segment<char const *> csegment;                     ///< convenience typedef
 | 
						|
    typedef segment<wchar_t const *> wcsegment;                 ///< convenience typedef
 | 
						|
    #ifdef BOOST_HAS_CHAR16_T
 | 
						|
    typedef segment<char16_t const *> u16csegment;              ///< convenience typedef
 | 
						|
    #endif
 | 
						|
    #ifdef BOOST_HAS_CHAR32_T
 | 
						|
    typedef segment<char32_t const *> u32csegment;              ///< convenience typedef
 | 
						|
    #endif
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 
 | 
						|
    
 | 
						|
    ///
 | 
						|
    /// Write the segment to the stream character by character
 | 
						|
    ///
 | 
						|
    template<typename CharType,typename TraitsType,typename Iterator>
 | 
						|
    std::basic_ostream<CharType,TraitsType> &operator<<(
 | 
						|
            std::basic_ostream<CharType,TraitsType> &out,
 | 
						|
            segment<Iterator> const &tok)
 | 
						|
    {
 | 
						|
        for(Iterator p=tok.begin(),e=tok.end();p!=e;++p)
 | 
						|
            out << *p;
 | 
						|
        return out;
 | 
						|
    }
 | 
						|
 | 
						|
    /// @}
 | 
						|
 | 
						|
} // boundary
 | 
						|
} // locale
 | 
						|
} // boost
 | 
						|
 | 
						|
#ifdef BOOST_MSVC
 | 
						|
#pragma warning(pop)
 | 
						|
#endif
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 |