Added splitView() and splitLines() to string utility functions.

This commit is contained in:
Patrick Wuttke 2025-07-15 18:21:10 +02:00
parent 02e99bbc82
commit 51092bb4cb

View File

@ -33,6 +33,16 @@ namespace mijin
// public constants // public constants
// //
namespace detail
{
template<typename TChar>
static constexpr std::array DEFAULT_TRIM_CHARS_DATA = {TChar(' '), TChar('\t'), TChar('\r'), TChar('\n')};
}
template<typename TChar>
static const std::basic_string_view<TChar, std::char_traits<TChar>> DEFAULT_TRIM_CHARS
= {detail::DEFAULT_TRIM_CHARS_DATA<TChar>.begin(), detail::DEFAULT_TRIM_CHARS_DATA<TChar>.end()};
// //
// public traits // public traits
// //
@ -98,6 +108,361 @@ struct SplitOptions
bool ignoreEmpty = true; bool ignoreEmpty = true;
}; };
struct [[nodiscard]] ConvertCharTypeResult
{
unsigned numRead = 0;
unsigned numWritten = 0;
constexpr operator bool() const MIJIN_NOEXCEPT
{
return numRead != 0 || numWritten != 0;
}
constexpr bool operator !() const MIJIN_NOEXCEPT
{
return !static_cast<bool>(*this);
}
};
struct SplitViewOptions
{
bool ignoreEmpty = true;
bool trim = false;
};
template<typename TChar, TChar splitAt, SplitViewOptions options = SplitViewOptions(), typename TCharTraits = std::char_traits<TChar>>
struct SplitStringTraitsCT
{
using char_t = TChar;
using string_view_t = std::basic_string_view<TChar, TCharTraits>;
static constexpr char_t getSplitAt() MIJIN_NOEXCEPT { return splitAt; }
static constexpr bool getIgnoreEmpty() MIJIN_NOEXCEPT { return options.ignoreEmpty; }
static constexpr bool getTrim() MIJIN_NOEXCEPT { return options.trim; }
static constexpr auto getTrimChars() MIJIN_NOEXCEPT { return DEFAULT_TRIM_CHARS<char_t>; }
};
template<typename TChar, typename TCharTraits = std::char_traits<TChar>>
struct SplitStringTraitsRT
{
using char_t = TChar;
using string_view_t = std::basic_string_view<TChar, TCharTraits>;
char_t splitAt;
bool ignoreEmpty;
string_view_t trimChars = {};
constexpr char_t getSplitAt() const MIJIN_NOEXCEPT { return splitAt; }
constexpr bool getIgnoreEmpty() const MIJIN_NOEXCEPT { return ignoreEmpty; }
constexpr bool getTrim() const MIJIN_NOEXCEPT { return !trimChars.empty(); }
constexpr string_view_t getTrimChars() const MIJIN_NOEXCEPT { return trimChars; }
};
template<typename T, typename TChar>
concept SplitStringTraitsType = std::is_copy_constructible_v<T> && requires(const T& object)
{
typename T::char_t;
typename T::string_view_t;
{ object.getSplitAt() } -> std::convertible_to<TChar>;
{ object.getIgnoreEmpty() } -> std::convertible_to<bool>;
{ object.getTrim() } -> std::convertible_to<bool>;
{ object.getTrimChars() } -> std::convertible_to<typename T::string_view_t>;
};
static_assert(SplitStringTraitsType<SplitStringTraitsCT<char, ' '>, char>);
static_assert(SplitStringTraitsType<SplitStringTraitsRT<char>, char>);
template<typename TChar, typename TLine, SplitViewOptions options = SplitViewOptions(), typename TCharTraits = std::char_traits<TChar>>
struct SplitLineTraitsCT : SplitStringTraitsCT<TChar, '\n', options, TCharTraits>
{
using base_t = SplitStringTraitsCT<TChar, '\n', options, TCharTraits>;
using char_t = TChar;
using line_t = TLine;
line_t line = 1;
using base_t::getSplitAt;
using base_t::getIgnoreEmpty;
using base_t::getTrim;
using base_t::getTrimChars;
constexpr void onNext() MIJIN_NOEXCEPT {
++line;
}
constexpr line_t getLine() const MIJIN_NOEXCEPT { return line; }
};
template<typename TChar, typename TLine, typename TCharTraits = std::char_traits<TChar>>
struct SplitLineTraitsRT
{
using char_t = TChar;
using line_t = TLine;
using string_view_t = std::basic_string_view<TChar, TCharTraits>;
line_t line = 1;
bool ignoreEmpty;
string_view_t trimChars = {};
constexpr char_t getSplitAt() const MIJIN_NOEXCEPT { return '\n'; }
constexpr bool getIgnoreEmpty() const MIJIN_NOEXCEPT { return ignoreEmpty; }
constexpr bool getTrim() const MIJIN_NOEXCEPT { return !trimChars.empty(); }
constexpr string_view_t getTrimChars() const MIJIN_NOEXCEPT { return trimChars; }
constexpr line_t getLine() const MIJIN_NOEXCEPT { return line; }
constexpr void onNext() MIJIN_NOEXCEPT {
++line;
}
};
template<typename T, typename TChar, typename TLine>
concept SplitLineTraitsType = SplitStringTraitsType<T, TChar> && requires (const T& object)
{
{ object.getLine() } -> std::convertible_to<TLine>;
};
static_assert(SplitLineTraitsType<SplitLineTraitsCT<char, unsigned>, char, unsigned>);
static_assert(SplitLineTraitsType<SplitLineTraitsRT<char, unsigned>, char, unsigned>);
template<typename TString, typename TChars>
[[nodiscard]]
auto trim(TString&& string, TChars&& chars);
template<typename TChar, SplitStringTraitsType<TChar> TTraits>
class SplitStringIterator
{
public:
using char_t = TChar;
using traits_t = TTraits;
using string_view_t = traits_t::string_view_t;
using base_t = string_view_t::iterator;
using value_type = string_view_t;
private:
[[no_unique_address]] traits_t traits_;
string_view_t full_;
string_view_t::iterator pos_;
string_view_t::iterator next_;
public:
constexpr SplitStringIterator(string_view_t full, base_t pos, traits_t traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<traits_t>)
: full_(full), pos_(pos), traits_(std::move(traits))
{
findNext();
}
constexpr explicit SplitStringIterator(traits_t traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<traits_t>)
: traits_(std::move(traits)) {}
constexpr SplitStringIterator(const SplitStringIterator&) noexcept(std::is_nothrow_copy_constructible_v<traits_t>) = default;
constexpr SplitStringIterator(SplitStringIterator&&) noexcept(std::is_nothrow_move_constructible_v<traits_t>) = default;
constexpr SplitStringIterator& operator=(const SplitStringIterator&) noexcept(std::is_nothrow_copy_assignable_v<traits_t>) = default;
constexpr SplitStringIterator& operator=(SplitStringIterator&&) noexcept(std::is_nothrow_move_assignable_v<traits_t>) = default;
constexpr bool operator==(const SplitStringIterator& other) const MIJIN_NOEXCEPT { MIJIN_ASSERT(full_ == other.full_, "Comparing unrelated iterators."); return pos_ == other.pos_; }
constexpr bool operator!=(const SplitStringIterator& other) const MIJIN_NOEXCEPT { MIJIN_ASSERT(full_ == other.full_, "Comparing unrelated iterators."); return pos_ != other.pos_; }
constexpr bool operator<(const SplitStringIterator& other) const MIJIN_NOEXCEPT { MIJIN_ASSERT(full_ == other.full_, "Comparing unrelated iterators."); return pos_ < other.pos_; }
constexpr bool operator<=(const SplitStringIterator& other) const MIJIN_NOEXCEPT { MIJIN_ASSERT(full_ == other.full_, "Comparing unrelated iterators."); return pos_ <= other.pos_; }
constexpr bool operator>(const SplitStringIterator& other) const MIJIN_NOEXCEPT { MIJIN_ASSERT(full_ == other.full_, "Comparing unrelated iterators."); return pos_ > other.pos_; }
constexpr bool operator>=(const SplitStringIterator& other) const MIJIN_NOEXCEPT { MIJIN_ASSERT(full_ == other.full_, "Comparing unrelated iterators."); return pos_ >= other.pos_; }
[[nodiscard]]
traits_t& getTraits() MIJIN_NOEXCEPT
{
return traits_;
}
[[nodiscard]]
const traits_t& getTraits() const MIJIN_NOEXCEPT
{
return traits_;
}
constexpr value_type operator*() const MIJIN_NOEXCEPT
{
MIJIN_ASSERT(pos_ != full_.end(), "Dereferencing an invalid iterator.");
string_view_t result{pos_, next_};
if (traits_.getTrim())
{
result = trim(result, traits_.getTrimChars());
}
return result;
}
constexpr SplitStringIterator& operator++() MIJIN_NOEXCEPT
{
MIJIN_ASSERT(pos_ != full_.end(), "Iterating past end.");
if (next_ == full_.end()) {
pos_ = full_.end();
}
else
{
pos_ = std::next(next_);
findNext();
}
return *this;
}
constexpr SplitStringIterator operator++(int) const MIJIN_NOEXCEPT
{
SplitStringIterator copy(*this);
++copy;
return copy;
}
// TODO
// SplitStringIterator& operator--() MIJIN_NOEXCEPT
// {
// MIJIN_ASSERT(pos_ != full_.begin(), "Iterating past begin.");
// next_ = std::prev(pos_);
// pos_ = std::find(std::reverse_iterator(next_), std::reverse_iterator(full_.begin()), separator).base();
// }
private:
constexpr void findNext()
{
while (true)
{
if constexpr (requires{{ traits_.onNext() };}) {
traits_.onNext();
}
next_ = std::find(pos_, full_.end(), traits_.getSplitAt());
if (!traits_.getIgnoreEmpty() || pos_ == full_.end()) {
break;
}
if (traits_.getTrim())
{
const string_view_t trimChars = traits_.getTrimChars();
typename string_view_t::iterator trimmedPos = std::find_if(pos_, next_, [&](char_t chr)
{
return !trimChars.contains(chr);
});
if (trimmedPos == next_)
{
pos_ = next_; // skip this part
}
}
if (pos_ != next_) {
break;
}
pos_ = std::next(pos_);
}
}
};
template<typename TChar, SplitStringTraitsType<TChar> TTraits>
class SplitStringRange
{
public:
using char_t = TChar;
using traits_t = TTraits;
using string_view_t = traits_t::string_view_t;
using iterator = SplitStringIterator<char_t, traits_t>;
private:
[[no_unique_address]] traits_t traits_;
string_view_t stringView_;
public:
constexpr explicit SplitStringRange(string_view_t stringView, traits_t traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<traits_t>)
: stringView_(stringView), traits_(std::move(traits)) {}
constexpr SplitStringRange(const SplitStringRange&) noexcept(std::is_nothrow_copy_constructible_v<traits_t>) = default;
constexpr SplitStringRange(SplitStringRange&&) noexcept(std::is_nothrow_move_constructible_v<traits_t>) = default;
constexpr SplitStringRange& operator=(const SplitStringRange&) noexcept(std::is_nothrow_copy_assignable_v<traits_t>) = default;
constexpr SplitStringRange& operator=(SplitStringRange&&) noexcept(std::is_nothrow_move_assignable_v<traits_t>) = default;
constexpr auto operator<=>(const SplitStringRange&) const noexcept = default;
constexpr iterator begin() const MIJIN_NOEXCEPT_IF(std::is_nothrow_copy_constructible_v<traits_t>)
{
return iterator(stringView_, stringView_.begin(), traits_);
}
constexpr iterator end() const MIJIN_NOEXCEPT_IF(std::is_nothrow_copy_constructible_v<traits_t>)
{
return iterator(stringView_, stringView_.end(), traits_);
}
};
template<typename TChar, typename TLine = unsigned, SplitLineTraitsType<TChar, TLine> TTraits = SplitLineTraitsCT<TChar, TLine>>
class LineIterator
{
public:
using char_t = TChar;
using line_t = TLine;
using traits_t = TTraits;
using base_t = SplitStringIterator<TChar, traits_t>;
using string_view_t = base_t::string_view_t;
using value_type = std::pair<string_view_t, line_t>;
private:
base_t base_ = {};
public:
constexpr LineIterator(string_view_t full, string_view_t::iterator pos, traits_t traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<traits_t>)
: base_(full, pos, std::move(traits)) {}
constexpr explicit LineIterator(traits_t traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<traits_t>)
: base_(std::move(traits)) {}
LineIterator(const LineIterator&) noexcept = default;
LineIterator(LineIterator&&) noexcept = default;
LineIterator& operator=(const LineIterator&) noexcept = default;
LineIterator& operator=(LineIterator&&) noexcept = default;
bool operator==(const LineIterator& other) const MIJIN_NOEXCEPT { return base_ == other.base_; }
bool operator!=(const LineIterator& other) const MIJIN_NOEXCEPT { return base_ != other.base_; }
bool operator<(const LineIterator& other) const MIJIN_NOEXCEPT { return base_ < other.base_; }
bool operator>(const LineIterator& other) const MIJIN_NOEXCEPT { return base_ > other.base_; }
bool operator<=(const LineIterator& other) const MIJIN_NOEXCEPT { return base_ <= other.base_; }
bool operator>=(const LineIterator& other) const MIJIN_NOEXCEPT { return base_ >= other.base_; }
constexpr value_type operator*() const MIJIN_NOEXCEPT
{
string_view_t stringView = *base_;
if (!base_.getTraits().getTrim())
{
// always split \r, even if not trimming other whitespace
if (stringView.ends_with('\r')) {
stringView = stringView.substr(0, stringView.size() - 1);
}
}
return {stringView, base_.getTraits().line};
}
constexpr LineIterator& operator++() MIJIN_NOEXCEPT
{
++base_;
return *this;
}
constexpr LineIterator operator++(int) const MIJIN_NOEXCEPT
{
SplitStringIterator copy(*this);
++copy;
return copy;
}
};
template<typename TChar, typename TLine = unsigned, SplitLineTraitsType<TChar, TLine> TTraits = SplitLineTraitsCT<TChar, TLine>>
class LineRange
{
public:
using char_t = TChar;
using line_t = TLine;
using traits_t = TTraits;
using iterator = LineIterator<char_t, line_t, traits_t>;
using string_view_t = iterator::string_view_t;
private:
[[no_unique_address]] traits_t traits_;
string_view_t stringView_;
public:
constexpr explicit LineRange(string_view_t stringView, traits_t traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<traits_t>)
: traits_(std::move(traits)), stringView_(stringView) {}
constexpr LineRange(const LineRange&) noexcept(std::is_nothrow_copy_constructible_v<traits_t>) = default;
constexpr LineRange(LineRange&&) noexcept(std::is_nothrow_move_constructible_v<traits_t>) = default;
constexpr LineRange& operator=(const LineRange&) noexcept(std::is_nothrow_copy_assignable_v<traits_t>) = default;
constexpr LineRange& operator=(LineRange&&) noexcept(std::is_nothrow_move_assignable_v<traits_t>) = default;
constexpr auto operator<=>(const LineRange&) const noexcept = default;
constexpr iterator begin() const MIJIN_NOEXCEPT
{
return iterator(stringView_, stringView_.begin(), traits_);
}
constexpr iterator end() const MIJIN_NOEXCEPT
{
return iterator(stringView_, stringView_.end(), traits_);
}
};
// //
// public functions // public functions
// //
@ -273,13 +638,6 @@ std::basic_string_view<TChar, TTraits> trimImpl(std::basic_string_view<TChar, TT
{ {
return trimPrefixImpl(trimSuffixImpl(stringView, charsToTrim), charsToTrim); return trimPrefixImpl(trimSuffixImpl(stringView, charsToTrim), charsToTrim);
} }
template<typename TChar>
static const std::array DEFAULT_TRIM_CHARS_DATA = {TChar(' '), TChar('\t'), TChar('\r'), TChar('\n')};
template<typename TChar>
static const std::basic_string_view<TChar, std::char_traits<TChar>> DEFAULT_TRIM_CHARS
= {DEFAULT_TRIM_CHARS_DATA<TChar>.begin(), DEFAULT_TRIM_CHARS_DATA<TChar>.end()};
} }
template<typename TLeft, typename TRight> template<typename TLeft, typename TRight>
@ -296,6 +654,61 @@ template<std::size_t count, typename TLeft, typename TRight>
std::basic_string_view(std::forward<TRight>(separator)), options, outNumResults); std::basic_string_view(std::forward<TRight>(separator)), options, outNumResults);
} }
template<typename TTraits, typename TChar> requires (SplitStringTraitsType<TTraits, TChar>)
[[nodiscard]] SplitStringRange<TChar, TTraits> splitView(typename TTraits::string_view_t stringView, TTraits traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<TTraits>)
{
return SplitStringRange<TChar, TTraits>(stringView, std::move(traits));
}
template<auto splitAt, SplitViewOptions options = SplitViewOptions(), typename TStringView, typename TChar = decltype(splitAt), typename TCharTraits = std::char_traits<TChar>>
[[nodiscard]] SplitStringRange<TChar, SplitStringTraitsCT<TChar, splitAt, options, TCharTraits>> splitView(TStringView&& stringView) MIJIN_NOEXCEPT
{
return splitView<SplitStringTraitsCT<TChar, splitAt, options, TCharTraits>>(std::basic_string_view<TChar, TCharTraits>(std::forward<TStringView>(stringView)));
}
template<typename TStringView, typename TChar, typename TCharTraits = std::char_traits<TChar>, typename TTrimChars = std::basic_string_view<TChar, TCharTraits>>
[[nodiscard]] auto splitView(TStringView&& stringView, TChar splitAt, bool ignoreEmpty = true, TTrimChars trimChars = {}) MIJIN_NOEXCEPT
{
return splitView(std::basic_string_view<TChar, TCharTraits>(std::forward<TStringView>(stringView)), SplitStringTraitsRT<TChar, TCharTraits>{
.splitAt = splitAt,
.ignoreEmpty = ignoreEmpty,
.trimChars = std::basic_string_view<TChar, TCharTraits>(std::forward<TTrimChars>(trimChars))
});
}
template<typename TTraits, typename TChar, typename TLine = unsigned> requires(SplitLineTraitsType<TTraits, TChar, TLine>)
[[nodiscard]] LineRange<TChar, TLine, TTraits> splitLines(typename TTraits::string_view_t stringView, TTraits traits = {}) MIJIN_NOEXCEPT_IF(std::is_nothrow_move_constructible_v<TTraits>)
{
return LineRange<TChar, TLine, TTraits>(stringView, std::move(traits));
}
template<typename TLine = unsigned, SplitViewOptions options = SplitViewOptions{.ignoreEmpty=false},
typename TParam,
typename TStringView = decltype(std::basic_string_view(std::declval<TParam&&>())),
typename TChar = typename TStringView::value_type,
typename TCharTraits = typename TStringView::traits_type,
typename TTraits = SplitLineTraitsCT<TChar, TLine, options, TCharTraits>>
[[nodiscard]]
auto splitLines(TParam&& stringView) MIJIN_NOEXCEPT -> LineRange<TChar, TLine, TTraits>
{
return LineRange<TChar, TLine, TTraits>(std::basic_string_view(std::forward<TParam>(stringView)));
}
template<typename TParam, typename TLine = unsigned,
typename TStringView = decltype(std::basic_string_view(std::declval<TParam&&>())),
typename TChar = typename TStringView::value_type,
typename TCharTraits = typename TStringView::traits_type,
typename TTraits = SplitLineTraitsRT<TChar, TLine, TCharTraits>,
typename TTrimChars = TStringView>
[[nodiscard]]
auto splitLines(TParam&& stringView, bool ignoreEmpty, TTrimChars&& trimChars = {}) MIJIN_NOEXCEPT -> LineRange<TChar, TLine, TTraits>
{
return LineRange<TChar, TLine, TTraits>(std::basic_string_view(std::forward<TParam>(stringView)), TTraits{
.ignoreEmpty = ignoreEmpty,
.trimChars = std::basic_string_view<TChar, TCharTraits>(std::forward<TTrimChars>(trimChars))
});
}
template<typename TString, typename TChars> template<typename TString, typename TChars>
[[nodiscard]] [[nodiscard]]
auto trimPrefix(TString&& string, TChars&& chars) auto trimPrefix(TString&& string, TChars&& chars)
@ -307,7 +720,7 @@ template<typename TString>
[[nodiscard]] [[nodiscard]]
auto trimPrefix(TString&& string) auto trimPrefix(TString&& string)
{ {
return trimPrefix(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>); return trimPrefix(string, DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
} }
template<typename TString, typename TChars> template<typename TString, typename TChars>
@ -321,7 +734,7 @@ template<typename TString>
[[nodiscard]] [[nodiscard]]
auto trimSuffix(TString&& string) auto trimSuffix(TString&& string)
{ {
return trimSuffix(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>); return trimSuffix(string, DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
} }
template<typename TString, typename TChars> template<typename TString, typename TChars>
@ -335,7 +748,7 @@ template<typename TString>
[[nodiscard]] [[nodiscard]]
auto trim(TString&& string) auto trim(TString&& string)
{ {
return trim(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>); return trim(string, DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
} }
template<typename TLeft, typename TRight> template<typename TLeft, typename TRight>
@ -487,21 +900,6 @@ auto operator|(TIterable&& iterable, const Join& joiner)
} }
} // namespace pipe } // namespace pipe
struct [[nodiscard]] ConvertCharTypeResult
{
unsigned numRead = 0;
unsigned numWritten = 0;
constexpr operator bool() const MIJIN_NOEXCEPT
{
return numRead != 0 || numWritten != 0;
}
constexpr bool operator !() const MIJIN_NOEXCEPT
{
return !static_cast<bool>(*this);
}
};
template<typename TFrom, typename TTo> template<typename TFrom, typename TTo>
ConvertCharTypeResult convertCharType(const TFrom* chrFrom, std::size_t numFrom, TTo* outTo, std::size_t numTo, std::mbstate_t& mbstate) MIJIN_NOEXCEPT ConvertCharTypeResult convertCharType(const TFrom* chrFrom, std::size_t numFrom, TTo* outTo, std::size_t numTo, std::mbstate_t& mbstate) MIJIN_NOEXCEPT
{ {