628 lines
18 KiB
C++
628 lines
18 KiB
C++
|
|
#pragma once
|
|
|
|
#if !defined(MIJIN_UTIL_STRING_HPP_INCLUDED)
|
|
#define MIJIN_UTIL_STRING_HPP_INCLUDED 1
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <charconv>
|
|
#include <climits>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <iterator>
|
|
#include <limits>
|
|
#include <locale>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
#include "./iterators.hpp"
|
|
#include "../internal/common.hpp"
|
|
#include "../util/traits.hpp"
|
|
|
|
namespace mijin
|
|
{
|
|
|
|
//
|
|
// public defines
|
|
//
|
|
|
|
//
|
|
// public constants
|
|
//
|
|
|
|
//
|
|
// public traits
|
|
//
|
|
|
|
template<typename T>
|
|
inline constexpr bool is_string_v = is_template_instance_v<std::basic_string, std::remove_cvref_t<T>>;
|
|
|
|
template<typename T>
|
|
concept std_string_type = is_string_v<T>;
|
|
|
|
template<typename T>
|
|
inline constexpr bool is_string_view_v = is_template_instance_v<std::basic_string_view, std::remove_cvref_t<T>>;
|
|
|
|
template<typename T>
|
|
concept std_string_view_type = is_string_view_v<T>;
|
|
|
|
template<typename T>
|
|
inline constexpr bool is_char_v = is_any_type_v<std::remove_cvref_t<T>, char, wchar_t, char8_t, char16_t, char32_t>;
|
|
|
|
template<typename T>
|
|
concept char_type = is_char_v<T>;
|
|
|
|
template<typename T>
|
|
inline constexpr bool is_cstring_v = std::is_pointer_v<T> && is_char_v<std::remove_pointer_t<T>>;
|
|
|
|
template<typename T>
|
|
concept cstring_type = is_cstring_v<T>;
|
|
|
|
template<typename T>
|
|
struct str_char_type
|
|
{
|
|
using type = void;
|
|
};
|
|
|
|
template<char_type T>
|
|
struct str_char_type<T*>
|
|
{
|
|
using type = std::remove_cvref_t<T>;
|
|
};
|
|
|
|
template<std_string_view_type T>
|
|
struct str_char_type<T>
|
|
{
|
|
using type = typename std::remove_cvref_t<T>::value_type;
|
|
};
|
|
|
|
template<std_string_type T>
|
|
struct str_char_type<T>
|
|
{
|
|
using type = typename std::remove_cvref_t<T>::value_type;
|
|
};
|
|
|
|
template<typename T>
|
|
using str_char_type_t = str_char_type<T>::type;
|
|
|
|
//
|
|
// public types
|
|
//
|
|
|
|
struct SplitOptions
|
|
{
|
|
std::size_t limitParts = std::numeric_limits<std::size_t>::max();
|
|
bool ignoreEmpty = true;
|
|
};
|
|
|
|
//
|
|
// public functions
|
|
//
|
|
|
|
template <typename TRange, typename TValue = typename TRange::value_type>
|
|
[[nodiscard]] std::string join(const TRange& elements, const char* const delimiter)
|
|
{
|
|
std::ostringstream oss;
|
|
auto first = std::begin(elements);
|
|
auto last = std::end(elements);
|
|
|
|
if (first != last)
|
|
{
|
|
std::copy(first, std::prev(last), std::ostream_iterator<TValue>(oss, delimiter));
|
|
first = std::prev(last);
|
|
}
|
|
if (first != last)
|
|
{
|
|
oss << *first;
|
|
}
|
|
|
|
return oss.str();
|
|
}
|
|
|
|
namespace detail
|
|
{
|
|
template<typename TChar, typename TTraits, typename TOutIterator>
|
|
void splitImpl(std::basic_string_view<TChar, TTraits> stringView,
|
|
std::basic_string_view<TChar, TTraits> separator,
|
|
const SplitOptions& options,
|
|
TOutIterator outIterator,
|
|
std::size_t& numEmitted)
|
|
{
|
|
using sv_t = std::basic_string_view<TChar, TTraits>;
|
|
|
|
MIJIN_ASSERT(options.limitParts > 0, "Cannot split to zero parts.");
|
|
MIJIN_ASSERT(!separator.empty(), "Separator cannot be empty.");
|
|
|
|
if (separator.empty())
|
|
{
|
|
return;
|
|
}
|
|
|
|
auto emit = [&](std::string_view result)
|
|
{
|
|
*outIterator = result;
|
|
++outIterator;
|
|
++numEmitted;
|
|
};
|
|
|
|
if (options.limitParts <= 1)
|
|
{
|
|
emit(stringView);
|
|
return;
|
|
}
|
|
auto start = stringView.begin();
|
|
auto pos = start;
|
|
const auto end = stringView.end();
|
|
|
|
auto seperatorFound = [&]()
|
|
{
|
|
return pos <= (end - separator.size()) && sv_t(pos, pos + separator.size()) == separator;
|
|
};
|
|
|
|
while (pos != end)
|
|
{
|
|
if (seperatorFound())
|
|
{
|
|
if (!options.ignoreEmpty || pos != start)
|
|
{
|
|
emit(std::string_view(start, pos));
|
|
}
|
|
start = pos = (pos + separator.size());
|
|
if (numEmitted == options.limitParts - 1)
|
|
{
|
|
if (options.ignoreEmpty)
|
|
{
|
|
while (seperatorFound())
|
|
{
|
|
pos += separator.size();
|
|
}
|
|
}
|
|
emit(std::string_view(pos, end));
|
|
return;
|
|
}
|
|
if (!options.ignoreEmpty && pos == end)
|
|
{
|
|
// skipped a separator at the very end, add an empty entry
|
|
emit("");
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
++pos;
|
|
}
|
|
}
|
|
|
|
if (start != end)
|
|
{
|
|
emit(std::string_view(start, end));
|
|
}
|
|
}
|
|
|
|
template<typename TChar, typename TTraits>
|
|
std::vector<std::basic_string_view<TChar, TTraits>> splitImpl(std::basic_string_view<TChar, TTraits> stringView,
|
|
std::basic_string_view<TChar, TTraits> separator,
|
|
const SplitOptions& options)
|
|
{
|
|
std::vector<std::basic_string_view<TChar, TTraits>> result;
|
|
std::size_t numEmitted = 0;
|
|
splitImpl(stringView, separator, options, std::back_inserter(result), numEmitted);
|
|
return result;
|
|
}
|
|
|
|
|
|
template<std::size_t count, typename TChar, typename TTraits>
|
|
std::array<std::basic_string_view<TChar, TTraits>, count> splitFixedImpl(std::basic_string_view<TChar, TTraits> stringView,
|
|
std::basic_string_view<TChar, TTraits> separator,
|
|
SplitOptions options,
|
|
std::size_t* outNumResults)
|
|
{
|
|
options.limitParts = count;
|
|
|
|
std::array<std::basic_string_view<TChar, TTraits>, count> result;
|
|
std::size_t numEmitted = 0;
|
|
splitImpl(stringView, separator, options, result.begin(), numEmitted);
|
|
if (outNumResults != nullptr)
|
|
{
|
|
*outNumResults = numEmitted;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
template<typename TChar, typename TTraitsA, typename TTraitsB>
|
|
bool equalsIgnoreCaseImpl(std::basic_string_view<TChar, TTraitsA> stringA, std::basic_string_view<TChar, TTraitsB> stringB) MIJIN_NOEXCEPT
|
|
{
|
|
if (stringA.size() != stringB.size())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
for (const auto [charA, charB] : zip(stringA, stringB))
|
|
{
|
|
if (std::tolower(charA) != std::tolower(charB))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template<typename TChar, typename TTraits>
|
|
std::basic_string_view<TChar, TTraits> trimPrefixImpl(std::basic_string_view<TChar, TTraits> stringView,
|
|
std::basic_string_view<TChar, TTraits> charsToTrim)
|
|
{
|
|
stringView.remove_prefix(std::min(stringView.find_first_not_of(charsToTrim), stringView.size()));
|
|
return stringView;
|
|
}
|
|
|
|
template<typename TChar, typename TTraits>
|
|
std::basic_string_view<TChar, TTraits> trimSuffixImpl(std::basic_string_view<TChar, TTraits> stringView,
|
|
std::basic_string_view<TChar, TTraits> charsToTrim)
|
|
{
|
|
stringView.remove_suffix(stringView.size() - std::min(stringView.find_last_not_of(charsToTrim) + 1, stringView.size()));
|
|
return stringView;
|
|
}
|
|
|
|
template<typename TChar, typename TTraits>
|
|
std::basic_string_view<TChar, TTraits> trimImpl(std::basic_string_view<TChar, TTraits> stringView,
|
|
std::basic_string_view<TChar, TTraits> charsToTrim)
|
|
{
|
|
return trimPrefixImpl(trimSuffixImpl(stringView, charsToTrim), charsToTrim);
|
|
}
|
|
|
|
template<typename TChar>
|
|
static const std::array DEFAULT_TRIM_CHARS_DATA = {TChar(' '), TChar('\t'), TChar('\r'), TChar('\n')};
|
|
|
|
template<typename TChar>
|
|
static const std::basic_string_view<TChar, std::char_traits<TChar>> DEFAULT_TRIM_CHARS
|
|
= {DEFAULT_TRIM_CHARS_DATA<TChar>.begin(), DEFAULT_TRIM_CHARS_DATA<TChar>.end()};
|
|
}
|
|
|
|
template<typename TLeft, typename TRight>
|
|
[[nodiscard]] auto split(TLeft&& stringView, TRight&& separator, const SplitOptions& options = {})
|
|
{
|
|
return detail::splitImpl(std::basic_string_view(std::forward<TLeft>(stringView)),
|
|
std::basic_string_view(std::forward<TRight>(separator)), options);
|
|
}
|
|
|
|
template<std::size_t count, typename TLeft, typename TRight>
|
|
[[nodiscard]] auto splitFixed(TLeft&& stringView, TRight&& separator, SplitOptions options = {}, std::size_t* outNumResults = nullptr)
|
|
{
|
|
return detail::splitFixedImpl<count>(std::basic_string_view(std::forward<TLeft>(stringView)),
|
|
std::basic_string_view(std::forward<TRight>(separator)), options, outNumResults);
|
|
}
|
|
|
|
template<typename TString, typename TChars>
|
|
[[nodiscard]]
|
|
auto trimPrefix(TString&& string, TChars&& chars)
|
|
{
|
|
return detail::trimPrefixImpl(std::string_view(std::forward<TString>(string)), std::string_view(std::forward<TChars>(chars)));
|
|
}
|
|
|
|
template<typename TString>
|
|
[[nodiscard]]
|
|
auto trimPrefix(TString&& string)
|
|
{
|
|
return trimPrefix(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
|
|
}
|
|
|
|
template<typename TString, typename TChars>
|
|
[[nodiscard]]
|
|
auto trimSuffix(TString&& string, TChars&& chars)
|
|
{
|
|
return detail::trimSuffixImpl(std::string_view(std::forward<TString>(string)), std::string_view(std::forward<TChars>(chars)));
|
|
}
|
|
|
|
template<typename TString>
|
|
[[nodiscard]]
|
|
auto trimSuffix(TString&& string)
|
|
{
|
|
return trimSuffix(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
|
|
}
|
|
|
|
template<typename TString, typename TChars>
|
|
[[nodiscard]]
|
|
auto trim(TString&& string, TChars&& chars)
|
|
{
|
|
return detail::trimImpl(std::string_view(std::forward<TString>(string)), std::string_view(std::forward<TChars>(chars)));
|
|
}
|
|
|
|
template<typename TString>
|
|
[[nodiscard]]
|
|
auto trim(TString&& string)
|
|
{
|
|
return trim(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
|
|
}
|
|
|
|
template<typename TLeft, typename TRight>
|
|
[[nodiscard]] bool equalsIgnoreCase(TLeft&& left, TRight&& right) MIJIN_NOEXCEPT
|
|
{
|
|
return detail::equalsIgnoreCaseImpl(std::string_view(left), std::string_view(right));
|
|
}
|
|
|
|
template<typename TChar, typename TTraits, typename TAllocator>
|
|
constexpr void makeLower(std::basic_string<TChar, TTraits, TAllocator>& string)
|
|
{
|
|
std::transform(string.begin(), string.end(), string.begin(), [locale = std::locale()](TChar chr)
|
|
{
|
|
return std::tolower<TChar>(chr, locale);
|
|
});
|
|
}
|
|
|
|
template<typename TChar, typename TTraits, typename TAllocator>
|
|
constexpr void makeUpper(std::basic_string<TChar, TTraits, TAllocator>& string)
|
|
{
|
|
std::transform(string.begin(), string.end(), string.begin(), [locale = std::locale()](TChar chr)
|
|
{
|
|
return std::toupper<TChar>(chr, locale);
|
|
});
|
|
}
|
|
|
|
template<typename... TArgs>
|
|
[[nodiscard]]
|
|
constexpr auto toLower(TArgs&&... args)
|
|
{
|
|
std::basic_string string(std::forward<TArgs>(args)...);
|
|
makeLower(string);
|
|
return string;
|
|
}
|
|
|
|
template<typename... TArgs>
|
|
[[nodiscard]]
|
|
constexpr auto toUpper(TArgs&&... args)
|
|
{
|
|
std::basic_string string(std::forward<TArgs>(args)...);
|
|
makeUpper(string);
|
|
return string;
|
|
}
|
|
|
|
|
|
template<std::integral TNumber>
|
|
[[nodiscard]]
|
|
constexpr bool toNumber(std::string_view stringView, TNumber& outNumber, int base = 10) MIJIN_NOEXCEPT
|
|
{
|
|
const char* start = &*stringView.begin();
|
|
const char* end = start + stringView.size();
|
|
const std::from_chars_result res = std::from_chars(start, end, outNumber, base);
|
|
return res.ec == std::errc{} && res.ptr == end;
|
|
}
|
|
|
|
template<typename TChar, typename TTraits, std::integral TNumber>
|
|
[[nodiscard]]
|
|
constexpr bool toNumber(std::basic_string_view<TChar, TTraits> stringView, TNumber& outNumber, int base = 10) MIJIN_NOEXCEPT requires (!std::is_same_v<TChar, char>)
|
|
{
|
|
std::string asString;
|
|
asString.resize(stringView.size());
|
|
// should only contain number symbols, so just cast down to char
|
|
std::transform(stringView.begin(), stringView.end(), asString.begin(), [](TChar chr) { return static_cast<char>(chr); });
|
|
return toNumber(asString, outNumber, base);
|
|
}
|
|
|
|
template<std::floating_point TNumber>
|
|
[[nodiscard]]
|
|
constexpr bool toNumber(std::string_view stringView, TNumber& outNumber, std::chars_format fmt = std::chars_format::general) MIJIN_NOEXCEPT
|
|
{
|
|
const char* start = &*stringView.begin();
|
|
const char* end = start + stringView.size();
|
|
const std::from_chars_result res = std::from_chars(start, end, outNumber, fmt);
|
|
return res.ec == std::errc{} && res.ptr == end;
|
|
}
|
|
|
|
template<typename TChar, typename TTraits, std::floating_point TNumber>
|
|
[[nodiscard]]
|
|
constexpr bool toNumber(std::basic_string_view<TChar, TTraits> stringView, TNumber& outNumber, std::chars_format fmt = std::chars_format::general) MIJIN_NOEXCEPT requires (!std::is_same_v<TChar, char>)
|
|
{
|
|
std::string asString;
|
|
asString.resize(stringView.size());
|
|
// should only contain number symbols, so just cast down to char
|
|
std::transform(stringView.begin(), stringView.end(), asString.begin(), [](TChar chr) { return static_cast<char>(chr); });
|
|
return toNumber(asString, outNumber, fmt);
|
|
}
|
|
|
|
template<typename TChar>
|
|
[[nodiscard]]
|
|
constexpr bool isDecimalChar(TChar chr) noexcept
|
|
{
|
|
return (chr >= TChar('0') && chr <= TChar('9'));
|
|
}
|
|
|
|
template<typename TChar>
|
|
[[nodiscard]]
|
|
constexpr bool isHexadecimalChar(TChar chr) noexcept
|
|
{
|
|
return isDecimalChar(chr)
|
|
|| (chr >= TChar('A') && chr <= TChar('F'))
|
|
|| (chr >= TChar('a') && chr <= TChar('f'));
|
|
}
|
|
|
|
template<typename TChar>
|
|
bool compareIgnoreCase(TChar left, TChar right) MIJIN_NOEXCEPT
|
|
{
|
|
return std::tolower(left) == std::tolower(right);
|
|
}
|
|
|
|
[[nodiscard]]
|
|
inline auto findIgnoreCase(std::string_view haystack, std::string_view needle)
|
|
{
|
|
return std::ranges::search(haystack, needle, &compareIgnoreCase<char>);
|
|
}
|
|
|
|
[[nodiscard]]
|
|
inline bool startsWithIgnoreCase(std::string_view string, std::string_view part)
|
|
{
|
|
if (part.size() > string.size())
|
|
{
|
|
return false;
|
|
}
|
|
return std::ranges::equal(string.substr(0, part.size()), part, &compareIgnoreCase<char>);
|
|
}
|
|
|
|
[[nodiscard]]
|
|
inline bool endsWithIgnoreCase(std::string_view string, std::string_view part)
|
|
{
|
|
if (part.size() > string.size())
|
|
{
|
|
return false;
|
|
}
|
|
return std::ranges::equal(string.substr(string.size() - part.size()), part, &compareIgnoreCase<char>);
|
|
}
|
|
|
|
namespace pipe
|
|
{
|
|
struct Join
|
|
{
|
|
const char* delimiter;
|
|
|
|
explicit Join(const char* delimiter_) MIJIN_NOEXCEPT: delimiter(delimiter_) {}
|
|
};
|
|
|
|
template<typename TIterable>
|
|
auto operator|(TIterable&& iterable, const Join& joiner)
|
|
{
|
|
return join(std::forward<TIterable>(iterable), joiner.delimiter);
|
|
}
|
|
} // namespace pipe
|
|
|
|
struct [[nodiscard]] ConvertCharTypeResult
|
|
{
|
|
unsigned numRead = 0;
|
|
unsigned numWritten = 0;
|
|
|
|
constexpr operator bool() const MIJIN_NOEXCEPT
|
|
{
|
|
return numRead != 0 || numWritten != 0;
|
|
}
|
|
constexpr bool operator !() const MIJIN_NOEXCEPT
|
|
{
|
|
return !static_cast<bool>(*this);
|
|
}
|
|
};
|
|
|
|
template<typename TFrom, typename TTo>
|
|
ConvertCharTypeResult convertCharType(const TFrom* chrFrom, std::size_t numFrom, TTo* outTo, std::size_t numTo, std::mbstate_t& mbstate) MIJIN_NOEXCEPT
|
|
{
|
|
if constexpr (std::is_same_v<TFrom, char>)
|
|
{
|
|
if constexpr (std::is_same_v<TTo, wchar_t>)
|
|
{
|
|
const std::size_t result = std::mbrtowc(outTo, chrFrom, numFrom, &mbstate);
|
|
if (result == static_cast<std::size_t>(-1))
|
|
{
|
|
return {};
|
|
}
|
|
return {
|
|
.numRead = static_cast<unsigned>(result),
|
|
.numWritten = 1
|
|
};
|
|
}
|
|
if constexpr (std::is_same_v<TTo, char8_t>)
|
|
{
|
|
|
|
}
|
|
}
|
|
if constexpr (std::is_same_v<TFrom, wchar_t>)
|
|
{
|
|
if constexpr (std::is_same_v<TTo, char>)
|
|
{
|
|
if (numTo < MB_CUR_MAX)
|
|
{
|
|
char tmpBuf[MB_LEN_MAX];
|
|
const ConvertCharTypeResult result = convertCharType(chrFrom, numFrom, tmpBuf, MB_LEN_MAX, mbstate);
|
|
if (result && result.numWritten <= numTo)
|
|
{
|
|
std::memcpy(outTo, tmpBuf, result.numWritten);
|
|
}
|
|
return result;
|
|
}
|
|
const std::size_t result = std::wcrtomb(outTo, *chrFrom, &mbstate);
|
|
if (result == static_cast<std::size_t>(-1))
|
|
{
|
|
return {};
|
|
}
|
|
return {
|
|
.numRead = 1,
|
|
.numWritten = static_cast<unsigned>(result)
|
|
};
|
|
}
|
|
if constexpr (std::is_same_v<TTo, char8_t>)
|
|
{
|
|
|
|
}
|
|
}
|
|
if constexpr (std::is_same_v<TFrom, char8_t>)
|
|
{
|
|
|
|
}
|
|
}
|
|
template<typename TFrom, typename TTo>
|
|
ConvertCharTypeResult convertCharType(const TFrom* chrFrom, std::size_t numFrom, TTo* outTo, std::size_t numTo) MIJIN_NOEXCEPT
|
|
{
|
|
std::mbstate_t mbstate;
|
|
return convertCharType(chrFrom, numFrom, outTo, numTo, mbstate);
|
|
}
|
|
|
|
template<typename TIterator>
|
|
struct [[nodiscard]] ConvertStringTypeResult
|
|
{
|
|
TIterator iterator;
|
|
bool success;
|
|
};
|
|
|
|
template<typename TTo, typename TFrom, typename TFromTraits, std::output_iterator<TTo> TIterator>
|
|
ConvertStringTypeResult<TIterator> convertStringType(std::basic_string_view<TFrom, TFromTraits> strFrom, TIterator outIterator)
|
|
{
|
|
TTo outBuffer[MB_LEN_MAX];
|
|
|
|
std::mbstate_t mbstate = {};
|
|
for (auto it = strFrom.begin(); it != strFrom.end();)
|
|
{
|
|
const std::size_t remaining = std::distance(it, strFrom.end());
|
|
const ConvertCharTypeResult result = convertCharType(&*it, remaining, outBuffer, MB_LEN_MAX, mbstate);
|
|
if (!result)
|
|
{
|
|
return {
|
|
.iterator = outIterator,
|
|
.success = false
|
|
};
|
|
}
|
|
for (unsigned pos = 0; pos < result.numWritten; ++pos)
|
|
{
|
|
*outIterator = outBuffer[pos];
|
|
++outIterator;
|
|
}
|
|
it = std::next(it, result.numRead);
|
|
}
|
|
|
|
return {
|
|
.iterator = outIterator,
|
|
.success = true
|
|
};
|
|
}
|
|
|
|
template<typename TFrom, typename TFromTraits, typename TTo, typename TToTraits, typename TToAllocator>
|
|
bool convertStringType(std::basic_string_view<TFrom, TFromTraits> strFrom, std::basic_string<TTo, TToTraits, TToAllocator>& outString)
|
|
{
|
|
if constexpr (std::is_same_v<TTo, TFrom>)
|
|
{
|
|
outString += strFrom;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return convertStringType<TTo>(strFrom, std::back_inserter(outString)).success;
|
|
}
|
|
}
|
|
|
|
template<typename TFrom, typename TTo, typename TToTraits, typename TToAllocator>
|
|
bool convertStringType(const TFrom* strFrom, std::basic_string<TTo, TToTraits, TToAllocator>& outString)
|
|
{
|
|
return convertStringType(std::basic_string_view<TFrom>(strFrom), outString);
|
|
}
|
|
} // namespace mijin
|
|
|
|
#endif // !defined(MIJIN_UTIL_STRING_HPP_INCLUDED)
|