mijin2/source/mijin/util/string.hpp

628 lines
18 KiB
C++

#pragma once
#if !defined(MIJIN_UTIL_STRING_HPP_INCLUDED)
#define MIJIN_UTIL_STRING_HPP_INCLUDED 1
#include <algorithm>
#include <array>
#include <charconv>
#include <climits>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <limits>
#include <locale>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>
#include "./iterators.hpp"
#include "../internal/common.hpp"
#include "../util/traits.hpp"
namespace mijin
{
//
// public defines
//
//
// public constants
//
//
// public traits
//
template<typename T>
inline constexpr bool is_string_v = is_template_instance_v<std::basic_string, std::remove_cvref_t<T>>;
template<typename T>
concept std_string_type = is_string_v<T>;
template<typename T>
inline constexpr bool is_string_view_v = is_template_instance_v<std::basic_string_view, std::remove_cvref_t<T>>;
template<typename T>
concept std_string_view_type = is_string_view_v<T>;
template<typename T>
inline constexpr bool is_char_v = is_any_type_v<std::remove_cvref_t<T>, char, wchar_t, char8_t, char16_t, char32_t>;
template<typename T>
concept char_type = is_char_v<T>;
template<typename T>
inline constexpr bool is_cstring_v = std::is_pointer_v<T> && is_char_v<std::remove_pointer_t<T>>;
template<typename T>
concept cstring_type = is_cstring_v<T>;
template<typename T>
struct str_char_type
{
using type = void;
};
template<char_type T>
struct str_char_type<T*>
{
using type = std::remove_cvref_t<T>;
};
template<std_string_view_type T>
struct str_char_type<T>
{
using type = typename std::remove_cvref_t<T>::value_type;
};
template<std_string_type T>
struct str_char_type<T>
{
using type = typename std::remove_cvref_t<T>::value_type;
};
template<typename T>
using str_char_type_t = str_char_type<T>::type;
//
// public types
//
struct SplitOptions
{
std::size_t limitParts = std::numeric_limits<std::size_t>::max();
bool ignoreEmpty = true;
};
//
// public functions
//
template <typename TRange, typename TValue = typename TRange::value_type>
[[nodiscard]] std::string join(const TRange& elements, const char* const delimiter)
{
std::ostringstream oss;
auto first = std::begin(elements);
auto last = std::end(elements);
if (first != last)
{
std::copy(first, std::prev(last), std::ostream_iterator<TValue>(oss, delimiter));
first = std::prev(last);
}
if (first != last)
{
oss << *first;
}
return oss.str();
}
namespace detail
{
template<typename TChar, typename TTraits, typename TOutIterator>
void splitImpl(std::basic_string_view<TChar, TTraits> stringView,
std::basic_string_view<TChar, TTraits> separator,
const SplitOptions& options,
TOutIterator outIterator,
std::size_t& numEmitted)
{
using sv_t = std::basic_string_view<TChar, TTraits>;
MIJIN_ASSERT(options.limitParts > 0, "Cannot split to zero parts.");
MIJIN_ASSERT(!separator.empty(), "Separator cannot be empty.");
if (separator.empty())
{
return;
}
auto emit = [&](std::string_view result)
{
*outIterator = result;
++outIterator;
++numEmitted;
};
if (options.limitParts <= 1)
{
emit(stringView);
return;
}
auto start = stringView.begin();
auto pos = start;
const auto end = stringView.end();
auto seperatorFound = [&]()
{
return pos <= (end - separator.size()) && sv_t(pos, pos + separator.size()) == separator;
};
while (pos != end)
{
if (seperatorFound())
{
if (!options.ignoreEmpty || pos != start)
{
emit(std::string_view(start, pos));
}
start = pos = (pos + separator.size());
if (numEmitted == options.limitParts - 1)
{
if (options.ignoreEmpty)
{
while (seperatorFound())
{
pos += separator.size();
}
}
emit(std::string_view(pos, end));
return;
}
if (!options.ignoreEmpty && pos == end)
{
// skipped a separator at the very end, add an empty entry
emit("");
return;
}
}
else
{
++pos;
}
}
if (start != end)
{
emit(std::string_view(start, end));
}
}
template<typename TChar, typename TTraits>
std::vector<std::basic_string_view<TChar, TTraits>> splitImpl(std::basic_string_view<TChar, TTraits> stringView,
std::basic_string_view<TChar, TTraits> separator,
const SplitOptions& options)
{
std::vector<std::basic_string_view<TChar, TTraits>> result;
std::size_t numEmitted = 0;
splitImpl(stringView, separator, options, std::back_inserter(result), numEmitted);
return result;
}
template<std::size_t count, typename TChar, typename TTraits>
std::array<std::basic_string_view<TChar, TTraits>, count> splitFixedImpl(std::basic_string_view<TChar, TTraits> stringView,
std::basic_string_view<TChar, TTraits> separator,
SplitOptions options,
std::size_t* outNumResults)
{
options.limitParts = count;
std::array<std::basic_string_view<TChar, TTraits>, count> result;
std::size_t numEmitted = 0;
splitImpl(stringView, separator, options, result.begin(), numEmitted);
if (outNumResults != nullptr)
{
*outNumResults = numEmitted;
}
return result;
}
template<typename TChar, typename TTraitsA, typename TTraitsB>
bool equalsIgnoreCaseImpl(std::basic_string_view<TChar, TTraitsA> stringA, std::basic_string_view<TChar, TTraitsB> stringB) MIJIN_NOEXCEPT
{
if (stringA.size() != stringB.size())
{
return false;
}
for (const auto [charA, charB] : zip(stringA, stringB))
{
if (std::tolower(charA) != std::tolower(charB))
{
return false;
}
}
return true;
}
template<typename TChar, typename TTraits>
std::basic_string_view<TChar, TTraits> trimPrefixImpl(std::basic_string_view<TChar, TTraits> stringView,
std::basic_string_view<TChar, TTraits> charsToTrim)
{
stringView.remove_prefix(std::min(stringView.find_first_not_of(charsToTrim), stringView.size()));
return stringView;
}
template<typename TChar, typename TTraits>
std::basic_string_view<TChar, TTraits> trimSuffixImpl(std::basic_string_view<TChar, TTraits> stringView,
std::basic_string_view<TChar, TTraits> charsToTrim)
{
stringView.remove_suffix(stringView.size() - std::min(stringView.find_last_not_of(charsToTrim) + 1, stringView.size()));
return stringView;
}
template<typename TChar, typename TTraits>
std::basic_string_view<TChar, TTraits> trimImpl(std::basic_string_view<TChar, TTraits> stringView,
std::basic_string_view<TChar, TTraits> charsToTrim)
{
return trimPrefixImpl(trimSuffixImpl(stringView, charsToTrim), charsToTrim);
}
template<typename TChar>
static const std::array DEFAULT_TRIM_CHARS_DATA = {TChar(' '), TChar('\t'), TChar('\r'), TChar('\n')};
template<typename TChar>
static const std::basic_string_view<TChar, std::char_traits<TChar>> DEFAULT_TRIM_CHARS
= {DEFAULT_TRIM_CHARS_DATA<TChar>.begin(), DEFAULT_TRIM_CHARS_DATA<TChar>.end()};
}
template<typename TLeft, typename TRight>
[[nodiscard]] auto split(TLeft&& stringView, TRight&& separator, const SplitOptions& options = {})
{
return detail::splitImpl(std::basic_string_view(std::forward<TLeft>(stringView)),
std::basic_string_view(std::forward<TRight>(separator)), options);
}
template<std::size_t count, typename TLeft, typename TRight>
[[nodiscard]] auto splitFixed(TLeft&& stringView, TRight&& separator, SplitOptions options = {}, std::size_t* outNumResults = nullptr)
{
return detail::splitFixedImpl<count>(std::basic_string_view(std::forward<TLeft>(stringView)),
std::basic_string_view(std::forward<TRight>(separator)), options, outNumResults);
}
template<typename TString, typename TChars>
[[nodiscard]]
auto trimPrefix(TString&& string, TChars&& chars)
{
return detail::trimPrefixImpl(std::string_view(std::forward<TString>(string)), std::string_view(std::forward<TChars>(chars)));
}
template<typename TString>
[[nodiscard]]
auto trimPrefix(TString&& string)
{
return trimPrefix(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
}
template<typename TString, typename TChars>
[[nodiscard]]
auto trimSuffix(TString&& string, TChars&& chars)
{
return detail::trimSuffixImpl(std::string_view(std::forward<TString>(string)), std::string_view(std::forward<TChars>(chars)));
}
template<typename TString>
[[nodiscard]]
auto trimSuffix(TString&& string)
{
return trimSuffix(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
}
template<typename TString, typename TChars>
[[nodiscard]]
auto trim(TString&& string, TChars&& chars)
{
return detail::trimImpl(std::string_view(std::forward<TString>(string)), std::string_view(std::forward<TChars>(chars)));
}
template<typename TString>
[[nodiscard]]
auto trim(TString&& string)
{
return trim(string, detail::DEFAULT_TRIM_CHARS<str_char_type_t<TString>>);
}
template<typename TLeft, typename TRight>
[[nodiscard]] bool equalsIgnoreCase(TLeft&& left, TRight&& right) MIJIN_NOEXCEPT
{
return detail::equalsIgnoreCaseImpl(std::string_view(left), std::string_view(right));
}
template<typename TChar, typename TTraits, typename TAllocator>
constexpr void makeLower(std::basic_string<TChar, TTraits, TAllocator>& string)
{
std::transform(string.begin(), string.end(), string.begin(), [locale = std::locale()](TChar chr)
{
return std::tolower<TChar>(chr, locale);
});
}
template<typename TChar, typename TTraits, typename TAllocator>
constexpr void makeUpper(std::basic_string<TChar, TTraits, TAllocator>& string)
{
std::transform(string.begin(), string.end(), string.begin(), [locale = std::locale()](TChar chr)
{
return std::toupper<TChar>(chr, locale);
});
}
template<typename... TArgs>
[[nodiscard]]
constexpr auto toLower(TArgs&&... args)
{
std::basic_string string(std::forward<TArgs>(args)...);
makeLower(string);
return string;
}
template<typename... TArgs>
[[nodiscard]]
constexpr auto toUpper(TArgs&&... args)
{
std::basic_string string(std::forward<TArgs>(args)...);
makeUpper(string);
return string;
}
template<std::integral TNumber>
[[nodiscard]]
constexpr bool toNumber(std::string_view stringView, TNumber& outNumber, int base = 10) MIJIN_NOEXCEPT
{
const char* start = &*stringView.begin();
const char* end = start + stringView.size();
const std::from_chars_result res = std::from_chars(start, end, outNumber, base);
return res.ec == std::errc{} && res.ptr == end;
}
template<typename TChar, typename TTraits, std::integral TNumber>
[[nodiscard]]
constexpr bool toNumber(std::basic_string_view<TChar, TTraits> stringView, TNumber& outNumber, int base = 10) MIJIN_NOEXCEPT requires (!std::is_same_v<TChar, char>)
{
std::string asString;
asString.resize(stringView.size());
// should only contain number symbols, so just cast down to char
std::transform(stringView.begin(), stringView.end(), asString.begin(), [](TChar chr) { return static_cast<char>(chr); });
return toNumber(asString, outNumber, base);
}
template<std::floating_point TNumber>
[[nodiscard]]
constexpr bool toNumber(std::string_view stringView, TNumber& outNumber, std::chars_format fmt = std::chars_format::general) MIJIN_NOEXCEPT
{
const char* start = &*stringView.begin();
const char* end = start + stringView.size();
const std::from_chars_result res = std::from_chars(start, end, outNumber, fmt);
return res.ec == std::errc{} && res.ptr == end;
}
template<typename TChar, typename TTraits, std::floating_point TNumber>
[[nodiscard]]
constexpr bool toNumber(std::basic_string_view<TChar, TTraits> stringView, TNumber& outNumber, std::chars_format fmt = std::chars_format::general) MIJIN_NOEXCEPT requires (!std::is_same_v<TChar, char>)
{
std::string asString;
asString.resize(stringView.size());
// should only contain number symbols, so just cast down to char
std::transform(stringView.begin(), stringView.end(), asString.begin(), [](TChar chr) { return static_cast<char>(chr); });
return toNumber(asString, outNumber, fmt);
}
template<typename TChar>
[[nodiscard]]
constexpr bool isDecimalChar(TChar chr) noexcept
{
return (chr >= TChar('0') && chr <= TChar('9'));
}
template<typename TChar>
[[nodiscard]]
constexpr bool isHexadecimalChar(TChar chr) noexcept
{
return isDecimalChar(chr)
|| (chr >= TChar('A') && chr <= TChar('F'))
|| (chr >= TChar('a') && chr <= TChar('f'));
}
template<typename TChar>
bool compareIgnoreCase(TChar left, TChar right) MIJIN_NOEXCEPT
{
return std::tolower(left) == std::tolower(right);
}
[[nodiscard]]
inline auto findIgnoreCase(std::string_view haystack, std::string_view needle)
{
return std::ranges::search(haystack, needle, &compareIgnoreCase<char>);
}
[[nodiscard]]
inline bool startsWithIgnoreCase(std::string_view string, std::string_view part)
{
if (part.size() > string.size())
{
return false;
}
return std::ranges::equal(string.substr(0, part.size()), part, &compareIgnoreCase<char>);
}
[[nodiscard]]
inline bool endsWithIgnoreCase(std::string_view string, std::string_view part)
{
if (part.size() > string.size())
{
return false;
}
return std::ranges::equal(string.substr(string.size() - part.size()), part, &compareIgnoreCase<char>);
}
namespace pipe
{
struct Join
{
const char* delimiter;
explicit Join(const char* delimiter_) MIJIN_NOEXCEPT: delimiter(delimiter_) {}
};
template<typename TIterable>
auto operator|(TIterable&& iterable, const Join& joiner)
{
return join(std::forward<TIterable>(iterable), joiner.delimiter);
}
} // namespace pipe
struct [[nodiscard]] ConvertCharTypeResult
{
unsigned numRead = 0;
unsigned numWritten = 0;
constexpr operator bool() const MIJIN_NOEXCEPT
{
return numRead != 0 || numWritten != 0;
}
constexpr bool operator !() const MIJIN_NOEXCEPT
{
return !static_cast<bool>(*this);
}
};
template<typename TFrom, typename TTo>
ConvertCharTypeResult convertCharType(const TFrom* chrFrom, std::size_t numFrom, TTo* outTo, std::size_t numTo, std::mbstate_t& mbstate) MIJIN_NOEXCEPT
{
if constexpr (std::is_same_v<TFrom, char>)
{
if constexpr (std::is_same_v<TTo, wchar_t>)
{
const std::size_t result = std::mbrtowc(outTo, chrFrom, numFrom, &mbstate);
if (result == static_cast<std::size_t>(-1))
{
return {};
}
return {
.numRead = static_cast<unsigned>(result),
.numWritten = 1
};
}
if constexpr (std::is_same_v<TTo, char8_t>)
{
}
}
if constexpr (std::is_same_v<TFrom, wchar_t>)
{
if constexpr (std::is_same_v<TTo, char>)
{
if (numTo < MB_CUR_MAX)
{
char tmpBuf[MB_LEN_MAX];
const ConvertCharTypeResult result = convertCharType(chrFrom, numFrom, tmpBuf, MB_LEN_MAX, mbstate);
if (result && result.numWritten <= numTo)
{
std::memcpy(outTo, tmpBuf, result.numWritten);
}
return result;
}
const std::size_t result = std::wcrtomb(outTo, *chrFrom, &mbstate);
if (result == static_cast<std::size_t>(-1))
{
return {};
}
return {
.numRead = 1,
.numWritten = static_cast<unsigned>(result)
};
}
if constexpr (std::is_same_v<TTo, char8_t>)
{
}
}
if constexpr (std::is_same_v<TFrom, char8_t>)
{
}
}
template<typename TFrom, typename TTo>
ConvertCharTypeResult convertCharType(const TFrom* chrFrom, std::size_t numFrom, TTo* outTo, std::size_t numTo) MIJIN_NOEXCEPT
{
std::mbstate_t mbstate;
return convertCharType(chrFrom, numFrom, outTo, numTo, mbstate);
}
template<typename TIterator>
struct [[nodiscard]] ConvertStringTypeResult
{
TIterator iterator;
bool success;
};
template<typename TTo, typename TFrom, typename TFromTraits, std::output_iterator<TTo> TIterator>
ConvertStringTypeResult<TIterator> convertStringType(std::basic_string_view<TFrom, TFromTraits> strFrom, TIterator outIterator)
{
TTo outBuffer[MB_LEN_MAX];
std::mbstate_t mbstate = {};
for (auto it = strFrom.begin(); it != strFrom.end();)
{
const std::size_t remaining = std::distance(it, strFrom.end());
const ConvertCharTypeResult result = convertCharType(&*it, remaining, outBuffer, MB_LEN_MAX, mbstate);
if (!result)
{
return {
.iterator = outIterator,
.success = false
};
}
for (unsigned pos = 0; pos < result.numWritten; ++pos)
{
*outIterator = outBuffer[pos];
++outIterator;
}
it = std::next(it, result.numRead);
}
return {
.iterator = outIterator,
.success = true
};
}
template<typename TFrom, typename TFromTraits, typename TTo, typename TToTraits, typename TToAllocator>
bool convertStringType(std::basic_string_view<TFrom, TFromTraits> strFrom, std::basic_string<TTo, TToTraits, TToAllocator>& outString)
{
if constexpr (std::is_same_v<TTo, TFrom>)
{
outString += strFrom;
return true;
}
else
{
return convertStringType<TTo>(strFrom, std::back_inserter(outString)).success;
}
}
template<typename TFrom, typename TTo, typename TToTraits, typename TToAllocator>
bool convertStringType(const TFrom* strFrom, std::basic_string<TTo, TToTraits, TToAllocator>& outString)
{
return convertStringType(std::basic_string_view<TFrom>(strFrom), outString);
}
} // namespace mijin
#endif // !defined(MIJIN_UTIL_STRING_HPP_INCLUDED)