2023-07-21 15:46:12 +08:00
|
|
|
|
#include "StringUtility.h"
|
|
|
|
|
#include "BoostLog.h"
|
|
|
|
|
#include <cctype>
|
|
|
|
|
#include <codecvt>
|
2024-05-28 21:18:38 +08:00
|
|
|
|
#ifdef WIN32
|
|
|
|
|
#include <Windows.h>
|
|
|
|
|
#endif
|
2023-07-21 15:46:12 +08:00
|
|
|
|
|
|
|
|
|
namespace Amass {
|
|
|
|
|
|
|
|
|
|
namespace StringUtility {
|
|
|
|
|
|
|
|
|
|
std::string &replace(std::string &string, const std::string &before, const std::string &after) {
|
|
|
|
|
for (size_t pos = 0; pos != std::string::npos; pos += after.length()) {
|
|
|
|
|
pos = string.find(before, pos);
|
|
|
|
|
if (pos != std::string::npos)
|
|
|
|
|
string.replace(pos, before.length(), after);
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t utf8Length(const std::string &text) {
|
|
|
|
|
size_t ret = 0;
|
|
|
|
|
for (uint32_t i = 0; i < text.length();) {
|
|
|
|
|
uint32_t byte_length = utf8CharacterByteSize(text.c_str() + i);
|
|
|
|
|
ret++;
|
|
|
|
|
i += byte_length;
|
|
|
|
|
}
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string_view utf8At(const std::string &text, size_t index) {
|
|
|
|
|
const char *ret = text.c_str();
|
|
|
|
|
for (uint32_t i = 0; i < index; i++) {
|
|
|
|
|
uint32_t byte_length = utf8CharacterByteSize(ret);
|
|
|
|
|
ret += byte_length;
|
|
|
|
|
}
|
|
|
|
|
return std::string_view(ret, utf8CharacterByteSize(ret));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t utf8CharacterByteSize(const char *character) {
|
|
|
|
|
if (character == nullptr) return 0;
|
|
|
|
|
uint32_t ret = 0;
|
|
|
|
|
uint8_t temp = character[0];
|
|
|
|
|
while (temp & 0x80) {
|
|
|
|
|
ret++;
|
|
|
|
|
temp <<= 1;
|
|
|
|
|
}
|
|
|
|
|
return ret > 0 ? ret : 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::wstring stringToWString(const std::string &string) {
|
|
|
|
|
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
|
|
|
return converter.from_bytes(string);
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-28 21:18:38 +08:00
|
|
|
|
std::string wstringToString(const std::wstring &string) {
|
|
|
|
|
#ifdef WIN32
|
|
|
|
|
int sizeNeeded = WideCharToMultiByte(CP_UTF8, 0, string.c_str(), string.size(), NULL, 0, NULL, NULL);
|
|
|
|
|
std::string ret(sizeNeeded, 0);
|
|
|
|
|
WideCharToMultiByte(CP_UTF8, 0, string.c_str(), string.size(), &ret[0], sizeNeeded, NULL, NULL);
|
|
|
|
|
#else
|
|
|
|
|
std::string ret;
|
|
|
|
|
assert(false && "not implement.");
|
|
|
|
|
#endif
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-21 15:46:12 +08:00
|
|
|
|
bool equal(std::string_view lhs, std::string_view rhs, bool caseSensitivity) {
|
|
|
|
|
auto n = lhs.size();
|
|
|
|
|
if (rhs.size() != n) return false;
|
|
|
|
|
auto p1 = lhs.data();
|
|
|
|
|
auto p2 = rhs.data();
|
|
|
|
|
char a, b;
|
|
|
|
|
|
|
|
|
|
while (n--) { // fast loop
|
|
|
|
|
a = *p1++;
|
|
|
|
|
b = *p2++;
|
|
|
|
|
if (a != b) {
|
|
|
|
|
if (caseSensitivity) {
|
|
|
|
|
return false;
|
|
|
|
|
} else {
|
|
|
|
|
goto slow;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
slow:
|
|
|
|
|
do {
|
|
|
|
|
if (std::tolower(a) != std::tolower(b)) return false;
|
|
|
|
|
a = *p1++;
|
|
|
|
|
b = *p2++;
|
|
|
|
|
} while (n--);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-13 10:08:09 +08:00
|
|
|
|
std::string UTF8ToGBK(const std::string &utf8Str) {
|
2024-06-19 23:16:48 +08:00
|
|
|
|
#ifdef WIN32
|
2024-06-13 10:08:09 +08:00
|
|
|
|
int wideStrLen = MultiByteToWideChar(CP_UTF8, 0, utf8Str.c_str(), -1, NULL, 0);
|
|
|
|
|
if (wideStrLen == 0) {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::wstring wideStr(wideStrLen, 0);
|
|
|
|
|
MultiByteToWideChar(CP_UTF8, 0, utf8Str.c_str(), -1, &wideStr[0], wideStrLen);
|
|
|
|
|
|
|
|
|
|
int gbkStrLen = WideCharToMultiByte(CP_ACP, 0, wideStr.c_str(), -1, NULL, 0, NULL, NULL);
|
|
|
|
|
if (gbkStrLen == 0) {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string gbkStr(gbkStrLen, 0);
|
|
|
|
|
WideCharToMultiByte(CP_ACP, 0, wideStr.c_str(), -1, &gbkStr[0], gbkStrLen, NULL, NULL);
|
|
|
|
|
return gbkStr;
|
2024-06-19 23:16:48 +08:00
|
|
|
|
#else
|
|
|
|
|
assert(false && "not implement.");
|
|
|
|
|
return "";
|
|
|
|
|
#endif
|
2024-06-13 10:08:09 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-07-21 15:46:12 +08:00
|
|
|
|
} // namespace StringUtility
|
|
|
|
|
} // namespace Amass
|