UsenetSearch/src/StringUtils.cpp

184 lines
5.2 KiB
C++

/*
Copyright© 2021 John Sennesael
UsenetSearch is Free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
UsenetSearch is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with UsenetSearch. If not, see <https://www.gnu.org/licenses/>.
*/
#include "usenetsearch/StringUtils.h"
#include "usenetsearch/Logger.h"
#include <openssl/md5.h>
#include <algorithm>
#include <array>
#include <codecvt>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <locale>
#include <sstream>
#include <string>
#include <vector>
namespace usenetsearch {
static std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conv;
std::string CharToHex(const char c)
{
const int val = c;
std::ostringstream result;
result << std::setw(2) << std::setfill('0') << std::hex;
result << val;
return result.str();
}
std::string StringFromWideString(const std::wstring& input)
{
std::string result;
try
{
result = conv.to_bytes(input);
}
catch(const std::range_error&)
{
return "";
}
return result;
}
std::string StringHash(const std::string& input)
{
unsigned char result[MD5_DIGEST_LENGTH];
MD5(
reinterpret_cast<unsigned char*>(const_cast<char*>(input.c_str())),
input.size(),
result
);
std::ostringstream sout;
sout << std::hex << std::setfill('0');
for(long long c: result)
{
sout << std::setw(2) << reinterpret_cast<long long>(c);
}
return sout.str();
}
std::array<std::uint8_t, 16> StringHashBytes(const std::string& input)
{
unsigned char buff[MD5_DIGEST_LENGTH];
MD5(
reinterpret_cast<unsigned char*>(const_cast<char*>(input.c_str())),
input.size(),
buff
);
std::array<std::uint8_t, MD5_DIGEST_LENGTH> result;
std::copy(std::begin(buff), std::end(buff), result.begin());
return result;
}
std::string HashBytesToString(const std::array<std::uint8_t, 16>& input)
{
std::ostringstream sout;
sout << std::hex << std::setfill('0');
for(std::uint8_t c: input)
{
sout << std::setw(2) << static_cast<long long>(c);
}
return sout.str();
}
bool StringStartsWith(const std::string& needle, const std::string& haystack)
{
return (std::strncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
}
bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack)
{
return (std::wcsncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
}
bool StringToBoolean(const std::string& str)
{
const std::string lstr = StringTrim(StringToLower(str));
if ((lstr == "true") || (lstr == "yes") || (lstr == "1")) return true;
if ((lstr == "false") || (lstr == "no") || (lstr == "0")) return false;
Logger::Get().Fatal<StringException>(
LOGID("StringUtils"),
"The string \"" + str + "\" is not a valid boolean value."
);
return false;
}
bool StringToBoolean(const std::wstring& str)
{
const std::wstring lstr = StringTrim(StringToLower(str));
if ((lstr == L"true") || (lstr == L"yes") || (lstr == L"1")) return true;
if ((lstr == L"false") || (lstr == L"no") || (lstr == L"0")) return false;
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conv;
Logger::Get().Fatal<StringException>(
LOGID("StringUtils"),
"The string \"" + conv.to_bytes(str)
+ "\" is not a valid boolean value."
);
return false;
}
void StringTreeOperation(
const std::string& searchString,
const std::string& splitBy,
size_t maxDepth,
std::function<void(const std::string& subToken, const std::string& str)> Fn)
{
const auto tokens = StringSplit(searchString, splitBy);
std::vector<std::string> tokenList;
for (auto outerIt = tokens.begin(); outerIt != tokens.end(); outerIt++)
{
for (size_t depth = 1; depth != maxDepth + 1; ++depth)
{
const auto endIt = outerIt + depth;
const auto subset = std::vector<std::string>(outerIt, endIt);
const auto subToken = StringJoin(subset, splitBy);
// Check if we already have this token.
//
// For phrases like "we went here and then we went there" this would
// avoid indexing the tokens 'we' and 'went' and 'we went' twice.
if (std::find(tokenList.begin(), tokenList.end(), subToken)
== tokenList.end())
{
Fn(subToken, searchString);
tokenList.emplace_back(subToken);
}
if (endIt == tokens.end()) break;
}
}
}
std::wstring WideStringFromString(const std::string& input)
{
std::wstring result;
try
{
result = conv.from_bytes(input);
}
catch(const std::range_error&)
{
return L"";
}
return result;
}
} // namespace usenetsearch