Implemented GROUP, LISTGROUP commands - Implemented database saving and loading of article ids

This commit is contained in:
John Sennesael 2021-09-21 17:46:31 -05:00 committed by John Sennesael
parent 5645d31f59
commit 60271b8de3
21 changed files with 262 additions and 44 deletions

View File

@ -1,15 +1,17 @@
# UsenetSearch is free software: you can redistribute it and/or modify # Copyright© 2021 John Sennesael
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# #
# UsenetSearch is distributed in the hope that it will be useful, # UsenetSearch is free software: you can redistribute it and/or modify
# but WITHOUT ANY WARRANTY; without even the implied warranty of # it under the terms of the GNU General Public License as published by
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # the Free Software Foundation, either version 3 of the License, or
# GNU General Public License for more details. # (at your option) any later version.
# #
# You should have received a copy of the GNU General Public License # UsenetSearch is distributed in the hope that it will be useful,
# along with UsenetSearch. If not, see <https://www.gnu.org/licenses/>. # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with UsenetSearch. If not, see <https://www.gnu.org/licenses/>.
cmake_minimum_required(VERSION 3.5) cmake_minimum_required(VERSION 3.5)

View File

@ -1,3 +1,5 @@
Copyright© 2021 John Sennesael
GNU GENERAL PUBLIC LICENSE GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007 Version 3, 29 June 2007

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
@ -15,9 +17,11 @@
#pragma once #pragma once
#include <codecvt>
#include <cstdint> #include <cstdint>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
#include <locale>
#include <memory> #include <memory>
#include <vector> #include <vector>
@ -35,20 +39,30 @@ struct DatabaseException: public UsenetSearchException
virtual ~DatabaseException() = default; virtual ~DatabaseException() = default;
}; };
class Database{ class Database
{
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> m_conv;
std::filesystem::path m_databasePath; std::filesystem::path m_databasePath;
std::uint64_t m_databaseVersion{DatabaseVersion}; std::uint64_t m_databaseVersion{DatabaseVersion};
std::ifstream m_newsGroupFileInput; std::ifstream m_newsGroupFileInput;
std::ofstream m_newsGroupFileOutput; std::ofstream m_newsGroupFileOutput;
std::filesystem::path GetArticleFilePath(const std::wstring& newsgroup);
void OpenNewsGroupFile(); void OpenNewsGroupFile();
public: public:
~Database(); ~Database();
std::unique_ptr<std::vector<std::wstring>> LoadArticleList(
const std::wstring& newsgroup
);
std::unique_ptr<std::vector<NntpListEntry>> LoadNewsgroupList(); std::unique_ptr<std::vector<NntpListEntry>> LoadNewsgroupList();
void Open(std::filesystem::path dbPath); void Open(std::filesystem::path dbPath);
void UpdateArticleList(
const std::wstring& newsgroup,
const std::vector<std::wstring>& articleIds
);
void UpdateNewsgroupList(const std::vector<NntpListEntry>& list); void UpdateNewsgroupList(const std::vector<NntpListEntry>& list);
}; };

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
@ -34,4 +36,6 @@ public:
}; };
static constexpr const int ENOTFOUND{2001};
} // namespace usenetsearch } // namespace usenetsearch

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -15,6 +15,7 @@
#pragma once #pragma once
#include <algorithm>
#include <fstream> #include <fstream>
#include <string> #include <string>
#include <vector> #include <vector>
@ -36,6 +37,8 @@ struct StringException: public UsenetSearchException
virtual ~StringException() = default; virtual ~StringException() = default;
}; };
std::string StringHash(const std::string& input);
template<typename T> template<typename T>
std::vector<T> StringSplit( std::vector<T> StringSplit(
const T& str, const T& str,
@ -58,16 +61,43 @@ std::vector<T> StringSplit(
return result; return result;
} }
std::string StringLeftTrim(const std::string& str); template<typename T>
T StringLeftTrim(const T& str)
{
T s = str;
s.erase(s.begin(), std::find_if(s.begin(), s.end(),
std::not1(std::ptr_fun<int, int>(std::isspace))));
return s;
}
std::string StringRightTrim(const std::string& str); template<typename T>
T StringRightTrim(const T& str)
{
T s = str;
s.erase(std::find_if(s.rbegin(), s.rend(),
std::not1(std::ptr_fun<int, int>(std::isspace))).base(),
s.end());
return s;
}
bool StringStartsWith(const std::string& needle, const std::string& haystack); bool StringStartsWith(const std::string& needle, const std::string& haystack);
bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack);
std::string StringTrim(const std::string& str); template<typename T>
T StringTrim(const T& str)
{
return StringLeftTrim(StringRightTrim(str));
}
std::string StringToLower(const std::string& str); template<typename T>
T StringToLower(const T& str)
{
T copy = str;
std::transform(copy.begin(),copy.end(),copy.begin(),::tolower);
return copy;
}
bool StringToBoolean(const std::string& str); bool StringToBoolean(const std::string& str);
bool StringToBoolean(const std::wstring& str);
} // namespace usenetsearch } // namespace usenetsearch

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
@ -36,6 +38,11 @@ struct UsenetClientException: public UsenetSearchException
virtual ~UsenetClientException() = default; virtual ~UsenetClientException() = default;
}; };
struct NntpHeader
{
};
struct NntpMessage struct NntpMessage
{ {
std::uint16_t code; std::uint16_t code;
@ -73,7 +80,17 @@ public:
std::uint16_t port, std::uint16_t port,
bool useSSL = false bool useSSL = false
); );
std::vector<NntpListEntry> List(); void Group(const std::wstring& groupName);
std::unique_ptr<std::vector<NntpListEntry>> List();
/* whilst message id's are typically numbers, the rfc states they are unique
alphanumeric strings. */
std::unique_ptr<std::vector<std::wstring>> ListGroup(
const std::wstring& newsGroup
);
// use the ListGroup <newsgroup> command to get a list of article id's in a newsgroup
// then use the HEAD <article id> command to get the headers for each article
}; };

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
@ -36,6 +38,42 @@ Database::~Database()
m_newsGroupFileOutput.close(); m_newsGroupFileOutput.close();
} }
} }
std::filesystem::path Database::GetArticleFilePath(
const std::wstring& newsgroup)
{
const auto groupFile = StringHash(m_conv.to_bytes(newsgroup)) + ".db";
return m_databasePath / groupFile;
}
std::unique_ptr<std::vector<std::wstring>> Database::LoadArticleList(
const std::wstring& newsgroup)
{
const auto articleFile = GetArticleFilePath(newsgroup);
if (!std::filesystem::exists(articleFile))
{
throw DatabaseException(ENOTFOUND,
"No article list found for newsgroup " + m_conv.to_bytes(newsgroup)
);
}
std::ifstream io;
io.open(articleFile, std::ios::binary);
std::uint64_t articleCount;
io.read(
reinterpret_cast<char*>(&articleCount),
sizeof(articleCount)
);
auto result = std::make_unique<std::vector<std::wstring>>();
for (std::uint64_t i = 0; i != articleCount; ++i)
{
std::wstring articleId;
io >> articleId;
result->emplace_back(articleId);
}
io.close();
return result;
}
std::unique_ptr<std::vector<NntpListEntry>> Database::LoadNewsgroupList() std::unique_ptr<std::vector<NntpListEntry>> Database::LoadNewsgroupList()
{ {
OpenNewsGroupFile(); OpenNewsGroupFile();
@ -97,6 +135,25 @@ void Database::OpenNewsGroupFile()
} }
} }
void Database::UpdateArticleList(
const std::wstring& newsgroup,
const std::vector<std::wstring>& articleIds)
{
const auto articleFile = GetArticleFilePath(newsgroup);
std::ofstream io;
io.open(articleFile, std::ios::binary);
const std::uint64_t articleCount = articleIds.size();
io.write(
reinterpret_cast<const char*>(&articleCount),
sizeof(articleCount)
);
for (const auto& id: articleIds)
{
io << id;
}
io.close();
}
void Database::UpdateNewsgroupList(const std::vector<NntpListEntry>& list) void Database::UpdateNewsgroupList(const std::vector<NntpListEntry>& list)
{ {
OpenNewsGroupFile(); OpenNewsGroupFile();

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
@ -14,11 +16,16 @@
*/ */
#include <algorithm> #include <algorithm>
#include <codecvt>
#include <cstring> #include <cstring>
#include <fstream> #include <fstream>
#include <iomanip>
#include <locale>
#include <string> #include <string>
#include <vector> #include <vector>
#include <openssl/md5.h>
#include "usenetsearch/StringUtils.h" #include "usenetsearch/StringUtils.h"
namespace usenetsearch { namespace usenetsearch {
@ -64,21 +71,21 @@ std::ifstream& operator>>(std::ifstream& in, std::wstring& str)
return in; return in;
} }
std::string StringLeftTrim(const std::string& str) std::string StringHash(const std::string& input)
{ {
std::string s = str; unsigned char result[MD5_DIGEST_LENGTH];
s.erase(s.begin(), std::find_if(s.begin(), s.end(), MD5(
std::not1(std::ptr_fun<int, int>(std::isspace)))); reinterpret_cast<unsigned char*>(const_cast<char*>(input.c_str())),
return s; input.size(),
} result
);
std::string StringRightTrim(const std::string& str) std::ostringstream sout;
{ sout << std::hex << std::setfill('0');
std::string s = str; for(long long c: result)
s.erase(std::find_if(s.rbegin(), s.rend(), {
std::not1(std::ptr_fun<int, int>(std::isspace))).base(), sout << std::setw(2) << reinterpret_cast<long long>(c);
s.end()); }
return s; return sout.str();
} }
bool StringStartsWith(const std::string& needle, const std::string& haystack) bool StringStartsWith(const std::string& needle, const std::string& haystack)
@ -86,6 +93,11 @@ bool StringStartsWith(const std::string& needle, const std::string& haystack)
return (std::strncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0); return (std::strncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
} }
bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack)
{
return (std::wcsncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
}
bool StringToBoolean(const std::string& str) bool StringToBoolean(const std::string& str)
{ {
const std::string lstr = StringTrim(StringToLower(str)); const std::string lstr = StringTrim(StringToLower(str));
@ -96,16 +108,16 @@ bool StringToBoolean(const std::string& str)
); );
} }
std::string StringToLower(const std::string& str) bool StringToBoolean(const std::wstring& str)
{ {
std::string copy = str; const std::wstring lstr = StringTrim(StringToLower(str));
std::transform(copy.begin(),copy.end(),copy.begin(),::tolower); if ((lstr == L"true") || (lstr == L"yes") || (lstr == L"1")) return true;
return copy; if ((lstr == L"false") || (lstr == L"no") || (lstr == L"0")) return false;
} std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conv;
throw StringException(EINVAL,
std::string StringTrim(const std::string& str) "The string \"" + conv.to_bytes(str)
{ + "\" is not a valid boolean value."
return StringLeftTrim(StringRightTrim(str)); );
} }
} // namespace usenetsearch } // namespace usenetsearch

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
@ -115,13 +117,28 @@ void UsenetClient::Connect(
} }
} }
void UsenetClient::Group(const std::wstring& groupName)
{
// Send user name
Write(L"GROUP " + groupName + L"\r\n");
auto response = ReadLine();
if (IsError(response))
{
throw UsenetClientException(
response.code,
"Error changing group to " + m_conv.to_bytes(groupName) + " : "
+ m_conv.to_bytes(response.message)
);
}
}
bool UsenetClient::IsError(const NntpMessage& msg) const bool UsenetClient::IsError(const NntpMessage& msg) const
{ {
if (msg.code >= 400) return true; if (msg.code >= 400) return true;
return false; return false;
} }
std::vector<NntpListEntry> UsenetClient::List() std::unique_ptr<std::vector<NntpListEntry>> UsenetClient::List()
{ {
Write(L"LIST COUNTS\r\n"); Write(L"LIST COUNTS\r\n");
/* In response, we should get a 215 response followed by the list of news /* In response, we should get a 215 response followed by the list of news
@ -139,7 +156,7 @@ std::vector<NntpListEntry> UsenetClient::List()
const auto listStr = ReadUntil(L"\r\n.\r\n"); const auto listStr = ReadUntil(L"\r\n.\r\n");
// parse the list. // parse the list.
const auto lines = StringSplit(listStr, std::wstring{L"\r\n"}); const auto lines = StringSplit(listStr, std::wstring{L"\r\n"});
std::vector<NntpListEntry> result; auto result = std::make_unique<std::vector<NntpListEntry>>();
for (const auto& line: lines) for (const auto& line: lines)
{ {
NntpListEntry entry; NntpListEntry entry;
@ -151,12 +168,38 @@ std::vector<NntpListEntry> UsenetClient::List()
entry.low = std::stoul(fields[2]); entry.low = std::stoul(fields[2]);
entry.count = std::stoul(fields[3]); entry.count = std::stoul(fields[3]);
entry.status = fields[4]; entry.status = fields[4];
result.emplace_back(entry); result->emplace_back(entry);
} }
} }
return result; return result;
} }
std::unique_ptr<std::vector<std::wstring>> UsenetClient::ListGroup(const std::wstring& newsGroup)
{
Write(L"LISTGROUP " + newsGroup + L"\r\n");
/* In response, we should get a 211 response followed by the list of
article ID's ending in a period on it's own line. */
const auto response = ReadLine();
if (IsError(response))
{
throw UsenetClientException(
response.code,
"Failed to fetch newsgroup list from server, "
+ std::string{"server responded with: "}
+ m_conv.to_bytes(response.message)
);
}
const auto listStr = ReadUntil(L"\r\n.\r\n");
// parse the list.
const auto lines = StringSplit(listStr, std::wstring{L"\r\n"});
auto result = std::make_unique<std::vector<std::wstring>>();
for (const auto& line: lines)
{
result->emplace_back(StringTrim(line));
}
return result;
}
NntpMessage UsenetClient::ReadLine() NntpMessage UsenetClient::ReadLine()
{ {
NntpMessage result{}; NntpMessage result{};

View File

@ -1,4 +1,6 @@
/* /*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or the Free Software Foundation, either version 3 of the License, or
@ -38,6 +40,9 @@ void Usage(const std::string& programName)
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
std::cout << "UsenetSearch - usenet search indexer" << std::endl;
std::cout << "Copyright© 2021 John Sennesael" << std::endl << std::endl;
std::string configFile{"config.json"}; std::string configFile{"config.json"};
// Parse args. // Parse args.
@ -86,7 +91,8 @@ int main(int argc, char* argv[])
conv.from_bytes(config.NNTPServerPassword()) conv.from_bytes(config.NNTPServerPassword())
); );
// Just testing the list command for now. // BEGIN TEMPORARY TEST CODE
/*
const auto list = client.List(); const auto list = client.List();
db.UpdateNewsgroupList(list); db.UpdateNewsgroupList(list);
std::cout << "Number of newsgroups in newsgroup list (saved): " std::cout << "Number of newsgroups in newsgroup list (saved): "
@ -95,7 +101,16 @@ int main(int argc, char* argv[])
const auto listLoaded = db.LoadNewsgroupList(); const auto listLoaded = db.LoadNewsgroupList();
std::cout << "Number of newsgroups in newsgroup list (loaded): " std::cout << "Number of newsgroups in newsgroup list (loaded): "
<< listLoaded->size() << std::endl; << listLoaded->size() << std::endl;
*/
const auto articles = client.ListGroup(L"comp.os.os2.comm");
std::cout << "Saving " << articles->size() << " articles." << std::endl;
db.UpdateArticleList(L"comp.os.os2.comm", *articles);
const auto loadedArticles = db.LoadArticleList(L"comp.os.os2.comm");
std::cout << "Loaded " << loadedArticles->size() << " artices." << std::endl;
// END TEMPORARY TEST CODE
} }
catch (const usenetsearch::UsenetSearchException& e) catch (const usenetsearch::UsenetSearchException& e)
{ {