From 60271b8de3ef9b9a686462c9afbcfff1a30cee93 Mon Sep 17 00:00:00 2001 From: John Sennesael Date: Tue, 21 Sep 2021 17:46:31 -0500 Subject: [PATCH] Implemented GROUP, LISTGROUP commands - Implemented database saving and loading of article ids --- CMakeLists.txt | 22 ++++++----- COPYING | 2 + include/usenetsearch/Configuration.h | 2 + include/usenetsearch/Database.h | 16 +++++++- include/usenetsearch/Dns.h | 2 + include/usenetsearch/Except.h | 4 ++ include/usenetsearch/IoSocket.h | 2 + include/usenetsearch/SSLConnection.h | 2 + include/usenetsearch/StringUtils.h | 38 ++++++++++++++++-- include/usenetsearch/TcpConnection.h | 2 + include/usenetsearch/UsenetClient.h | 19 ++++++++- src/Configuration.cpp | 2 + src/Database.cpp | 57 +++++++++++++++++++++++++++ src/Dns.cpp | 2 + src/Except.cpp | 2 + src/IoSocket.cpp | 2 + src/SSLConnection.cpp | 2 + src/StringUtils.cpp | 58 +++++++++++++++++----------- src/TcpConnection.cpp | 2 + src/UsenetClient.cpp | 49 +++++++++++++++++++++-- src/main.cpp | 19 ++++++++- 21 files changed, 262 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12387a2..c0881f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,17 @@ -# UsenetSearch is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# Copyright© 2021 John Sennesael # -# UsenetSearch is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# UsenetSearch is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. # -# You should have received a copy of the GNU General Public License -# along with UsenetSearch. If not, see . +# UsenetSearch is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with UsenetSearch. If not, see . cmake_minimum_required(VERSION 3.5) diff --git a/COPYING b/COPYING index 53d1f3d..8cbce33 100644 --- a/COPYING +++ b/COPYING @@ -1,3 +1,5 @@ + Copyright© 2021 John Sennesael + GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 diff --git a/include/usenetsearch/Configuration.h b/include/usenetsearch/Configuration.h index eb030bb..0107e25 100644 --- a/include/usenetsearch/Configuration.h +++ b/include/usenetsearch/Configuration.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/include/usenetsearch/Database.h b/include/usenetsearch/Database.h index 9dd94af..4a422f0 100644 --- a/include/usenetsearch/Database.h +++ b/include/usenetsearch/Database.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -15,9 +17,11 @@ #pragma once +#include #include #include #include +#include #include #include @@ -35,20 +39,30 @@ struct DatabaseException: public UsenetSearchException virtual ~DatabaseException() = default; }; -class Database{ +class Database +{ + std::wstring_convert> m_conv; std::filesystem::path m_databasePath; std::uint64_t m_databaseVersion{DatabaseVersion}; std::ifstream m_newsGroupFileInput; std::ofstream m_newsGroupFileOutput; + std::filesystem::path GetArticleFilePath(const std::wstring& newsgroup); void OpenNewsGroupFile(); public: ~Database(); + std::unique_ptr> LoadArticleList( + const std::wstring& newsgroup + ); std::unique_ptr> LoadNewsgroupList(); void Open(std::filesystem::path dbPath); + void UpdateArticleList( + const std::wstring& newsgroup, + const std::vector& articleIds + ); void UpdateNewsgroupList(const std::vector& list); }; diff --git a/include/usenetsearch/Dns.h b/include/usenetsearch/Dns.h index 068a516..3b0c0b5 100644 --- a/include/usenetsearch/Dns.h +++ b/include/usenetsearch/Dns.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/include/usenetsearch/Except.h b/include/usenetsearch/Except.h index 0f806f5..37df23d 100644 --- a/include/usenetsearch/Except.h +++ b/include/usenetsearch/Except.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -34,4 +36,6 @@ public: }; +static constexpr const int ENOTFOUND{2001}; + } // namespace usenetsearch diff --git a/include/usenetsearch/IoSocket.h b/include/usenetsearch/IoSocket.h index e7a194d..bb8dc51 100644 --- a/include/usenetsearch/IoSocket.h +++ b/include/usenetsearch/IoSocket.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/include/usenetsearch/SSLConnection.h b/include/usenetsearch/SSLConnection.h index d1fb978..f141e0a 100644 --- a/include/usenetsearch/SSLConnection.h +++ b/include/usenetsearch/SSLConnection.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/include/usenetsearch/StringUtils.h b/include/usenetsearch/StringUtils.h index b10ec9f..245e2b8 100644 --- a/include/usenetsearch/StringUtils.h +++ b/include/usenetsearch/StringUtils.h @@ -15,6 +15,7 @@ #pragma once +#include #include #include #include @@ -36,6 +37,8 @@ struct StringException: public UsenetSearchException virtual ~StringException() = default; }; +std::string StringHash(const std::string& input); + template std::vector StringSplit( const T& str, @@ -58,16 +61,43 @@ std::vector StringSplit( return result; } -std::string StringLeftTrim(const std::string& str); +template +T StringLeftTrim(const T& str) +{ + T s = str; + s.erase(s.begin(), std::find_if(s.begin(), s.end(), + std::not1(std::ptr_fun(std::isspace)))); + return s; +} -std::string StringRightTrim(const std::string& str); +template +T StringRightTrim(const T& str) +{ + T s = str; + s.erase(std::find_if(s.rbegin(), s.rend(), + std::not1(std::ptr_fun(std::isspace))).base(), + s.end()); + return s; +} bool StringStartsWith(const std::string& needle, const std::string& haystack); +bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack); -std::string StringTrim(const std::string& str); +template +T StringTrim(const T& str) +{ + return StringLeftTrim(StringRightTrim(str)); +} -std::string StringToLower(const std::string& str); +template +T StringToLower(const T& str) +{ + T copy = str; + std::transform(copy.begin(),copy.end(),copy.begin(),::tolower); + return copy; +} bool StringToBoolean(const std::string& str); +bool StringToBoolean(const std::wstring& str); } // namespace usenetsearch diff --git a/include/usenetsearch/TcpConnection.h b/include/usenetsearch/TcpConnection.h index 18d1f65..940f55c 100644 --- a/include/usenetsearch/TcpConnection.h +++ b/include/usenetsearch/TcpConnection.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/include/usenetsearch/UsenetClient.h b/include/usenetsearch/UsenetClient.h index 9804106..749af39 100644 --- a/include/usenetsearch/UsenetClient.h +++ b/include/usenetsearch/UsenetClient.h @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -36,6 +38,11 @@ struct UsenetClientException: public UsenetSearchException virtual ~UsenetClientException() = default; }; +struct NntpHeader +{ + +}; + struct NntpMessage { std::uint16_t code; @@ -73,7 +80,17 @@ public: std::uint16_t port, bool useSSL = false ); - std::vector List(); + void Group(const std::wstring& groupName); + std::unique_ptr> List(); + + /* whilst message id's are typically numbers, the rfc states they are unique + alphanumeric strings. */ + std::unique_ptr> ListGroup( + const std::wstring& newsGroup + ); + + // use the ListGroup command to get a list of article id's in a newsgroup + // then use the HEAD
command to get the headers for each article }; diff --git a/src/Configuration.cpp b/src/Configuration.cpp index 8bc407a..8e44ca0 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/src/Database.cpp b/src/Database.cpp index 449b366..df2fe68 100644 --- a/src/Database.cpp +++ b/src/Database.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -36,6 +38,42 @@ Database::~Database() m_newsGroupFileOutput.close(); } } + +std::filesystem::path Database::GetArticleFilePath( + const std::wstring& newsgroup) +{ + const auto groupFile = StringHash(m_conv.to_bytes(newsgroup)) + ".db"; + return m_databasePath / groupFile; +} + +std::unique_ptr> Database::LoadArticleList( + const std::wstring& newsgroup) +{ + const auto articleFile = GetArticleFilePath(newsgroup); + if (!std::filesystem::exists(articleFile)) + { + throw DatabaseException(ENOTFOUND, + "No article list found for newsgroup " + m_conv.to_bytes(newsgroup) + ); + } + std::ifstream io; + io.open(articleFile, std::ios::binary); + std::uint64_t articleCount; + io.read( + reinterpret_cast(&articleCount), + sizeof(articleCount) + ); + auto result = std::make_unique>(); + for (std::uint64_t i = 0; i != articleCount; ++i) + { + std::wstring articleId; + io >> articleId; + result->emplace_back(articleId); + } + io.close(); + return result; +} + std::unique_ptr> Database::LoadNewsgroupList() { OpenNewsGroupFile(); @@ -97,6 +135,25 @@ void Database::OpenNewsGroupFile() } } +void Database::UpdateArticleList( + const std::wstring& newsgroup, + const std::vector& articleIds) +{ + const auto articleFile = GetArticleFilePath(newsgroup); + std::ofstream io; + io.open(articleFile, std::ios::binary); + const std::uint64_t articleCount = articleIds.size(); + io.write( + reinterpret_cast(&articleCount), + sizeof(articleCount) + ); + for (const auto& id: articleIds) + { + io << id; + } + io.close(); +} + void Database::UpdateNewsgroupList(const std::vector& list) { OpenNewsGroupFile(); diff --git a/src/Dns.cpp b/src/Dns.cpp index 7b66f96..a98b58c 100644 --- a/src/Dns.cpp +++ b/src/Dns.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/src/Except.cpp b/src/Except.cpp index 61cb58f..5a1e457 100644 --- a/src/Except.cpp +++ b/src/Except.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/src/IoSocket.cpp b/src/IoSocket.cpp index af53247..9b4616c 100644 --- a/src/IoSocket.cpp +++ b/src/IoSocket.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/src/SSLConnection.cpp b/src/SSLConnection.cpp index 868dea4..dcaf01c 100644 --- a/src/SSLConnection.cpp +++ b/src/SSLConnection.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/src/StringUtils.cpp b/src/StringUtils.cpp index d535107..dbd396b 100644 --- a/src/StringUtils.cpp +++ b/src/StringUtils.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -14,11 +16,16 @@ */ #include +#include #include #include +#include +#include #include #include +#include + #include "usenetsearch/StringUtils.h" namespace usenetsearch { @@ -64,21 +71,21 @@ std::ifstream& operator>>(std::ifstream& in, std::wstring& str) return in; } -std::string StringLeftTrim(const std::string& str) +std::string StringHash(const std::string& input) { - std::string s = str; - s.erase(s.begin(), std::find_if(s.begin(), s.end(), - std::not1(std::ptr_fun(std::isspace)))); - return s; -} - -std::string StringRightTrim(const std::string& str) -{ - std::string s = str; - s.erase(std::find_if(s.rbegin(), s.rend(), - std::not1(std::ptr_fun(std::isspace))).base(), - s.end()); - return s; + unsigned char result[MD5_DIGEST_LENGTH]; + MD5( + reinterpret_cast(const_cast(input.c_str())), + input.size(), + result + ); + std::ostringstream sout; + sout << std::hex << std::setfill('0'); + for(long long c: result) + { + sout << std::setw(2) << reinterpret_cast(c); + } + return sout.str(); } bool StringStartsWith(const std::string& needle, const std::string& haystack) @@ -86,6 +93,11 @@ bool StringStartsWith(const std::string& needle, const std::string& haystack) return (std::strncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0); } +bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack) +{ + return (std::wcsncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0); +} + bool StringToBoolean(const std::string& str) { const std::string lstr = StringTrim(StringToLower(str)); @@ -96,16 +108,16 @@ bool StringToBoolean(const std::string& str) ); } -std::string StringToLower(const std::string& str) +bool StringToBoolean(const std::wstring& str) { - std::string copy = str; - std::transform(copy.begin(),copy.end(),copy.begin(),::tolower); - return copy; -} - -std::string StringTrim(const std::string& str) -{ - return StringLeftTrim(StringRightTrim(str)); + const std::wstring lstr = StringTrim(StringToLower(str)); + if ((lstr == L"true") || (lstr == L"yes") || (lstr == L"1")) return true; + if ((lstr == L"false") || (lstr == L"no") || (lstr == L"0")) return false; + std::wstring_convert> conv; + throw StringException(EINVAL, + "The string \"" + conv.to_bytes(str) + + "\" is not a valid boolean value." + ); } } // namespace usenetsearch diff --git a/src/TcpConnection.cpp b/src/TcpConnection.cpp index 43f0d72..af06018 100644 --- a/src/TcpConnection.cpp +++ b/src/TcpConnection.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or diff --git a/src/UsenetClient.cpp b/src/UsenetClient.cpp index 1cae17b..6d232bd 100644 --- a/src/UsenetClient.cpp +++ b/src/UsenetClient.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -115,13 +117,28 @@ void UsenetClient::Connect( } } +void UsenetClient::Group(const std::wstring& groupName) +{ + // Send user name + Write(L"GROUP " + groupName + L"\r\n"); + auto response = ReadLine(); + if (IsError(response)) + { + throw UsenetClientException( + response.code, + "Error changing group to " + m_conv.to_bytes(groupName) + " : " + + m_conv.to_bytes(response.message) + ); + } +} + bool UsenetClient::IsError(const NntpMessage& msg) const { if (msg.code >= 400) return true; return false; } -std::vector UsenetClient::List() +std::unique_ptr> UsenetClient::List() { Write(L"LIST COUNTS\r\n"); /* In response, we should get a 215 response followed by the list of news @@ -139,7 +156,7 @@ std::vector UsenetClient::List() const auto listStr = ReadUntil(L"\r\n.\r\n"); // parse the list. const auto lines = StringSplit(listStr, std::wstring{L"\r\n"}); - std::vector result; + auto result = std::make_unique>(); for (const auto& line: lines) { NntpListEntry entry; @@ -151,12 +168,38 @@ std::vector UsenetClient::List() entry.low = std::stoul(fields[2]); entry.count = std::stoul(fields[3]); entry.status = fields[4]; - result.emplace_back(entry); + result->emplace_back(entry); } } return result; } +std::unique_ptr> UsenetClient::ListGroup(const std::wstring& newsGroup) +{ + Write(L"LISTGROUP " + newsGroup + L"\r\n"); + /* In response, we should get a 211 response followed by the list of + article ID's ending in a period on it's own line. */ + const auto response = ReadLine(); + if (IsError(response)) + { + throw UsenetClientException( + response.code, + "Failed to fetch newsgroup list from server, " + + std::string{"server responded with: "} + + m_conv.to_bytes(response.message) + ); + } + const auto listStr = ReadUntil(L"\r\n.\r\n"); + // parse the list. + const auto lines = StringSplit(listStr, std::wstring{L"\r\n"}); + auto result = std::make_unique>(); + for (const auto& line: lines) + { + result->emplace_back(StringTrim(line)); + } + return result; +} + NntpMessage UsenetClient::ReadLine() { NntpMessage result{}; diff --git a/src/main.cpp b/src/main.cpp index dcc60a7..6e47377 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,4 +1,6 @@ /* + Copyright© 2021 John Sennesael + UsenetSearch is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -38,6 +40,9 @@ void Usage(const std::string& programName) int main(int argc, char* argv[]) { + std::cout << "UsenetSearch - usenet search indexer" << std::endl; + std::cout << "Copyright© 2021 John Sennesael" << std::endl << std::endl; + std::string configFile{"config.json"}; // Parse args. @@ -86,7 +91,8 @@ int main(int argc, char* argv[]) conv.from_bytes(config.NNTPServerPassword()) ); - // Just testing the list command for now. + // BEGIN TEMPORARY TEST CODE +/* const auto list = client.List(); db.UpdateNewsgroupList(list); std::cout << "Number of newsgroups in newsgroup list (saved): " @@ -95,7 +101,16 @@ int main(int argc, char* argv[]) const auto listLoaded = db.LoadNewsgroupList(); std::cout << "Number of newsgroups in newsgroup list (loaded): " << listLoaded->size() << std::endl; - +*/ + const auto articles = client.ListGroup(L"comp.os.os2.comm"); + std::cout << "Saving " << articles->size() << " articles." << std::endl; + db.UpdateArticleList(L"comp.os.os2.comm", *articles); + + const auto loadedArticles = db.LoadArticleList(L"comp.os.os2.comm"); + std::cout << "Loaded " << loadedArticles->size() << " artices." << std::endl; + + // END TEMPORARY TEST CODE + } catch (const usenetsearch::UsenetSearchException& e) {