Implemented GROUP, LISTGROUP commands - Implemented database saving and loading of article ids

This commit is contained in:
John Sennesael 2021-09-21 17:46:31 -05:00 committed by John Sennesael
parent 5645d31f59
commit 60271b8de3
21 changed files with 262 additions and 44 deletions

View File

@ -1,15 +1,17 @@
# UsenetSearch is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# Copyright© 2021 John Sennesael
#
# UsenetSearch is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# UsenetSearch is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# You should have received a copy of the GNU General Public License
# along with UsenetSearch. If not, see <https://www.gnu.org/licenses/>.
# UsenetSearch is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with UsenetSearch. If not, see <https://www.gnu.org/licenses/>.
cmake_minimum_required(VERSION 3.5)

View File

@ -1,3 +1,5 @@
Copyright© 2021 John Sennesael
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@ -15,9 +17,11 @@
#pragma once
#include <codecvt>
#include <cstdint>
#include <filesystem>
#include <fstream>
#include <locale>
#include <memory>
#include <vector>
@ -35,20 +39,30 @@ struct DatabaseException: public UsenetSearchException
virtual ~DatabaseException() = default;
};
class Database{
class Database
{
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> m_conv;
std::filesystem::path m_databasePath;
std::uint64_t m_databaseVersion{DatabaseVersion};
std::ifstream m_newsGroupFileInput;
std::ofstream m_newsGroupFileOutput;
std::filesystem::path GetArticleFilePath(const std::wstring& newsgroup);
void OpenNewsGroupFile();
public:
~Database();
std::unique_ptr<std::vector<std::wstring>> LoadArticleList(
const std::wstring& newsgroup
);
std::unique_ptr<std::vector<NntpListEntry>> LoadNewsgroupList();
void Open(std::filesystem::path dbPath);
void UpdateArticleList(
const std::wstring& newsgroup,
const std::vector<std::wstring>& articleIds
);
void UpdateNewsgroupList(const std::vector<NntpListEntry>& list);
};

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@ -34,4 +36,6 @@ public:
};
static constexpr const int ENOTFOUND{2001};
} // namespace usenetsearch

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -15,6 +15,7 @@
#pragma once
#include <algorithm>
#include <fstream>
#include <string>
#include <vector>
@ -36,6 +37,8 @@ struct StringException: public UsenetSearchException
virtual ~StringException() = default;
};
std::string StringHash(const std::string& input);
template<typename T>
std::vector<T> StringSplit(
const T& str,
@ -58,16 +61,43 @@ std::vector<T> StringSplit(
return result;
}
std::string StringLeftTrim(const std::string& str);
template<typename T>
T StringLeftTrim(const T& str)
{
T s = str;
s.erase(s.begin(), std::find_if(s.begin(), s.end(),
std::not1(std::ptr_fun<int, int>(std::isspace))));
return s;
}
std::string StringRightTrim(const std::string& str);
template<typename T>
T StringRightTrim(const T& str)
{
T s = str;
s.erase(std::find_if(s.rbegin(), s.rend(),
std::not1(std::ptr_fun<int, int>(std::isspace))).base(),
s.end());
return s;
}
bool StringStartsWith(const std::string& needle, const std::string& haystack);
bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack);
std::string StringTrim(const std::string& str);
template<typename T>
T StringTrim(const T& str)
{
return StringLeftTrim(StringRightTrim(str));
}
std::string StringToLower(const std::string& str);
template<typename T>
T StringToLower(const T& str)
{
T copy = str;
std::transform(copy.begin(),copy.end(),copy.begin(),::tolower);
return copy;
}
bool StringToBoolean(const std::string& str);
bool StringToBoolean(const std::wstring& str);
} // namespace usenetsearch

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@ -36,6 +38,11 @@ struct UsenetClientException: public UsenetSearchException
virtual ~UsenetClientException() = default;
};
struct NntpHeader
{
};
struct NntpMessage
{
std::uint16_t code;
@ -73,7 +80,17 @@ public:
std::uint16_t port,
bool useSSL = false
);
std::vector<NntpListEntry> List();
void Group(const std::wstring& groupName);
std::unique_ptr<std::vector<NntpListEntry>> List();
/* whilst message id's are typically numbers, the rfc states they are unique
alphanumeric strings. */
std::unique_ptr<std::vector<std::wstring>> ListGroup(
const std::wstring& newsGroup
);
// use the ListGroup <newsgroup> command to get a list of article id's in a newsgroup
// then use the HEAD <article id> command to get the headers for each article
};

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@ -36,6 +38,42 @@ Database::~Database()
m_newsGroupFileOutput.close();
}
}
std::filesystem::path Database::GetArticleFilePath(
const std::wstring& newsgroup)
{
const auto groupFile = StringHash(m_conv.to_bytes(newsgroup)) + ".db";
return m_databasePath / groupFile;
}
std::unique_ptr<std::vector<std::wstring>> Database::LoadArticleList(
const std::wstring& newsgroup)
{
const auto articleFile = GetArticleFilePath(newsgroup);
if (!std::filesystem::exists(articleFile))
{
throw DatabaseException(ENOTFOUND,
"No article list found for newsgroup " + m_conv.to_bytes(newsgroup)
);
}
std::ifstream io;
io.open(articleFile, std::ios::binary);
std::uint64_t articleCount;
io.read(
reinterpret_cast<char*>(&articleCount),
sizeof(articleCount)
);
auto result = std::make_unique<std::vector<std::wstring>>();
for (std::uint64_t i = 0; i != articleCount; ++i)
{
std::wstring articleId;
io >> articleId;
result->emplace_back(articleId);
}
io.close();
return result;
}
std::unique_ptr<std::vector<NntpListEntry>> Database::LoadNewsgroupList()
{
OpenNewsGroupFile();
@ -97,6 +135,25 @@ void Database::OpenNewsGroupFile()
}
}
void Database::UpdateArticleList(
const std::wstring& newsgroup,
const std::vector<std::wstring>& articleIds)
{
const auto articleFile = GetArticleFilePath(newsgroup);
std::ofstream io;
io.open(articleFile, std::ios::binary);
const std::uint64_t articleCount = articleIds.size();
io.write(
reinterpret_cast<const char*>(&articleCount),
sizeof(articleCount)
);
for (const auto& id: articleIds)
{
io << id;
}
io.close();
}
void Database::UpdateNewsgroupList(const std::vector<NntpListEntry>& list)
{
OpenNewsGroupFile();

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@ -14,11 +16,16 @@
*/
#include <algorithm>
#include <codecvt>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <locale>
#include <string>
#include <vector>
#include <openssl/md5.h>
#include "usenetsearch/StringUtils.h"
namespace usenetsearch {
@ -64,21 +71,21 @@ std::ifstream& operator>>(std::ifstream& in, std::wstring& str)
return in;
}
std::string StringLeftTrim(const std::string& str)
std::string StringHash(const std::string& input)
{
std::string s = str;
s.erase(s.begin(), std::find_if(s.begin(), s.end(),
std::not1(std::ptr_fun<int, int>(std::isspace))));
return s;
}
std::string StringRightTrim(const std::string& str)
{
std::string s = str;
s.erase(std::find_if(s.rbegin(), s.rend(),
std::not1(std::ptr_fun<int, int>(std::isspace))).base(),
s.end());
return s;
unsigned char result[MD5_DIGEST_LENGTH];
MD5(
reinterpret_cast<unsigned char*>(const_cast<char*>(input.c_str())),
input.size(),
result
);
std::ostringstream sout;
sout << std::hex << std::setfill('0');
for(long long c: result)
{
sout << std::setw(2) << reinterpret_cast<long long>(c);
}
return sout.str();
}
bool StringStartsWith(const std::string& needle, const std::string& haystack)
@ -86,6 +93,11 @@ bool StringStartsWith(const std::string& needle, const std::string& haystack)
return (std::strncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
}
bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack)
{
return (std::wcsncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
}
bool StringToBoolean(const std::string& str)
{
const std::string lstr = StringTrim(StringToLower(str));
@ -96,16 +108,16 @@ bool StringToBoolean(const std::string& str)
);
}
std::string StringToLower(const std::string& str)
bool StringToBoolean(const std::wstring& str)
{
std::string copy = str;
std::transform(copy.begin(),copy.end(),copy.begin(),::tolower);
return copy;
}
std::string StringTrim(const std::string& str)
{
return StringLeftTrim(StringRightTrim(str));
const std::wstring lstr = StringTrim(StringToLower(str));
if ((lstr == L"true") || (lstr == L"yes") || (lstr == L"1")) return true;
if ((lstr == L"false") || (lstr == L"no") || (lstr == L"0")) return false;
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conv;
throw StringException(EINVAL,
"The string \"" + conv.to_bytes(str)
+ "\" is not a valid boolean value."
);
}
} // namespace usenetsearch

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@ -115,13 +117,28 @@ void UsenetClient::Connect(
}
}
void UsenetClient::Group(const std::wstring& groupName)
{
// Send user name
Write(L"GROUP " + groupName + L"\r\n");
auto response = ReadLine();
if (IsError(response))
{
throw UsenetClientException(
response.code,
"Error changing group to " + m_conv.to_bytes(groupName) + " : "
+ m_conv.to_bytes(response.message)
);
}
}
bool UsenetClient::IsError(const NntpMessage& msg) const
{
if (msg.code >= 400) return true;
return false;
}
std::vector<NntpListEntry> UsenetClient::List()
std::unique_ptr<std::vector<NntpListEntry>> UsenetClient::List()
{
Write(L"LIST COUNTS\r\n");
/* In response, we should get a 215 response followed by the list of news
@ -139,7 +156,7 @@ std::vector<NntpListEntry> UsenetClient::List()
const auto listStr = ReadUntil(L"\r\n.\r\n");
// parse the list.
const auto lines = StringSplit(listStr, std::wstring{L"\r\n"});
std::vector<NntpListEntry> result;
auto result = std::make_unique<std::vector<NntpListEntry>>();
for (const auto& line: lines)
{
NntpListEntry entry;
@ -151,12 +168,38 @@ std::vector<NntpListEntry> UsenetClient::List()
entry.low = std::stoul(fields[2]);
entry.count = std::stoul(fields[3]);
entry.status = fields[4];
result.emplace_back(entry);
result->emplace_back(entry);
}
}
return result;
}
std::unique_ptr<std::vector<std::wstring>> UsenetClient::ListGroup(const std::wstring& newsGroup)
{
Write(L"LISTGROUP " + newsGroup + L"\r\n");
/* In response, we should get a 211 response followed by the list of
article ID's ending in a period on it's own line. */
const auto response = ReadLine();
if (IsError(response))
{
throw UsenetClientException(
response.code,
"Failed to fetch newsgroup list from server, "
+ std::string{"server responded with: "}
+ m_conv.to_bytes(response.message)
);
}
const auto listStr = ReadUntil(L"\r\n.\r\n");
// parse the list.
const auto lines = StringSplit(listStr, std::wstring{L"\r\n"});
auto result = std::make_unique<std::vector<std::wstring>>();
for (const auto& line: lines)
{
result->emplace_back(StringTrim(line));
}
return result;
}
NntpMessage UsenetClient::ReadLine()
{
NntpMessage result{};

View File

@ -1,4 +1,6 @@
/*
Copyright© 2021 John Sennesael
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@ -38,6 +40,9 @@ void Usage(const std::string& programName)
int main(int argc, char* argv[])
{
std::cout << "UsenetSearch - usenet search indexer" << std::endl;
std::cout << "Copyright© 2021 John Sennesael" << std::endl << std::endl;
std::string configFile{"config.json"};
// Parse args.
@ -86,7 +91,8 @@ int main(int argc, char* argv[])
conv.from_bytes(config.NNTPServerPassword())
);
// Just testing the list command for now.
// BEGIN TEMPORARY TEST CODE
/*
const auto list = client.List();
db.UpdateNewsgroupList(list);
std::cout << "Number of newsgroups in newsgroup list (saved): "
@ -95,7 +101,16 @@ int main(int argc, char* argv[])
const auto listLoaded = db.LoadNewsgroupList();
std::cout << "Number of newsgroups in newsgroup list (loaded): "
<< listLoaded->size() << std::endl;
*/
const auto articles = client.ListGroup(L"comp.os.os2.comm");
std::cout << "Saving " << articles->size() << " articles." << std::endl;
db.UpdateArticleList(L"comp.os.os2.comm", *articles);
const auto loadedArticles = db.LoadArticleList(L"comp.os.os2.comm");
std::cout << "Loaded " << loadedArticles->size() << " artices." << std::endl;
// END TEMPORARY TEST CODE
}
catch (const usenetsearch::UsenetSearchException& e)
{