diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12387a2..c0881f8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,15 +1,17 @@
-# UsenetSearch is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
+# Copyright© 2021 John Sennesael
#
-# UsenetSearch is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
+# UsenetSearch is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
#
-# You should have received a copy of the GNU General Public License
-# along with UsenetSearch. If not, see .
+# UsenetSearch is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with UsenetSearch. If not, see .
cmake_minimum_required(VERSION 3.5)
diff --git a/COPYING b/COPYING
index 53d1f3d..8cbce33 100644
--- a/COPYING
+++ b/COPYING
@@ -1,3 +1,5 @@
+ Copyright© 2021 John Sennesael
+
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
diff --git a/include/usenetsearch/Configuration.h b/include/usenetsearch/Configuration.h
index eb030bb..0107e25 100644
--- a/include/usenetsearch/Configuration.h
+++ b/include/usenetsearch/Configuration.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/include/usenetsearch/Database.h b/include/usenetsearch/Database.h
index 9dd94af..4a422f0 100644
--- a/include/usenetsearch/Database.h
+++ b/include/usenetsearch/Database.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -15,9 +17,11 @@
#pragma once
+#include
#include
#include
#include
+#include
#include
#include
@@ -35,20 +39,30 @@ struct DatabaseException: public UsenetSearchException
virtual ~DatabaseException() = default;
};
-class Database{
+class Database
+{
+ std::wstring_convert> m_conv;
std::filesystem::path m_databasePath;
std::uint64_t m_databaseVersion{DatabaseVersion};
std::ifstream m_newsGroupFileInput;
std::ofstream m_newsGroupFileOutput;
+ std::filesystem::path GetArticleFilePath(const std::wstring& newsgroup);
void OpenNewsGroupFile();
public:
~Database();
+ std::unique_ptr> LoadArticleList(
+ const std::wstring& newsgroup
+ );
std::unique_ptr> LoadNewsgroupList();
void Open(std::filesystem::path dbPath);
+ void UpdateArticleList(
+ const std::wstring& newsgroup,
+ const std::vector& articleIds
+ );
void UpdateNewsgroupList(const std::vector& list);
};
diff --git a/include/usenetsearch/Dns.h b/include/usenetsearch/Dns.h
index 068a516..3b0c0b5 100644
--- a/include/usenetsearch/Dns.h
+++ b/include/usenetsearch/Dns.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/include/usenetsearch/Except.h b/include/usenetsearch/Except.h
index 0f806f5..37df23d 100644
--- a/include/usenetsearch/Except.h
+++ b/include/usenetsearch/Except.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -34,4 +36,6 @@ public:
};
+static constexpr const int ENOTFOUND{2001};
+
} // namespace usenetsearch
diff --git a/include/usenetsearch/IoSocket.h b/include/usenetsearch/IoSocket.h
index e7a194d..bb8dc51 100644
--- a/include/usenetsearch/IoSocket.h
+++ b/include/usenetsearch/IoSocket.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/include/usenetsearch/SSLConnection.h b/include/usenetsearch/SSLConnection.h
index d1fb978..f141e0a 100644
--- a/include/usenetsearch/SSLConnection.h
+++ b/include/usenetsearch/SSLConnection.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/include/usenetsearch/StringUtils.h b/include/usenetsearch/StringUtils.h
index b10ec9f..245e2b8 100644
--- a/include/usenetsearch/StringUtils.h
+++ b/include/usenetsearch/StringUtils.h
@@ -15,6 +15,7 @@
#pragma once
+#include
#include
#include
#include
@@ -36,6 +37,8 @@ struct StringException: public UsenetSearchException
virtual ~StringException() = default;
};
+std::string StringHash(const std::string& input);
+
template
std::vector StringSplit(
const T& str,
@@ -58,16 +61,43 @@ std::vector StringSplit(
return result;
}
-std::string StringLeftTrim(const std::string& str);
+template
+T StringLeftTrim(const T& str)
+{
+ T s = str;
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(),
+ std::not1(std::ptr_fun(std::isspace))));
+ return s;
+}
-std::string StringRightTrim(const std::string& str);
+template
+T StringRightTrim(const T& str)
+{
+ T s = str;
+ s.erase(std::find_if(s.rbegin(), s.rend(),
+ std::not1(std::ptr_fun(std::isspace))).base(),
+ s.end());
+ return s;
+}
bool StringStartsWith(const std::string& needle, const std::string& haystack);
+bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack);
-std::string StringTrim(const std::string& str);
+template
+T StringTrim(const T& str)
+{
+ return StringLeftTrim(StringRightTrim(str));
+}
-std::string StringToLower(const std::string& str);
+template
+T StringToLower(const T& str)
+{
+ T copy = str;
+ std::transform(copy.begin(),copy.end(),copy.begin(),::tolower);
+ return copy;
+}
bool StringToBoolean(const std::string& str);
+bool StringToBoolean(const std::wstring& str);
} // namespace usenetsearch
diff --git a/include/usenetsearch/TcpConnection.h b/include/usenetsearch/TcpConnection.h
index 18d1f65..940f55c 100644
--- a/include/usenetsearch/TcpConnection.h
+++ b/include/usenetsearch/TcpConnection.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/include/usenetsearch/UsenetClient.h b/include/usenetsearch/UsenetClient.h
index 9804106..749af39 100644
--- a/include/usenetsearch/UsenetClient.h
+++ b/include/usenetsearch/UsenetClient.h
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -36,6 +38,11 @@ struct UsenetClientException: public UsenetSearchException
virtual ~UsenetClientException() = default;
};
+struct NntpHeader
+{
+
+};
+
struct NntpMessage
{
std::uint16_t code;
@@ -73,7 +80,17 @@ public:
std::uint16_t port,
bool useSSL = false
);
- std::vector List();
+ void Group(const std::wstring& groupName);
+ std::unique_ptr> List();
+
+ /* whilst message id's are typically numbers, the rfc states they are unique
+ alphanumeric strings. */
+ std::unique_ptr> ListGroup(
+ const std::wstring& newsGroup
+ );
+
+ // use the ListGroup command to get a list of article id's in a newsgroup
+ // then use the HEAD command to get the headers for each article
};
diff --git a/src/Configuration.cpp b/src/Configuration.cpp
index 8bc407a..8e44ca0 100644
--- a/src/Configuration.cpp
+++ b/src/Configuration.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/src/Database.cpp b/src/Database.cpp
index 449b366..df2fe68 100644
--- a/src/Database.cpp
+++ b/src/Database.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -36,6 +38,42 @@ Database::~Database()
m_newsGroupFileOutput.close();
}
}
+
+std::filesystem::path Database::GetArticleFilePath(
+ const std::wstring& newsgroup)
+{
+ const auto groupFile = StringHash(m_conv.to_bytes(newsgroup)) + ".db";
+ return m_databasePath / groupFile;
+}
+
+std::unique_ptr> Database::LoadArticleList(
+ const std::wstring& newsgroup)
+{
+ const auto articleFile = GetArticleFilePath(newsgroup);
+ if (!std::filesystem::exists(articleFile))
+ {
+ throw DatabaseException(ENOTFOUND,
+ "No article list found for newsgroup " + m_conv.to_bytes(newsgroup)
+ );
+ }
+ std::ifstream io;
+ io.open(articleFile, std::ios::binary);
+ std::uint64_t articleCount;
+ io.read(
+ reinterpret_cast(&articleCount),
+ sizeof(articleCount)
+ );
+ auto result = std::make_unique>();
+ for (std::uint64_t i = 0; i != articleCount; ++i)
+ {
+ std::wstring articleId;
+ io >> articleId;
+ result->emplace_back(articleId);
+ }
+ io.close();
+ return result;
+}
+
std::unique_ptr> Database::LoadNewsgroupList()
{
OpenNewsGroupFile();
@@ -97,6 +135,25 @@ void Database::OpenNewsGroupFile()
}
}
+void Database::UpdateArticleList(
+ const std::wstring& newsgroup,
+ const std::vector& articleIds)
+{
+ const auto articleFile = GetArticleFilePath(newsgroup);
+ std::ofstream io;
+ io.open(articleFile, std::ios::binary);
+ const std::uint64_t articleCount = articleIds.size();
+ io.write(
+ reinterpret_cast(&articleCount),
+ sizeof(articleCount)
+ );
+ for (const auto& id: articleIds)
+ {
+ io << id;
+ }
+ io.close();
+}
+
void Database::UpdateNewsgroupList(const std::vector& list)
{
OpenNewsGroupFile();
diff --git a/src/Dns.cpp b/src/Dns.cpp
index 7b66f96..a98b58c 100644
--- a/src/Dns.cpp
+++ b/src/Dns.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/src/Except.cpp b/src/Except.cpp
index 61cb58f..5a1e457 100644
--- a/src/Except.cpp
+++ b/src/Except.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/src/IoSocket.cpp b/src/IoSocket.cpp
index af53247..9b4616c 100644
--- a/src/IoSocket.cpp
+++ b/src/IoSocket.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/src/SSLConnection.cpp b/src/SSLConnection.cpp
index 868dea4..dcaf01c 100644
--- a/src/SSLConnection.cpp
+++ b/src/SSLConnection.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/src/StringUtils.cpp b/src/StringUtils.cpp
index d535107..dbd396b 100644
--- a/src/StringUtils.cpp
+++ b/src/StringUtils.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -14,11 +16,16 @@
*/
#include
+#include
#include
#include
+#include
+#include
#include
#include
+#include
+
#include "usenetsearch/StringUtils.h"
namespace usenetsearch {
@@ -64,21 +71,21 @@ std::ifstream& operator>>(std::ifstream& in, std::wstring& str)
return in;
}
-std::string StringLeftTrim(const std::string& str)
+std::string StringHash(const std::string& input)
{
- std::string s = str;
- s.erase(s.begin(), std::find_if(s.begin(), s.end(),
- std::not1(std::ptr_fun(std::isspace))));
- return s;
-}
-
-std::string StringRightTrim(const std::string& str)
-{
- std::string s = str;
- s.erase(std::find_if(s.rbegin(), s.rend(),
- std::not1(std::ptr_fun(std::isspace))).base(),
- s.end());
- return s;
+ unsigned char result[MD5_DIGEST_LENGTH];
+ MD5(
+ reinterpret_cast(const_cast(input.c_str())),
+ input.size(),
+ result
+ );
+ std::ostringstream sout;
+ sout << std::hex << std::setfill('0');
+ for(long long c: result)
+ {
+ sout << std::setw(2) << reinterpret_cast(c);
+ }
+ return sout.str();
}
bool StringStartsWith(const std::string& needle, const std::string& haystack)
@@ -86,6 +93,11 @@ bool StringStartsWith(const std::string& needle, const std::string& haystack)
return (std::strncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
}
+bool StringStartsWith(const std::wstring& needle, const std::wstring& haystack)
+{
+ return (std::wcsncmp(haystack.c_str(),needle.c_str(),needle.size()) == 0);
+}
+
bool StringToBoolean(const std::string& str)
{
const std::string lstr = StringTrim(StringToLower(str));
@@ -96,16 +108,16 @@ bool StringToBoolean(const std::string& str)
);
}
-std::string StringToLower(const std::string& str)
+bool StringToBoolean(const std::wstring& str)
{
- std::string copy = str;
- std::transform(copy.begin(),copy.end(),copy.begin(),::tolower);
- return copy;
-}
-
-std::string StringTrim(const std::string& str)
-{
- return StringLeftTrim(StringRightTrim(str));
+ const std::wstring lstr = StringTrim(StringToLower(str));
+ if ((lstr == L"true") || (lstr == L"yes") || (lstr == L"1")) return true;
+ if ((lstr == L"false") || (lstr == L"no") || (lstr == L"0")) return false;
+ std::wstring_convert> conv;
+ throw StringException(EINVAL,
+ "The string \"" + conv.to_bytes(str)
+ + "\" is not a valid boolean value."
+ );
}
} // namespace usenetsearch
diff --git a/src/TcpConnection.cpp b/src/TcpConnection.cpp
index 43f0d72..af06018 100644
--- a/src/TcpConnection.cpp
+++ b/src/TcpConnection.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
diff --git a/src/UsenetClient.cpp b/src/UsenetClient.cpp
index 1cae17b..6d232bd 100644
--- a/src/UsenetClient.cpp
+++ b/src/UsenetClient.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -115,13 +117,28 @@ void UsenetClient::Connect(
}
}
+void UsenetClient::Group(const std::wstring& groupName)
+{
+ // Send user name
+ Write(L"GROUP " + groupName + L"\r\n");
+ auto response = ReadLine();
+ if (IsError(response))
+ {
+ throw UsenetClientException(
+ response.code,
+ "Error changing group to " + m_conv.to_bytes(groupName) + " : "
+ + m_conv.to_bytes(response.message)
+ );
+ }
+}
+
bool UsenetClient::IsError(const NntpMessage& msg) const
{
if (msg.code >= 400) return true;
return false;
}
-std::vector UsenetClient::List()
+std::unique_ptr> UsenetClient::List()
{
Write(L"LIST COUNTS\r\n");
/* In response, we should get a 215 response followed by the list of news
@@ -139,7 +156,7 @@ std::vector UsenetClient::List()
const auto listStr = ReadUntil(L"\r\n.\r\n");
// parse the list.
const auto lines = StringSplit(listStr, std::wstring{L"\r\n"});
- std::vector result;
+ auto result = std::make_unique>();
for (const auto& line: lines)
{
NntpListEntry entry;
@@ -151,12 +168,38 @@ std::vector UsenetClient::List()
entry.low = std::stoul(fields[2]);
entry.count = std::stoul(fields[3]);
entry.status = fields[4];
- result.emplace_back(entry);
+ result->emplace_back(entry);
}
}
return result;
}
+std::unique_ptr> UsenetClient::ListGroup(const std::wstring& newsGroup)
+{
+ Write(L"LISTGROUP " + newsGroup + L"\r\n");
+ /* In response, we should get a 211 response followed by the list of
+ article ID's ending in a period on it's own line. */
+ const auto response = ReadLine();
+ if (IsError(response))
+ {
+ throw UsenetClientException(
+ response.code,
+ "Failed to fetch newsgroup list from server, "
+ + std::string{"server responded with: "}
+ + m_conv.to_bytes(response.message)
+ );
+ }
+ const auto listStr = ReadUntil(L"\r\n.\r\n");
+ // parse the list.
+ const auto lines = StringSplit(listStr, std::wstring{L"\r\n"});
+ auto result = std::make_unique>();
+ for (const auto& line: lines)
+ {
+ result->emplace_back(StringTrim(line));
+ }
+ return result;
+}
+
NntpMessage UsenetClient::ReadLine()
{
NntpMessage result{};
diff --git a/src/main.cpp b/src/main.cpp
index dcc60a7..6e47377 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,4 +1,6 @@
/*
+ Copyright© 2021 John Sennesael
+
UsenetSearch is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -38,6 +40,9 @@ void Usage(const std::string& programName)
int main(int argc, char* argv[])
{
+ std::cout << "UsenetSearch - usenet search indexer" << std::endl;
+ std::cout << "Copyright© 2021 John Sennesael" << std::endl << std::endl;
+
std::string configFile{"config.json"};
// Parse args.
@@ -86,7 +91,8 @@ int main(int argc, char* argv[])
conv.from_bytes(config.NNTPServerPassword())
);
- // Just testing the list command for now.
+ // BEGIN TEMPORARY TEST CODE
+/*
const auto list = client.List();
db.UpdateNewsgroupList(list);
std::cout << "Number of newsgroups in newsgroup list (saved): "
@@ -95,7 +101,16 @@ int main(int argc, char* argv[])
const auto listLoaded = db.LoadNewsgroupList();
std::cout << "Number of newsgroups in newsgroup list (loaded): "
<< listLoaded->size() << std::endl;
-
+*/
+ const auto articles = client.ListGroup(L"comp.os.os2.comm");
+ std::cout << "Saving " << articles->size() << " articles." << std::endl;
+ db.UpdateArticleList(L"comp.os.os2.comm", *articles);
+
+ const auto loadedArticles = db.LoadArticleList(L"comp.os.os2.comm");
+ std::cout << "Loaded " << loadedArticles->size() << " artices." << std::endl;
+
+ // END TEMPORARY TEST CODE
+
}
catch (const usenetsearch::UsenetSearchException& e)
{