Corrected functionality, identified and fixed search bug

This commit is contained in:
asdf 2021-07-21 19:31:43 +10:00
parent 4760777e07
commit 29e7e3addd
2 changed files with 84 additions and 37 deletions

62
data.py
View File

@ -75,7 +75,8 @@ def get_parent_record(parent_id: str, link_data: list) -> list:
if parent_id == "":
raise ValueError("parent_id cannot be empty")
for record in link_data:
if record[0] == parent_id.partition("+")[2]:
timestamp = record[2]
if timestamp == parent_id.partition("+")[2]:
return record
raise KeyError("there's no parent record for the specified parent_id")
@ -167,47 +168,44 @@ class LinkData:
new_post_id = 1
record = record._replace(ID_if_parent=new_post_id)
self.link_data.insert(0, list(record))
self.generate_category_data()
else:
self.link_data.insert(0, list(record))
self.generate_category_data()
return new_post_id
def generate_category_data(self):
"""generate categories list and category count from sorted link data"""
self.categories.clear()
i = (record for record in self.link_data if record[4] != "")
for record in i:
for record in self.link_data:
name = record[4]
timestamp = record[2]
if name not in [cat_record["name"] for cat_record in self.categories]:
self.categories.append(
{"name": name, "count": 1, "last_updated": timestamp}
)
if name != "":
if name not in [cat_record["name"] for cat_record in self.categories]:
self.categories.append(
{"name": name, "count": 1, "last_updated": timestamp}
)
else:
for cat_record in self.categories:
if cat_record["name"] == name:
cat_record["count"] += 1
if cat_record["last_updated"] < timestamp:
cat_record["last_updated"] = timestamp
else:
for cat_record in self.categories:
if cat_record["name"] == name:
cat_record["count"] += 1
if cat_record["last_updated"] < timestamp:
cat_record["last_updated"] = timestamp
for record in self.link_data:
if record[4] == "":
parent_id = self.get_parent_record(record[3])
timestamp = record[2]
if parent_id != -1:
for line in self.link_data:
if line[0] == parent_id:
name = record[4]
for cat_record in self.categories:
if cat_record["name"] == name:
if cat_record["last_updated"] < timestamp:
cat_record["last_updated"] = timestamp
parent_id = record[3]
try:
parent_record = get_parent_record(parent_id, self.link_data)
except KeyError:
continue
parent_cat_name = parent_record[4]
if parent_cat_name not in [cat_record["name"] for cat_record in self.categories]:
self.categories.append(
{"name": parent_cat_name, "count": 1, "last_updated": timestamp}
)
else:
for cat_record in self.categories:
if cat_record["name"] == parent_cat_name:
if cat_record["last_updated"] < timestamp:
cat_record["last_updated"] = timestamp
def search(self, keyword: str) -> list:
"""returns a unique list of link_data records for posts that contain

View File

@ -108,10 +108,48 @@ class TestDataHelperFunctions(unittest.TestCase):
)
def test_get_parent_record(self):
# test that an empty parent_id throws a value error
test_link_data = []
parent_id = ""
with self.assertRaises(ValueError):
data.get_parent_record("", test_link_data)
data.get_parent_record(parent_id, test_link_data)
# confirm the function returns a valid parent record
test_parent_record = [
1,
"testuser",
"1000",
"",
"test_category",
"test_url",
"test_title",
]
test_link_data = (
[
[
1,
"testuser",
"1000",
"",
"test_category",
"test_url",
"test_title",
]
],
)
parent_id = 1000
self.assertListEqual(
test_parent_record, data.get_parent_record(parent_id, test_link_data)
)
# confirm key error raised if no results found
parent_id = 69
with self.assertRaises(KeyError):
data.get_parent_record(parent_id, test_link_data)
class TestLinkDataSearch(unittest.TestCase):
@ -177,12 +215,22 @@ class TestLinkDataSearch(unittest.TestCase):
"",
"this is an orphaned reply but it contains the keyword",
],
[66, "keyword", "1576461366.5580268", "", "c", "c", "c"],
[66, "keyword", "1576461366.5580261", "", "c", "c", "c"],
[65, "poster6", "1576461367.5580268", "", "keyword", "c", "c"],
[64, "poster7", "1576461368.5580268", "", "c", "keyword", "c"],
[63, "poster8", "1576461369.5580268", "", "c", "c", "keyword"],
[62, "poster9", "1576461370.5580268", "", "c", "c", "ssskeywordsubstring"],
[61, "poster0", "1576461370.5580268", "", "c", "c", "KEYWORD capital"],
[61, "poster0", "1576461371.5580268", "", "c", "c", "KEYWORD capital"],
[60, "poste99", "1576461372.5580268", "", "c", "c", "c"],
[
"",
"user99",
"1576461372.6680268",
"poste99+1576461372.5580268",
"",
"",
"the post doesn't contain the keyword but this reply does",
],
]
test_results = [
@ -204,12 +252,13 @@ class TestLinkDataSearch(unittest.TestCase):
"gemini://keyword",
"keyword site with no replies",
),
(66, "keyword", "1576461366.5580268", "", "c", "c", "c"),
(66, "keyword", "1576461366.5580261", "", "c", "c", "c"),
(65, "poster6", "1576461367.5580268", "", "keyword", "c", "c"),
(64, "poster7", "1576461368.5580268", "", "c", "keyword", "c"),
(63, "poster8", "1576461369.5580268", "", "c", "c", "keyword"),
(62, "poster9", "1576461370.5580268", "", "c", "c", "ssskeywordsubstring"),
(61, "poster0", "1576461370.5580268", "", "c", "c", "KEYWORD capital"),
(61, "poster0", "1576461371.5580268", "", "c", "c", "KEYWORD capital"),
(60, "poste99", "1576461372.5580268", "", "c", "c", "c"),
]
self.assertEqual(link_data.search("keyword"), test_results)