From 29e7e3addd370938fa49daa4fc881a2ab4a1f8eb Mon Sep 17 00:00:00 2001 From: asdf Date: Wed, 21 Jul 2021 19:31:43 +1000 Subject: [PATCH] Corrected functionality, identified and fixed search bug --- data.py | 62 ++++++++++++++++++++++------------------------ tests/data_test.py | 59 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 84 insertions(+), 37 deletions(-) diff --git a/data.py b/data.py index acab7ce..192e9f1 100644 --- a/data.py +++ b/data.py @@ -75,7 +75,8 @@ def get_parent_record(parent_id: str, link_data: list) -> list: if parent_id == "": raise ValueError("parent_id cannot be empty") for record in link_data: - if record[0] == parent_id.partition("+")[2]: + timestamp = record[2] + if timestamp == parent_id.partition("+")[2]: return record raise KeyError("there's no parent record for the specified parent_id") @@ -167,47 +168,44 @@ class LinkData: new_post_id = 1 record = record._replace(ID_if_parent=new_post_id) self.link_data.insert(0, list(record)) - self.generate_category_data() else: self.link_data.insert(0, list(record)) + self.generate_category_data() return new_post_id def generate_category_data(self): """generate categories list and category count from sorted link data""" self.categories.clear() - i = (record for record in self.link_data if record[4] != "") - for record in i: + for record in self.link_data: name = record[4] timestamp = record[2] - if name not in [cat_record["name"] for cat_record in self.categories]: - self.categories.append( - {"name": name, "count": 1, "last_updated": timestamp} - ) + if name != "": + if name not in [cat_record["name"] for cat_record in self.categories]: + self.categories.append( + {"name": name, "count": 1, "last_updated": timestamp} + ) + else: + for cat_record in self.categories: + if cat_record["name"] == name: + cat_record["count"] += 1 + if cat_record["last_updated"] < timestamp: + cat_record["last_updated"] = timestamp else: - for cat_record in self.categories: - if cat_record["name"] == name: - cat_record["count"] += 1 - if cat_record["last_updated"] < timestamp: - cat_record["last_updated"] = timestamp - - for record in self.link_data: - if record[4] == "": - parent_id = self.get_parent_record(record[3]) - timestamp = record[2] - if parent_id != -1: - for line in self.link_data: - if line[0] == parent_id: - name = record[4] - for cat_record in self.categories: - if cat_record["name"] == name: - if cat_record["last_updated"] < timestamp: - cat_record["last_updated"] = timestamp - - - - - - + parent_id = record[3] + try: + parent_record = get_parent_record(parent_id, self.link_data) + except KeyError: + continue + parent_cat_name = parent_record[4] + if parent_cat_name not in [cat_record["name"] for cat_record in self.categories]: + self.categories.append( + {"name": parent_cat_name, "count": 1, "last_updated": timestamp} + ) + else: + for cat_record in self.categories: + if cat_record["name"] == parent_cat_name: + if cat_record["last_updated"] < timestamp: + cat_record["last_updated"] = timestamp def search(self, keyword: str) -> list: """returns a unique list of link_data records for posts that contain diff --git a/tests/data_test.py b/tests/data_test.py index 1cb586f..033bff3 100644 --- a/tests/data_test.py +++ b/tests/data_test.py @@ -108,10 +108,48 @@ class TestDataHelperFunctions(unittest.TestCase): ) def test_get_parent_record(self): + # test that an empty parent_id throws a value error test_link_data = [] + parent_id = "" with self.assertRaises(ValueError): - data.get_parent_record("", test_link_data) + data.get_parent_record(parent_id, test_link_data) + + # confirm the function returns a valid parent record + test_parent_record = [ + 1, + "testuser", + "1000", + "", + "test_category", + "test_url", + "test_title", + ] + test_link_data = ( + [ + [ + 1, + "testuser", + "1000", + "", + "test_category", + "test_url", + "test_title", + ] + ], + ) + parent_id = 1000 + + self.assertListEqual( + test_parent_record, data.get_parent_record(parent_id, test_link_data) + ) + + # confirm key error raised if no results found + + parent_id = 69 + + with self.assertRaises(KeyError): + data.get_parent_record(parent_id, test_link_data) class TestLinkDataSearch(unittest.TestCase): @@ -177,12 +215,22 @@ class TestLinkDataSearch(unittest.TestCase): "", "this is an orphaned reply but it contains the keyword", ], - [66, "keyword", "1576461366.5580268", "", "c", "c", "c"], + [66, "keyword", "1576461366.5580261", "", "c", "c", "c"], [65, "poster6", "1576461367.5580268", "", "keyword", "c", "c"], [64, "poster7", "1576461368.5580268", "", "c", "keyword", "c"], [63, "poster8", "1576461369.5580268", "", "c", "c", "keyword"], [62, "poster9", "1576461370.5580268", "", "c", "c", "ssskeywordsubstring"], - [61, "poster0", "1576461370.5580268", "", "c", "c", "KEYWORD capital"], + [61, "poster0", "1576461371.5580268", "", "c", "c", "KEYWORD capital"], + [60, "poste99", "1576461372.5580268", "", "c", "c", "c"], + [ + "", + "user99", + "1576461372.6680268", + "poste99+1576461372.5580268", + "", + "", + "the post doesn't contain the keyword but this reply does", + ], ] test_results = [ @@ -204,12 +252,13 @@ class TestLinkDataSearch(unittest.TestCase): "gemini://keyword", "keyword site with no replies", ), - (66, "keyword", "1576461366.5580268", "", "c", "c", "c"), + (66, "keyword", "1576461366.5580261", "", "c", "c", "c"), (65, "poster6", "1576461367.5580268", "", "keyword", "c", "c"), (64, "poster7", "1576461368.5580268", "", "c", "keyword", "c"), (63, "poster8", "1576461369.5580268", "", "c", "c", "keyword"), (62, "poster9", "1576461370.5580268", "", "c", "c", "ssskeywordsubstring"), - (61, "poster0", "1576461370.5580268", "", "c", "c", "KEYWORD capital"), + (61, "poster0", "1576461371.5580268", "", "c", "c", "KEYWORD capital"), + (60, "poste99", "1576461372.5580268", "", "c", "c", "c"), ] self.assertEqual(link_data.search("keyword"), test_results)