forked from cmccabe/linkulator2
86 lines
3.0 KiB
Python
86 lines
3.0 KiB
Python
|
"""This module takes input and returns link_data, the data structure linkulator works from"""
|
||
|
import time
|
||
|
from pathlib import PurePath
|
||
|
from glob import glob
|
||
|
|
||
|
def is_valid(line: str) -> bool:
|
||
|
"""Checks if current line is valid or not, returns true and false respectively."""
|
||
|
if line.count("|") != 4:
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
|
||
|
def process(line: str):
|
||
|
"""Takes a line, returns a list based on the delimeter pipe character"""
|
||
|
line = line.rstrip("\n")
|
||
|
split_line = line.split("|")
|
||
|
return split_line
|
||
|
|
||
|
|
||
|
def get(config, ignore_names):
|
||
|
pipe_count = (
|
||
|
4 ## A PROPERLY FORMATED LINE IN linkulator.data HAS EXACTLY FOUR PIPES.
|
||
|
)
|
||
|
link_data = []
|
||
|
## username, datestamp, parent-id, category, link-url, link-title
|
||
|
categories = []
|
||
|
category_counts = {}
|
||
|
ignore_names = []
|
||
|
|
||
|
|
||
|
## WHENEVER THIS FUNCTION IS CALLED, THE DATA IS REFRESHED FROM FILES. SINCE
|
||
|
## DISK IO IS PROBABLY THE HEAVIEST PART OF THIS SCRIPT, DON'T DO THIS OFTEN.
|
||
|
|
||
|
files_pattern = str(
|
||
|
PurePath(config.PATHS.all_homedir_pattern).joinpath(
|
||
|
config.PATHS.datadir, config.PATHS.datafile
|
||
|
)
|
||
|
)
|
||
|
linkulator_files = glob(files_pattern)
|
||
|
|
||
|
linkulator_lines = []
|
||
|
for filename in linkulator_files:
|
||
|
with open(filename) as f:
|
||
|
# get file owner username from path
|
||
|
file_owner = PurePath(filename).parent.parent.name
|
||
|
if file_owner in ignore_names:
|
||
|
# ignore names found in ignore file
|
||
|
continue
|
||
|
for line in f:
|
||
|
if line.count("|") != pipe_count:
|
||
|
# ignore lines that fail validation
|
||
|
continue
|
||
|
line = line.rstrip("\n")
|
||
|
split_line = line.split("|")
|
||
|
if (
|
||
|
split_line[0] and float(split_line[0]) < time.time()
|
||
|
): # ONLY USE LINK IF DATE IS EARLIER THAN NOW
|
||
|
split_line.insert(0, file_owner)
|
||
|
linkulator_lines.append(split_line) ## creating a list of lists
|
||
|
|
||
|
if len(linkulator_lines) == 0:
|
||
|
print("It looks link there are no links yet. Run 'linkulator -p' to add one.")
|
||
|
exit()
|
||
|
|
||
|
i = 1
|
||
|
for idx, line in enumerate(linkulator_lines):
|
||
|
if line[2] == "": # CREATE/INSERT PARENT ID:
|
||
|
linkulator_lines[idx].insert(0, i)
|
||
|
i = i + 1
|
||
|
else: ## NOT PARENT, SO NO PARENT ID
|
||
|
linkulator_lines[idx].insert(0, "")
|
||
|
link_data = linkulator_lines
|
||
|
|
||
|
## THIS IS SUPPOSED TO SORT ALL LINKS BY CREATION DATE. NEED TO CONFIRM THAT IT WORKS.
|
||
|
link_data.sort(key=lambda x: x[2])
|
||
|
|
||
|
category_counts.clear() ## CLEAR SO WE DON'T DOUBLE-COUNT IF FNC RUN MORE THAN ONCE.
|
||
|
for line in link_data:
|
||
|
if line[4] not in categories and line[4] != "":
|
||
|
categories.append(line[4])
|
||
|
category_counts[line[4]] = 1
|
||
|
elif line[4] in categories:
|
||
|
category_counts[line[4]] = category_counts[line[4]] + 1
|
||
|
|
||
|
return link_data, categories, category_counts
|