gmi-feed-aggregator/Makefile

78 lines
2.0 KiB
Makefile
Raw Normal View History

2020-11-20 01:00:17 +00:00
NUM_LINKS_TO_SHOW= 50
2020-11-17 22:07:06 +00:00
PARALLEL_DOWNLOADS= 8
.PHONY: default
default: get-feeds build
.PHONY: build
build: index.gmi
.PHONY: get-feeds
get-feeds:
rm downloaded.gmi
@make sorted.gmi
index.gmi: \
header.gmi \
sorted.gmi \
footer.gmi \
2020-11-20 01:00:17 +00:00
> "$@"
cat header.gmi > "$@"
awk '\
BEGIN { \
D=""; \
T=""; \
} { \
if(D!=substr($$3, 0, 10)) { \
D=substr($$3, 0, 10); \
T="## "D"\n"; \
printf "%s", T; \
} \
$$3=""; \
print \
}' sorted.gmi \
| tee -a "$@"
cat footer.gmi >> "$@"
2020-11-17 22:07:06 +00:00
sorted.gmi: downloaded.gmi
2020-11-20 01:00:17 +00:00
cat "$<" | sort -k 3 -r | uniq | head -${NUM_LINKS_TO_SHOW} > "$@"
2020-11-17 22:07:06 +00:00
downloaded.gmi: feeds.txt
2020-11-17 22:07:06 +00:00
> "$@"
xargs -a feeds.txt -P ${PARALLEL_DOWNLOADS} -n 1 -I {} bash -c '\
TMP_FILE=$$(mktemp); \
URL="{}"; \
BASE_URL=$$( echo $$URL | grep -oE ".+/" ); \
echo "BASE_URL: $$BASE_URL"; \
echo "DOWNLOADING: $$DOMAIN $$URL into $$TMP_FILE"; \
./gcat "$$URL" > $$TMP_FILE; \
TITLE=$$(grep -s -m 1 -E "^# " $$TMP_FILE | cut -c 3- ); \
echo "TITLE: $$TITLE"; \
echo "full urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -sE "^=>\s*gemini://" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
echo "slash urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -hsE "^=>\s*/" \
| sed -E -e "s#^=>[ ]*/#=> $${BASE_URL}#g" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
echo "no-slash urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -hsvE "^=>\s*gemini://" \
| grep -hsE "^=>\s*[^/]" \
| sed -E -e "s#^=>[ ]*#=> $${BASE_URL}#g" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
rm $$TMP_FILE; \
'