gmi-feed-aggregator/Makefile

94 lines
2.3 KiB
Makefile

NUM_LINKS_TO_SHOW= 100
PARALLEL_DOWNLOADS= 8
.PHONY: default
default: get-feeds build
.PHONY: build
build: index.gmi
.PHONY:deploy
deploy:
printf 'cd emilis\nput index.gmi\n' \
| sftp -b - gemini.circumlunar.space
.PHONY: get-feeds
get-feeds:
rm downloaded.gmi
@make sorted.gmi
.PHONY: test-feed
test-feed:
@echo 'Current feeds.txt:'
cat feeds.txt
cp -i feeds.txt backup-feeds.txt
cp -i test-feed.txt feeds.txt
@make default
cp backup-feeds.txt feeds.txt
index.gmi: \
header.gmi \
sorted.gmi \
footer.gmi \
> "$@"
cat header.gmi > "$@"
awk '\
BEGIN { \
D=""; \
T=""; \
} { \
if(D!=substr($$3, 0, 10)) { \
D=substr($$3, 0, 10); \
T="\n### "D"\n"; \
printf "%s", T; \
} \
$$3=""; \
print \
}' sorted.gmi \
| tee -a "$@"
cat footer.gmi >> "$@"
date +'Updated: %FT%T%:z' >> "$@"
sorted.gmi: downloaded.gmi
cat "$<" | sort -k 3 -r | uniq | head -${NUM_LINKS_TO_SHOW} > "$@"
downloaded.gmi: feeds.txt
> "$@"
xargs -a feeds.txt -P ${PARALLEL_DOWNLOADS} -n 1 -I {} bash -c '\
TMP_FILE=$$(mktemp); \
URL="{}"; \
BASE_URL=$$( echo $$URL | grep -oE ".+/" ); \
echo "BASE_URL: $$BASE_URL"; \
echo "DOWNLOADING: $$DOMAIN $$URL into $$TMP_FILE"; \
./gcat "$$URL" \
| tee $$TMP_FILE; \
TITLE=$$(grep -s -m 1 -E "^# " $$TMP_FILE | cut -c 3- | sed "s/\r//g" ); \
echo "TITLE: $$TITLE"; \
echo "full urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -sE "^=>\s*gemini://" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
echo "slash urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -hsE "^=>\s*/" \
| sed -E -e "s#^=>[ ]*/#=> $${BASE_URL}#g" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
echo "no-slash urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -hsvE "^=>\s*gemini://" \
| grep -hsvE "^=>\s*/" \
| sed -E -e "s#^=>[ ]*#=> $${BASE_URL}#g" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
rm $$TMP_FILE; \
'