2020-11-20 01:00:17 +00:00
|
|
|
NUM_LINKS_TO_SHOW= 50
|
2020-11-17 22:07:06 +00:00
|
|
|
PARALLEL_DOWNLOADS= 8
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: default
|
|
|
|
default: get-feeds build
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: build
|
|
|
|
build: index.gmi
|
|
|
|
|
|
|
|
|
|
|
|
.PHONY: get-feeds
|
|
|
|
get-feeds:
|
|
|
|
rm downloaded.gmi
|
|
|
|
@make sorted.gmi
|
|
|
|
|
|
|
|
|
|
|
|
index.gmi: \
|
|
|
|
header.gmi \
|
|
|
|
sorted.gmi \
|
|
|
|
footer.gmi \
|
|
|
|
|
2020-11-20 01:00:17 +00:00
|
|
|
> "$@"
|
|
|
|
cat header.gmi > "$@"
|
|
|
|
awk '\
|
|
|
|
BEGIN { \
|
|
|
|
D=""; \
|
|
|
|
T=""; \
|
|
|
|
} { \
|
|
|
|
if(D!=substr($$3, 0, 10)) { \
|
|
|
|
D=substr($$3, 0, 10); \
|
|
|
|
T="## "D"\n"; \
|
|
|
|
printf "%s", T; \
|
|
|
|
} \
|
|
|
|
$$3=""; \
|
|
|
|
print \
|
|
|
|
}' sorted.gmi \
|
|
|
|
| tee -a "$@"
|
|
|
|
cat footer.gmi >> "$@"
|
2020-11-17 22:07:06 +00:00
|
|
|
|
|
|
|
|
|
|
|
sorted.gmi: downloaded.gmi
|
2020-11-20 01:00:17 +00:00
|
|
|
cat "$<" | sort -k 3 -r | uniq | head -${NUM_LINKS_TO_SHOW} > "$@"
|
2020-11-17 22:07:06 +00:00
|
|
|
|
|
|
|
|
2020-11-19 16:56:46 +00:00
|
|
|
downloaded.gmi: feeds.txt
|
2020-11-17 22:07:06 +00:00
|
|
|
> "$@"
|
2020-11-20 00:41:12 +00:00
|
|
|
xargs -a feeds.txt -P ${PARALLEL_DOWNLOADS} -n 1 -I {} bash -c '\
|
|
|
|
TMP_FILE=$$(mktemp); \
|
|
|
|
URL="{}"; \
|
|
|
|
BASE_URL=$$( echo $$URL | grep -oE ".+/" ); \
|
|
|
|
echo "BASE_URL: $$BASE_URL"; \
|
|
|
|
echo "DOWNLOADING: $$DOMAIN $$URL into $$TMP_FILE"; \
|
|
|
|
./gcat "$$URL" > $$TMP_FILE; \
|
|
|
|
TITLE=$$(grep -s -m 1 -E "^# " $$TMP_FILE | cut -c 3- ); \
|
|
|
|
echo "TITLE: $$TITLE"; \
|
|
|
|
echo "full urls:"; \
|
|
|
|
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
|
|
|
|
| grep -sE "^=>\s*gemini://" \
|
|
|
|
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
|
|
|
|
| tee -a "$@"; \
|
|
|
|
echo "slash urls:"; \
|
|
|
|
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
|
|
|
|
| grep -hsE "^=>\s*/" \
|
|
|
|
| sed -E -e "s#^=>[ ]*/#=> $${BASE_URL}#g" \
|
|
|
|
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
|
|
|
|
| tee -a "$@"; \
|
|
|
|
echo "no-slash urls:"; \
|
|
|
|
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
|
|
|
|
| grep -hsvE "^=>\s*gemini://" \
|
|
|
|
| grep -hsE "^=>\s*[^/]" \
|
|
|
|
| sed -E -e "s#^=>[ ]*#=> $${BASE_URL}#g" \
|
|
|
|
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
|
|
|
|
| tee -a "$@"; \
|
|
|
|
rm $$TMP_FILE; \
|
|
|
|
'
|