Implemented support for relative links.

This commit is contained in:
Emilis Dambauskas 2020-11-20 02:41:12 +02:00
parent db1895122f
commit b4322c725e
1 changed files with 30 additions and 4 deletions

View File

@ -24,11 +24,37 @@ index.gmi: \
sorted.gmi: downloaded.gmi
cat "$<" | sort -k 2 -r | uniq | head -100 > "$@"
cat "$<" | sort -k 3 -r | uniq | head -100 > "$@"
downloaded.gmi: feeds.txt
> "$@"
xargs -a feeds.txt -P ${PARALLEL_DOWNLOADS} -n 1 ./gcat \
| grep -E "^=>\s*gemini://\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" \
>> "$@"
xargs -a feeds.txt -P ${PARALLEL_DOWNLOADS} -n 1 -I {} bash -c '\
TMP_FILE=$$(mktemp); \
URL="{}"; \
BASE_URL=$$( echo $$URL | grep -oE ".+/" ); \
echo "BASE_URL: $$BASE_URL"; \
echo "DOWNLOADING: $$DOMAIN $$URL into $$TMP_FILE"; \
./gcat "$$URL" > $$TMP_FILE; \
TITLE=$$(grep -s -m 1 -E "^# " $$TMP_FILE | cut -c 3- ); \
echo "TITLE: $$TITLE"; \
echo "full urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -sE "^=>\s*gemini://" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
echo "slash urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -hsE "^=>\s*/" \
| sed -E -e "s#^=>[ ]*/#=> $${BASE_URL}#g" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
echo "no-slash urls:"; \
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
| grep -hsvE "^=>\s*gemini://" \
| grep -hsE "^=>\s*[^/]" \
| sed -E -e "s#^=>[ ]*#=> $${BASE_URL}#g" \
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
| tee -a "$@"; \
rm $$TMP_FILE; \
'