Implemented support for relative links.
This commit is contained in:
parent
db1895122f
commit
b4322c725e
34
Makefile
34
Makefile
|
@ -24,11 +24,37 @@ index.gmi: \
|
|||
|
||||
|
||||
sorted.gmi: downloaded.gmi
|
||||
cat "$<" | sort -k 2 -r | uniq | head -100 > "$@"
|
||||
cat "$<" | sort -k 3 -r | uniq | head -100 > "$@"
|
||||
|
||||
|
||||
downloaded.gmi: feeds.txt
|
||||
> "$@"
|
||||
xargs -a feeds.txt -P ${PARALLEL_DOWNLOADS} -n 1 ./gcat \
|
||||
| grep -E "^=>\s*gemini://\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" \
|
||||
>> "$@"
|
||||
xargs -a feeds.txt -P ${PARALLEL_DOWNLOADS} -n 1 -I {} bash -c '\
|
||||
TMP_FILE=$$(mktemp); \
|
||||
URL="{}"; \
|
||||
BASE_URL=$$( echo $$URL | grep -oE ".+/" ); \
|
||||
echo "BASE_URL: $$BASE_URL"; \
|
||||
echo "DOWNLOADING: $$DOMAIN $$URL into $$TMP_FILE"; \
|
||||
./gcat "$$URL" > $$TMP_FILE; \
|
||||
TITLE=$$(grep -s -m 1 -E "^# " $$TMP_FILE | cut -c 3- ); \
|
||||
echo "TITLE: $$TITLE"; \
|
||||
echo "full urls:"; \
|
||||
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
|
||||
| grep -sE "^=>\s*gemini://" \
|
||||
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
|
||||
| tee -a "$@"; \
|
||||
echo "slash urls:"; \
|
||||
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
|
||||
| grep -hsE "^=>\s*/" \
|
||||
| sed -E -e "s#^=>[ ]*/#=> $${BASE_URL}#g" \
|
||||
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
|
||||
| tee -a "$@"; \
|
||||
echo "no-slash urls:"; \
|
||||
grep -hsE "^=>\s*\S+\s+[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?(Z|\+[0-9]{1,2}:[0-9]{2}))?\s+\S.*$$" $$TMP_FILE \
|
||||
| grep -hsvE "^=>\s*gemini://" \
|
||||
| grep -hsE "^=>\s*[^/]" \
|
||||
| sed -E -e "s#^=>[ ]*#=> $${BASE_URL}#g" \
|
||||
| awk "{ if(\"$$TITLE\"){ \$$4= \"$$TITLE - \" \$$4; } print }" \
|
||||
| tee -a "$@"; \
|
||||
rm $$TMP_FILE; \
|
||||
'
|
||||
|
|
Loading…
Reference in New Issue