trust-store-generators/get-hosts.sh

120 lines
3.1 KiB
Bash
Executable File

#!/bin/sh
# Download and merge lists of Gemini hosts from:
# gemini://geminispace.info/known-hosts
# gemini://gemini.bortzmeyer.org/software/lupa/lupa-capsules.txt
# gemini://auragem.space/search/capsules
set -o errexit # (-e) exit immediately if any command has a non-zero exit status
set -o nounset # (-u) don't accept undefined variables
#set -o xtrace # for debugging
timestamp_start=$(date +%s)
# Go where this script is.
cd "$(dirname "$0")" || exit
hosts1=$(agunua --binary --maximum-time 20 \
gemini://geminispace.info/known-hosts 2>/dev/null \
| grep "gemini://" | cut -d ' ' -f 3)
if [ -z "$hosts1" ]; then
>&2 echo "geminispace.info/known-hosts download failed."
exit 1
fi
hosts2=$(agunua --binary --maximum-time 20 \
gemini://gemini.bortzmeyer.org/software/lupa/lupa-capsules.txt 2>/dev/null)
if [ -z "$hosts2" ]; then
>&2 echo "lupa-capsules.txt download failed."
exit 1
fi
# Concatenate hosts files.
hosts="$hosts1
$hosts2"
# Prepare temporary file.
tempfile=$(mktemp)
# Delete temporary file on exit.
finish() {
rm -f "$tempfile"
}
trap finish EXIT
echo "$hosts" | while read -r line; do
# Remove redundant ":1965" port from each line that has it.
line="${line%:1965}"
# Remove redundant "." from the end of each hostname that has it.
line="${line%.}"
# Hostname to lowercase.
line=$(echo "$line" | tr '[:upper:]' '[:lower:]')
# Add cleaned up hostnames to temporary file.
echo "$line" >> "$tempfile"
done
# Get hosts back from temporary file.
hosts=$(cat "$tempfile")
# Remove empty lines; convert punycode to unicode; sort entries; remove duplicates.
hosts=$(echo "$hosts" | awk NF | idn --allow-unassigned --idna-to-unicode | sort -fu)
# Remove hosts which contain neither "." nor ":", such as "localhost".
hosts=$(echo "$hosts" | grep '\.\|:')
# Remove explicitly excluded hosts.
hosts=$(echo "$hosts" | grep -vxEf excluded-hosts)
if ! echo "$hosts" | grep -qE '\.onion(:.*)?$'; then
>&2 echo "The .onions are missing!"
exit 1
fi
if [ -z "$hosts" ]; then
>&2 echo "hosts file downloads failed."
exit 1
fi
# Save to temporary file again.
echo "$hosts" > "$tempfile"
# Check connection to host.
# $1: "" or "tor"
# $2: host[:port]
test_host() {
host="$2"
tor=""
if [ "$1" = "tor" ]; then
tor="--socks 127.0.0.1:9050"
fi
agunua $tor --no-tofu --accept-expired-certificate --ignore-missing-tls-close \
--maximum-time 20 "gemini://$host/" >/dev/null 2>&1
}
# Test if removed hosts are still online.
if [ -f hosts ]; then
echo "Testing removed hosts..."
for removed_host in $(diff hosts "$tempfile" | grep ^\< | cut -c 3-); do
printf "%s" "$removed_host"
# If direct connection fails, try to connect through Tor.
if test_host "" "$removed_host" || test_host "tor" "$removed_host"; then
echo " - ONLINE"
# Add removed host back.
hosts="$hosts
$removed_host"
else
echo " - offline"
fi
done
fi
# Sort entries again.
hosts=$(echo "$hosts" | sort)
# Save to file.
echo "$hosts" > hosts
timestamp_end=$(date +%s)
exec_time="$((timestamp_end - timestamp_start))"
exec_time_formatted="$(date -d "@$exec_time" --utc "+%H:%M:%S")"
echo "OK (duration: $exec_time_formatted)"