Compare commits

...

5 Commits

Author SHA1 Message Date
nervuri 418a53cd4f
output certs to stderr on Tor verification failure 2021-06-04 14:28:16 +03:00
nervuri ccfc936c4a
fix IDN conversion failure for some emoji domains
For example: 🦈🖥.ws
2021-06-04 14:25:24 +03:00
nervuri 77b61c8fc0
increased timeouts 2021-06-04 14:23:18 +03:00
nervuri bdd4db2df9
reduced number of retries by 1 2021-06-04 14:22:35 +03:00
nervuri feb651d698
add hosts from Lupa 2021-06-04 14:20:22 +03:00
3 changed files with 34 additions and 33 deletions

View File

@ -4,7 +4,7 @@ Geminispace is (currently) small enough that we can afford to download all known
This repo contains scripts for:
1. downloading a list of hosts from gemini://geminispace.info/known-hosts
1. downloading a list of hosts from [geminispace.info](gemini://geminispace.info/known-hosts) and [Lupa](gemini://gemini.bortzmeyer.org/software/lupa/lupa-capsules.txt)
2. downloading the TLS certificates of those hosts
3. generating a table containing details about each certificate
4. generating trust stores for various Gemini clients, currently:

View File

@ -33,18 +33,11 @@ fetch_cert() (
response=$($timeout $torsocks openssl s_client -connect "$hp" </dev/null 2>/dev/null \
| sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p')
# If no response, try again.
if [ -z "$response" ]; then
# If no response and Tor was not used, try again.
if [ -z "$response" ] && [ -z "$torsocks" ]; then
sleep 5
response=$($timeout $torsocks openssl s_client -connect "$hp" </dev/null 2>/dev/null \
response=$($timeout openssl s_client -connect "$hp" </dev/null 2>/dev/null \
| sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p')
# If still no response and Tor was not used, try one last time.
if [ -z "$response" ] && [ -z "$torsocks" ]; then
sleep 10
response=$($timeout openssl s_client -connect "$hp" </dev/null 2>/dev/null \
| sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p')
fi
fi
echo "$response"
@ -74,7 +67,7 @@ while read -r host; do
fi
# Hostname to punycode.
host=$(echo "$host" | idn)
host=$(echo "$host" | idn --allow-unassigned)
# Hostname to lowercase.
host=$(echo "$host" | tr '[:upper:]' '[:lower:]')
@ -88,7 +81,7 @@ while read -r host; do
fi
# Get cert.
cert=$(fetch_cert "$host_and_port" 'timeout 5')
cert=$(fetch_cert "$host_and_port" 'timeout 10')
if [ -z "$cert" ]; then
>&2 echo "$host_and_port - connection failed"
@ -104,13 +97,18 @@ while read -r host; do
exit 1
fi
cert_via_tor=$(fetch_cert "$host_and_port" 'timeout 15' 'torsocks')
cert_via_tor=$(fetch_cert "$host_and_port" 'timeout 25' 'torsocks')
if [ -z "$cert_via_tor" ]; then
>&2 echo "$host_and_port - Tor connection failed"
elif [ -n "$cert" ] && [ "$cert" != "$cert_via_tor" ]; then
>&2 echo "$host_and_port - Tor VERIFICATION FAILED (certs don't match)!!!"
# In this case, don't save the certificate.
# In this case, don't save any certificate to file.
# Output both certificates to stderr instead.
>&2 echo "CERT:"
>&2 echo "$cert"
>&2 echo "CERT VIA TOR:"
>&2 echo "$cert_via_tor"
continue
else
# If direct connection failed and Tor connection succeeded,
@ -124,7 +122,7 @@ while read -r host; do
# If we got a cert back, then the host and port were valid,
# so they are safe to include in a file name.
# Convert from punycode to unicode, if needed.
host_and_port=$(echo "$host_and_port" | idn --idna-to-unicode)
host_and_port=$(echo "$host_and_port" | idn --allow-unassigned --idna-to-unicode)
echo "$cert" > "certs/${host_and_port}.pem"
printf ' - OK'
else

View File

@ -1,6 +1,8 @@
#!/bin/sh
# Download a list of Gemini hosts from gemini://geminispace.info/known-hosts
# Download and merge lists of Gemini hosts from:
# gemini://geminispace.info/known-hosts
# gemini://gemini.bortzmeyer.org/software/lupa/lupa-capsules.txt
set -o errexit # (-e) exit immediately if any command has a non-zero exit status
set -o nounset # (-u) don't accept undefined variables
@ -12,33 +14,34 @@ cd "$(dirname "$0")" || exit
# If Agunua is installed, use it.
if command -v agunua >/dev/null; then
# Using Agunua is more secure, because it does certificate pinning.
hosts=$(agunua --insecure --binary gemini://geminispace.info/known-hosts \
# The --insecure option just makes it accept certificates that are
# not signed by a (known) CA.
hosts1=$(agunua --insecure --binary gemini://geminispace.info/known-hosts 2>/dev/null \
| grep "gemini://" | cut -d ' ' -f 3)
hosts2=$(agunua --insecure --binary gemini://gemini.bortzmeyer.org/software/lupa/lupa-capsules.txt 2>/dev/null)
else
# If Agunua is not installed, pipe the request into OpenSSL s_client.
hosts=$(printf "gemini://geminispace.info/known-hosts\r\n" \
hosts1=$(printf "gemini://geminispace.info/known-hosts\r\n" \
| timeout 5 openssl s_client -quiet -connect "geminispace.info:1965" 2>/dev/null \
| grep "gemini://" | cut -d ' ' -f 3)
hosts2=$(printf "gemini://gemini.bortzmeyer.org/software/lupa/lupa-capsules.txt\r\n" \
| timeout 5 openssl s_client -quiet -connect "gemini.bortzmeyer.org:1965" 2>/dev/null \
| tail -n +2)
fi
# Concatenate the two files.
hosts="$hosts1
$hosts2"
# Convert punycode to unicode; sort entries; remove duplicates; remove empty lines.
hosts=$(echo "$hosts" | idn --allow-unassigned --idna-to-unicode | sort -fu | awk NF)
if [ -z "$hosts" ]; then
>&2 echo "hosts file download failed."
>&2 echo "hosts file downloads failed."
exit 1
fi
# Add a few hosts that are missing from geminispace.info
hosts="$hosts
campaignwiki.org
feeds.drewdevault.com
gem.adele.work
makeworld.gq
qwertqwefsday.eu:80
simplynews.metalune.xyz"
# Remove a host which changes its cert every few minutes.
hosts=$(echo "$hosts" | grep -v 'tofu-tester.random-projects.net:1966')
# Save to file.
echo "$hosts" | sort | uniq > hosts
echo "$hosts" > hosts
echo OK