webify-gemini.sh/webify-gemini.sh

#!/usr/bin/env bash

# Variables
if [ $# -eq 0 ]; then
    echo -e "No arguments supplied. \n\nCorrect Usage:\n    webify-gemini.sh <Gemini source directory> <HTML source directory> <Capsule address> <Website address>\n\nExample:\n    webify-gemini.sh /var/gemini/users/$USER/ /home/$USER/public_html/ gemini://gemini.ctrl-c.club/~$USER/ http://ctrl-c.club/~$USER/"
    exit 1
elif [ $# -eq 1 ]; then
    if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then
        echo -e "Usage:\n    webify-gemini.sh <Gemini source directory> <HTML source directory> <Capsule address> <Website address>\n\nExample:\n    webify-gemini.sh /var/gemini/users/$USER/ /home/$USER/public_html/ gemini://gemini.ctrl-c.club/~$USER/ http://ctrl-c.club/~$USER/"
        exit 0
    fi
elif ! [ $# -eq 4 ]; then
    echo -e "Incorrect number of arguments.\n\nCorrect Usage:\n    webify-gemini.sh <Gemini source directory> <HTML source directory> <Capsule address> <Website address>\n\nExample:\n    webify-gemini.sh /var/gemini/users/$USER/ /home/$USER/public_html/ gemini://gemini.ctrl-c.club/~$USER/ http://ctrl-c.club/~$USER/"
    exit 1
fi
geminiDirectory="$1"
htmlDirectory="$2"
geminiAddress="$3"
webAddress="$4"

# Trim ending slash from varables
if [ "${geminiDirectory: -1}" == "/" ]; then
    geminiDirectory=$(echo -n "$geminiDirectory" | rev | cut -c 2- | rev)
fi
if [ "${htmlDirectory: -1}" == "/" ]; then
    htmlDirectory=$(echo -n "$htmlDirectory" | rev | cut -c 2- | rev)
fi
if [ "${geminiAddress: -1}" == "/" ]; then
    geminiAddress=$(echo -n "$geminiAddress" | rev | cut -c 2- | rev)
fi
if [ "${webAddress: -1}" == "/" ]; then
    webAddress=$(echo -n "$webAddress" | rev | cut -c 2- | rev)
fi

# Find all .gmi and .gemini files in the user's gemini directory, then return
# the paths of these files relative to the user's gemini directory, omitting
# the file extension
function findFilesWithoutExtension() {
    # All *.gmi files
    find "$geminiDirectory/" -name '*.gmi' -printf "%P\n" | while read -r file
    do
        echo "$file" | rev | cut -f 2- -d '.' | rev
    done
    # All *.gemini files
    find "$geminiDirectory/" -name '*.gemini' -printf "%P\n" | while read -r file
    do
        echo "$file" | rev | cut -f 2- -d '.' | rev
    done
}

# Find all subdirectories of the user's gemini directory
function findDirectories() {
    find "$geminiDirectory/" -type d -printf "%P\n"
}

# Make sure all the subdirectories exist
for directory in $(findDirectories)
do
    mkdir -p "$htmlDirectory/$directory"
done

# Copy all non-gemtext files into the user's public_html folder
find "$geminiDirectory/" -type f -printf "%P\n" | while read -r file
do
    if [ "${file: -4}" != ".gmi" ] && [ "${file: -7}" != ".gemini" ]; then
        cp "$geminiDirectory/$file" "$htmlDirectory/$file"
    fi
done

# Make sure $htmlDirectory/style.css exists
if ! test -f "$htmlDirectory/style.css"; then
    echo "/* This file controls the styling of your website. For an example style.css
file, see http://ctrl-c.club/~lovetocode999/style.css */

/* Normal text */
p {}

/* Links */
a {}

/* Headers */
h1 {}

/* Sub-headers */
h2 {}

/* Sub-sub-headers */
h3 {}

/* Bulleted lists */
ul {}

/* List items */
li {}

/* Blockquotes */
blockquote {}

/* Preformatted text */
pre {}

/* Applies to all separate elements */
* {}

/* Applies to the entire page as one element */
body {}" > "$htmlDirectory/style.css"
fi

# Make sure $htmlDirectory/footer.html exists
if ! test -f "$htmlDirectory/footer.html"; then
    touch "$htmlDirectory/footer.html"
fi

# Make sure $htmlDirectory/header.html exists
if ! test -f "$htmlDirectory/header.html"; then
    touch "$htmlDirectory/header.html"
fi

# Convert all the gemtext files into html files, and copy them into the user's
# public_html directory
for file in $(findFilesWithoutExtension)
do
    # Check whether file is a .gmi or .gemini file
    if test -f "$geminiDirectory/$file.gmi"; then
        filename="$geminiDirectory/$file.gmi"
    else
        filename="$geminiDirectory/$file.gemini"
    fi
    # Set the title to the content of the first header
    title=$(awk 'BEGIN{FPAT="([^ ]+)"}{if ($1=="#"){$1="";$2="\b"$2;print $0;exit}}' "$filename")

    # Run the file through awk, passing the gemini address, web address and
    # title to awk as varables
    awk -v title="$title" -v webAddress="$webAddress" '
    BEGIN {
        # header.html
        # Split the file by spaces
        FPAT = "([^ ]+)"
        # Print the start of the html file, up to the <body> tag
        print "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n    <meta charset=\"UTF-8\">\n    <title>"title"</title>\n    <link rel=\"stylesheet\" href=\""webAddress"/style.css\">\n</head>\n<body>"
    }
    {
        print "    "$0;
    }
    ' "$htmlDirectory/header.html" > "$htmlDirectory/$file.html"
    awk -v geminiAddress="$geminiAddress" -v webAddress="$webAddress" '
    BEGIN {
        # Split the file by spaces
        FPAT = "([^ ]+)"
        # These varables will be used later on
        urlcount=0 # Number of urls
        headnum=0 # Number of headers
        subheadnum=0 # Number of sub-headers
        subsubheadnum=0 # Number of sub-sub-headers
        pre=0 # Whether or not the current text is preformatted
        bullist=0 # Whether or not a bulleted list was just formed
    }
    {
        # Replace special characters with their HTML equivalents
        gsub(/</, "\\&lt;")
        gsub(/>/, "\\&gt;")
        gsub(/=&gt;/, "=>")
        # If the line is ```, toggle preformatted text
        if ($1 == "```") {
            # The next four lines check if the previous line was part of a
            # bulleted list, and, if it was, print a closing </ul> tag. These
            # four lines are present at the start of every if expression,
            # except for the one that prints an actual list
            if (bullist) {
                bullist = !bullist
                print "    </ul>"
            }
            # Toggle pre
            pre = !pre
            # If pre, print a <pre> tag, else, print a closing </pre> tag
            if (pre) {
                print "    <pre>"
            }
            else {
                print "    </pre>"
            }
        }
        # If the text is preformatted, just print it without checking for any
        # formatting
        else if (pre) {
            print $0
        }
        # Bulleted list
        else if ($1 == "*") {
            # We do not need to print the asterisk, so set the variable holding
            # it to an empty string
            $1 = ""
            # If bullist, print just an <li> tag containing the text, else
            # print a starting <ul> as well as an <li> tag containing the text
            if (bullist) {
                print "        <li>"$0"</li>"
            }
            else {
                bullist = !bullist
                print "    <ul>\n        <li>"$0"</li>"
            }
        }
        # Blockquotes
        else if ($1 == ">") {
            if (bullist) {
                bullist = !bullist
                print "    </ul>"
            }
            $1 = ""
            print "    <blockquote>"$0"</blockquote>"
        }
        # Headers
        else if ($1 == "#") {
            headnum++
            if (bullist) {
                bullist = !bullist
                print "    </ul>"
            }
            $1 = ""
            print "    <h1 id=\""headnum"\">"$0"</h1>"
        }
        # Sub-headers
        else if ($1 == "##") {
            subheadnum++
            if (bullist) {
                bullist = !bullist
                print "    </ul>"
            }
            $1 = ""
            print "    <h2 id=\""headnum"."subheadnum"\">"$0"</h2>"
        }
        # Sub-sub-headers
        else if ($1 == "###") {
            subsubheadnum++
            if (bullist) {
                bullist = !bullist
                print "    </ul>"
            }
            $1 = ""
            print "    <h3 id=\""headnum"."subheadnum"."subsubheadnum"\">"$0"</h3>"
        }
        # Links
        else if ($1 == "=>") {
            if (bullist) {
                bullist = !bullist
                print "    </ul>"
            }
            # Increment the number of urls
            urlcount++
            # Substitute all occurrences of .gmi and .gemini with .html,
            # substitute all occurrences of $geminiAddress with $webAddress,
            sub(geminiAddress, webAddress)
            if ($2 ~ webAddress) {
                sub(/\.gmi/, ".html")
                sub(/\.gemini/, ".html")
            }
            # Set the url to the second argument
            url = $2
            # Set the first two arguments to nothing, so only the link text is
            # printed
            $1 = ""
            $2 = ""
            # Print a href tag with url as the url text and $0 as the link text,
            # then print a <br /> tag so that multiple consecutive urls do not
            # end up on the same line
            print "    <a href=\""url"\">"$0"</a><br />"
        }
        # If none of these previous formatting statements run, just print the
        # text in a <p> tag
        else {
            if (bullist) {
                bullist = !bullist
                print "    </ul>"
            }
            print "    <p>"$0"</p>"
        }
    }
    END {
        if (bullist) {
            bullist = !bullist
            print "    </ul>"
        }
    }
    ' "$filename" >> "$htmlDirectory/$file.html"
    awk '
    BEGIN {
        # footer.html
        # Split the file by spaces
        FPAT = "([^ ]+)"
    }
    {
        print "    "$0;
    }
    END {
        # Finally, print a closing </body> and </html>
        print "</body>\n</html>"
    }
    ' "$htmlDirectory/footer.html" >> "$htmlDirectory/$file.html"
    echo "Converted ${filename} to ${htmlDirectory}/${file}.html"
done