From 56f0295be203771f59e94162b62fa17d4fea54a3 Mon Sep 17 00:00:00 2001 From: Martin Herkt Date: Tue, 1 Nov 2016 05:17:54 +0100 Subject: [PATCH] init --- .gitignore | 2 + LICENSE | 13 + README.rst | 54 ++++ cleanup.py | 23 ++ fhost.py | 402 +++++++++++++++++++++++++++ migrations/README | 1 + migrations/alembic.ini | 45 +++ migrations/env.py | 87 ++++++ migrations/script.py.mako | 22 ++ migrations/versions/0cd36ecdd937_.py | 42 +++ requirements.txt | 20 ++ 11 files changed, 711 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.rst create mode 100755 cleanup.py create mode 100755 fhost.py create mode 100755 migrations/README create mode 100644 migrations/alembic.ini create mode 100755 migrations/env.py create mode 100755 migrations/script.py.mako create mode 100644 migrations/versions/0cd36ecdd937_.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..64799da --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +.py[cod] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7f1b81d --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright © 2016, Martin Herkt + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..ca5ba46 --- /dev/null +++ b/README.rst @@ -0,0 +1,54 @@ +The Null Pointer +================ + +This is a no-bullshit file hosting and URL shortening service that also runs +`0x0.st `_. Use with uWSGI. + +If you are running nginx, you should use the ``X-Accel-Redirect`` header. +To make it work, include this in your nginx config’s ``server`` block:: + + location /up { + internal; + } + +where ``/up`` is whatever you’ve configured as ``FHOST_STORAGE_PATH`` +in ``fhost.py``. + +For all other servers, set ``FHOST_USE_X_ACCEL_REDIRECT`` to ``False`` and +``USE_X_SENDFILE`` to ``True``, assuming your server supports this. +Otherwise, Flask will serve the file with chunked encoding, which sucks and +should be avoided at all costs. + +To make files expire, simply create a cronjob that runs ``cleanup.py`` every +now and then. + +Before running the service for the first time, run ``./fhost.py db upgrade``. + + +FAQ +--- + +Q: + Will you ever add a web interface with HTML forms? +A: + No. This would without a doubt make it very popular and quickly exceed + my hosting budget unless I started crippling it. + +Q: + What about file management? Will I be able to register an account at some + point? +A: + No. + +Q: + Why are you storing IP addresses with each uploaded file? +A: + This is done to make dealing with legal claims and accidental uploads + easier, e.g. when a user requests removal of all text files uploaded from + a certain address within a given time frame (it happens). + +Q: + Do you accept donations? +A: + Only if you insist. I’ve spent very little time and effort on this service + and I don’t feel like I should be taking money for it. diff --git a/cleanup.py b/cleanup.py new file mode 100755 index 0000000..401140a --- /dev/null +++ b/cleanup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 + +import os, sys, time, datetime +from fhost import app + +os.chdir(os.path.dirname(sys.argv[0])) +os.chdir(app.config["FHOST_STORAGE_PATH"]) + +files = [f for f in os.listdir(".")] + +maxs = app.config["MAX_CONTENT_LENGTH"] +mind = 30 +maxd = 365 + +for f in files: + stat = os.stat(f) + systime = time.time() + age = datetime.timedelta(seconds = systime - stat.st_mtime).days + + maxage = mind + (-maxd + mind) * (stat.st_size / maxs - 1) ** 3 + + if age >= maxage: + os.remove(f) diff --git a/fhost.py b/fhost.py new file mode 100755 index 0000000..1b9a4c7 --- /dev/null +++ b/fhost.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from flask import Flask, abort, escape, make_response, redirect, request, send_from_directory, url_for +from flask_sqlalchemy import SQLAlchemy +from flask_script import Manager +from flask_migrate import Migrate, MigrateCommand +from hashlib import sha256 +from humanize import naturalsize +from magic import Magic +from mimetypes import guess_extension +import os, sys +import requests +from short_url import UrlEncoder +from validators import url as url_valid + +app = Flask(__name__) +app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False + +app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///db.sqlite" # "postgresql://0x0@/0x0" +app.config["PREFERRED_URL_SCHEME"] = "https" # nginx users: make sure to have 'uwsgi_param UWSGI_SCHEME $scheme;' in your config +app.config["MAX_CONTENT_LENGTH"] = 256 * 1024 * 1024 +app.config["MAX_URL_LENGTH"] = 4096 +app.config["FHOST_STORAGE_PATH"] = "up" +app.config["FHOST_USE_X_ACCEL_REDIRECT"] = True # expect nginx by default +app.config["USE_X_SENDFILE"] = False +app.config["FHOST_EXT_OVERRIDE"] = { + "image/gif" : ".gif", + "image/jpeg" : ".jpg", + "image/png" : ".png", + "image/svg+xml" : ".svg", + "video/webm" : ".webm", + "video/x-matroska" : ".mkv", + "application/octet-stream" : ".bin", + "text/plain" : ".txt" +} + +# default blacklist to avoid AV mafia extortion +app.config["FHOST_MIME_BLACKLIST"] = [ + "application/x-dosexec", + "application/java-archive", + "application/java-vm" +] + +try: + mimedetect = Magic(mime=True, mime_encoding=False) +except: + print("""Error: You have installed the wrong version of the 'magic' module. +Please install python-magic.""") + sys.exit(1) + +if not os.path.exists(app.config["FHOST_STORAGE_PATH"]): + os.mkdir(app.config["FHOST_STORAGE_PATH"]) + +db = SQLAlchemy(app) +migrate = Migrate(app, db) + +manager = Manager(app) +manager.add_command("db", MigrateCommand) + +su = UrlEncoder(alphabet='DEQhd2uFteibPwq0SWBInTpA_jcZL5GKz3YCR14Ulk87Jors9vNHgfaOmMXy6Vx-', block_size=16) + +class URL(db.Model): + id = db.Column(db.Integer, primary_key = True) + url = db.Column(db.UnicodeText, unique = True) + + def __init__(self, url): + self.url = url + + def getname(self): + return su.enbase(self.id, 1) + +class File(db.Model): + id = db.Column(db.Integer, primary_key = True) + sha256 = db.Column(db.String, unique = True) + ext = db.Column(db.UnicodeText) + mime = db.Column(db.UnicodeText) + addr = db.Column(db.UnicodeText) + removed = db.Column(db.Boolean, default=False) + + def __init__(self, sha256, ext, mime, addr): + self.sha256 = sha256 + self.ext = ext + self.mime = mime + self.addr = addr + + def getname(self): + return u"{0}{1}".format(su.enbase(self.id, 1), self.ext) + + +def getpath(fn): + return os.path.join(app.config["FHOST_STORAGE_PATH"], fn) + +def geturl(p): + return url_for("get", path=p, _external=True) + "\n" + +def shorten(url): + if len(url) > app.config["MAX_URL_LENGTH"]: + abort(414) + + if not url_valid(url): + abort(400) + + existing = URL.query.filter_by(url=url).first() + + if existing: + return geturl(existing.getname()) + else: + u = URL(url) + db.session.add(u) + db.session.commit() + + return geturl(u.getname()) + +def store_file(f, addr): + data = f.stream.read() + digest = sha256(data).hexdigest() + existing = File.query.filter_by(sha256=digest).first() + + if existing: + if existing.removed: + return legal() + + epath = getpath(existing.sha256) + + if not os.path.exists(epath): + with open(epath, "wb") as of: + of.write(data) + + os.utime(epath, None) + existing.addr = addr + db.session.commit() + + return geturl(existing.getname()) + else: + guessmime = mimedetect.from_buffer(data) + + if not f.content_type or not "/" in f.content_type or f.content_type == "application/octet-stream": + mime = guessmime + else: + mime = f.content_type + + if mime in app.config["FHOST_MIME_BLACKLIST"] or guessmime in app.config["FHOST_MIME_BLACKLIST"]: + abort(415) + + if mime.startswith("text/") and not "charset" in f.mime: + mime += "; charset=utf-8" + + ext = os.path.splitext(f.filename)[1] + + if not ext: + gmime = mime.split(";")[0] + + if not gmime in app.config["FHOST_EXT_OVERRIDE"]: + ext = guess_extension(gmime) + else: + ext = app.config["FHOST_EXT_OVERRIDE"][gmime] + else: + ext = ext[:8] + + if not ext: + ext = ".bin" + + with open(getpath(digest), "wb") as of: + of.write(data) + + sf = File(digest, ext, mime, addr) + db.session.add(sf) + db.session.commit() + + return geturl(sf.getname()) + +def store_url(url, addr): + fhost_url = url_for(".fhost", _external=True).rstrip("/") + fhost_url_https = url_for(".fhost", _external=True, _scheme="https").rstrip("/") + + if url.startswith(fhost_url) or url.startswith(fhost_url_https): + return segfault(508) + + r = requests.get(url, stream=True, verify=False) + + try: + r.raise_for_status() + except (requests.exceptions.HTTPError, e): + return str(e) + "\n" + + if "content-length" in r.headers: + l = int(r.headers["content-length"]) + + if l < app.config["MAX_CONTENT_LENGTH"]: + def urlfile(**kwargs): + return type('',(),kwargs)() + + f = urlfile(stream=r.raw, content_type=r.headers["content-type"], filename="") + + return store_file(f, addr) + else: + hl = naturalsize(l, binary = True) + hml = naturalsize(app.config["MAX_CONTENT_LENGTH"], binary=True) + + return "Remote file too large ({0} > {1}).\n".format(hl, hml), 413 + else: + return "Could not determine remote file size (no Content-Length in response header; shoot admin).\n", 411 + +@app.route("/") +def get(path): + p = os.path.splitext(path) + id = su.debase(p[0]) + + if p[1]: + f = File.query.get(id) + + if f and f.ext == p[1]: + if f.removed: + return legal() + + fpath = getpath(f.sha256) + + if not os.path.exists(fpath): + abort(404) + + fsize = os.path.getsize(fpath) + + if app.config["FHOST_USE_X_ACCEL_REDIRECT"]: + response = make_response() + response.headers["Content-Type"] = f.mime + response.headers["Content-Length"] = fsize + response.headers["X-Accel-Redirect"] = "/" + fpath + return response + else: + return send_from_directory(app.config["FHOST_STORAGE_PATH"], f.sha256, mimetype = f.mime) + else: + u = URL.query.get(id) + + if u: + return redirect(u.url) + + abort(404) + +@app.route("/", methods=["GET", "POST"]) +def fhost(): + if request.method == "POST": + sf = None + + if "file" in request.files: + return store_file(request.files["file"], request.remote_addr) + elif "url" in request.form: + return store_url(request.form["url"], request.remote_addr) + elif "shorten" in request.form: + return shorten(request.form["shorten"]) + + abort(400) + else: + fmts = list(app.config["FHOST_EXT_OVERRIDE"]) + fmts.sort() + maxsize = naturalsize(app.config["MAX_CONTENT_LENGTH"], binary=True) + maxsizenum, maxsizeunit = maxsize.split(" ") + maxsizenum = float(maxsizenum) + maxsizehalf = maxsizenum / 2 + + if maxsizenum.is_integer(): + maxsizenum = int(maxsizenum) + if maxsizehalf.is_integer(): + maxsizehalf = int(maxsizehalf) + + return """
+THE NULL POINTER
+================
+
+HTTP POST files here:
+    curl -F'file=@yourfile.png' {0}
+You can also POST remote URLs:
+    curl -F'url=http://example.com/image.jpg' {0}
+Or you can shorten URLs:
+    curl -F'shorten=http://example.com/some/long/url' {0}
+
+File URLs are valid for at least 30 days and up to a year (see below).
+Shortened URLs do not expire.
+
+Maximum file size: {1}
+Not allowed: {5}
+
+
+FILE RETENTION PERIOD
+---------------------
+
+retention = min_age + (-max_age + min_age) * pow((file_size / max_size - 1), 3)
+
+   days
+    365 |  \\
+        |   \\
+        |    \\
+        |     \\
+        |      \\
+        |       \\
+        |        ..
+        |          \\
+  197.5 | ----------..-------------------------------------------
+        |             ..
+        |               \\
+        |                ..
+        |                  ...
+        |                     ..
+        |                       ...
+        |                          ....
+        |                              ......
+     30 |                                    ....................
+          0{2}{3}
+           {4}
+
+
+ABUSE
+-----
+
+If you would like to request permanent deletion, please contact lachs0r via
+IRC on Freenode, or send an email to lachs0r@(this domain).
+
+Please allow up to 24 hours for a response.
+
+""".format(url_for(".fhost", _external=True).rstrip("/"), + maxsize, str(maxsizehalf).rjust(27), str(maxsizenum).rjust(27), + maxsizeunit.rjust(54), + ", ".join(app.config["FHOST_MIME_BLACKLIST"])) + +@app.route("/robots.txt") +def robots(): + return """User-agent: * +Disallow: / +""" + +def legal(): + return "451 Unavailable For Legal Reasons\n", 451 + +@app.errorhandler(400) +@app.errorhandler(404) +@app.errorhandler(414) +@app.errorhandler(415) +def segfault(e): + return "Segmentation fault\n", e.code + +@app.errorhandler(404) +def notfound(e): + return u"""
Process {0} stopped
+* thread #1: tid = {0}, {1:#018x}, name = '{2}'
+    frame #0:
+Process {0} stopped
+* thread #8: tid = {0}, {3:#018x} fhost`get(path='{4}') + 27 at fhost.c:139, name = 'fhost/responder', stop reason = invalid address (fault address: 0x30)
+    frame #0: {3:#018x} fhost`get(path='{4}') + 27 at fhost.c:139
+   136   get(SrvContext *ctx, const char *path)
+   137   {{
+   138       StoredObj *obj = ctx->store->query(shurl_debase(path));
+-> 139       switch (obj->type) {{
+   140           case ObjTypeFile:
+   141               ctx->serve_file_id(obj->id);
+   142               break;
+(lldb) q
+""".format(os.getpid(), id(app), "fhost", id(get), escape(request.path)), e.code + +@manager.command +def debug(): + app.config["FHOST_USE_X_ACCEL_REDIRECT"] = False + app.run(debug=True, port=4562,host="0.0.0.0") + +@manager.command +def permadelete(name): + id = su.debase(name) + f = File.query.get(id) + + if f: + if os.path.exists(getpath(f.sha256)): + os.remove(getpath(f.sha256)) + f.removed = True + db.session.commit() + +@manager.command +def query(name): + id = su.debase(name) + f = File.query.get(id) + + if f: + print("url: {}".format(f.getname())) + vals = vars(f) + + for v in vals: + if not v.startswith("_sa"): + print("{}: {}".format(v, vals[v])) + +@manager.command +def queryhash(h): + f = File.query.filter_by(sha256=h).first() + if f: + query(su.enbase(f.id, 1)) + +@manager.command +def queryaddr(a): + res = File.query.filter_by(addr=a) + + for f in res: + query(su.enbase(f.id, 1)) + +if __name__ == "__main__": + manager.run() diff --git a/migrations/README b/migrations/README new file mode 100755 index 0000000..98e4f9c --- /dev/null +++ b/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/migrations/alembic.ini b/migrations/alembic.ini new file mode 100644 index 0000000..f8ed480 --- /dev/null +++ b/migrations/alembic.ini @@ -0,0 +1,45 @@ +# A generic, single database configuration. + +[alembic] +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/migrations/env.py b/migrations/env.py new file mode 100755 index 0000000..4593816 --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,87 @@ +from __future__ import with_statement +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig +import logging + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) +logger = logging.getLogger('alembic.env') + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +from flask import current_app +config.set_main_option('sqlalchemy.url', + current_app.config.get('SQLALCHEMY_DATABASE_URI')) +target_metadata = current_app.extensions['migrate'].db.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure(url=url) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + # this callback is used to prevent an auto-migration from being generated + # when there are no changes to the schema + # reference: http://alembic.readthedocs.org/en/latest/cookbook.html + def process_revision_directives(context, revision, directives): + if getattr(config.cmd_opts, 'autogenerate', False): + script = directives[0] + if script.upgrade_ops.is_empty(): + directives[:] = [] + logger.info('No changes in schema detected.') + + engine = engine_from_config(config.get_section(config.config_ini_section), + prefix='sqlalchemy.', + poolclass=pool.NullPool) + + connection = engine.connect() + context.configure(connection=connection, + target_metadata=target_metadata, + process_revision_directives=process_revision_directives, + **current_app.extensions['migrate'].configure_args) + + try: + with context.begin_transaction(): + context.run_migrations() + finally: + connection.close() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/migrations/script.py.mako b/migrations/script.py.mako new file mode 100755 index 0000000..9570201 --- /dev/null +++ b/migrations/script.py.mako @@ -0,0 +1,22 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision} +Create Date: ${create_date} + +""" + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/migrations/versions/0cd36ecdd937_.py b/migrations/versions/0cd36ecdd937_.py new file mode 100644 index 0000000..349cab9 --- /dev/null +++ b/migrations/versions/0cd36ecdd937_.py @@ -0,0 +1,42 @@ +"""empty message + +Revision ID: 0cd36ecdd937 +Revises: None +Create Date: 2016-11-01 05:25:42.691768 + +""" + +# revision identifiers, used by Alembic. +revision = '0cd36ecdd937' +down_revision = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.create_table('URL', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('url', sa.UnicodeText(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('url') + ) + op.create_table('file', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('sha256', sa.String(), nullable=True), + sa.Column('ext', sa.UnicodeText(), nullable=True), + sa.Column('mime', sa.UnicodeText(), nullable=True), + sa.Column('addr', sa.UnicodeText(), nullable=True), + sa.Column('removed', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('sha256') + ) + ### end Alembic commands ### + + +def downgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.drop_table('file') + op.drop_table('URL') + ### end Alembic commands ### diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3004091 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +alembic==0.8.8 +click==6.6 +decorator==4.0.10 +Flask==0.11.1 +Flask-Migrate==2.0.0 +Flask-Script==2.0.5 +Flask-SQLAlchemy==2.1 +humanize==0.5.1 +itsdangerous==0.24 +Jinja2==2.8 +Mako==1.0.4 +MarkupSafe==0.23 +python-editor==1.0.1 +python-magic==0.4.12 +requests==2.11.1 +short-url==1.2.2 +six==1.10.0 +SQLAlchemy==1.1.3 +validators==0.11.0 +Werkzeug==0.11.11