From ae7530d85c545083855c5bd905dcf2422b31c871 Mon Sep 17 00:00:00 2001 From: kayvr <98552926+kayvr@users.noreply.github.com> Date: Sun, 20 Mar 2022 13:54:47 -0700 Subject: [PATCH] Add canonicalize URL API. Introduces the ability for protocols to normalize in input URL before further processing. For example, the file:// API requires that directories are terminated with a forward slash to ensure proper caching. --- rover.py | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/rover.py b/rover.py index 0d58780..fa9ca3f 100644 --- a/rover.py +++ b/rover.py @@ -91,6 +91,17 @@ for entry in Path(rover_script_path).parent.iterdir(): # Implementation of the 'file://' protocol. class FileProtocol: + @staticmethod + def api_canonicalize_url(url: str): + # Directories may or may not have a '/' suffix. We expect directories + # to have the '/' suffix. + parsed_url = parse_url(url) + source_path = Path(parsed_url.path) + if source_path.is_dir(): + if not url.endswith("/"): + url = url + "/" + return url + @staticmethod def api_use_abspath(): return False @@ -137,6 +148,10 @@ def get_protocol(url: str): loaded_protocols.add(protocol.name) return protocol.module +def api_canonicalize_url(url: str): + protocol = get_protocol(url) + return protocol.api_canonicalize_url(url) + def api_use_abspath(url: str): protocol = get_protocol(url) return protocol.api_use_abspath() @@ -206,6 +221,9 @@ class RoverDirModInfo(NamedTuple): def eprint(*args, **kwargs): print("ERROR:", *args, file=sys.stderr, **kwargs) +def wprint(*args, **kwargs): + print("warning:", *args, file=sys.stderr, **kwargs) + def read_integer(line): index = 0 integer_str = "" @@ -324,7 +342,7 @@ def generate_rover_file_from_path(source_path): file_contents = open(item, 'r', encoding="utf-8").read() sha = hashlib.sha256(file_contents.encode('utf-8')).hexdigest() except UnicodeDecodeError as e: - print(f"Ignoring invalid utf-8 file. {item} {e}") + print(f"Ignoring invalid utf-8 file. {item}") continue stat = os.stat(item) mtime = int(os.path.getmtime(item)) @@ -516,10 +534,6 @@ def protocol_perform_land( for dir_entry in src_rover_file.directories: filename = getattr(dir_entry, 'filename') rec_url = getattr(src_rover_file, 'url') - # Directories must support a forward slash after the name. - if not rec_url.endswith("/"): - print("Have url, that is a directory, that does not contain a terminating slash.") - rec_url = rec_url + "/" rec_url_joined = urllib.parse.urljoin(rec_url, filename) protocol_on_land(rec_url_joined, target_path / filename, is_recursive, leave_for_tour, leave_unread) @@ -633,8 +647,8 @@ def handle_fetch(args): def handle_land(args): url_or_path = args.url_or_path[0] url = path_to_url(url_or_path) + url = api_canonicalize_url(url) - parsed_url = urllib.parse.urlparse(url) target_path = None if url_or_path == ".": target_path = Path.cwd() @@ -655,6 +669,7 @@ def handle_land(args): eprint(f"--retour and --unread are mutually exclusive.") sys.exit(1) + parsed_url = urllib.parse.urlparse(url) if not parsed_url.scheme in supported_protocols.keys(): eprint(f"Unsupported protocol: {parsed_url.scheme}") sys.exit(1) @@ -977,14 +992,13 @@ def land_file(local_path): try: api_land_file(file_url, local_path) except RuntimeError as e: - eprint(f"Failed to land url '{file_url}'.") - eprint(f"Exception: {e}") + wprint(f"Failed to land url '{file_url}'.") + wprint(f"Exception: {e}") failed_land = True error_occurred = True except socket.gaierror as e: - eprint(f"Failed to land url. '{file_url}'.") - eprint(f"Socket Exception: {e}") - eprint("Ignoring file.") + wprint(f"Failed to land url. '{file_url}'.") + wprint(f"Socket Exception: {e}") failed_land = True error_occurred = True