improve performance when listing multiple archives with shared backends

This commit is contained in:
Conor Hughes 2015-08-04 19:34:50 -07:00
parent 3b1e5a60a5
commit 85ed230db2
3 changed files with 54 additions and 14 deletions

View File

@ -105,9 +105,16 @@ class Application(object):
self.logger.info("Successfully completed {}/{} backups.".format(len(backup_successes), len(backups)))
def list_archives(self):
backend_to_primed_list_token_map = {}
for backup in self.get_all_backups():
for backend in backup.backends:
if backend not in backend_to_primed_list_token_map:
token = backend.get_primed_list_token()
backend_to_primed_list_token_map[backend] = token
for backup in self.get_all_backups():
sys.stdout.write("{}:\n".format(backup.name))
for backend, archives in backup.get_all_archives():
for backend, archives in backup.get_all_archives(backend_to_primed_list_token_map=backend_to_primed_list_token_map):
sorted_archives = sorted(archives, cmp=lambda x,y: cmp(x.datetime, y.datetime))
enumerated_archives = ((i, archive) for i, archive in enumerate(sorted_archives) if self.within_timespec(archive))
sys.stdout.write("\t{}:\n".format(backend.name))

View File

@ -80,7 +80,7 @@ class Backup(object):
success = success and backend.perform(self.paths, self.name, now)
return success
def get_all_archives(self, backends=None):
def get_all_archives(self, backends=None, backend_to_primed_list_token_map=None):
if backends is None:
backends = self.backends
@ -91,8 +91,12 @@ class Backup(object):
pairs = []
for backend in backends:
token = None
if backend_to_primed_list_token_map is not None:
assert backend in backend_to_primed_list_token_map, "Backend {} not in primed token map?".format(backend)
token = backend_to_primed_list_token_map[backend]
pairs.append(
[backend, backend.existing_archives_for_name(self.name)])
[backend, backend.existing_archives_for_name(self.name, primed_list_token=token)])
return pairs

View File

@ -10,6 +10,7 @@ import datetime
import dateutil.tz
import time
import re
import StringIO
from .. import backend_types
from .. import package_logger
@ -44,6 +45,14 @@ class TarsnapArchive(backend_types.Archive):
return True
class _TarsnapPrimedListToken(object):
def __init__(self, tarsnap_output):
self.tarsnap_output = tarsnap_output
def iterlines(self):
return StringIO.StringIO(self.tarsnap_output)
class TarsnapBackend(backend_types.BackupBackend):
NAMES = {"tarsnap"}
@ -105,24 +114,44 @@ class TarsnapBackend(backend_types.BackupBackend):
pass
os.rmdir(tmpdir)
def existing_archives_for_name(self, backup_name):
def existing_archives_for_name(self, backup_name, primed_list_token=None):
argv = [TARSNAP_PATH, "--list-archives"]
if self.keyfile is not None:
argv += ["--keyfile", self.keyfile]
proc = None
f = None
if primed_list_token is not None:
f = primed_list_token.iterlines()
else:
proc = subprocess.Popen(argv, stdout=subprocess.PIPE)
f = proc.stdout
identifier = self.create_backup_identifier(backup_name)
regex = backup_instance_regex(identifier, backup_name)
results = []
for line in f:
m = regex.match(line)
if m:
ts = float(m.groupdict()["timestamp"])
results.append(TarsnapArchive(self, ts, m.group()))
if proc is not None and proc.wait() != 0:
self.logger.error("Tarsnap invocation failed with exit code {}".format(proc.returncode))
return results
def get_primed_list_token(self):
argv = [TARSNAP_PATH, "--list-archives"]
if self.keyfile is not None:
argv += ["--keyfile", self.keyfile]
proc = subprocess.Popen(argv, stdout=subprocess.PIPE)
identifier = self.create_backup_identifier(backup_name)
regex = backup_instance_regex(identifier, backup_name)
results = []
for line in proc.stdout:
m = regex.match(line)
if m:
ts = float(m.groupdict()["timestamp"])
results.append(TarsnapArchive(self, ts, m.group()))
token = _TarsnapPrimedListToken(proc.stdout.read())
if proc.wait() != 0:
self.logger.error("Tarsnap invocation failed with exit code {}".format(proc.returncode))
return results
return token