move archive deletion out of the main eviction script

This commit is contained in:
ansuz 2021-02-17 14:17:41 +05:30
parent 3e4895220c
commit 9e1d82f373
2 changed files with 213 additions and 65 deletions

View File

@ -30,8 +30,16 @@ Env = {
*/
module.exports = function (Env, cb) {
var complete = Util.once(Util.mkAsync(cb));
// the number of ms artificially introduced between CPU-intensive operations
var THROTTLE_FACTOR = 10;
var evictArchived = function (Env, cb) {
var Log;
var store;
var pinStore;
var blobs;
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
var report = {
// archivedChannelsRemoved,
// archivedAccountsRemoved,
@ -53,67 +61,7 @@ module.exports = function (Env, cb) {
// runningTime,
};
// the administrator should have set an 'inactiveTime' in their config
// if they didn't, just exit.
if (!Env.inactiveTime || typeof(Env.inactiveTime) !== "number") {
return void complete("NO_INACTIVE_TIME");
}
// get a list of premium accounts on this instance
// pre-converted to the 'safeKey' format so we can easily compare
// them against ids we see on the filesystem
var premiumSafeKeys = Object.keys(Env.limits || {})
.map(function (id) {
return Keys.canonicalize(id);
})
.filter(Boolean)
.map(Util.escapeKeyCharacters);
// files which have not been changed since before this date can be considered inactive
var inactiveTime = +new Date() - (Env.inactiveTime * 24 * 3600 * 1000);
// files which were archived before this date can be considered safe to remove
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
var store;
var pinStore;
var Log;
var blobs;
/* It's fairly easy to know if a channel or blob is active
but knowing whether it is pinned requires that we
keep the set of pinned documents in memory.
Some users will share the same set of documents in their pin lists,
so the representation of pinned documents should scale sub-linearly
with the number of users and pinned documents.
That said, sub-linear isn't great...
A Bloom filter is "a space-efficient probabilistic data structure"
which lets us check whether an item is _probably_ or _definitely not_
in a set. This is good enough for our purposes since we just want to
know whether something can safely be removed and false negatives
(not safe to remove when it actually is) are acceptable.
We set our capacity to some large number, and the error rate to whatever
we think is acceptable.
TODO make this configurable ?
*/
var BLOOM_CAPACITY = (1 << 20) - 1; // over a million items
var BLOOM_ERROR = 1 / 10000; // an error rate of one in a thousand
// the number of ms artificially introduced between CPU-intensive operations
var THROTTLE_FACTOR = 10;
// we'll use one filter for the set of active documents
var activeDocs = Bloom.optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
// and another one for the set of pinned documents
var pinnedDocs = Bloom. optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
var startTime = +new Date();
var msSinceStart = function () {
return (+new Date()) - startTime;
};
var loadStorage = function () {
store = Env.store;
@ -237,6 +185,105 @@ module.exports = function (Env, cb) {
}));
};
nThen(loadStorage)
.nThen(removeArchivedChannels)
.nThen(removeArchivedBlobProofs)
.nThen(removeArchivedBlobs)
.nThen(function () {
cb();
});
};
module.exports = function (Env, cb) {
var complete = Util.once(Util.mkAsync(cb));
var report = {
// archivedChannelsRemoved,
// archivedAccountsRemoved,
// archivedBlobProofsRemoved,
// archivedBlobsRemoved,
// totalChannels,
// activeChannels,
// totalBlobs,
// activeBlobs,
// totalAccounts,
// activeAccounts,
// channelsArchived,
launchTime: +new Date(),
// runningTime,
};
// the administrator should have set an 'inactiveTime' in their config
// if they didn't, just exit.
if (!Env.inactiveTime || typeof(Env.inactiveTime) !== "number") {
return void complete("NO_INACTIVE_TIME");
}
// get a list of premium accounts on this instance
// pre-converted to the 'safeKey' format so we can easily compare
// them against ids we see on the filesystem
var premiumSafeKeys = Object.keys(Env.limits || {})
.map(function (id) {
return Keys.canonicalize(id);
})
.filter(Boolean)
.map(Util.escapeKeyCharacters);
// files which have not been changed since before this date can be considered inactive
var inactiveTime = +new Date() - (Env.inactiveTime * 24 * 3600 * 1000);
// files which were archived before this date can be considered safe to remove
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
var store;
var pinStore;
var Log;
var blobs;
/* It's fairly easy to know if a channel or blob is active
but knowing whether it is pinned requires that we
keep the set of pinned documents in memory.
Some users will share the same set of documents in their pin lists,
so the representation of pinned documents should scale sub-linearly
with the number of users and pinned documents.
That said, sub-linear isn't great...
A Bloom filter is "a space-efficient probabilistic data structure"
which lets us check whether an item is _probably_ or _definitely not_
in a set. This is good enough for our purposes since we just want to
know whether something can safely be removed and false negatives
(not safe to remove when it actually is) are acceptable.
We set our capacity to some large number, and the error rate to whatever
we think is acceptable.
TODO make this configurable ?
*/
var BLOOM_CAPACITY = (1 << 20) - 1; // over a million items
var BLOOM_ERROR = 1 / 10000; // an error rate of one in a thousand
// we'll use one filter for the set of active documents
var activeDocs = Bloom.optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
// and another one for the set of pinned documents
var pinnedDocs = Bloom. optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
var startTime = +new Date();
var msSinceStart = function () {
return (+new Date()) - startTime;
};
var loadStorage = function () {
store = Env.store;
pinStore = Env.pinStore;
Log = Env.Log;
blobs = Env.blobStore;
};
var categorizeChannelsByActivity = function (w) {
var channels = 0;
var active = 0;
@ -566,9 +613,6 @@ module.exports = function (Env, cb) {
};
nThen(loadStorage)
.nThen(removeArchivedChannels)
.nThen(removeArchivedBlobProofs)
.nThen(removeArchivedBlobs)
// iterate over all documents and add them to a bloom filter if they have been active
.nThen(categorizeChannelsByActivity)
@ -590,3 +634,5 @@ module.exports = function (Env, cb) {
complete(void 0, report);
});
};
module.exports.archived = evictArchived;

102
scripts/evict-archived.js Normal file
View File

@ -0,0 +1,102 @@
var Eviction = require("../lib/eviction");
var nThen = require("nthen");
var Store = require("../lib/storage/file");
var BlobStore = require("../lib/storage/blob");
var Quota = require("../lib/commands/quota");
var Environment = require("../lib/env");
var Decrees = require("../lib/decrees");
var config = require("../lib/load-config");
var Env = Environment.create(config);
var loadPremiumAccounts = function (Env, cb) {
nThen(function (w) {
// load premium accounts
Quota.updateCachedLimits(Env, w(function (err) {
if (err) {
Env.Log.error('EVICT_LOAD_PREMIUM_ACCOUNTS', {
error: err,
});
}
}));
}).nThen(function (w) {
// load and apply decrees
Decrees.load(Env, w(function (err) {
if (err) {
Env.Log.error('EVICT_LOAD_DECREES', {
error: err.code || err,
message: err.message,
});
}
}));
}).nThen(function () {
//console.log(Env.limits);
cb();
});
};
var prepareEnv = function (Env, cb) {
//Quota.applyCustomLimits(Env);
nThen(function (w) {
/* Database adaptors
*/
// load the store which will be used for iterating over channels
// and performing operations like archival and deletion
Store.create(config, w(function (err, _) {
if (err) {
w.abort();
throw err;
}
Env.store = _;
}));
Store.create({
filePath: config.pinPath,
}, w(function (err, _) {
if (err) {
w.abort();
throw err;
}
Env.pinStore = _;
}));
// load the logging module so that you have a record of which
// files were archived or deleted at what time
var Logger = require("../lib/log");
Logger.create(config, w(function (_) {
Env.Log = _;
}));
config.getSession = function () {};
BlobStore.create(config, w(function (err, _) {
if (err) {
w.abort();
return console.error(err);
}
Env.blobStore = _;
}));
}).nThen(function (w) {
loadPremiumAccounts(Env, w(function (/* err */) {
//if (err) { }
}));
}).nThen(function () {
cb();
});
};
//console.log("starting");
nThen(function (w) {
// load database adaptors and configuration values into the environment
prepareEnv(Env, w(function () {
//console.log("env prepared");
}));
}).nThen(function (w) {
Eviction.archived(Env, w(function () {
}));
});