move archive deletion out of the main eviction script
This commit is contained in:
parent
3e4895220c
commit
9e1d82f373
176
lib/eviction.js
176
lib/eviction.js
|
@ -30,8 +30,16 @@ Env = {
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
module.exports = function (Env, cb) {
|
// the number of ms artificially introduced between CPU-intensive operations
|
||||||
var complete = Util.once(Util.mkAsync(cb));
|
var THROTTLE_FACTOR = 10;
|
||||||
|
|
||||||
|
var evictArchived = function (Env, cb) {
|
||||||
|
var Log;
|
||||||
|
var store;
|
||||||
|
var pinStore;
|
||||||
|
var blobs;
|
||||||
|
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
|
||||||
|
|
||||||
var report = {
|
var report = {
|
||||||
// archivedChannelsRemoved,
|
// archivedChannelsRemoved,
|
||||||
// archivedAccountsRemoved,
|
// archivedAccountsRemoved,
|
||||||
|
@ -53,67 +61,7 @@ module.exports = function (Env, cb) {
|
||||||
// runningTime,
|
// runningTime,
|
||||||
};
|
};
|
||||||
|
|
||||||
// the administrator should have set an 'inactiveTime' in their config
|
|
||||||
// if they didn't, just exit.
|
|
||||||
if (!Env.inactiveTime || typeof(Env.inactiveTime) !== "number") {
|
|
||||||
return void complete("NO_INACTIVE_TIME");
|
|
||||||
}
|
|
||||||
|
|
||||||
// get a list of premium accounts on this instance
|
|
||||||
// pre-converted to the 'safeKey' format so we can easily compare
|
|
||||||
// them against ids we see on the filesystem
|
|
||||||
var premiumSafeKeys = Object.keys(Env.limits || {})
|
|
||||||
.map(function (id) {
|
|
||||||
return Keys.canonicalize(id);
|
|
||||||
})
|
|
||||||
.filter(Boolean)
|
|
||||||
.map(Util.escapeKeyCharacters);
|
|
||||||
|
|
||||||
// files which have not been changed since before this date can be considered inactive
|
|
||||||
var inactiveTime = +new Date() - (Env.inactiveTime * 24 * 3600 * 1000);
|
|
||||||
|
|
||||||
// files which were archived before this date can be considered safe to remove
|
|
||||||
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
|
|
||||||
|
|
||||||
var store;
|
|
||||||
var pinStore;
|
|
||||||
var Log;
|
|
||||||
var blobs;
|
|
||||||
|
|
||||||
/* It's fairly easy to know if a channel or blob is active
|
|
||||||
but knowing whether it is pinned requires that we
|
|
||||||
keep the set of pinned documents in memory.
|
|
||||||
|
|
||||||
Some users will share the same set of documents in their pin lists,
|
|
||||||
so the representation of pinned documents should scale sub-linearly
|
|
||||||
with the number of users and pinned documents.
|
|
||||||
|
|
||||||
That said, sub-linear isn't great...
|
|
||||||
A Bloom filter is "a space-efficient probabilistic data structure"
|
|
||||||
which lets us check whether an item is _probably_ or _definitely not_
|
|
||||||
in a set. This is good enough for our purposes since we just want to
|
|
||||||
know whether something can safely be removed and false negatives
|
|
||||||
(not safe to remove when it actually is) are acceptable.
|
|
||||||
|
|
||||||
We set our capacity to some large number, and the error rate to whatever
|
|
||||||
we think is acceptable.
|
|
||||||
|
|
||||||
TODO make this configurable ?
|
|
||||||
*/
|
|
||||||
var BLOOM_CAPACITY = (1 << 20) - 1; // over a million items
|
|
||||||
var BLOOM_ERROR = 1 / 10000; // an error rate of one in a thousand
|
|
||||||
// the number of ms artificially introduced between CPU-intensive operations
|
|
||||||
var THROTTLE_FACTOR = 10;
|
|
||||||
|
|
||||||
// we'll use one filter for the set of active documents
|
|
||||||
var activeDocs = Bloom.optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
|
|
||||||
// and another one for the set of pinned documents
|
|
||||||
var pinnedDocs = Bloom. optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
|
|
||||||
|
|
||||||
var startTime = +new Date();
|
|
||||||
var msSinceStart = function () {
|
|
||||||
return (+new Date()) - startTime;
|
|
||||||
};
|
|
||||||
|
|
||||||
var loadStorage = function () {
|
var loadStorage = function () {
|
||||||
store = Env.store;
|
store = Env.store;
|
||||||
|
@ -237,6 +185,105 @@ module.exports = function (Env, cb) {
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
nThen(loadStorage)
|
||||||
|
.nThen(removeArchivedChannels)
|
||||||
|
.nThen(removeArchivedBlobProofs)
|
||||||
|
.nThen(removeArchivedBlobs)
|
||||||
|
.nThen(function () {
|
||||||
|
cb();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = function (Env, cb) {
|
||||||
|
var complete = Util.once(Util.mkAsync(cb));
|
||||||
|
var report = {
|
||||||
|
// archivedChannelsRemoved,
|
||||||
|
// archivedAccountsRemoved,
|
||||||
|
// archivedBlobProofsRemoved,
|
||||||
|
// archivedBlobsRemoved,
|
||||||
|
|
||||||
|
// totalChannels,
|
||||||
|
// activeChannels,
|
||||||
|
|
||||||
|
// totalBlobs,
|
||||||
|
// activeBlobs,
|
||||||
|
|
||||||
|
// totalAccounts,
|
||||||
|
// activeAccounts,
|
||||||
|
|
||||||
|
// channelsArchived,
|
||||||
|
|
||||||
|
launchTime: +new Date(),
|
||||||
|
// runningTime,
|
||||||
|
};
|
||||||
|
|
||||||
|
// the administrator should have set an 'inactiveTime' in their config
|
||||||
|
// if they didn't, just exit.
|
||||||
|
if (!Env.inactiveTime || typeof(Env.inactiveTime) !== "number") {
|
||||||
|
return void complete("NO_INACTIVE_TIME");
|
||||||
|
}
|
||||||
|
|
||||||
|
// get a list of premium accounts on this instance
|
||||||
|
// pre-converted to the 'safeKey' format so we can easily compare
|
||||||
|
// them against ids we see on the filesystem
|
||||||
|
var premiumSafeKeys = Object.keys(Env.limits || {})
|
||||||
|
.map(function (id) {
|
||||||
|
return Keys.canonicalize(id);
|
||||||
|
})
|
||||||
|
.filter(Boolean)
|
||||||
|
.map(Util.escapeKeyCharacters);
|
||||||
|
|
||||||
|
// files which have not been changed since before this date can be considered inactive
|
||||||
|
var inactiveTime = +new Date() - (Env.inactiveTime * 24 * 3600 * 1000);
|
||||||
|
|
||||||
|
// files which were archived before this date can be considered safe to remove
|
||||||
|
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
|
||||||
|
|
||||||
|
var store;
|
||||||
|
var pinStore;
|
||||||
|
var Log;
|
||||||
|
var blobs;
|
||||||
|
|
||||||
|
/* It's fairly easy to know if a channel or blob is active
|
||||||
|
but knowing whether it is pinned requires that we
|
||||||
|
keep the set of pinned documents in memory.
|
||||||
|
|
||||||
|
Some users will share the same set of documents in their pin lists,
|
||||||
|
so the representation of pinned documents should scale sub-linearly
|
||||||
|
with the number of users and pinned documents.
|
||||||
|
|
||||||
|
That said, sub-linear isn't great...
|
||||||
|
A Bloom filter is "a space-efficient probabilistic data structure"
|
||||||
|
which lets us check whether an item is _probably_ or _definitely not_
|
||||||
|
in a set. This is good enough for our purposes since we just want to
|
||||||
|
know whether something can safely be removed and false negatives
|
||||||
|
(not safe to remove when it actually is) are acceptable.
|
||||||
|
|
||||||
|
We set our capacity to some large number, and the error rate to whatever
|
||||||
|
we think is acceptable.
|
||||||
|
|
||||||
|
TODO make this configurable ?
|
||||||
|
*/
|
||||||
|
var BLOOM_CAPACITY = (1 << 20) - 1; // over a million items
|
||||||
|
var BLOOM_ERROR = 1 / 10000; // an error rate of one in a thousand
|
||||||
|
|
||||||
|
// we'll use one filter for the set of active documents
|
||||||
|
var activeDocs = Bloom.optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
|
||||||
|
// and another one for the set of pinned documents
|
||||||
|
var pinnedDocs = Bloom. optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
|
||||||
|
|
||||||
|
var startTime = +new Date();
|
||||||
|
var msSinceStart = function () {
|
||||||
|
return (+new Date()) - startTime;
|
||||||
|
};
|
||||||
|
|
||||||
|
var loadStorage = function () {
|
||||||
|
store = Env.store;
|
||||||
|
pinStore = Env.pinStore;
|
||||||
|
Log = Env.Log;
|
||||||
|
blobs = Env.blobStore;
|
||||||
|
};
|
||||||
|
|
||||||
var categorizeChannelsByActivity = function (w) {
|
var categorizeChannelsByActivity = function (w) {
|
||||||
var channels = 0;
|
var channels = 0;
|
||||||
var active = 0;
|
var active = 0;
|
||||||
|
@ -566,9 +613,6 @@ module.exports = function (Env, cb) {
|
||||||
};
|
};
|
||||||
|
|
||||||
nThen(loadStorage)
|
nThen(loadStorage)
|
||||||
.nThen(removeArchivedChannels)
|
|
||||||
.nThen(removeArchivedBlobProofs)
|
|
||||||
.nThen(removeArchivedBlobs)
|
|
||||||
|
|
||||||
// iterate over all documents and add them to a bloom filter if they have been active
|
// iterate over all documents and add them to a bloom filter if they have been active
|
||||||
.nThen(categorizeChannelsByActivity)
|
.nThen(categorizeChannelsByActivity)
|
||||||
|
@ -590,3 +634,5 @@ module.exports = function (Env, cb) {
|
||||||
complete(void 0, report);
|
complete(void 0, report);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
module.exports.archived = evictArchived;
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
var Eviction = require("../lib/eviction");
|
||||||
|
var nThen = require("nthen");
|
||||||
|
var Store = require("../lib/storage/file");
|
||||||
|
var BlobStore = require("../lib/storage/blob");
|
||||||
|
|
||||||
|
var Quota = require("../lib/commands/quota");
|
||||||
|
var Environment = require("../lib/env");
|
||||||
|
var Decrees = require("../lib/decrees");
|
||||||
|
|
||||||
|
var config = require("../lib/load-config");
|
||||||
|
|
||||||
|
var Env = Environment.create(config);
|
||||||
|
|
||||||
|
var loadPremiumAccounts = function (Env, cb) {
|
||||||
|
nThen(function (w) {
|
||||||
|
// load premium accounts
|
||||||
|
Quota.updateCachedLimits(Env, w(function (err) {
|
||||||
|
if (err) {
|
||||||
|
Env.Log.error('EVICT_LOAD_PREMIUM_ACCOUNTS', {
|
||||||
|
error: err,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}).nThen(function (w) {
|
||||||
|
// load and apply decrees
|
||||||
|
Decrees.load(Env, w(function (err) {
|
||||||
|
if (err) {
|
||||||
|
Env.Log.error('EVICT_LOAD_DECREES', {
|
||||||
|
error: err.code || err,
|
||||||
|
message: err.message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}).nThen(function () {
|
||||||
|
//console.log(Env.limits);
|
||||||
|
cb();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
var prepareEnv = function (Env, cb) {
|
||||||
|
//Quota.applyCustomLimits(Env);
|
||||||
|
|
||||||
|
nThen(function (w) {
|
||||||
|
/* Database adaptors
|
||||||
|
*/
|
||||||
|
|
||||||
|
// load the store which will be used for iterating over channels
|
||||||
|
// and performing operations like archival and deletion
|
||||||
|
Store.create(config, w(function (err, _) {
|
||||||
|
if (err) {
|
||||||
|
w.abort();
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
Env.store = _;
|
||||||
|
}));
|
||||||
|
|
||||||
|
Store.create({
|
||||||
|
filePath: config.pinPath,
|
||||||
|
}, w(function (err, _) {
|
||||||
|
if (err) {
|
||||||
|
w.abort();
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
Env.pinStore = _;
|
||||||
|
}));
|
||||||
|
|
||||||
|
// load the logging module so that you have a record of which
|
||||||
|
// files were archived or deleted at what time
|
||||||
|
var Logger = require("../lib/log");
|
||||||
|
Logger.create(config, w(function (_) {
|
||||||
|
Env.Log = _;
|
||||||
|
}));
|
||||||
|
|
||||||
|
config.getSession = function () {};
|
||||||
|
BlobStore.create(config, w(function (err, _) {
|
||||||
|
if (err) {
|
||||||
|
w.abort();
|
||||||
|
return console.error(err);
|
||||||
|
}
|
||||||
|
Env.blobStore = _;
|
||||||
|
}));
|
||||||
|
}).nThen(function (w) {
|
||||||
|
loadPremiumAccounts(Env, w(function (/* err */) {
|
||||||
|
//if (err) { }
|
||||||
|
}));
|
||||||
|
}).nThen(function () {
|
||||||
|
cb();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
//console.log("starting");
|
||||||
|
nThen(function (w) {
|
||||||
|
// load database adaptors and configuration values into the environment
|
||||||
|
prepareEnv(Env, w(function () {
|
||||||
|
//console.log("env prepared");
|
||||||
|
|
||||||
|
}));
|
||||||
|
}).nThen(function (w) {
|
||||||
|
Eviction.archived(Env, w(function () {
|
||||||
|
|
||||||
|
}));
|
||||||
|
});
|
Loading…
Reference in New Issue