fixes, added a fail count to the db, wip output
you might need to add the column to your db manually `alter table fetched add fails integer default 0;`
This commit is contained in:
parent
72d1829842
commit
f6f50ed035
|
@ -120,7 +120,7 @@ const getNextPage = async (instance, path, params, client, maxId, count, eventEm
|
|||
}
|
||||
url = url.replace('&', '?');
|
||||
|
||||
logger.info(`${count} posts remaining on ${instance}${path}`);
|
||||
logger.info(`${count} posts remaining on ${instance}${path}`, {url});
|
||||
|
||||
let response;
|
||||
|
||||
|
|
2
index.js
2
index.js
|
@ -37,7 +37,7 @@ db.open()
|
|||
|
||||
|
||||
|
||||
db.run('create table if not exists fetched (object text, status text, instance text, type text, runTimestamp integer, constraint pk_obj_inst primary key (object, instance))')
|
||||
db.run('create table if not exists fetched (object text, status text, instance text, type text, runTimestamp integer, fails integer default 0, constraint pk_obj_inst primary key (object, instance))')
|
||||
|
||||
const pino = require('pino');
|
||||
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
masto-backfill - A tool to backfill Mastodon instances with posts from other instances
|
||||
Copyright (C) 2023 Adam K
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
const OutputInterface = require('./outputClass.js');
|
||||
|
||||
// This output just saves posts and users as `pending` in the database for later fetching
|
||||
|
||||
const TemplateOutput = new OutputInterface(
|
||||
'template',
|
||||
function (name, logger, options, globalOptions) {
|
||||
// will be called once when the output is initialized, should return a new instance of the output (`this`)
|
||||
|
||||
this.name = name;
|
||||
this.dbName = options.dbName || name;
|
||||
this.logger = logger.child({output: 'template', name: name});
|
||||
if(options?.logLevel) this.logger.level = options.logLevel;
|
||||
|
||||
// do initialization here
|
||||
|
||||
this.fetched = new Set();
|
||||
this.errors = new Set();
|
||||
|
||||
|
||||
return this;
|
||||
},
|
||||
async function (query, db, options) {
|
||||
// will be called for each post/user, should return true/false for whether the write was successful (only use false for retryable/unexpected errors, like network errors)
|
||||
|
||||
let dbResponse = await db.all("SELECT * FROM fetched WHERE object = ? and instance = ? and status in ('success', 'pending')", [query, `${this.dbName}`]);
|
||||
|
||||
if(dbResponse.length > 0) {
|
||||
// already fetched
|
||||
return true;
|
||||
}
|
||||
|
||||
// do the actual fetching here
|
||||
|
||||
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'pending', ?, ?, ?) ON CONFLICT(object,instance) DO nothing",
|
||||
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if successful
|
||||
|
||||
// await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?)",
|
||||
// [query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
|
||||
|
||||
|
||||
},
|
||||
function () {
|
||||
// will be called once when the program is exiting, should do cleanup and return true/false for whether the cleanup was successful
|
||||
// nothing to clean up here, the main db SHOULD NOT be closed here
|
||||
return true;
|
||||
},
|
||||
function () { // needs to be synchronous
|
||||
// will be called when the program is exiting, should retry any failed fetches, return value doesn't matter
|
||||
return 0;
|
||||
}
|
||||
|
||||
);
|
||||
|
||||
module.exports = TemplateOutput;
|
|
@ -76,6 +76,11 @@ const FakeRelayOutput = new OutputInterface(
|
|||
},
|
||||
async function (query, db, options) {
|
||||
|
||||
if(!query) {
|
||||
this.logger.warn(`No query provided for ${this.name}`)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
if(query.startsWith('@')) {
|
||||
this.logger.debug(`User fetching not supported on ${this.name}`);
|
||||
|
@ -110,7 +115,7 @@ const FakeRelayOutput = new OutputInterface(
|
|||
return true;
|
||||
} catch (e) {
|
||||
this.logger.warn(`Error fetching ${query} on ${this.name}; error: ${e}`);
|
||||
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed'",
|
||||
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed', fails = fails + 1",
|
||||
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
|
||||
this.errorsCount++;
|
||||
return false;
|
||||
|
|
|
@ -113,7 +113,7 @@ const MastoOutput = new OutputInterface(
|
|||
} catch (e) {
|
||||
this.logger.warn(`Error fetching ${query} on ${this.name}; error: ${e}`);
|
||||
|
||||
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed'",
|
||||
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed', fails = fails + 1",
|
||||
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
|
||||
|
||||
this.errorsCount++;
|
||||
|
|
|
@ -27,6 +27,7 @@ const TemplateOutput = new OutputInterface(
|
|||
// will be called once when the output is initialized, should return a new instance of the output (`this`)
|
||||
|
||||
this.name = name;
|
||||
this.dbName = options.dbName || name;
|
||||
this.logger = logger.child({output: 'template', name: name});
|
||||
if(options?.logLevel) this.logger.level = options.logLevel;
|
||||
|
||||
|
@ -50,20 +51,20 @@ const TemplateOutput = new OutputInterface(
|
|||
|
||||
// do the actual fetching here
|
||||
|
||||
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'success', ?, ?, ?) ON CONFLICT(pk_obj_inst) DO UPDATE SET status = 'success'",
|
||||
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'success', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'success'",
|
||||
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if successful
|
||||
|
||||
// await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?)",
|
||||
// await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed', fails = fails + 1",
|
||||
// [query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
|
||||
|
||||
|
||||
},
|
||||
function () {
|
||||
// will be called once when the program is exiting, should return true/false for whether the close was successful
|
||||
// will be called once when the program is exiting, should do cleanup and return true/false for whether the cleanup was successful
|
||||
return true;
|
||||
},
|
||||
function () { // needs to be synchronous
|
||||
// will be called if error count > 0, should retry any failed fetches, return value doesn't matter
|
||||
// will be called when the program is exiting, should asynchronously retry any failed fetches, should return the amount of failed fetches
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue