fixes, added a fail count to the db, wip output

you might need to add the column to your db manually
`alter table fetched add fails integer default 0;`
This commit is contained in:
AdamK2003 2023-07-05 01:59:42 +02:00
parent 72d1829842
commit f6f50ed035
No known key found for this signature in database
GPG Key ID: 656E9DB475B09927
6 changed files with 88 additions and 8 deletions

View File

@ -120,7 +120,7 @@ const getNextPage = async (instance, path, params, client, maxId, count, eventEm
}
url = url.replace('&', '?');
logger.info(`${count} posts remaining on ${instance}${path}`);
logger.info(`${count} posts remaining on ${instance}${path}`, {url});
let response;

View File

@ -37,7 +37,7 @@ db.open()
db.run('create table if not exists fetched (object text, status text, instance text, type text, runTimestamp integer, constraint pk_obj_inst primary key (object, instance))')
db.run('create table if not exists fetched (object text, status text, instance text, type text, runTimestamp integer, fails integer default 0, constraint pk_obj_inst primary key (object, instance))')
const pino = require('pino');

74
outputs/dbPending.js Normal file
View File

@ -0,0 +1,74 @@
/*
masto-backfill - A tool to backfill Mastodon instances with posts from other instances
Copyright (C) 2023 Adam K
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const OutputInterface = require('./outputClass.js');
// This output just saves posts and users as `pending` in the database for later fetching
const TemplateOutput = new OutputInterface(
'template',
function (name, logger, options, globalOptions) {
// will be called once when the output is initialized, should return a new instance of the output (`this`)
this.name = name;
this.dbName = options.dbName || name;
this.logger = logger.child({output: 'template', name: name});
if(options?.logLevel) this.logger.level = options.logLevel;
// do initialization here
this.fetched = new Set();
this.errors = new Set();
return this;
},
async function (query, db, options) {
// will be called for each post/user, should return true/false for whether the write was successful (only use false for retryable/unexpected errors, like network errors)
let dbResponse = await db.all("SELECT * FROM fetched WHERE object = ? and instance = ? and status in ('success', 'pending')", [query, `${this.dbName}`]);
if(dbResponse.length > 0) {
// already fetched
return true;
}
// do the actual fetching here
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'pending', ?, ?, ?) ON CONFLICT(object,instance) DO nothing",
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if successful
// await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?)",
// [query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
},
function () {
// will be called once when the program is exiting, should do cleanup and return true/false for whether the cleanup was successful
// nothing to clean up here, the main db SHOULD NOT be closed here
return true;
},
function () { // needs to be synchronous
// will be called when the program is exiting, should retry any failed fetches, return value doesn't matter
return 0;
}
);
module.exports = TemplateOutput;

View File

@ -76,6 +76,11 @@ const FakeRelayOutput = new OutputInterface(
},
async function (query, db, options) {
if(!query) {
this.logger.warn(`No query provided for ${this.name}`)
return true;
}
if(query.startsWith('@')) {
this.logger.debug(`User fetching not supported on ${this.name}`);
@ -110,7 +115,7 @@ const FakeRelayOutput = new OutputInterface(
return true;
} catch (e) {
this.logger.warn(`Error fetching ${query} on ${this.name}; error: ${e}`);
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed'",
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed', fails = fails + 1",
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
this.errorsCount++;
return false;

View File

@ -113,7 +113,7 @@ const MastoOutput = new OutputInterface(
} catch (e) {
this.logger.warn(`Error fetching ${query} on ${this.name}; error: ${e}`);
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed'",
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed', fails = fails + 1",
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
this.errorsCount++;

View File

@ -27,6 +27,7 @@ const TemplateOutput = new OutputInterface(
// will be called once when the output is initialized, should return a new instance of the output (`this`)
this.name = name;
this.dbName = options.dbName || name;
this.logger = logger.child({output: 'template', name: name});
if(options?.logLevel) this.logger.level = options.logLevel;
@ -50,20 +51,20 @@ const TemplateOutput = new OutputInterface(
// do the actual fetching here
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'success', ?, ?, ?) ON CONFLICT(pk_obj_inst) DO UPDATE SET status = 'success'",
await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'success', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'success'",
[query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if successful
// await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?)",
// await db.all("INSERT INTO fetched (object, status, instance, type, runTimestamp) VALUES (?, 'failed', ?, ?, ?) ON CONFLICT(object,instance) DO UPDATE SET status = 'failed', fails = fails + 1",
// [query, `${this.dbName}`, this.outputName, global.runTimestamp]); // if unsuccessful
},
function () {
// will be called once when the program is exiting, should return true/false for whether the close was successful
// will be called once when the program is exiting, should do cleanup and return true/false for whether the cleanup was successful
return true;
},
function () { // needs to be synchronous
// will be called if error count > 0, should retry any failed fetches, return value doesn't matter
// will be called when the program is exiting, should asynchronously retry any failed fetches, should return the amount of failed fetches
return 0;
}