Added initial version
This commit is contained in:
commit
c9b99a5c89
|
@ -0,0 +1 @@
|
|||
/node_modules
|
|
@ -0,0 +1,47 @@
|
|||
# Twtxt Registry Server
|
||||
|
||||
A small registry server for twtxt, which allows to query for mentions and hash tags.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
* [elasticsearch](https://www.elastic.co/downloads/elasticsearch) server
|
||||
* [npm](https://nodejs.org) installed
|
||||
|
||||
## Installation
|
||||
|
||||
``` console
|
||||
$ npm install
|
||||
$ export PORT=8080
|
||||
$ export ELASTICSEARCH_HOST=localhost
|
||||
$ export ELASTICSEARCH_PORT=9200
|
||||
$ node src/server.js
|
||||
```
|
||||
|
||||
## Example API calls
|
||||
|
||||
Add a new Twtxt User to the Registry:
|
||||
|
||||
``` console
|
||||
$ curl -X POST http://localhost:8080/api/plain/users?url=https://dracoblue.net/twtxt.txt
|
||||
OK
|
||||
```
|
||||
|
||||
Retrieve a list of all mentions of a specific twtxt User (e.g. `https://buckket.org/twtxt.txt`):
|
||||
|
||||
``` console
|
||||
$ curl http://localhost:8080/api/plain/mentions?url=https://buckket.org/twtxt.txt
|
||||
http://workspaces.local/twtxt.txt 2016-02-09T12:57:59.000Z @<buckket https://buckket.org/twtxt.txt> something like https://gitter.im/ or a freenode channel?
|
||||
http://workspaces.local/twtxt.txt 2016-02-08T22:51:47.000Z @<buckket https://buckket.org/twtxt.txt> looks nice ;)
|
||||
```
|
||||
|
||||
Retrieve a list of all tweets with a specific tag (e.g `#twtxt`):
|
||||
|
||||
``` console
|
||||
$ curl http://localhost:8080/api/plain/tag/twtxt
|
||||
http://workspaces.local/twtxt.txt 2016-02-06T21:32:02.000Z @erlehmann is messing with timestamps in @buckket #twtxt :)
|
||||
http://workspaces.local/twtxt.txt 2016-02-06T12:14:18.000Z Simple nodejs script to convert your twitter timeline to twtxt: https://t.co/txnWsC5jvA ( find my #twtxt at https://t.co/uN1KDXwJ8B )
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This work is copyright by DracoBlue (http://dracoblue.net) and licensed under the terms of MIT License.
|
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
"name": "twtxt-registry",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "server.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/DracoBlue/twtxt-registry.git"
|
||||
},
|
||||
"dependencies": {
|
||||
"elasticsearch": "^10.1.3",
|
||||
"express": "~4.9.7",
|
||||
"md5": "^2.0.0",
|
||||
"moment": "^2.11.2"
|
||||
},
|
||||
"author": "DracoBlue <JanS@DracoBlue.de>",
|
||||
"license": "MIT",
|
||||
"bugs": {
|
||||
"url": "https://github.com/DracoBlue/twtxt-registry/issues"
|
||||
},
|
||||
"homepage": "https://github.com/DracoBlue/twtxt-registry#readme"
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
var md5 = require('md5');
|
||||
var TwtxtTxt = require('./twtxt-utils/TwtxtTxt');
|
||||
var urlUtils = require('url');
|
||||
var http = require('http');
|
||||
var https = require('https');
|
||||
|
||||
var Storage = function(client) {
|
||||
|
||||
this.client = client;
|
||||
};
|
||||
|
||||
Storage.prototype.addUrl = function(url, cb) {
|
||||
var that = this;
|
||||
that.client.create({
|
||||
index: 'index',
|
||||
type: 'users',
|
||||
id: md5(url),
|
||||
body: {
|
||||
url: url
|
||||
}
|
||||
}, cb);
|
||||
};
|
||||
|
||||
Storage.prototype.storeTweet = function(tweet, cb) {
|
||||
var that = this;
|
||||
|
||||
that.client.update({
|
||||
index: 'index',
|
||||
type: 'tweets',
|
||||
id: tweet.id,
|
||||
body: tweet
|
||||
}, function() {
|
||||
that.client.create({
|
||||
index: 'index',
|
||||
type: 'tweets',
|
||||
id: tweet.id,
|
||||
body: tweet
|
||||
}, cb);
|
||||
});
|
||||
};
|
||||
|
||||
Storage.prototype.forEachUrl = function(cb) {
|
||||
var that = this;
|
||||
|
||||
var doScroll = function(scrollId) {
|
||||
that.client.scroll({
|
||||
scrollId: scrollId,
|
||||
limit: 1,
|
||||
type: 'users',
|
||||
scroll: '30s'
|
||||
}, function(error, response) {
|
||||
response.hits.hits.forEach(function (hit) {
|
||||
process.nextTick(function() {
|
||||
cb(hit._source.url);
|
||||
})
|
||||
});
|
||||
|
||||
if (response.hits.hits.length) {
|
||||
doScroll(scrollId);
|
||||
}
|
||||
|
||||
});
|
||||
};
|
||||
|
||||
that.client.search({
|
||||
index: 'index',
|
||||
scroll: '30s',
|
||||
limit: 1,
|
||||
type: 'users',
|
||||
search_type: 'scan'
|
||||
}, function(error, response) {
|
||||
doScroll(response._scroll_id);
|
||||
});
|
||||
};
|
||||
|
||||
Storage.prototype.getTweetsByHashTag = function(hashTag, cb) {
|
||||
var that = this;
|
||||
|
||||
that.client.search({
|
||||
index: 'index',
|
||||
type: 'tweets',
|
||||
q: "hashTags:\"" + hashTag + "\"",
|
||||
limit: 20
|
||||
}, function(error, response) {
|
||||
var tweets = [];
|
||||
response.hits.hits.forEach(function(hit) {
|
||||
tweets.push(hit._source);
|
||||
});
|
||||
cb(tweets);
|
||||
});
|
||||
};
|
||||
|
||||
Storage.prototype.getTweetsByMentions = function(twtxtUrl, cb) {
|
||||
var that = this;
|
||||
|
||||
that.client.search({
|
||||
index: 'index',
|
||||
type: 'tweets',
|
||||
q: "mentions:\"" + twtxtUrl + "\"",
|
||||
limit: 20
|
||||
}, function(error, response) {
|
||||
var tweets = [];
|
||||
response.hits.hits.forEach(function(hit) {
|
||||
tweets.push(hit._source);
|
||||
});
|
||||
cb(tweets);
|
||||
});
|
||||
};
|
||||
|
||||
Storage.prototype.startUpdating = function() {
|
||||
var that = this;
|
||||
|
||||
clearInterval(this.updatingInterval);
|
||||
|
||||
var updateAllUrls = function() {
|
||||
that.forEachUrl(function(url) {
|
||||
var client = http;
|
||||
if (urlUtils.parse(url)['protocol'] === "https:") {
|
||||
client = https;
|
||||
}
|
||||
|
||||
client.get(url, function(res) {
|
||||
var body = [];
|
||||
res.on('data', function(chunk) {
|
||||
body.push(chunk);
|
||||
}).on('end', function() {
|
||||
body = Buffer.concat(body).toString();
|
||||
|
||||
var txt = new TwtxtTxt(url, body);
|
||||
txt.getTweets().forEach(function(tweet) {
|
||||
that.storeTweet(tweet, function() {
|
||||
});
|
||||
});
|
||||
});
|
||||
}).on('error', function (e) {});
|
||||
});
|
||||
};
|
||||
|
||||
this.updatingInterval = setInterval(function() {
|
||||
updateAllUrls();
|
||||
}, 60000);
|
||||
|
||||
updateAllUrls();
|
||||
};
|
||||
|
||||
module.exports = Storage;
|
|
@ -0,0 +1,59 @@
|
|||
var plainApi = function(storage) {
|
||||
var express = require('express');
|
||||
var api = express.Router();
|
||||
|
||||
api.use(function (req, res, next) {
|
||||
res.set('Content-Type', 'text/plain');
|
||||
next();
|
||||
});
|
||||
|
||||
api.get('/tag/:tag', function (req, res) {
|
||||
if (!req.params.tag) {
|
||||
res.sendStatus(400);
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
storage.getTweetsByHashTag("#" + req.params.tag, function (tweets) {
|
||||
var response = [];
|
||||
|
||||
tweets.forEach(function (tweet) {
|
||||
response.push(tweet.author_url + "\t" + tweet.timestamp + "\t" + tweet.text);
|
||||
});
|
||||
res.send(response.join("\n"));
|
||||
})
|
||||
});
|
||||
|
||||
api.get('/mentions', function (req, res) {
|
||||
if (!req.query.url) {
|
||||
res.sendStatus(400);
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
storage.getTweetsByMentions(req.query.url, function (tweets) {
|
||||
var response = [];
|
||||
|
||||
tweets.forEach(function (tweet) {
|
||||
response.push(tweet.author_url + "\t" + tweet.timestamp + "\t" + tweet.text);
|
||||
});
|
||||
res.send(response.join("\n"));
|
||||
})
|
||||
});
|
||||
|
||||
api.post('/users', function (req, res) {
|
||||
if (!req.query.url) {
|
||||
res.sendStatus(400);
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
storage.addUrl(req.query.url, function () {
|
||||
res.send("OK");
|
||||
});
|
||||
});
|
||||
|
||||
return api;
|
||||
};
|
||||
|
||||
module.exports = plainApi;
|
|
@ -0,0 +1,36 @@
|
|||
// Require our dependencies
|
||||
var express = require('express');
|
||||
var elasticsearch = require('elasticsearch');
|
||||
var Storage = require('./Storage');
|
||||
var plainApi = require('./plainApi');
|
||||
var http = require('http');
|
||||
|
||||
// Create an express instance and set a port variable
|
||||
var app = express();
|
||||
var port = process.env.PORT || 8080;
|
||||
|
||||
// Disable etag headers on responses
|
||||
app.disable('etag');
|
||||
|
||||
var storage = new Storage(
|
||||
new elasticsearch.Client({
|
||||
host: (process.env.ELASTICSEARCH_HOST || "localhost") + ":" + (process.env.ELASTICSEARCH_PORT || "9200"),
|
||||
log: 'warning'
|
||||
})
|
||||
);
|
||||
|
||||
app.use('/api/plain/', plainApi(storage));
|
||||
|
||||
var server = http.createServer(app).listen(port, function() {
|
||||
console.log('twtxt registry listening on port ' + port);
|
||||
storage.startUpdating();
|
||||
});
|
||||
|
||||
storage.addUrl("https://buckket.org/twtxt.txt", function() {
|
||||
});
|
||||
|
||||
storage.addUrl("https://buckket.org/twtxt_news.txt", function() {
|
||||
});
|
||||
|
||||
storage.addUrl("https://dracoblue.net/twtxt.txt", function() {
|
||||
});
|
|
@ -0,0 +1,117 @@
|
|||
var moment = require('moment');
|
||||
var urlUtils = require('url');
|
||||
var md5 = require('md5');
|
||||
|
||||
var TwtxtTxt = function(url, body) {
|
||||
this.body = body;
|
||||
this.url = url;
|
||||
|
||||
this.setTweetsByBody(this.body);
|
||||
};
|
||||
|
||||
TwtxtTxt.prototype.getTweets = function() {
|
||||
return this.tweets;
|
||||
};
|
||||
|
||||
TwtxtTxt.prototype.setTweetsByBody = function(body) {
|
||||
var that = this;
|
||||
this.tweets = [];
|
||||
|
||||
body.split("\n").forEach(function(row) {
|
||||
row = (row || "").trim();
|
||||
|
||||
if (row) {
|
||||
var match = row.match(/^([^\t]+)\t(.+)/);
|
||||
|
||||
if (match && moment(match[1]).isValid()) {
|
||||
|
||||
var text = match[2].trim();
|
||||
var body = text.toString();
|
||||
|
||||
var hashTags = that.extractHashTagsByRow(text);
|
||||
var mentions = that.extractMentionsByRow(text);
|
||||
|
||||
if (body) {
|
||||
var currentMatch = body.match(/@<([^ ]+) ([^> ]+)>/);
|
||||
while (currentMatch) {
|
||||
body = body.replace(currentMatch[0], '<a href="' + that.encodeXml(currentMatch[2]) + '" class="username">@' + that.encodeXml(currentMatch[1]) + '</a>');
|
||||
currentMatch = body.match(/@<([^ ]+) ([^> ]+)>/);
|
||||
}
|
||||
|
||||
currentMatch = body.match(/@<([^> ]+)>/);
|
||||
while (currentMatch) {
|
||||
body = body.replace(currentMatch[0], '<a href="' + that.encodeXml(currentMatch[1]) + '" class="username">@' + that.encodeXml(urlUtils.parse(currentMatch[1])['hostname'] || currentMatch[1]) + '</a>');
|
||||
currentMatch = body.match(/@<([^> ]+)>/);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
that.tweets.push({
|
||||
id: md5(that.url + "\t" + row),
|
||||
timestamp: moment(match[1]).toISOString(),
|
||||
hashTags: hashTags,
|
||||
mentions: mentions,
|
||||
author_url: that.url,
|
||||
body: body,
|
||||
text: text
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
var xml_special_to_escaped_one_map = {
|
||||
'&': '&',
|
||||
'"': '"',
|
||||
'<': '<',
|
||||
'>': '>'
|
||||
};
|
||||
|
||||
var escaped_one_to_xml_special_map = {
|
||||
'&': '&',
|
||||
'"': '"',
|
||||
'<': '<',
|
||||
'>': '>'
|
||||
};
|
||||
|
||||
TwtxtTxt.prototype.encodeXml = function(string) {
|
||||
return string.replace(/([\&"<>])/g, function(str, item) {
|
||||
return xml_special_to_escaped_one_map[item];
|
||||
});
|
||||
};
|
||||
|
||||
TwtxtTxt.prototype.decodeXml = function(string) {
|
||||
return string.replace(/("|<|>|&)/g,
|
||||
function(str, item) {
|
||||
return escaped_one_to_xml_special_map[item];
|
||||
});
|
||||
};
|
||||
|
||||
TwtxtTxt.prototype.extractHashTagsByRow = function(string) {
|
||||
return string.match(/(#[^\s#<>'"]+)/g) || [];
|
||||
};
|
||||
|
||||
|
||||
TwtxtTxt.prototype.extractMentionsByRow = function(body) {
|
||||
var mentions = [];
|
||||
var currentMatch = body.match(/@<([^ ]+ [^> ]+)>/g);
|
||||
if (currentMatch) {
|
||||
currentMatch.forEach(function(mention) {
|
||||
var rowMatch = mention.match(/@<([^ ]+) ([^> ]+)>/);
|
||||
mentions.push(rowMatch[2]);
|
||||
})
|
||||
}
|
||||
|
||||
currentMatch = body.match(/@<([^> ]+)>/g);
|
||||
if (currentMatch) {
|
||||
currentMatch.forEach(function(mention) {
|
||||
var rowMatch = mention.match(/@<([^> ]+)>/);
|
||||
mentions.push(rowMatch[1]);
|
||||
})
|
||||
}
|
||||
|
||||
return mentions;
|
||||
};
|
||||
|
||||
module.exports = TwtxtTxt;
|
Reference in New Issue