Added initial version

This commit is contained in:
DracoBlue 2016-02-11 15:52:15 +01:00
commit c9b99a5c89
7 changed files with 431 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/node_modules

47
README.md Normal file
View File

@ -0,0 +1,47 @@
# Twtxt Registry Server
A small registry server for twtxt, which allows to query for mentions and hash tags.
## Prerequisites
* [elasticsearch](https://www.elastic.co/downloads/elasticsearch) server
* [npm](https://nodejs.org) installed
## Installation
``` console
$ npm install
$ export PORT=8080
$ export ELASTICSEARCH_HOST=localhost
$ export ELASTICSEARCH_PORT=9200
$ node src/server.js
```
## Example API calls
Add a new Twtxt User to the Registry:
``` console
$ curl -X POST http://localhost:8080/api/plain/users?url=https://dracoblue.net/twtxt.txt
OK
```
Retrieve a list of all mentions of a specific twtxt User (e.g. `https://buckket.org/twtxt.txt`):
``` console
$ curl http://localhost:8080/api/plain/mentions?url=https://buckket.org/twtxt.txt
http://workspaces.local/twtxt.txt 2016-02-09T12:57:59.000Z @<buckket https://buckket.org/twtxt.txt> something like https://gitter.im/ or a freenode channel?
http://workspaces.local/twtxt.txt 2016-02-08T22:51:47.000Z @<buckket https://buckket.org/twtxt.txt> looks nice ;)
```
Retrieve a list of all tweets with a specific tag (e.g `#twtxt`):
``` console
$ curl http://localhost:8080/api/plain/tag/twtxt
http://workspaces.local/twtxt.txt 2016-02-06T21:32:02.000Z @erlehmann is messing with timestamps in @buckket #twtxt :)
http://workspaces.local/twtxt.txt 2016-02-06T12:14:18.000Z Simple nodejs script to convert your twitter timeline to twtxt: https://t.co/txnWsC5jvA ( find my #twtxt at https://t.co/uN1KDXwJ8B )
```
## License
This work is copyright by DracoBlue (http://dracoblue.net) and licensed under the terms of MIT License.

25
package.json Normal file
View File

@ -0,0 +1,25 @@
{
"name": "twtxt-registry",
"version": "1.0.0",
"description": "",
"main": "server.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/DracoBlue/twtxt-registry.git"
},
"dependencies": {
"elasticsearch": "^10.1.3",
"express": "~4.9.7",
"md5": "^2.0.0",
"moment": "^2.11.2"
},
"author": "DracoBlue <JanS@DracoBlue.de>",
"license": "MIT",
"bugs": {
"url": "https://github.com/DracoBlue/twtxt-registry/issues"
},
"homepage": "https://github.com/DracoBlue/twtxt-registry#readme"
}

146
src/Storage.js Normal file
View File

@ -0,0 +1,146 @@
var md5 = require('md5');
var TwtxtTxt = require('./twtxt-utils/TwtxtTxt');
var urlUtils = require('url');
var http = require('http');
var https = require('https');
var Storage = function(client) {
this.client = client;
};
Storage.prototype.addUrl = function(url, cb) {
var that = this;
that.client.create({
index: 'index',
type: 'users',
id: md5(url),
body: {
url: url
}
}, cb);
};
Storage.prototype.storeTweet = function(tweet, cb) {
var that = this;
that.client.update({
index: 'index',
type: 'tweets',
id: tweet.id,
body: tweet
}, function() {
that.client.create({
index: 'index',
type: 'tweets',
id: tweet.id,
body: tweet
}, cb);
});
};
Storage.prototype.forEachUrl = function(cb) {
var that = this;
var doScroll = function(scrollId) {
that.client.scroll({
scrollId: scrollId,
limit: 1,
type: 'users',
scroll: '30s'
}, function(error, response) {
response.hits.hits.forEach(function (hit) {
process.nextTick(function() {
cb(hit._source.url);
})
});
if (response.hits.hits.length) {
doScroll(scrollId);
}
});
};
that.client.search({
index: 'index',
scroll: '30s',
limit: 1,
type: 'users',
search_type: 'scan'
}, function(error, response) {
doScroll(response._scroll_id);
});
};
Storage.prototype.getTweetsByHashTag = function(hashTag, cb) {
var that = this;
that.client.search({
index: 'index',
type: 'tweets',
q: "hashTags:\"" + hashTag + "\"",
limit: 20
}, function(error, response) {
var tweets = [];
response.hits.hits.forEach(function(hit) {
tweets.push(hit._source);
});
cb(tweets);
});
};
Storage.prototype.getTweetsByMentions = function(twtxtUrl, cb) {
var that = this;
that.client.search({
index: 'index',
type: 'tweets',
q: "mentions:\"" + twtxtUrl + "\"",
limit: 20
}, function(error, response) {
var tweets = [];
response.hits.hits.forEach(function(hit) {
tweets.push(hit._source);
});
cb(tweets);
});
};
Storage.prototype.startUpdating = function() {
var that = this;
clearInterval(this.updatingInterval);
var updateAllUrls = function() {
that.forEachUrl(function(url) {
var client = http;
if (urlUtils.parse(url)['protocol'] === "https:") {
client = https;
}
client.get(url, function(res) {
var body = [];
res.on('data', function(chunk) {
body.push(chunk);
}).on('end', function() {
body = Buffer.concat(body).toString();
var txt = new TwtxtTxt(url, body);
txt.getTweets().forEach(function(tweet) {
that.storeTweet(tweet, function() {
});
});
});
}).on('error', function (e) {});
});
};
this.updatingInterval = setInterval(function() {
updateAllUrls();
}, 60000);
updateAllUrls();
};
module.exports = Storage;

59
src/plainApi.js Normal file
View File

@ -0,0 +1,59 @@
var plainApi = function(storage) {
var express = require('express');
var api = express.Router();
api.use(function (req, res, next) {
res.set('Content-Type', 'text/plain');
next();
});
api.get('/tag/:tag', function (req, res) {
if (!req.params.tag) {
res.sendStatus(400);
res.end();
return;
}
storage.getTweetsByHashTag("#" + req.params.tag, function (tweets) {
var response = [];
tweets.forEach(function (tweet) {
response.push(tweet.author_url + "\t" + tweet.timestamp + "\t" + tweet.text);
});
res.send(response.join("\n"));
})
});
api.get('/mentions', function (req, res) {
if (!req.query.url) {
res.sendStatus(400);
res.end();
return;
}
storage.getTweetsByMentions(req.query.url, function (tweets) {
var response = [];
tweets.forEach(function (tweet) {
response.push(tweet.author_url + "\t" + tweet.timestamp + "\t" + tweet.text);
});
res.send(response.join("\n"));
})
});
api.post('/users', function (req, res) {
if (!req.query.url) {
res.sendStatus(400);
res.end();
return;
}
storage.addUrl(req.query.url, function () {
res.send("OK");
});
});
return api;
};
module.exports = plainApi;

36
src/server.js Normal file
View File

@ -0,0 +1,36 @@
// Require our dependencies
var express = require('express');
var elasticsearch = require('elasticsearch');
var Storage = require('./Storage');
var plainApi = require('./plainApi');
var http = require('http');
// Create an express instance and set a port variable
var app = express();
var port = process.env.PORT || 8080;
// Disable etag headers on responses
app.disable('etag');
var storage = new Storage(
new elasticsearch.Client({
host: (process.env.ELASTICSEARCH_HOST || "localhost") + ":" + (process.env.ELASTICSEARCH_PORT || "9200"),
log: 'warning'
})
);
app.use('/api/plain/', plainApi(storage));
var server = http.createServer(app).listen(port, function() {
console.log('twtxt registry listening on port ' + port);
storage.startUpdating();
});
storage.addUrl("https://buckket.org/twtxt.txt", function() {
});
storage.addUrl("https://buckket.org/twtxt_news.txt", function() {
});
storage.addUrl("https://dracoblue.net/twtxt.txt", function() {
});

117
src/twtxt-utils/TwtxtTxt.js Normal file
View File

@ -0,0 +1,117 @@
var moment = require('moment');
var urlUtils = require('url');
var md5 = require('md5');
var TwtxtTxt = function(url, body) {
this.body = body;
this.url = url;
this.setTweetsByBody(this.body);
};
TwtxtTxt.prototype.getTweets = function() {
return this.tweets;
};
TwtxtTxt.prototype.setTweetsByBody = function(body) {
var that = this;
this.tweets = [];
body.split("\n").forEach(function(row) {
row = (row || "").trim();
if (row) {
var match = row.match(/^([^\t]+)\t(.+)/);
if (match && moment(match[1]).isValid()) {
var text = match[2].trim();
var body = text.toString();
var hashTags = that.extractHashTagsByRow(text);
var mentions = that.extractMentionsByRow(text);
if (body) {
var currentMatch = body.match(/@<([^ ]+) ([^> ]+)>/);
while (currentMatch) {
body = body.replace(currentMatch[0], '<a href="' + that.encodeXml(currentMatch[2]) + '" class="username">@' + that.encodeXml(currentMatch[1]) + '</a>');
currentMatch = body.match(/@<([^ ]+) ([^> ]+)>/);
}
currentMatch = body.match(/@<([^> ]+)>/);
while (currentMatch) {
body = body.replace(currentMatch[0], '<a href="' + that.encodeXml(currentMatch[1]) + '" class="username">@' + that.encodeXml(urlUtils.parse(currentMatch[1])['hostname'] || currentMatch[1]) + '</a>');
currentMatch = body.match(/@<([^> ]+)>/);
}
}
that.tweets.push({
id: md5(that.url + "\t" + row),
timestamp: moment(match[1]).toISOString(),
hashTags: hashTags,
mentions: mentions,
author_url: that.url,
body: body,
text: text
});
}
}
});
};
var xml_special_to_escaped_one_map = {
'&': '&',
'"': '"',
'<': '&lt;',
'>': '&gt;'
};
var escaped_one_to_xml_special_map = {
'&': '&',
'"': '"',
'&lt;': '<',
'&gt;': '>'
};
TwtxtTxt.prototype.encodeXml = function(string) {
return string.replace(/([\&"<>])/g, function(str, item) {
return xml_special_to_escaped_one_map[item];
});
};
TwtxtTxt.prototype.decodeXml = function(string) {
return string.replace(/("|<|>|&)/g,
function(str, item) {
return escaped_one_to_xml_special_map[item];
});
};
TwtxtTxt.prototype.extractHashTagsByRow = function(string) {
return string.match(/(#[^\s#<>'"]+)/g) || [];
};
TwtxtTxt.prototype.extractMentionsByRow = function(body) {
var mentions = [];
var currentMatch = body.match(/@<([^ ]+ [^> ]+)>/g);
if (currentMatch) {
currentMatch.forEach(function(mention) {
var rowMatch = mention.match(/@<([^ ]+) ([^> ]+)>/);
mentions.push(rowMatch[2]);
})
}
currentMatch = body.match(/@<([^> ]+)>/g);
if (currentMatch) {
currentMatch.forEach(function(mention) {
var rowMatch = mention.match(/@<([^> ]+)>/);
mentions.push(rowMatch[1]);
})
}
return mentions;
};
module.exports = TwtxtTxt;