README: newsboat sqlite3 export script: improvements
- Export read/unread state to a separate plain-text "urls" file, line by line. - Handle white-space control-chars better. From the sfeed(1) man page: " The fields: title, id, author are not allowed to have newlines and TABs, all whitespace characters are replaced by a single space character. Control characters are removed." So do the reverse for newsboat aswell: change white-space characters which are also control-characters (such as TABs and newlines) to a single space character.
This commit is contained in:
parent
f18f4818ed
commit
7feab0fd88
31
README
31
README
|
@ -628,10 +628,12 @@ sfeedrc file and change the curl options "-L --max-redirs 0".
|
||||||
|
|
||||||
Shellscript to export existing newsboat cached items from sqlite3 to the sfeed
|
Shellscript to export existing newsboat cached items from sqlite3 to the sfeed
|
||||||
TSV format.
|
TSV format.
|
||||||
|
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
# Export newsbeuter/newsboat cached items from sqlite3 to the sfeed TSV format.
|
# Export newsbeuter/newsboat cached items from sqlite3 to the sfeed TSV format.
|
||||||
# The data is split per file per feed with the name of the newsboat title/url.
|
# The data is split per file per feed with the name of the newsboat title/url.
|
||||||
|
# It writes the urls of the read items line by line to a "urls" file.
|
||||||
|
#
|
||||||
# Dependencies: sqlite3, awk.
|
# Dependencies: sqlite3, awk.
|
||||||
#
|
#
|
||||||
# Usage: create some directory to store the feeds, run this script.
|
# Usage: create some directory to store the feeds, run this script.
|
||||||
|
@ -653,8 +655,8 @@ TSV format.
|
||||||
SELECT
|
SELECT
|
||||||
i.pubDate, i.title, i.url, i.content, i.guid, i.author,
|
i.pubDate, i.title, i.url, i.content, i.guid, i.author,
|
||||||
i.enclosure_url,
|
i.enclosure_url,
|
||||||
f.rssurl AS rssurl, f.title AS feedtitle --,
|
f.rssurl AS rssurl, f.title AS feedtitle, i.unread --,
|
||||||
-- i.id, i.unread, i.enclosure_type, i.enqueued, i.flags, i.deleted,
|
-- i.id, i.enclosure_type, i.enqueued, i.flags, i.deleted,
|
||||||
-- i.base
|
-- i.base
|
||||||
FROM rss_feed f
|
FROM rss_feed f
|
||||||
INNER JOIN rss_item i ON i.feedurl = f.rssurl
|
INNER JOIN rss_item i ON i.feedurl = f.rssurl
|
||||||
|
@ -668,13 +670,19 @@ TSV format.
|
||||||
FS = "\x1f";
|
FS = "\x1f";
|
||||||
RS = "\x1e";
|
RS = "\x1e";
|
||||||
}
|
}
|
||||||
# strip all control-chars for normal fields.
|
# normal non-content fields.
|
||||||
function strip(s) {
|
function field(s) {
|
||||||
|
gsub("^[[:space:]]*", "", s);
|
||||||
|
gsub("[[:space:]]*$", "", s);
|
||||||
|
gsub("[[:space:]]", " ", s);
|
||||||
gsub("[[:cntrl:]]", "", s);
|
gsub("[[:cntrl:]]", "", s);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
# escape chars in content field.
|
# content field.
|
||||||
function escape(s) {
|
function content(s) {
|
||||||
|
gsub("^[[:space:]]*", "", s);
|
||||||
|
gsub("[[:space:]]*$", "", s);
|
||||||
|
# escape chars in content field.
|
||||||
gsub("\\\\", "\\\\", s);
|
gsub("\\\\", "\\\\", s);
|
||||||
gsub("\n", "\\n", s);
|
gsub("\n", "\\n", s);
|
||||||
gsub("\t", "\\t", s);
|
gsub("\t", "\\t", s);
|
||||||
|
@ -690,9 +698,14 @@ TSV format.
|
||||||
print "Writing file: \"" fname "\" (title: " $9 ", url: " $8 ")" > "/dev/stderr";
|
print "Writing file: \"" fname "\" (title: " $9 ", url: " $8 ")" > "/dev/stderr";
|
||||||
}
|
}
|
||||||
|
|
||||||
print $1 "\t" strip($2) "\t" strip($3) "\t" escape($4) "\t" \
|
print $1 "\t" field($2) "\t" field($3) "\t" content($4) "\t" \
|
||||||
"html" "\t" strip($5) "\t" strip($6) "\t" strip($7) \
|
"html" "\t" field($5) "\t" field($6) "\t" field($7) \
|
||||||
> fname;
|
> fname;
|
||||||
|
|
||||||
|
# write urls of the read items to a file line by line.
|
||||||
|
if ($10 == "0") {
|
||||||
|
print $3 > "urls";
|
||||||
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue