masto2gemlog/masto2gemlog.pl

77 lines
1.6 KiB
Perl
Executable File

#! /usr/bin/perl
use strict;
my $username=shift;
my $rssurl="https://mastodon.social/users/$username.rss";
system("wget $rssurl");
my $fname="$username.rss";
my $dir="gemlog";
my $count=`xmllint --xpath 'count(/rss/channel/item)' $fname`;
my %post;
for(my $i=1;$i<=$count;$i++) {
my $date=`xmllint --xpath "string(/rss/channel/item[$i]/pubDate)" $fname`;
my $text=`xmllint --xpath "string(/rss/channel/item[$i]/description)" $fname`;
my $media=`xmllint --xpath "string(/rss/channel/item[$i]/*[name()='media:content']/\@url)" $fname`;
chop $media;
# figure out the date
$date=~/ (\d+) (\S+) (\d+) /;
my $filedate="$3-$2-$1";
# html to gemtext conversion
$text=~s@<p>@@g;
$text=~s@</p>@@g;
$text=~s@<br />@\n@g;
# hashtags are converted to links
# <a href="https://mastodon.social/tags/framagit" class="mention hashtag" rel="tag">#<span>framagit</span></a>
$text=~s@<a href=\"[^\"]+\" class=\"mention hashtag\" rel=\"tag\">#<span>(.*)</span></a>@ #$1@g;
if ($text=~m@(<a .*</a>)@) {
my $a_tag=$1;
if($a_tag =~ m@href=\"([^\"]+)\"@) {
my $url=$1;
substr($text, index($text, $a_tag), length($a_tag))="\n=> $url\n";
}
}
# convert quoted chars, there are probaly more to do
$text=~s@&#39;@'@g;
$text=~s@&quot;@"@g;
if($media ne "") {
system("wget -P $dir/img -nc $media");
$media =~ s@.*/@@;
$media="=> img/$media\n";
}
$post{$filedate} || ($post{$filedate}="# $filedate\n");
$post{$filedate}.="## $date
$text
$media
";
}
foreach my $d (keys(%post)) {
print "$d\n";
open(FILE, ">$dir/$d.gmi");
print FILE $post{$d};
close FILE;
}