handle links better, variables for username and target dir
This commit is contained in:
parent
8eff3e496d
commit
978a4d17cf
|
@ -2,14 +2,24 @@
|
||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
|
|
||||||
my $count=`xmllint --xpath 'count(/rss/channel/item)' alexlehm.rss`;
|
my $username=shift;
|
||||||
|
|
||||||
|
my $rssurl="https://mastodon.social/users/$username.rss";
|
||||||
|
|
||||||
|
system("wget $rssurl");
|
||||||
|
|
||||||
|
my $fname="$username.rss";
|
||||||
|
|
||||||
|
my $dir="gemlog";
|
||||||
|
|
||||||
|
my $count=`xmllint --xpath 'count(/rss/channel/item)' $fname`;
|
||||||
|
|
||||||
my %post;
|
my %post;
|
||||||
|
|
||||||
for(my $i=1;$i<=$count;$i++) {
|
for(my $i=1;$i<=$count;$i++) {
|
||||||
my $date=`xmllint --xpath "string(/rss/channel/item[$i]/pubDate)" alexlehm.rss`;
|
my $date=`xmllint --xpath "string(/rss/channel/item[$i]/pubDate)" $fname`;
|
||||||
my $text=`xmllint --xpath "string(/rss/channel/item[$i]/description)" alexlehm.rss`;
|
my $text=`xmllint --xpath "string(/rss/channel/item[$i]/description)" $fname`;
|
||||||
my $media=`xmllint --xpath "string(/rss/channel/item[$i]/*[name()='media:content']/\@url)" alexlehm.rss`;
|
my $media=`xmllint --xpath "string(/rss/channel/item[$i]/*[name()='media:content']/\@url)" $fname`;
|
||||||
|
|
||||||
chop $media;
|
chop $media;
|
||||||
|
|
||||||
|
@ -19,8 +29,6 @@ for(my $i=1;$i<=$count;$i++) {
|
||||||
|
|
||||||
my $filedate="$3-$2-$1";
|
my $filedate="$3-$2-$1";
|
||||||
|
|
||||||
print "$date $filedate\n";
|
|
||||||
|
|
||||||
# html to gemtext conversion
|
# html to gemtext conversion
|
||||||
|
|
||||||
$text=~s@<p>@@g;
|
$text=~s@<p>@@g;
|
||||||
|
@ -33,11 +41,20 @@ for(my $i=1;$i<=$count;$i++) {
|
||||||
|
|
||||||
$text=~s@<a href=\"[^\"]+\" class=\"mention hashtag\" rel=\"tag\">#<span>(.*)</span></a>@#$1@g;
|
$text=~s@<a href=\"[^\"]+\" class=\"mention hashtag\" rel=\"tag\">#<span>(.*)</span></a>@#$1@g;
|
||||||
|
|
||||||
|
if ($text=~m@(<a .*</a>)@) {
|
||||||
|
my $a_tag=$1;
|
||||||
|
if($a_tag =~ m@href=\"([^\"]+)\"@) {
|
||||||
|
my $url=$1;
|
||||||
|
substr($text, index($text, $a_tag), length($a_tag))="\n => $url\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# convert quoted chars, there are probaly more to do
|
||||||
$text=~s@'@'@g;
|
$text=~s@'@'@g;
|
||||||
$text=~s@"@"@g;
|
$text=~s@"@"@g;
|
||||||
|
|
||||||
if($media ne "") {
|
if($media ne "") {
|
||||||
system("wget -nc $media");
|
system("wget -P $dir -nc $media");
|
||||||
$media =~ s@.*/@@;
|
$media =~ s@.*/@@;
|
||||||
$media="=> $media\n";
|
$media="=> $media\n";
|
||||||
}
|
}
|
||||||
|
@ -50,9 +67,8 @@ $media
|
||||||
|
|
||||||
foreach my $d (keys(%post)) {
|
foreach my $d (keys(%post)) {
|
||||||
print "$d\n";
|
print "$d\n";
|
||||||
open(FILE, ">$d.gmi");
|
open(FILE, ">$dir/$d.gmi");
|
||||||
print FILE $post{$d};
|
print FILE $post{$d};
|
||||||
close FILE;
|
close FILE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user