handle links better, variables for username and target dir

This commit is contained in:
Alexander 2023-02-18 10:47:53 +00:00
parent 8eff3e496d
commit 978a4d17cf
1 changed files with 25 additions and 9 deletions

View File

@ -2,14 +2,24 @@
use strict;
my $count=`xmllint --xpath 'count(/rss/channel/item)' alexlehm.rss`;
my $username=shift;
my $rssurl="https://mastodon.social/users/$username.rss";
system("wget $rssurl");
my $fname="$username.rss";
my $dir="gemlog";
my $count=`xmllint --xpath 'count(/rss/channel/item)' $fname`;
my %post;
for(my $i=1;$i<=$count;$i++) {
my $date=`xmllint --xpath "string(/rss/channel/item[$i]/pubDate)" alexlehm.rss`;
my $text=`xmllint --xpath "string(/rss/channel/item[$i]/description)" alexlehm.rss`;
my $media=`xmllint --xpath "string(/rss/channel/item[$i]/*[name()='media:content']/\@url)" alexlehm.rss`;
my $date=`xmllint --xpath "string(/rss/channel/item[$i]/pubDate)" $fname`;
my $text=`xmllint --xpath "string(/rss/channel/item[$i]/description)" $fname`;
my $media=`xmllint --xpath "string(/rss/channel/item[$i]/*[name()='media:content']/\@url)" $fname`;
chop $media;
@ -19,8 +29,6 @@ for(my $i=1;$i<=$count;$i++) {
my $filedate="$3-$2-$1";
print "$date $filedate\n";
# html to gemtext conversion
$text=~s@<p>@@g;
@ -33,11 +41,20 @@ for(my $i=1;$i<=$count;$i++) {
$text=~s@<a href=\"[^\"]+\" class=\"mention hashtag\" rel=\"tag\">#<span>(.*)</span></a>@#$1@g;
if ($text=~m@(<a .*</a>)@) {
my $a_tag=$1;
if($a_tag =~ m@href=\"([^\"]+)\"@) {
my $url=$1;
substr($text, index($text, $a_tag), length($a_tag))="\n => $url\n";
}
}
# convert quoted chars, there are probaly more to do
$text=~s@&#39;@'@g;
$text=~s@&quot;@"@g;
if($media ne "") {
system("wget -nc $media");
system("wget -P $dir -nc $media");
$media =~ s@.*/@@;
$media="=> $media\n";
}
@ -50,9 +67,8 @@ $media
foreach my $d (keys(%post)) {
print "$d\n";
open(FILE, ">$d.gmi");
open(FILE, ">$dir/$d.gmi");
print FILE $post{$d};
close FILE;
}