102 lines
2.7 KiB
Bash
Executable File
102 lines
2.7 KiB
Bash
Executable File
#!/usr/bin/sh
|
|
|
|
#echo "Downloading fiis.com.br/anual.html"
|
|
#wget -q fiis.com.br/anual -O anual.html
|
|
|
|
echo "Extracting tickers"
|
|
grep -Eo "<span class=\"ticker\">[A-Z0-9]+" anual.html \
|
|
| sed "s/<span class=\"ticker\">//g" \
|
|
> fiis.txt
|
|
|
|
echo "Downloading tickers"
|
|
l=$(wc -l fiis.txt)
|
|
#l=$(head -n 4 fiis.txt | wc -l)
|
|
i=1
|
|
#for f in $(head -n 4 fiis.txt); do
|
|
for f in $(cat fiis.txt); do
|
|
echo -en "\r$i/$l downloading $f";
|
|
wget -q -O "$f.fii.html" "fiis.com.br/$f";
|
|
i=$((i+1))
|
|
done
|
|
|
|
echo
|
|
echo "Extracting data"
|
|
for f in *.fii.*; do
|
|
(echo;
|
|
grep --color=never -Eoz "<tbody>.*</tbody>" $f \
|
|
| tr "\n" " " \
|
|
| sed "s/<\/td>/ /g;s/<td>//g;s/<tr>/\n/g;s/R$ / /g;s/<tbody>//g;s/<\/tr>//g;s/<\/tbody>//g;s/ / /g";
|
|
echo) > $(basename -s .html $f).txt
|
|
#rm $f
|
|
done
|
|
|
|
# Fix the parens.
|
|
for f in *fii.txt; do
|
|
sed -zi "s/\n \n / /g;s/\n/)\n(/g;s/( /(-/g;s/(-/[/g;s/(//g;s/\[/( /g" $f
|
|
done
|
|
|
|
echo "(define fiis (quote (" >> fiis.ss
|
|
for f in *fii.txt; do
|
|
html=$(basename -s .fii.txt $f).fii.html
|
|
|
|
fundname=$(cat $html \
|
|
| grep -Eo 'fund-name">.*</span>' \
|
|
| sed 's/fund-name">//g;s/<\/span>//g');
|
|
|
|
admname=$(cat $html \
|
|
| grep -Eo 'administrator-name">.*</span>' \
|
|
| sed 's/administrator-name">//g;s/<\/span>//g');
|
|
|
|
cnpj=$(cat $html \
|
|
| grep -Eo 'administrator-doc">.*</span>' \
|
|
| sed 's/administrator-doc">//g;s/<\/span>//g');
|
|
|
|
tel=$(cat $html \
|
|
| tr "\n" " " \
|
|
| grep -Eoz 'Telefone</span> <span class="value">[0-9)( -]*</span>' \
|
|
| sed 's/Telefone<\/span> <span class="value">//g;s/<\/span>//g')
|
|
|
|
numdecotas=$(cat $html\
|
|
| tr "\n" " " \
|
|
| grep -Eoz 'Número de Cotas</span> <span class="value">[0-9.]+</span>'\
|
|
| sed 's/Número de Cotas<\/span> <span class="value">//g;s/<\/span>//g'\
|
|
| sed 's/\.//g')
|
|
|
|
numdecotistas=$(cat $html\
|
|
| tr "\n" " " \
|
|
| grep -Eoz 'Número de Cotistas</span> <span class="value">[0-9.]+</span>'\
|
|
| sed 's/Número de Cotistas<\/span> <span class="value">//g;s/<\/span>//g'\
|
|
| sed 's/\.//g')
|
|
|
|
nomenopregao=$(cat $html\
|
|
| tr "\n" " " \
|
|
| grep -Eoz 'Nome no Pregão</span> <span class="value">[A-z]+</span>'\
|
|
| sed 's/Nome no Pregão<\/span> <span class="value">//g;s/<\/span>//g')
|
|
|
|
tipodofii=$(cat $html\
|
|
| tr "\n" " " \
|
|
| grep -Eoz 'Tipo do FII</span> <span class="value">[A-z:]+</span>'\
|
|
| sed 's/Tipo do FII<\/span> <span class="value">//g;s/<\/span>//g')
|
|
|
|
(echo "(("
|
|
echo "$(basename -s .fii.html $html)"
|
|
echo "\"$fundname\""
|
|
echo "\"$admname\""
|
|
echo "\"$cnpj\""
|
|
echo "\"$tel\""
|
|
echo "$numdecotas"
|
|
echo "$numdecotistas"
|
|
echo "\"$nomenopregao\""
|
|
echo "\"$tipodofii\""
|
|
#echo "\"$nomenopregao\""
|
|
#echo "\"$tipodofii\""
|
|
echo ")(("
|
|
cat $f
|
|
echo "))") >> fiis.ss
|
|
done
|
|
|
|
echo ")))" >> fiis.ss
|
|
|
|
# Get rid of those pesky \x00 characters.
|
|
sed -i "s/\x00//g" fiis.ss
|