huntnw/clean

18 lines
313 B
Bash
Executable File

#!/bin/bash
mv urls.txt urls.txt.bak
cat urls.txt.bak | sort | uniq -w 20 > urls.txt
rm urls.txt.bak
find ./content/ -type 'f' -size -100c -delete
find ./content/ -type 'f' -size +15k -delete
grep -lrIiZ "404 not found" ./content/ | xargs -0 rm --
grep -lrIiZ "403 forbidden" ./content/ | xargs -0 rm --