From d94901f6f2247d1227ec3052e0e9098811154166 Mon Sep 17 00:00:00 2001 From: lickthecheese Date: Thu, 19 Mar 2020 10:35:34 -0400 Subject: [PATCH] fix slep --- crawly | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crawly b/crawly index 06580db..3d0ca6e 100755 --- a/crawly +++ b/crawly @@ -16,12 +16,12 @@ if [[ $string =~ $regex && $string != *"<"* && $string != *">"* ]] then echo visiting $1 - + sleep 1 # get URLs CRAWLED="./content/"`echo $1 | shasum | head -c 5` echo $CRAWLED - curl $1 -m 5 -L > $TEMPIDC + curl $1 -m 1 -L > $TEMPIDC cat $TEMPIDC | grep href=\" | grep "https://" | grep -o "https:\/\/[^\"]*" | awk '/html$/ || /php$/ || /txt$/ || /\/$/ { print $0 }' >> $2 cat $TEMPIDC | grep href=\" | grep "https://" | grep -o "https:\/\/[^\"]*" | awk '!/html$/ && !/php$/ && !/txt$/ && !/\/$/ { print $0"/" }' >> $2 @@ -45,7 +45,6 @@ rm urls.txt for fn in $URLLIST; do echo "found URL $fn" -sleep 1 visit $fn urls.txt done