diff --git a/clean b/clean index 0eac0fc..70bbda6 100755 --- a/clean +++ b/clean @@ -1,7 +1,7 @@ #!/bin/bash mv urls.txt urls.txt.bak -cat urls.txt.bak | sort | uniq > urls.txt +cat urls.txt.bak | sort | uniq -w 20 > urls.txt rm urls.txt.bak diff --git a/crawly b/crawly index 2977dac..a93b436 100755 --- a/crawly +++ b/crawly @@ -52,7 +52,7 @@ visit $fn urls.txt done mv urls.txt urls.txt.bak -cat urls.txt.bak | sort | uniq > urls.txt +cat urls.txt.bak | sort | uniq -w 20 > urls.txt rm urls.txt.bak find ./content/ -type 'f' -size -100c -delete