bragi/stdtxt.sh

14 lines
762 B
Bash
Executable File

#!/bin/bash
#This pipeline should remove the punctuation from a text file and then
# remove blank lines
#No longer removes punctuation, but should clean up blank lines and numbers at the beginning of a line and followed by punctuation.
# still need to figure out how to also do iconv (to us-ascii? to utf-8?)
# usage: 'stdtxt.sh filename'
#tr -d [:punct:] < $1 | sed '/^\s*$/d' >> "$1.nopunct"
#sed 's/^[0-9]*[[:punct:]]//' $1 | sed 's/^ *[0-9]*[[:punct:]]//' | sed '/^ *$/d' >> "$1.std"
# does this work for brackets and underscores?
# sed 's/[][}{)(_]//g'
# added another sed to remove numbers after whitespace before punctuation.
sed 's/^[0-9]*[[:punct:]]//' $1 | sed 's/^[[:space:]]*[0-9]*[[:punct:]]//' | sed 's/[][}{)(_]//g' | sed '/^ *$/d' >> "$1.std"