14 lines
762 B
Bash
Executable File
14 lines
762 B
Bash
Executable File
#!/bin/bash
|
|
#This pipeline should remove the punctuation from a text file and then
|
|
# remove blank lines
|
|
#No longer removes punctuation, but should clean up blank lines and numbers at the beginning of a line and followed by punctuation.
|
|
# still need to figure out how to also do iconv (to us-ascii? to utf-8?)
|
|
# usage: 'stdtxt.sh filename'
|
|
#tr -d [:punct:] < $1 | sed '/^\s*$/d' >> "$1.nopunct"
|
|
#sed 's/^[0-9]*[[:punct:]]//' $1 | sed 's/^ *[0-9]*[[:punct:]]//' | sed '/^ *$/d' >> "$1.std"
|
|
# does this work for brackets and underscores?
|
|
# sed 's/[][}{)(_]//g'
|
|
# added another sed to remove numbers after whitespace before punctuation.
|
|
sed 's/^[0-9]*[[:punct:]]//' $1 | sed 's/^[[:space:]]*[0-9]*[[:punct:]]//' | sed 's/[][}{)(_]//g' | sed '/^ *$/d' >> "$1.std"
|
|
|