playground/awk/one.awk

76 lines
1.3 KiB
Awk

# Use with gawk
function timeliner(line) {
split(line, hms, ":")
h = hms[1]
m = hms[2]
split(hms[3], sms, ",")
s = sms[1]
ms = sms[2]/10
rv = sprintf("%d:%02d:%02d.%02d",h,m,s,ms)
return rv
}
function flush(IDX, start, end, TEXT) {
initial = "Dialogue: Marked=0"
middle = "DefaultVCD, NTP,0000,0000,0000,,{\pos(400,570)}"
sstart = timeliner(start)
send = timeliner(end)
printf("%s,%s,%s,%s,%s\n", initial, sstart, send, middle, TEXT)
}
BEGIN{
# Rows separated by blank lines
#RS = "";
# Columns separated by new lines
#F = "\n"
IDX = 0
TIME = 0
TEXT = ""
}
# Change if ending is CRLF
sub(/\r$/,"") {}
# gawk has 3 arg match, but not awk
match($0, /^([0-9]+$)/, a) {
if (IDX==0 && TIME==0) {
IDX=a[1]
next
}
}
match($0, /([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}) --> ([0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3})/, a) {
if (IDX!=0 && TIME==0) {
start = a[1]
end = a[2]
# Set flag
TIME = 1
TEXT = ""
next
}
}
# Anything other than a blank line
!/^\s*$/ {
TEXT = TEXT$0
}
match($0, /^$/, a) {
if (IDX!=0 && TIME!=0) {
flush(IDX, start, end, TEXT)
# Reset flags
TIME = 0
IDX = 0
}
}
END {
flush(IDX, start, end, TEXT)
}