#!/bin/sh
#
# extrablatt - EXTRAct karolenderBLATT.
#
# Searches XXXXXXXXXXXX (or any file(s) given as arguments) for
# karolenderblatt entries and prints the corresponding date stamp,
# the karolenderblatt entry and 9 additional lines of trailing
# context.
# TODO:
#
# * parse date stamps (for year, month, day).
#
# * instead of printing the date stamp line, prefix the following output
# with the (shortened) date stamp.
#
# * adjust date stamp by one resp. two days when the karolenderblatt
# entry starts with 'gestern vor' or 'vorgestern vor', then replace
# those terms by 'heute vor' (further processing mentioned below).
#
# * unHTMLIze the entries (e.g. " -> ", strip
tags etc.).
#
# * replace the relative terms ('heute vor N Jahren') by absolute
# dates ('am dd.mm.yyyy'). Note: N may be a number or a noun
# (like 'sechs', 'neun', including typos like 'neuen').
[ $# -gt 0 ] || set -- XXXXXXXXXXXX
# Could almost be done with grep -A9, except for the date stamp.
# So we use some awk, carefully obfuscated with love.
awk '
function ref(d, e) {
e = d
d = "" d ""
sub(/^[^0-9]*/, "", e)
sub(/[^0-9]*$/, "", e)
return ds[d]++ ? d : "" d
}
/^----/ && n = !(d = $0)
/^(kili|quabla)<\/B> \([^)]*\): (K|Reservek)arolenderblatt/ \
&& !(n = 9 + !!($0 = ref(d) "\n" $0))
n && n--
' "$@" |
sed -e 's@[[:space:]]*
@
@g' \
-e 's@@@g' \
-e 's@@@g' \
-e 's@\([^[:alnum:]]\)\(GID=[0-9][0-9]*\)@\1\2@' \
-e 's@#[[:space:]]*\([0-9][0-9]*\)\(
\)$@#\1\2@'