2008年8月1日金曜日

(Code) parsedat

sedで力ずく!
#!/bin/sh

## program name
PROG=`basename $0`

## environment
LANG=ja_JP.UTF-8

## check command
LIST="mktemp iconv w3m"
for CMD in $LIST ;do
TEST_CMD=`which $CMD`
if [ -z "$TEST_CMD" ] ;then
echo "Error: $PROG: we need command \"$CMD\"" >&2
exit 1
fi
done

## init
### check arg
if [ -z "$1" ] ;then
echo "usage: $PROG <dat file>" >&2
exit 1
fi

FILE="$1"
if [ ! -f "$FILE" ] ;then
echo "Error: $PROG: not exist \"$FILE\"" >&2
exit 1
fi

if [ -z "$2" ] ;then
N=0
fi
N=$2

TEST_NUM=`echo $N |grep '[^0-9]'`
if [ -n "$TEST_NUM" ] ;then
echo "Error: $PROG: not num ($TEST_NUM)" >&2
exit 1
fi

### prepare utf8 dat file
DIR=`mktemp -d`
FILE_UTF8=$DIR/dat_utf.txt
iconv -c -f SJIS-WIN $FILE -o $FILE_UTF8

## body
n=1
while read LINE ;do
NUM=$[$N + $n]
NAME=`echo "$LINE" |sed 's/\(.*\)<>.*<>.*<> .* <>.*/\1/'`
MAIL=`echo "$LINE" |sed 's/.*<>\(.*\)<>.*<> .* <>.*/\1/'`
ETC=`echo "$LINE" |sed 's/.*<>.*<>\(.*\)<> .* <>.*/\1/'`
TIME=`echo "$ETC" |sed 's/.* \([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}[^ ]*\) .*/\1/'`
ID=`echo "$ETC" |sed 's/.* \(ID:[0-9a-zA-Z+/]\{9\}\).*/\1/'`
BODY=`echo "$LINE" |sed 's/.*<>.*<>.*<>\( .* \)<>.*/\1/'`

cat <<EOF |w3m -dump -T text/html -cols 100
<html>
<dt>$NUM: $NAME [$MAIL] $TIME $ID<dd>$BODY
</html>
EOF
echo

n=$[$n + 1]
done <$FILE_UTF8