#!/bin/bash

# devide a large text file (分韻撮要) to several html pages
# Edward Wong <edward@mail.sdu.edu.cn>
# 2010-03

declare -A inittab
inittab=([p]='並丙' [ph]='磐篇' [m]='明乜' [f]='扶非' [t]='大丁' [th]='同太' [n]='尼拈' [l]='來拉'
         [ts]='在則' [tsh]='七前' [s]='士心' [tsj]='仗之' [tshj]='長丑' [sj]='時審' [nj]='言'
         [k]='共見開' [kh]='其啟開' [kw]='共見合' [khw]='其啟合' [ng]='岸' [7]='喻烏開' [h]='下可'
         [j]='由英' [w]='喻烏合')

declare -a rhyme
declare -a no
declare -a title
rhyme=(in ai i y au ong ing eng an oeng oong iu u yn aai ang z am aau ooi im on oi oo aam aan aa un ui e oon oom aang ng)
no=('第一' '第二' '第三' '第四' '第五' '第六' '第七' '第七' '第八' '第九' '第十'
    '十一' '十二' '十三' '十四' '十五' '十六' '十七' '十八' '十九' '二十' '二十一'
    '二十二' '二十三' '二十四' '二十五' '二十六' '二十七' '二十八' '二十九' '三十'
    '三十一' '三十二' '三十三')
title=('先蘚線屑' '威偉畏' '幾紀記' '諸主著' '修叟秀' '東董凍篤' '英影應益(甲)' '英影應益(乙)'
       '賓禀嬪𤲃' '張掌帳着' '剛講降角' '朝沼照' '孤古故' '鴛婉怨乙' '皆解介' '登等凳德'
       '師史四' '金錦禁急' '交絞教' '栽宰載' '兼檢劍劫' '津贐進卒' '雖髓歲' '科火貨'
       '緘减鑒甲' '翻反泛發' '家賈嫁' '官管貫括' '魁賄誨' '遮者蔗' '干趕幹割' '甘敢紺蛤'
       '彭棒硬額' '吾五悟')

printf 'dividing '$1' into rhymes...'
(for ((i=0;i<${#rhyme[@]};i++)); do
  echo s/${no[$i]}${title[$i]}/${rhyme[$i]}/g
done) | sed -f - $1 | awk 'BEGIN {FS="\t"; OFS="\t"} {print $2, $4, $5 >>$1".tmp"}'
echo 'done.'

exec 3>index.html
cat >&3 <<EOF
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
EOF
echo '<!-- This document is automatically generated by a Bash script on '`date +%Y-%m-%d`'.' >&3
cat >&3 <<EOF
     Copyright (c) 黄艺华 (Edward Wong)
     Rights are granted to copy, modify and redistribute this document in non-commercial purposes,
     on condition that each copy, modified or unmodified, contains this complete announcement of license. -->
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8" />
    <link rel="stylesheet" type="text/css" href="style.css" />
  </head>
  <body>
    <h1>分韻撮要聲韻表</h1>

    <div class="perface">
      <p>《分韻撮要》是近代粤語音繫的韻書，原書與《江湖尺牘》合印，稱《江湖尺牘分韻撮要合集》，出版於1838年。該書記錄了清初的粵語音繫，是研究近代粵語不可多得的原始資料。承同好將原書製作成電子表格，發佈於水木社區語言學版。得之欣喜之餘，尚嫌查閱不便，遂編寫一個Bash腳本將之轉化為HTML格式，庶幾有助於來者。由於原書不標聲母，乃酌取《初學粵音切要》聲母字頭補入，又第七英影應益實分兩韻，就以甲乙區分。由於自動生成，錯漏在所難免，幸不吝指正。原電子表格可從<a href="http://att.newsmth.net/att.php?p.203.64708.355.zip">此處</a>獲得。值此對錄入者的繁複工作謹表謝忱！</p>
      <p class="sign"><a href="mailto:edward@mail.sdu.edu.cn">黄藝華</a></p>
      <p class="date">2010年3月於濟南</p>
    </div>

    <div class="toc">
      <h2>目錄</h2>
      <ul>
EOF

for ((i;i<${#rhyme[@]};i++)); do
  printf '        <li><a href="'${rhyme[$i]}'.html">'${no[$i]}${title[$i]}'</a></li>\n' >&3

  printf 'generating '${rhyme[$i]}'.html...'
  exec 4>${rhyme[$i]}.html
  cat >&4 <<EOF
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
EOF
  echo '<!-- This document is automatically generated by a Bash script on '`date +%Y-%m-%d`'.' >&4
  cat >&4 <<EOF
     Users are free to read, change and redistribute this document in non-business purposes,
     in condition that all changes and redistributions contain this entire announcement of license.
     Copyright (c) Edward Wong <edward@mail.sdu.edu.cn> -->
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8" />
    <link rel="stylesheet" type="text/css" href="style.css" />
  </head>
  <body>
    <table class="rhyme">
EOF

  echo '      <caption>'${no[$i]}${title[$i]}'</caption>' >&4
  cat >&4 <<EOF
      <tr>
        <th class="init"></th>
        <th>陰平</th>
        <th>陽平</th>
        <th>陰上</th>
        <th>陽上</th>
        <th>陰去</th>
        <th>陽去</th>
        <th>陰入</th>
        <th>陽入</th>
      </tr>
EOF

  for init in p ph m f t th n l ts tsh s tsj tshj sj nj k kh kw khw ng 7 h j w; do
    printf '      <tr>\n        <td class="init">'${inittab[$init]}'</td>\n' >&4
    for tone in {1..8}; do
      printf '        <td>' >&4
      awk -F"\t" '($1=="'$init'") && ($2=="'$tone'") {printf "%s", $3; exit 0}' ${rhyme[$i]}'.tmp' >&4
      printf '</td>\n' >&4
    done
    printf '      </tr>\n' >&4
  done

  cat >&4 <<EOF
    </table>
    <p class="link">
EOF

  if [ $[$i-1] -ge 0 ]; then
    echo '      <a href="'${rhyme[$i-1]}'.html">&lt;&lt;'${title[$i-1]}'</a>' >&4
  fi
  echo '      <a href="index.html">目錄</a>' >&4
  if [ $[$i+1] -lt ${#rhyme[@]} ]; then
    echo '      <a href="'${rhyme[$i+1]}'.html">'${title[$i+1]}'&gt;&gt;</a>' >&4
  fi
  cat >&4 <<EOF
    </p>
  </body>
</html>
EOF

  echo 'done.'

  rm ${rhyme[$i]}.tmp
done
cat >&3 <<EOF
      </ul>
    </div>
  </body>
</html>
EOF

echo 'finished. refer to index.html for index.'
echo 'all bebug reported to <edward@mail.sdu.edu.cn>'
