Scripts and data used for generating ZhConversion.php
authorZheng Zhu <zhengzhu@users.mediawiki.org>
Fri, 1 Oct 2004 03:05:18 +0000 (03:05 +0000)
committerZheng Zhu <zhengzhu@users.mediawiki.org>
Fri, 1 Oct 2004 03:05:18 +0000 (03:05 +0000)
includes/zhtable/Makefile [new file with mode: 0644]
includes/zhtable/README [new file with mode: 0644]
includes/zhtable/printutf8.c [new file with mode: 0644]
includes/zhtable/simp2trad.manual [new file with mode: 0644]
includes/zhtable/simp2tradPhrases.manual [new file with mode: 0644]
includes/zhtable/trad2simp.manual [new file with mode: 0644]
includes/zhtable/trad2simpPhrases.manual [new file with mode: 0644]
includes/zhtable/tradphrases.manual [new file with mode: 0644]

diff --git a/includes/zhtable/Makefile b/includes/zhtable/Makefile
new file mode 100644 (file)
index 0000000..304f286
--- /dev/null
@@ -0,0 +1,180 @@
+#
+# Creating the file ZhConversion.php used for Simplified/Traditional
+# Chinese conversion. It gets the basic conversion table from the Unihan 
+# database, and construct the phrase tables using phrase libraries in
+# the SCIM packages. There are also special tables used to for adjustment.
+# Some data in the file simp2trad.manual was taken from the following
+# paper:
+# Requirement: you need to set your locale to zh_CN.UTF-8 (or any
+# other utf-8 locales, I suppose)
+#
+#
+all: ZhConversion.php
+
+Unihan.txt:
+       wget ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip
+       unzip Unihan.zip
+
+EZ.txt.in:
+       wget http://freedesktop.org/~suzhe/sources/scim-tables-0.4.3.tar.gz
+       tar zxvf scim-tables-0.4.3.tar.gz > /dev/null
+       cp scim-tables-0.4.3/zh/EZ.txt.in .
+       rm -rf scim-tables-0.4.3* 
+
+phrase_lib.txt:
+       wget http://freedesktop.org/~suzhe/scim-chinese/scim-chinese-0.4.2.tar.gz 
+       tar zxvf scim-chinese-0.4.2.tar.gz > /dev/null
+       cp scim-chinese-0.4.2/data/phrase_lib.txt .
+       rm -rf scim-chinese-0.4.2*
+
+printutf8: printutf8.c
+       gcc -o printutf8 printutf8.c
+
+trad2simp.t: Unihan.txt trad2simp.manual printutf8
+       grep kSimplifiedVariant Unihan.txt | sed  '/#/d' | sed  's/kSimplifiedVariant//' | ./printutf8 > tmp1
+       for I in `colrm 11 < trad2simp.manual` ; do sed "/^$$I/d" tmp1 > tmp2; mv tmp2 tmp1; done
+       cat trad2simp.manual tmp1 > trad2simp.t
+
+simp2trad.t: Unihan.txt simp2trad.manual printutf8
+       grep kTraditionalVariant Unihan.txt | sed  '/#/d' | sed  's/kTraditionalVariant//' | ./printutf8 > tmp1
+       for I in `colrm 11 < simp2trad.manual` ; do sed "/^$$I/d" tmp1 > tmp2; mv tmp2 tmp1; done
+       cat simp2trad.manual tmp1 > simp2trad.t
+
+t2s_1tomany.t: trad2simp.t
+       grep -s ".\{19,\}" trad2simp.t | sed  's/U+...../"/' | sed  's/|U+...../"=>"/' | sed  's/|U+.....//g' | sed  's/|/",/'  > t2s_1tomany.t
+
+t2s_1to1.t: trad2simp.t s2t_1tomany.t
+       sed  "/.*|.*|.*|.*/d" trad2simp.t | sed  's/U+[0-9a-z][0-9a-z]*/"/' | sed  's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed  's/|/",/' > t2s_1to1.t
+       grep  '"."=>"..",' s2t_1tomany.t | sed 's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
+       grep  '"."=>"...",' s2t_1tomany.t | sed 's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
+       grep  '"."=>"...",' s2t_1tomany.t | sed 's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
+       grep  '"."=>"....",' s2t_1tomany.t | sed 's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> t2s_1to1.t
+       grep  '"."=>"....",' s2t_1tomany.t | sed 's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> t2s_1to1.t
+       grep  '"."=>"....",' s2t_1tomany.t | sed 's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> t2s_1to1.t
+       sort t2s_1to1.t | uniq > t2s_1to1.t
+
+
+s2t_1tomany.t: simp2trad.t
+       grep -s ".\{19,\}" simp2trad.t | sed  's/U+...../"/' | sed  's/|U+...../"=>"/' | sed  's/|U+.....//g' | sed  's/|/",/' > s2t_1tomany.t
+
+s2t_1to1.t: simp2trad.t t2s_1tomany.t
+       sed  "/.*|.*|.*|.*/d" simp2trad.t | sed  's/U+[0-9a-z][0-9a-z]*/"/' | sed  's/|U+[0-9a-z][0-9a-z]*/"=>"/' | sed  's/|/",/' > s2t_1to1.t
+       grep  '"."=>"..",' t2s_1tomany.t | sed 's/\("."\)=>".\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
+       grep  '"."=>"...",' t2s_1tomany.t | sed 's/\("."\)=>".\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
+       grep  '"."=>"...",' t2s_1tomany.t | sed 's/\("."\)=>"..\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
+       grep  '"."=>"....",' t2s_1tomany.t | sed 's/\("."\)=>".\(.\)..",/"\2"=>\1,/' >> s2t_1to1.t
+       grep  '"."=>"....",' t2s_1tomany.t | sed 's/\("."\)=>"..\(.\).",/"\2"=>\1,/' >> s2t_1to1.t
+       grep  '"."=>"....",' t2s_1tomany.t | sed 's/\("."\)=>"...\(.\)",/"\2"=>\1,/' >> s2t_1to1.t
+       sort s2t_1to1.t | uniq > s2t_1to1.t
+
+ez.t: EZ.txt.in
+       colrm 1 8 < EZ.txt.in | sed 's/\t//g' | grep "^.\{2,4\}[0-9]" | sed 's/[0-9]//g' > ez.t
+
+alltradphrases.t: ez.t s2t_1tomany.t
+       for i in `cat s2t_1tomany.t | sed 's/.......//' | sed 's/",/\n/' | sed 's/\(.\)/\1\n/g' |sort | uniq`; do grep -s $$i ez.t ; done  > alltradphrases.t || true
+
+tradphrases_2.t: alltradphrases.t
+       cat alltradphrases.t | grep  "^..$$" | sort | uniq > tradphrases_2.t
+
+tradphrases_3.t: alltradphrases.t
+       cat alltradphrases.t | grep  "^...$$" | sort | uniq > tradphrases_3.t
+       for i in `cat tradphrases_2.t`; do grep $$i tradphrases_3.t ; done | sort | uniq > t3 || true
+       diff t3 tradphrases_3.t | grep ">" | sed 's/> //' > tradphrases_3.t
+
+
+tradphrases_4.t: alltradphrases.t
+       cat alltradphrases.t | grep  "^....$$" | sort | uniq > tradphrases_4.t
+       for i in `cat tradphrases_2.t`; do grep $$i tradphrases_4.t ; done | sort | uniq > t3 || true
+       diff t3 tradphrases_4.t | grep ">" | sed 's/> //' > t
+       mv t tradphrases_4.t
+       for i in `cat tradphrases_3.t`; do grep $$i tradphrases_4.t ; done | sort | uniq > t3 || true
+       diff t3 tradphrases_4.t | grep ">" | sed 's/> //' > t
+       mv t tradphrases_4.t
+
+tradphrases.t: tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t t2s_1tomany.t
+       cat tradphrases.manual tradphrases_2.t tradphrases_3.t tradphrases_4.t |sort | uniq > tradphrases.t
+       for i in `sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep $$i tradphrases.t ; done | diff tradphrases.t - | grep '<' | sed 's/< //' > t
+       mv t tradphrases.t
+
+ph.t: phrase_lib.txt
+       sed 's/[\t0-9a-zA-Z]//g' phrase_lib.txt | grep "^.\{2,4\}$$" > ph.t
+
+allsimpphrases.t: ph.t
+       rm -f allsimpphrases.t
+       for i in `cat t2s_1tomany.t | sed 's/.......//' | sed 's/",/\n/' | sed 's/\(.\)/\1\n/g' | sort | uniq `; do grep $$i ph.t >> allsimpphrases.t; done
+
+simpphrases_2.t: allsimpphrases.t
+       cat allsimpphrases.t | grep "^..$$" | sort | uniq > simpphrases_2.t
+
+simpphrases_3.t: allsimpphrases.t
+       cat allsimpphrases.t | grep "^...$$" | sort | uniq > simpphrases_3.t
+       for i in `cat simpphrases_2.t`; do grep $$i simpphrases_3.t ; done | sort | uniq > t3 || true
+       diff t3 simpphrases_3.t | grep ">" | sed 's/> //' > t
+       mv t simpphrases_3.t
+
+simpphrases_4.t: allsimpphrases.t
+       cat allsimpphrases.t | grep "^....$$" | sort | uniq > simpphrases_4.t
+       rm -f t
+       for i in `cat simpphrases_2.t`; do grep $$i simpphrases_4.t >> t; done || true
+       sort t | uniq > t3
+       diff t3 simpphrases_4.t | grep ">" | sed 's/> //' > t
+       mv t simpphrases_4.t
+       for i in `cat simpphrases_3.t`; do grep $$i simpphrases_4.t; done | sort  | uniq > t3 || true
+       diff t3 simpphrases_4.t | grep ">" | sed 's/> //' > t
+       mv t simpphrases_4.t
+
+simpphrases.t:simpphrases_2.t simpphrases_3.t simpphrases_4.t t2s_1tomany.t
+       cat simpphrases_2.t simpphrases_3.t simpphrases_4.t > simpphrases.t
+       for i in `sed 's/"\(.\).*/\1/' t2s_1tomany.t ` ; do grep $$i simpphrases.t ; done | diff simpphrases.t - | grep '<' | sed 's/< //' > t
+       mv t simpphrases.t
+
+
+trad2simp1to1.t: t2s_1tomany.t t2s_1to1.t
+       sed  's/\(.......\).*/\1",/' t2s_1tomany.t > trad2simp1to1.t
+       cat t2s_1to1.t >> trad2simp1to1.t
+
+simp2trad1to1.t: s2t_1tomany.t s2t_1to1.t
+       sed  's/\(.......\).*/\1",/' s2t_1tomany.t > simp2trad1to1.t
+       cat s2t_1to1.t >> simp2trad1to1.t
+
+trad2simp.php: trad2simp1to1.t tradphrases.t
+       printf '<?php\n$$trad2simp=array(' > trad2simp.php
+       cat trad2simp1to1.t >> trad2simp.php
+       printf ');\n$$str=\n"' >> trad2simp.php
+       cat tradphrases.t >> trad2simp.php
+       printf '";\n$$t=strtr($$str, $$trad2simp);\necho $$t;\n?>' >> trad2simp.php
+
+simp2trad.php: simp2trad1to1.t simpphrases.t
+       printf '<?php\n$$simp2trad=array(' > simp2trad.php
+       cat simp2trad1to1.t >> simp2trad.php
+       printf ');\n$$str=\n"' >> simp2trad.php
+       cat simpphrases.t >> simp2trad.php
+       printf '";\n$$t=strtr($$str, $$simp2trad);\necho $$t;\n?>' >> simp2trad.php
+
+simp2trad.phrases.t: trad2simp.php tradphrases.t simp2tradPhrases.manual
+       php -f trad2simp.php | sed  's/\(.*\)/"\1" => /' > tmp1
+       cat tradphrases.t | sed  's/\(.*\)/"\1",/' > tmp2
+       paste tmp1 tmp2 > simp2trad.phrases.t
+       sed 's/\(.*\)\t\(.*\)/"\1"=>"\2",/' simp2tradPhrases.manual >> simp2trad.phrases.t
+
+trad2simp.phrases.t: simp2trad.php simpphrases.t trad2simpPhrases.manual
+       php -f simp2trad.php | sed  's/\(.*\)/"\1" => /' > tmp1
+       cat simpphrases.t | sed  's/\(.*\)/"\1",/' > tmp2
+       paste tmp1 tmp2 > trad2simp.phrases.t
+       sed 's/\(.*\)\t\(.*\)/"\1"=>"\2",/' trad2simpPhrases.manual >> trad2simp.phrases.t
+
+ZhConversion.php: simp2trad1to1.t simp2trad.phrases.t trad2simp1to1.t trad2simp.phrases.t
+       printf '<?php\n$$zhSimp2Trad=array(\n' > ZhConversion.php
+       cat simp2trad1to1.t >> ZhConversion.php
+       echo >> ZhConversion.php
+       cat simp2trad.phrases.t >> ZhConversion.php
+       echo ');' >> ZhConversion.php
+       echo >> ZhConversion.php
+       printf '$$zhTrad2Simp=array(\n' >> ZhConversion.php
+       cat trad2simp1to1.t >> ZhConversion.php
+       echo >> ZhConversion.php
+       cat trad2simp.phrases.t >> ZhConversion.php
+       printf ');\n?>' >> ZhConversion.php
+
+clean:
+       rm -f ZhConversion.php tmp1 tmp2 tmp3 t3 *.t trad2simp.php simp2trad.php
\ No newline at end of file
diff --git a/includes/zhtable/README b/includes/zhtable/README
new file mode 100644 (file)
index 0000000..4b7428b
--- /dev/null
@@ -0,0 +1,9 @@
+the file manual.txt contains special mappings not included in the
+unihan database. The first 116 entries are taken from 
+
+   冯寿忠,“非对称繁简字”对照表, 《语文建设通讯》1997-9第53期.
+   /http://www.yywzw.com/jt/feng/fengb01.htm
+
+The rest are added by me after examining the conversion results.
+
+zhengzhu at gmail.com
\ No newline at end of file
diff --git a/includes/zhtable/printutf8.c b/includes/zhtable/printutf8.c
new file mode 100644 (file)
index 0000000..b6ccf17
--- /dev/null
@@ -0,0 +1,99 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+/* 
+ Unicode                   UTF8
+0x00000000 - 0x0000007F: 0xxxxxxx
+0x00000080 - 0x000007FF: 110xxx xx 10xx xxxx
+0x00000800 - 0x0000FFFF: 1110xxxx  10xxxx xx 10xx xxxx
+0x00010000 - 0x001FFFFF: 11110x xx 10xx xxxx 10xxxx xx 10xx xxxx
+0x00200000 - 0x03FFFFFF: 111110xx  10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx
+0x04000000 - 0x7FFFFFFF: 1111110x  10xx xxxx 10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx
+
+0000 0      1001 9
+0001 1      1010 A
+0010 2      1011 B
+0011 3      1100 C
+0100 4      1101 D 
+0101 5      1110 E
+0110 6      1111 F
+0111 7
+1000 8
+*/
+void printUTF8(long long u) {
+  long long m;
+  if(u<0x80) {
+    printf("%c", (unsigned char)u);
+  }
+  else if(u<0x800) {
+    m = ((u&0x7c0)>>6) | 0xc0;
+    printf("%c", (unsigned char)m);
+    m = (u&0x3f) | 0x80;
+    printf("%c", (unsigned char)m);
+  }
+  else if(u<0x10000) {
+    m = ((u&0xf000)>>12) | 0xe0;
+    printf("%c",(unsigned char)m);
+    m = ((u&0xfc0)>>6) | 0x80;
+    printf("%c",(unsigned char)m);
+    m = (u & 0x3f) | 0x80;
+    printf("%c",(unsigned char)m);
+  }
+  else if(u<0x200000) {
+    m = ((u&0x1c0000)>>18) | 0xf0;
+    printf("%c", (unsigned char)m);
+    m = ((u& 0x3f000)>>12) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = ((u& 0xfc0)>>6) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = (u&0x3f) | 0x80;
+    printf("%c", (unsigned char)m);
+  }
+  else if(u<0x4000000){
+    m = ((u&0x3000000)>>24) | 0xf8;
+    printf("%c", (unsigned char)m);
+    m = ((u&0xfc0000)>>18) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = ((u&0x3f000)>>12) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = ((u&0xfc00)>>6) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = (u&0x3f) | 0x80;
+    printf("%c", (unsigned char)m);
+  }
+  else {
+    m = ((u&0x40000000)>>30) | 0xfc;
+    printf("%c", (unsigned char)m);
+    m = ((u&0x3f000000)>>24) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = ((u&0xfc0000)>>18) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = ((u&0x3f000)>>12) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = ((u&0xfc0)>>6) | 0x80;
+    printf("%c", (unsigned char)m);
+    m = (u&0x3f)| 0x80;
+    printf("%c", (unsigned char)m);
+  }
+}
+
+int main() {
+  int i,j;
+  long long n1, n2;
+  unsigned char b1[15], b2[15];
+  unsigned char buf[1024];
+  i=0;
+  while(fgets(buf, 1024, stdin)) {
+    //    printf("read %s\n", buf);
+    for(i=0;i<strlen(buf); i++) 
+      if(buf[i]=='U') {
+       if(buf[i+1] == '+') {
+         n1 = strtoll(buf+i+2,0,16);
+         printf("U+%05x", n1);
+         printUTF8(n1);printf("|");
+       }
+      }
+    printf("\n");
+  }
+}
+
diff --git a/includes/zhtable/simp2trad.manual b/includes/zhtable/simp2trad.manual
new file mode 100644 (file)
index 0000000..731a41e
--- /dev/null
@@ -0,0 +1,177 @@
+U+0677f板|U+0677f板|U+095c6闆|
+U+08868表|U+08868表|U+09336錶|
+U+0624d才|U+0624d才|U+07e94纔|
+U+04e11丑|U+0919c醜|U+04e11丑|
+U+051fa出|U+051fa出|U+09f63齣|
+U+06dc0淀|U+06fb1澱|U+06dc0淀|
+U+051ac冬|U+051ac冬|U+09f15鼕|
+U+08303范|U+07bc4範|U+08303范|
+U+04e30丰|U+08c50豐|U+04e30丰|
+U+0522e刮|U+0522e刮|U+098b3颳|
+U+0540e后|U+05f8c後|U+0540e后|
+U+080e1胡|U+080e1胡|U+09b0d鬍|U+0885a衚|
+U+056de回|U+056de回|U+08ff4迴|
+U+04f19伙|U+05925夥|U+04f19伙|
+U+059dc姜|U+08591薑|U+059dc姜|
+U+0501f借|U+0501f借|U+085c9藉|
+U+0514b克|U+0514b克|U+0524b剋|
+U+056f0困|U+056f0困|U+0774f睏|
+U+06f13漓|U+06f13漓|U+07055灕|
+U+091cc里|U+091cc里|U+088e1裡|U+088cf裏|
+U+05e18帘|U+07c3e簾|U+05e18帘|
+U+09709霉|U+09709霉|U+09ef4黴|
+U+09762面|U+09762面|U+09eb5麵|
+U+08511蔑|U+08511蔑|U+0884a衊|
+U+05343千|U+05343千|U+097c6韆|
+U+079cb秋|U+079cb秋|U+097a6鞦|
+U+0677e松|U+0677e松|U+09b06鬆|
+U+054b8咸|U+054b8咸|U+09e79鹹|
+U+05411向|U+05411向|U+056ae嚮|U+066cf曏|
+U+04f59余|U+09918餘|U+04f59余|
+U+090c1郁|U+09b31鬱|U+090c1郁|
+U+05fa1御|U+05fa1御|U+079a6禦|
+U+0613f愿|U+09858願|U+0613f愿|
+U+04e91云|U+096f2雲|U+04e91云|
+U+082b8芸|U+082b8芸|U+08553蕓|
+U+06c84沄|U+06c84沄|U+06f90澐|
+U+081f4致|U+081f4致|U+07dfb緻|
+U+05236制|U+05236制|U+088fd製|
+U+06731朱|U+06731朱|U+07843硃|
+U+07b51筑|U+07bc9築|U+07b51筑|
+U+051c6准|U+06e96準|U+051c6准|
+U+05382厂|U+05ee0廠|U+05382厂|
+U+05e7f广|U+05ee3廣|U+05e7f广|
+U+08f9f辟|U+095e2闢|U+08f9f辟|
+U+0522b别|U+05225別|U+05f46彆|
+U+0535c卜|U+0535c卜|U+08514蔔|
+U+06c88沈|U+06c88沈|U+0700b瀋|
+U+051b2冲|U+06c96沖|U+0885d衝|
+U+079cd种|U+07a2e種|U+079cd种|
+U+0866b虫|U+087f2蟲|U+0866b虫|
+U+062c5担|U+064d4擔|U+062c5担|
+U+0515a党|U+09ee8黨|U+0515a党|
+U+06597斗|U+09b25鬥|U+06597斗|
+U+0513f儿|U+05152兒|U+0513f儿|
+U+05e72干|U+04e7e乾|U+05e79幹|U+05e72干|
+U+08c37谷|U+08c37谷|U+07a40穀|
+U+067dc柜|U+06ac3櫃|U+067dc柜|
+U+05408合|U+05408合|U+095a4閤|
+U+05212划|U+0756b畫|U+05283劃|U+05212划|
+U+0574f坏|U+058de壞|U+0574f坏|
+U+051e0几|U+05e7e幾|U+051e0几|
+U+07cfb系|U+07cfb系|U+07e6b繫|U+04fc2係|
+U+05bb6家|U+05bb6家|U+050a2傢|
+U+04ef7价|U+050f9價|U+04ef7价|
+U+0636e据|U+064da據|U+0636e据|
+U+05377卷|U+06372捲|U+05377卷|
+U+09002适|U+09069適|U+09002适|
+U+08721蜡|U+0881f蠟|U+08721蜡|
+U+0814a腊|U+081d8臘|U+0814a腊|
+U+04e86了|U+04e86了|U+077ad瞭|
+U+07d2f累|U+07d2f累|U+07e8d纍|
+U+04e48么|U+09ebd麽|U+04e48么|U+05e7a幺|U+09ebc麼|
+U+08499蒙|U+08499蒙|U+077c7矇|U+06fdb濛|U+061de懞|
+U+04e07万|U+0842c萬|U+04e07万|
+U+05b81宁|U+05be7寧|U+05b81宁|
+U+06734朴|U+06a38樸|U+06734朴|
+U+082f9苹|U+0860b蘋|U+082f9苹|
+U+04ec6仆|U+050d5僕|U+04ec6仆|
+U+066f2曲|U+066f2曲|U+09eaf麯|
+U+0786e确|U+078ba確|U+0786e确|
+U+0820d舍|U+0820d舍|U+06368捨|
+U+080dc胜|U+052dd勝|U+080dc胜|
+U+0672f术|U+08853術|U+0672f术|
+U+053f0台|U+053f0台|U+081fa臺|U+06aaf檯|U+098b1颱|
+U+04f53体|U+09ad4體|U+04f53体|
+U+06d82涂|U+05857塗|U+06d82涂|
+U+053f6叶|U+08449葉|U+053f6叶|
+U+05401吁|U+05401吁|U+07c72籲|
+U+065cb旋|U+065cb旋|U+0955f镟|
+U+04f63佣|U+050ad傭|U+04f63佣|
+U+04e0e与|U+08207與|U+04e0e与|
+U+06298折|U+06298折|U+0647a摺|
+U+05f81征|U+05fb5徵|U+05f81征|
+U+075c7症|U+075c7症|U+07665癥|
+U+06076恶|U+060e1惡|U+05641噁|
+U+053d1发|U+0767c發|U+09aee髮|
+U+0590d复|U+05fa9復|U+08907複|U+08986覆|
+U+06c47汇|U+0532f匯|U+05f59彙|
+U+083b7获|U+07372獲|U+07a6b穫|
+U+09965饥|U+098e2飢|U+09951饑|
+U+05c3d尽|U+076e1盡|U+05118儘|
+U+05386历|U+06b77歷|U+066c6曆|
+U+05364卤|U+06ef7滷|U+09e75鹵|
+U+05f25弥|U+05f4c彌|U+07030瀰|
+U+07b7e签|U+07c3d簽|U+07c56籖|
+U+07ea4纤|U+07e96纖|U+07e34縴|
+U+082cf苏|U+08607蘇|U+056cc囌|
+U+0575b坛|U+058c7壇|U+07f48罈|
+U+056e2团|U+05718團|U+07cf0糰|
+U+0987b须|U+09808須|U+09b1a鬚|
+U+0810f脏|U+081df臟|U+09ad2髒|
+U+053ea只|U+096bb隻|U+08879衹|
+U+0949f钟|U+09418鐘|U+0937e鍾|
+U+0836f药|U+085e5藥|U+0846f葯|
+U+0540c同|U+0540c同|U+08855衕|
+U+05fd7志|U+05fd7志|U+08a8c誌|
+U+0676f杯|U+0676f杯|U+076c3盃|
+U+05cb3岳|U+05cb3岳|U+05dbd嶽|
+U+05e03布|U+05e03布|U+04f48佈|
+U+05f53当|U+07576當|U+05679噹|
+U+0540a吊|U+05f14弔|U+0540a吊|
+U+04ec7仇|U+04ec7仇|U+08b8e讎|
+U+08574蕴|U+0860a蘊|U+085f4藴|
+U+07ebf线|U+07dda線|U+07dab綫|
+U+04e3a为|U+070ba為|U+07232爲|
+U+04ea7产|U+07522產|U+07523産|
+U+04f17众|U+0773e眾|U+08846衆|
+U+04f2a伪|U+0507d偽|U+050de僞|
+U+051eb凫|U+09ce7鳧|U+09cec鳬|
+U+05395厕|U+05ec1廁|U+053a0厠|
+U+0542f启|U+0555f啟|U+05553啓|
+U+05899墙|U+07246牆|U+058bb墻|
+U+058f3壳|U+06bbc殼|U+06bbb殻|
+U+05956奖|U+0734e獎|U+0596c奬|
+U+059ab妫|U+05aaf媯|U+05b00嬀|
+U+05e76并|U+04e26並|U+04f75併|
+U+05f55录|U+09304錄|U+09332録|
+U+060ab悫|U+06128愨|U+06164慤|
+U+06781极|U+06975極|U+06781极|
+U+06ca9沩|U+06e88溈|U+06f59潙|
+U+07618瘘|U+0763a瘺|U+0763b瘻|
+U+07877硷|U+09e7c鹼|U+07906礆|
+U+07ad6竖|U+08c4e豎|U+07aea竪|
+U+07edd绝|U+07d55絕|U+07d76絶|
+U+07ee3绣|U+07e61繡|U+07d89綉|
+U+07ee6绦|U+07d5b絛|U+07e27縧|
+U+07ef1绱|U+07dd4緔|U+0979d鞝|
+U+07ef7绷|U+07db3綳|U+07e43繃|
+U+07eff绿|U+07da0綠|U+07dd1緑|
+U+07f30缰|U+097c1韁|U+07e6e繮|
+U+082e7苧|U+082ce苎|U+085b4薴|
+U+083bc莼|U+08493蒓|U+084f4蓴|
+U+08bf4说|U+08aaa說|U+08aac説|
+U+08c23谣|U+08b20謠|U+08b21謡|
+U+08c2b谫|U+08b7e譾|U+08b2d謭|
+U+08d43赃|U+08d13贓|U+08d1c贜|
+U+08d4d赍|U+09f4e齎|U+08ceb賫|
+U+08d5d赝|U+08d17贗|U+08d0b贋|
+U+0915d酝|U+0919e醞|U+09196醖|
+U+091c7采|U+063a1採|U+091c7采|U+057f0埰|
+U+094a9钩|U+09264鉤|U+0920e鈎|
+U+094b5钵|U+07f3d缽|U+09262鉢|
+U+09508锈|U+092b9銹|U+093fd鏽|
+U+09510锐|U+092b3銳|U+092ed鋭|
+U+09528锨|U+06774杴|U+09341鍁|
+U+0954c镌|U+0942b鐫|U+093b8鎸|
+U+09562镢|U+09481钁|U+0941d鐝|
+U+09605阅|U+095b1閱|U+095b2閲|
+U+09893颓|U+09839頹|U+0983d頽|
+U+0989c颜|U+0984f顏|U+09854顔|
+U+09980馀|U+09918餘|U+04f59余|
+U+09a82骂|U+07f75罵|U+099e1駡|
+U+09c87鲇|U+09bf0鯰|U+09b8e鮎|
+U+09c9e鲞|U+09bd7鯗|U+09b9d鮝|
+U+09cc4鳄|U+09c77鱷|U+09c10鰐|
+U+09e21鸡|U+096de雞|U+09dc4鷄|
+U+09e5a鹚|U+09dbf鶿|U+09dc0鷀|
diff --git a/includes/zhtable/simp2tradPhrases.manual b/includes/zhtable/simp2tradPhrases.manual
new file mode 100644 (file)
index 0000000..3a83aea
--- /dev/null
@@ -0,0 +1,100 @@
+串行 串列
+以太 乙太
+位图 點陣圖
+例程 常式
+信道 通道
+像素 圖元
+光标 游標
+光盘 光碟
+光驱 光碟機
+全角 全形
+公元 西元
+公历 西曆
+共享 共用
+兼容 相容
+内存 記憶體
+前缀 首碼
+办厂 開工廠
+加载 載入
+半角 半形
+变量 變數
+后缀 尾碼
+噪声 雜訊
+因子 因數
+在线 線上
+域名 功能變數名稱
+声卡 音效卡
+奔驰 賓士
+奶酪 乳酪
+字号 字型大小
+字库 字形檔
+字段 欄位
+字符 字元
+字节 位元組
+存盘 存檔
+寻址 定址
+尾注 章節附註
+巨商 鉅賈
+布什 布希
+异步 非同步
+总线 匯流排
+手电 手電筒
+打印 列印
+括号 括弧
+接口 介面
+控件 控制項
+权限 許可權
+毫安 毫安培
+水印 浮水印
+汉卡 中文卡
+海碗 大碗公
+盘片 碟片
+硬件 硬體
+硅油 矽油
+硅片 矽片
+硅石 矽石
+硅砖 矽磚
+硅肺 矽肺
+硅胶 矽膠
+硅藻 矽藻
+硅谷 矽谷
+硅酸 矽酸
+硅钢 矽鋼
+硬盘 硬碟
+磁盘 磁片
+磁道 磁軌
+程序 程式
+程控 程式控制
+端口 埠
+算子 運算元
+算法 演算法
+芯片 晶片
+词组 片語
+译码 解碼
+软驱 軟盤機
+闪存 快閃記憶體
+鼠标 滑鼠
+二极管      二極體
+三极管      三極體
+进制 進位
+交互式      互動式
+仿真 模擬
+优先级      優先順序
+传感 感測
+便携式      攜帶型
+信息论      資訊理論
+循环 迴圈
+写保护      防寫
+分布式      分散式
+分辨率      解析度
+丹顿 丹唐
+也门 葉門
+习用 慣用
+元音 母音
+数码 數位
+程序 程式
+软件 軟體
+网络 網路
+计算机      電腦
+保存 存檔
+服务器      伺服器
\ No newline at end of file
diff --git a/includes/zhtable/trad2simp.manual b/includes/zhtable/trad2simp.manual
new file mode 100644 (file)
index 0000000..da06931
--- /dev/null
@@ -0,0 +1,15 @@
+U+056a5嚥|U+054bd咽|
+U+0585a塚|U+051a2冢|
+U+05dbd嶽|U+05cb3岳|
+U+04e99亙|U+04e98亘|
+U+081e5臥|U+05367卧|
+U+04f48佈|U+05e03布|
+U+06dd2淒|U+051c4凄|
+U+06de8淨|U+051c0净|
+U+05147兇|U+051f6凶|
+U+04f48佈|U+05e03布|
+U+06c59汙|U+06c61污|
+U+056ae嚮|U+05411向|
+U+09031週|U+05468周|
+U+0904a遊|U+06e38游|
+U+06de9淩|U+051cc凌|
diff --git a/includes/zhtable/trad2simpPhrases.manual b/includes/zhtable/trad2simpPhrases.manual
new file mode 100644 (file)
index 0000000..076c111
--- /dev/null
@@ -0,0 +1,99 @@
+串列 串行
+乙太 以太
+點陣圖      位图
+常式 例程
+通道 信道
+圖元 像素
+游標 光标
+光碟 光盘
+光碟機      光驱
+全形 全角
+西元 公元
+西曆 公历
+共用 共享
+相容 兼容
+記憶體      内存
+首碼 前缀
+開工廠      办厂
+載入 加载
+半形 半角
+變數 变量
+尾碼 后缀
+雜訊 噪声
+因數 因子
+線上 在线
+功能變數名稱     域名
+音效卡      声卡
+賓士 奔驰
+乳酪 奶酪
+字型大小   字号
+字形檔      字库
+欄位 字段
+字元 字符
+位元組      字节
+存檔 存盘
+定址 寻址
+章節附註   尾注
+鉅賈 巨商
+布希 布什
+非同步      异步
+匯流排      总线
+手電筒      手电
+列印 打印
+括弧 括号
+介面 接口
+控制項      控件
+許可權      权限
+毫安培      毫安
+浮水印      水印
+中文卡      汉卡
+大碗公      海碗
+碟片 盘片
+硬體 硬件
+矽油 硅油
+矽片 硅片
+矽石 硅石
+矽磚 硅砖
+矽肺 硅肺
+矽膠 硅胶
+矽藻 硅藻
+矽谷 硅谷
+矽酸 硅酸
+矽鋼 硅钢
+硬碟 硬盘
+磁片 磁盘
+磁軌 磁道
+程式 程序
+程式控制   程控
+運算元      算子
+演算法      算法
+晶片 芯片
+片語 词组
+解碼 译码
+軟盤機      软驱
+快閃記憶體        闪存
+滑鼠 鼠标
+二極體      二极管
+三極體      三极管
+進位 进制
+互動式      交互式
+模擬 仿真
+優先順序   优先级
+感測 传感
+攜帶型      便携式
+資訊理論   信息论
+迴圈 循环
+防寫 写保护
+分散式      分布式
+解析度      分辨率
+丹唐 丹顿
+葉門 也门
+慣用 习用
+母音 元音
+數位 数码
+程式 程序
+軟體 软件
+網路 网络
+電腦 计算机
+存檔 保存
+伺服器      服务器
\ No newline at end of file
diff --git a/includes/zhtable/tradphrases.manual b/includes/zhtable/tradphrases.manual
new file mode 100644 (file)
index 0000000..447358f
--- /dev/null
@@ -0,0 +1,13 @@
+一隻
+三隻
+四隻
+五隻
+六隻
+七隻
+八隻
+九隻
+十隻
+百隻
+千隻
+萬隻
+億隻