return mlist
def charManualTable( path ):
- fp = open( path, 'r', encoding = 'U8' )
- ret = {}
- for line in fp:
- elems = line.split( '#' )[0].split( '|' )
- elems = unichr3( *elems )
- if len( elems ) > 1:
- ret[elems[0]] = elems[1:]
- return ret
+ fp = open( path, 'r', encoding = 'U8' )
+ for line in fp:
+ elems = line.split( '#' )[0].split( '|' )
+ elems = unichr3( *elems )
+ if len( elems ) > 1:
+ yield elems[0], elems[1:]
def toManyRules( src_table ):
tomany = set()
fp = open( path, 'r', encoding = 'U8' )
ret = dict()
for line in fp:
- elems = line.split( '#' )[0].split()
+ line = line.rstrip( '\r\n' )
+ if '#' in line:
+ line = line.split( '#' )[0].rstrip()
+ elems = line.split( '\t' )
if len( elems ) > 1:
ret[elems[0]] = elems[1]
return ret
def dictToSortedList( src_table, pos ):
- return sorted( src_table.items(), key = lambda m: m[pos] )
+ return sorted( src_table.items(), key = lambda m: ( m[pos], m[1 - pos] ) )
def translate( text, conv_table ):
i = 0
reconv_table = {}
wordlist = [line.split( '#' )[0].strip() for line in fp]
wordlist = list( set( wordlist ) )
- wordlist.sort( key = len, reverse = True )
+ wordlist.sort( key = lambda w: ( len(w), w ), reverse = True )
while wordlist:
word = wordlist.pop()
new_word = translate( word, conv_table )
def defaultWordsTable( src_wordlist, src_tomany, char_conv_table, char_reconv_table ):
wordlist = list( src_wordlist )
- wordlist.sort( key = len, reverse = True )
+ wordlist.sort( key = lambda w: ( len(w), w ), reverse = True )
word_conv_table = {}
word_reconv_table = {}
conv_table = char_conv_table.copy()
def main():
#Get Unihan.zip:
url = 'http://www.unicode.org/Public/%s/ucd/Unihan.zip' % UNIHAN_VER
- han_dest = 'Unihan.zip'
+ han_dest = 'Unihan-%s.zip' % UNIHAN_VER
download( url, han_dest )
# Get scim-tables-$(SCIM_TABLES_VER).tar.gz:
# Unihan.txt
( t2s_1tomany, s2t_1tomany ) = unihanParser( han_dest )
+ t2s_1tomany.update( charManualTable( 'symme_supp.manual' ) )
t2s_1tomany.update( charManualTable( 'trad2simp.manual' ) )
+ s2t_1tomany.update( ( t[0], [f] ) for ( f, t ) in charManualTable( 'symme_supp.manual' ) )
s2t_1tomany.update( charManualTable( 'simp2trad.manual' ) )
if pyversion[:1] in ['2']:
toCN = dictToSortedList( customRules( 'toCN.manual' ), 1 )
# sorted list toHK
toHK = dictToSortedList( customRules( 'toHK.manual' ), 1 )
- # sorted list toSG
- toSG = dictToSortedList( customRules( 'toSG.manual' ), 1 )
# sorted list toTW
toTW = dictToSortedList( customRules( 'toTW.manual' ), 1 )
+ PHPArray( toHK ) \
+ '\n);\n\n$zh2CN = array(\n' \
+ PHPArray( toCN ) \
- + '\n);\n\n$zh2SG = array(\n' \
- + PHPArray( toSG ) \
+ '\n);\n'
if pyversion[:1] in ['2']: