Skip to content

Commit 2d5a74c

Browse files
committed
Merge branch 'develop'
2 parents d4955ea + cbf49b1 commit 2d5a74c

45 files changed

Lines changed: 74730 additions & 74856 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ FlexTools/Modules-*
33
FlexTools/Collections-*
44

55
flextools.ini
6+
FlexTools/Collections/*
67

78
# Temp files
89
*.tmp
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
[Chinese.Update Tone Number Fields]
2-
_order = 1
32

43
[Chinese.Update Pinyin Fields]
5-
_order = 2
64

75
[Chinese.Update Reversal Index Sort Field]
8-
_order = 3
96

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
[Duplicates.Find Duplicate Definitions]
2-
_order = 1
32

43
[Duplicates.Find Duplicate Entries]
5-
_order = 2
64

75
[Duplicates.Merge Entries]
8-
_order = 3
96

107
[Duplicates.Merge Senses]
11-
_order = 4
128

FlexTools/Collections/Examples.ini

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
11
[Reports.Project Information]
2-
_order = 1
32

43
[Reports.Text Statistics]
5-
_order = 2
64

75
[Reports.Lexicon Statistics]
8-
_order = 3
96

107
[Utilities.Approve Spelling of Numbers]
11-
_order = 4
128

139
[Reports.Lexeme Usage in Corpus]
14-
_order = 5
1510

1611
[Examples.Example - Check Punctuation]
17-
_order = 6
1812

1913
[Export.Dump All Headwords To File]
20-
_order = 7
14+
Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
1-
[Export.Dump All Headwords To File]
2-
_order = 1
1+
[Export.Export All Headwords To File]
32

4-
[Export.Dump Published Headwords To File]
5-
_order = 2
3+
[Export.Export Published Headwords To File]
64

7-
[Export.Dump Semantic Domain List To File]
8-
_order = 3
5+
[Export.Export Semantic Domain List To File]
96

10-
[Export.Dump Texts To File]
11-
_order = 4
7+
[Export.Export Texts To File]
128

FlexTools/Collections/Reports.ini

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
[Reports.Project Information]
2-
_order = 1
32

43
[Reports.Lexeme Usage in Corpus]
5-
_order = 2
64

75
[Reports.Text Statistics]
8-
_order = 3
96

107
[Reports.Incomplete Analyses]
11-
_order = 4
8+

FlexTools/Modules/Chinese/Lib/ChineseUtilities.py

Lines changed: 3 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,6 @@
99
#
1010
#
1111

12-
from __future__ import unicode_literals
13-
from __future__ import print_function
14-
from __future__ import absolute_import
15-
16-
from builtins import str
17-
1812
import codecs
1913
import re
2014

@@ -232,17 +226,10 @@ def __loadPunctuation(self):
232226
self[c] = {l: l} # Sort string is itself for punctuation
233227

234228
def __load(self):
235-
# This stack-frame code is all that I've found to work for
236-
# getting the current path from Idle, command-line AND
237-
# FlexTools (where it is used by a custom-imported module.)
238-
# (Use of __file__ failed for Idle)
239-
mypath = os.path.dirname(sys._getframe().f_code.co_filename)
240-
fname = os.path.join(mypath, self.FileName)
241-
242-
if fname[-4:] == ".pkl":
243-
self.__loadFromPickle(fname)
229+
if self.FileName.endswith(".pkl"):
230+
self.__loadFromPickle(self.FileName)
244231
else:
245-
self.__loadFromTextFile(fname)
232+
self.__loadFromTextFile(self.FileName)
246233
self.__loadPunctuation()
247234

248235
def Lookup(self, hz, py):
@@ -307,71 +294,3 @@ def CalculateSortString(self, hanzi, tonenum, sortString):
307294

308295
return (newSortString, msg)
309296

310-
311-
# --- Testing ---
312-
313-
if __name__ == "__main__":
314-
if sys.stdout.encoding == None:
315-
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
316-
317-
testSet = [
318-
("路", "lu4"),
319-
("你好", "ni3 hao3"),
320-
("中国", "zhong1 guo4"), # Purposeful pinyin error
321-
("中国话", "Zhong1guo2hua4"),
322-
("去人民公园",""),
323-
('枣红色', "zao3hong2 se4"), # Ambiguous parse
324-
("录音", "lu4yin1"),
325-
("录音机", "lu4yin1"), # Purposeful pinyin error
326-
("绿", "lu:4"), # Multiple pronunciations
327-
("乱", "luan4"),
328-
("耳朵", "er3.duo5"),
329-
("孩子", "hai2.zi5"),
330-
("撒谎", "sa1//huang3"),
331-
("老老实实地","lao3lao5shi2shi2 .de5"),
332-
333-
# er hua is handled
334-
('\u5ea7\u513f', 'zuor4'),
335-
('\u53ed\u513f\u72d7', 'bar1gou3'),
336-
('\u767d\u773c\u513f\u72fc', 'bai2yanr3lang2'),
337-
# Latin letters
338-
('\u5361\u62c9OK', 'ka3la1ou1kei4'),
339-
340-
# Fails PY check because the combination of ambiguous pinyin
341-
# is checked by check_pinyin.check_pinyin() and that
342-
# function doesn't handle punctuation
343-
("你好吗\N{FULLWIDTH QUESTION MARK}", "ni3 hao3 .ma5?"),
344-
('是(1单)', "shi4 ( 1 dan)"),
345-
346-
# This is okay
347-
("你在\N{FULLWIDTH QUESTION MARK}", "ni3 zai4?"),
348-
# Other punctuation is supported
349-
('老(人)', "lao3 (ren2)"),
350-
('他,她,它', 'ta1, ta1, ta1'),
351-
('他/她/它', 'ta1/ta1/ta1'),
352-
('1单数', "1 dan1shu4"),
353-
('左…右…', 'zuo3…you4…'),
354-
('\N{FULLWIDTH SEMICOLON}', ';'),
355-
('连\N{HORIZONTAL ELLIPSIS}也', 'lian2…ye3'),
356-
357-
# Passes PY check, but fails Sort String due to angle brackets
358-
# not being included in chin_utils.tonenum_syl_pat
359-
('\N{LEFT DOUBLE ANGLE BRACKET}\N{RIGHT DOUBLE ANGLE BRACKET}', "<<zuo4>>"),
360-
361-
# Ambiguities
362-
('红', "gong1|hong2"),
363-
]
364-
365-
366-
print("--- Testing Chinese Parser and Sort String Generator ---")
367-
Parser = ChineseParser()
368-
Sorter = SortStringDB()
369-
for chns, tonenum in testSet:
370-
print("%s [%s] %s" % (chns, repr(chns), tonenum))
371-
#print "\tParse:\t", Parser.Tonenum(chns, tonenum)
372-
#if tonenum:
373-
print("\tCheck:\t", tonenum)
374-
result = Parser.Tonenum(chns, tonenum)
375-
print("\t\t", result if result else "OK")
376-
ss = Sorter.SortString(chns, tonenum)
377-
print("\tSort:\t", ss)

0 commit comments

Comments
 (0)