|
9 | 9 | # |
10 | 10 | # |
11 | 11 |
|
12 | | -from __future__ import unicode_literals |
13 | | -from __future__ import print_function |
14 | | -from __future__ import absolute_import |
15 | | - |
16 | | -from builtins import str |
17 | | - |
18 | 12 | import codecs |
19 | 13 | import re |
20 | 14 |
|
@@ -232,17 +226,10 @@ def __loadPunctuation(self): |
232 | 226 | self[c] = {l: l} # Sort string is itself for punctuation |
233 | 227 |
|
234 | 228 | def __load(self): |
235 | | - # This stack-frame code is all that I've found to work for |
236 | | - # getting the current path from Idle, command-line AND |
237 | | - # FlexTools (where it is used by a custom-imported module.) |
238 | | - # (Use of __file__ failed for Idle) |
239 | | - mypath = os.path.dirname(sys._getframe().f_code.co_filename) |
240 | | - fname = os.path.join(mypath, self.FileName) |
241 | | - |
242 | | - if fname[-4:] == ".pkl": |
243 | | - self.__loadFromPickle(fname) |
| 229 | + if self.FileName.endswith(".pkl"): |
| 230 | + self.__loadFromPickle(self.FileName) |
244 | 231 | else: |
245 | | - self.__loadFromTextFile(fname) |
| 232 | + self.__loadFromTextFile(self.FileName) |
246 | 233 | self.__loadPunctuation() |
247 | 234 |
|
248 | 235 | def Lookup(self, hz, py): |
@@ -307,71 +294,3 @@ def CalculateSortString(self, hanzi, tonenum, sortString): |
307 | 294 |
|
308 | 295 | return (newSortString, msg) |
309 | 296 |
|
310 | | - |
311 | | -# --- Testing --- |
312 | | - |
313 | | -if __name__ == "__main__": |
314 | | - if sys.stdout.encoding == None: |
315 | | - sys.stdout = codecs.getwriter("utf-8")(sys.stdout) |
316 | | - |
317 | | - testSet = [ |
318 | | - ("路", "lu4"), |
319 | | - ("你好", "ni3 hao3"), |
320 | | - ("中国", "zhong1 guo4"), # Purposeful pinyin error |
321 | | - ("中国话", "Zhong1guo2hua4"), |
322 | | - ("去人民公园",""), |
323 | | - ('枣红色', "zao3hong2 se4"), # Ambiguous parse |
324 | | - ("录音", "lu4yin1"), |
325 | | - ("录音机", "lu4yin1"), # Purposeful pinyin error |
326 | | - ("绿", "lu:4"), # Multiple pronunciations |
327 | | - ("乱", "luan4"), |
328 | | - ("耳朵", "er3.duo5"), |
329 | | - ("孩子", "hai2.zi5"), |
330 | | - ("撒谎", "sa1//huang3"), |
331 | | - ("老老实实地","lao3lao5shi2shi2 .de5"), |
332 | | - |
333 | | - # er hua is handled |
334 | | - ('\u5ea7\u513f', 'zuor4'), |
335 | | - ('\u53ed\u513f\u72d7', 'bar1gou3'), |
336 | | - ('\u767d\u773c\u513f\u72fc', 'bai2yanr3lang2'), |
337 | | - # Latin letters |
338 | | - ('\u5361\u62c9OK', 'ka3la1ou1kei4'), |
339 | | - |
340 | | - # Fails PY check because the combination of ambiguous pinyin |
341 | | - # is checked by check_pinyin.check_pinyin() and that |
342 | | - # function doesn't handle punctuation |
343 | | - ("你好吗\N{FULLWIDTH QUESTION MARK}", "ni3 hao3 .ma5?"), |
344 | | - ('是(1单)', "shi4 ( 1 dan)"), |
345 | | - |
346 | | - # This is okay |
347 | | - ("你在\N{FULLWIDTH QUESTION MARK}", "ni3 zai4?"), |
348 | | - # Other punctuation is supported |
349 | | - ('老(人)', "lao3 (ren2)"), |
350 | | - ('他,她,它', 'ta1, ta1, ta1'), |
351 | | - ('他/她/它', 'ta1/ta1/ta1'), |
352 | | - ('1单数', "1 dan1shu4"), |
353 | | - ('左…右…', 'zuo3…you4…'), |
354 | | - ('\N{FULLWIDTH SEMICOLON}', ';'), |
355 | | - ('连\N{HORIZONTAL ELLIPSIS}也', 'lian2…ye3'), |
356 | | - |
357 | | - # Passes PY check, but fails Sort String due to angle brackets |
358 | | - # not being included in chin_utils.tonenum_syl_pat |
359 | | - ('\N{LEFT DOUBLE ANGLE BRACKET}做\N{RIGHT DOUBLE ANGLE BRACKET}', "<<zuo4>>"), |
360 | | - |
361 | | - # Ambiguities |
362 | | - ('红', "gong1|hong2"), |
363 | | - ] |
364 | | - |
365 | | - |
366 | | - print("--- Testing Chinese Parser and Sort String Generator ---") |
367 | | - Parser = ChineseParser() |
368 | | - Sorter = SortStringDB() |
369 | | - for chns, tonenum in testSet: |
370 | | - print("%s [%s] %s" % (chns, repr(chns), tonenum)) |
371 | | - #print "\tParse:\t", Parser.Tonenum(chns, tonenum) |
372 | | - #if tonenum: |
373 | | - print("\tCheck:\t", tonenum) |
374 | | - result = Parser.Tonenum(chns, tonenum) |
375 | | - print("\t\t", result if result else "OK") |
376 | | - ss = Sorter.SortString(chns, tonenum) |
377 | | - print("\tSort:\t", ss) |
0 commit comments