* src/DateParser.py: Switch from utf8 strings to unicode.
* src/DateDisplay.py: Switch from utf8 strings to unicode. * src/dates/Date_ru.py: Switch from utf8 strings to unicode. * src/dates/Date_fr.py: Switch from utf8 strings to unicode. svn: r3733
This commit is contained in:
		| @@ -4,6 +4,11 @@ | ||||
| 2004-11-16 Alex Roitman  <shura@alex.neuro.umn.edu> | ||||
| 	* src/DateParser.py: Typo. | ||||
|  | ||||
| 	* src/DateParser.py: Switch from utf8 strings to unicode. | ||||
| 	* src/DateDisplay.py: Switch from utf8 strings to unicode. | ||||
| 	* src/dates/Date_ru.py: Switch from utf8 strings to unicode. | ||||
| 	* src/dates/Date_fr.py: Switch from utf8 strings to unicode. | ||||
|  | ||||
| 2004-11-15 Alex Roitman  <shura@alex.neuro.umn.edu> | ||||
| 	* src/DateDisplay.py: Remove localized displayers. | ||||
| 	* src/DateParser.py: Remove localized parsers. | ||||
|   | ||||
| @@ -98,9 +98,9 @@ class DateDisplay: | ||||
|         ) | ||||
|      | ||||
|     _french = ( | ||||
|         '',           'Vend\xc3\xa9miaire', 'Brumaire', | ||||
|         'Frimaire',   'Niv\xc3\xb4se',      'Pluvi\xc3\xb4se', | ||||
|         'Vent\xc3\xb4se', 'Germinal',       'Flor\xc3\xa9al', | ||||
|         '',           u'Vend\xc3\xa9miaire', 'Brumaire', | ||||
|         'Frimaire',   u'Niv\xc3\xb4se',      u'Pluvi\xc3\xb4se', | ||||
|         u'Vent\xc3\xb4se', 'Germinal',       u'Flor\xc3\xa9al', | ||||
|         'Prairial',   'Messidor',       'Thermidor', | ||||
|         'Fructidor',  'Extra' | ||||
|         ) | ||||
|   | ||||
| @@ -137,10 +137,10 @@ class DateParser: | ||||
|         } | ||||
|  | ||||
|     french_to_int = { | ||||
|         'vend\xc3\xa9miaire'   : 1,    'brumaire'   : 2, | ||||
|         'frimaire'             : 3,    'niv\xc3\xb4se  ': 4, | ||||
|         'pluvi\xc3\xb4se'      : 5,    'vent\xc3\xb4se' : 6, | ||||
|         'germinal'             : 7,    'flor\xc3\xa9al' : 8, | ||||
|         u'vend\xc3\xa9miaire'   : 1,    'brumaire'   : 2, | ||||
|         'frimaire'             : 3,    u'niv\xc3\xb4se  ': 4, | ||||
|         u'pluvi\xc3\xb4se'      : 5,    u'vent\xc3\xb4se' : 6, | ||||
|         'germinal'             : 7,    u'flor\xc3\xa9al' : 8, | ||||
|         'prairial'             : 9,    'messidor'   : 10, | ||||
|         'thermidor'            : 11,   'fructidor'  : 12, | ||||
|         'extra'                : 13 | ||||
| @@ -239,7 +239,12 @@ class DateParser: | ||||
|         self._mod_str  = '(' + '|'.join( | ||||
|             [ key.replace('.','\.') for key in self.modifier_to_int.keys() ] | ||||
|             ) + ')' | ||||
|         self._mon_str  = '(' + '|'.join(self.month_to_int.keys()) + ')' | ||||
|         # Need to reverse-sort the keys, so that April matches before Apr does. | ||||
|         # Otherwise, 'april 2000' would be matched as 'apr' + garbage ('il 2000') | ||||
|         _month_keys = self.month_to_int.keys() | ||||
|         _month_keys.sort() | ||||
|         _month_keys.reverse() | ||||
|         self._mon_str  = '(' + '|'.join(_month_keys) + ')' | ||||
|         self._jmon_str = '(' + '|'.join(self.hebrew_to_int.keys()) + ')' | ||||
|         self._fmon_str = '(' + '|'.join(self.french_to_int.keys()) + ')' | ||||
|         self._pmon_str = '(' + '|'.join(self.persian_to_int.keys()) + ')' | ||||
| @@ -316,7 +321,7 @@ class DateParser: | ||||
|                                     self.month_to_int,gregorian_valid) | ||||
|                               | ||||
|     def _parse_calendar(self,text,regex1,regex2,mmap,check=None): | ||||
|         match = regex1.match(text) | ||||
|         match = regex1.match(text.lower()) | ||||
|         if match: | ||||
|             groups = match.groups() | ||||
|             if groups[0] == None: | ||||
| @@ -337,9 +342,10 @@ class DateParser: | ||||
|                 value = Date.EMPTY | ||||
|             return value | ||||
|  | ||||
|         match = regex2.match(text) | ||||
|         match = regex2.match(text.lower()) | ||||
|         if match: | ||||
|             groups = match.groups() | ||||
|             print groups #[ g.encode('utf8') for g in groups ] | ||||
|             if groups[1] == None: | ||||
|                 m = 0 | ||||
|             else: | ||||
| @@ -421,8 +427,6 @@ class DateParser: | ||||
|         qual = Date.QUAL_NONE | ||||
|         cal  = Date.CAL_GREGORIAN | ||||
|          | ||||
|         text = text.encode('utf8') | ||||
|          | ||||
|         match = self._cal.match(text) | ||||
|         if match: | ||||
|             grps = match.groups() | ||||
|   | ||||
| @@ -50,48 +50,54 @@ from DateDisplay import DateDisplay | ||||
| class DateParserFR(DateParser): | ||||
|  | ||||
|     modifier_to_int = { | ||||
|         'avant'    : Date.MOD_BEFORE,  | ||||
|         'av.'      : Date.MOD_BEFORE,  | ||||
|         'av'       : Date.MOD_BEFORE,  | ||||
|         'après' : Date.MOD_AFTER, | ||||
|         'ap.'    : Date.MOD_AFTER, | ||||
|         'ap'     : Date.MOD_AFTER, | ||||
|         'env.'   : Date.MOD_ABOUT, | ||||
|         'env'    : Date.MOD_ABOUT, | ||||
|         'circa'  : Date.MOD_ABOUT, | ||||
|         'c.'     : Date.MOD_ABOUT, | ||||
|         'vers'   : Date.MOD_ABOUT, | ||||
|         u'avant'    : Date.MOD_BEFORE,  | ||||
|         u'av.'      : Date.MOD_BEFORE,  | ||||
|         u'av'       : Date.MOD_BEFORE,  | ||||
|         u'après' : Date.MOD_AFTER, | ||||
|         u'ap.'    : Date.MOD_AFTER, | ||||
|         u'ap'     : Date.MOD_AFTER, | ||||
|         u'env.'   : Date.MOD_ABOUT, | ||||
|         u'env'    : Date.MOD_ABOUT, | ||||
|         u'circa'  : Date.MOD_ABOUT, | ||||
|         u'c.'     : Date.MOD_ABOUT, | ||||
|         u'vers'   : Date.MOD_ABOUT, | ||||
|         } | ||||
|  | ||||
|     calendar_to_int = { | ||||
|         'grégorien'      : Date.CAL_GREGORIAN, | ||||
|         'g'                     : Date.CAL_GREGORIAN, | ||||
|         'julien'                : Date.CAL_JULIAN, | ||||
|         'j'                     : Date.CAL_JULIAN, | ||||
|         'hébreu'         : Date.CAL_HEBREW, | ||||
|         'h'                     : Date.CAL_HEBREW, | ||||
|         'islamique'             : Date.CAL_ISLAMIC, | ||||
|         'i'                     : Date.CAL_ISLAMIC, | ||||
|         'révolutionnaire': Date.CAL_FRENCH, | ||||
|         'r'                     : Date.CAL_FRENCH, | ||||
|         'perse'                 : Date.CAL_PERSIAN, | ||||
|         'p'                     : Date.CAL_PERSIAN, | ||||
|         u'grégorien'      : Date.CAL_GREGORIAN, | ||||
|         u'g'                     : Date.CAL_GREGORIAN, | ||||
|         u'julien'                : Date.CAL_JULIAN, | ||||
|         u'j'                     : Date.CAL_JULIAN, | ||||
|         u'hébreu'         : Date.CAL_HEBREW, | ||||
|         u'h'                     : Date.CAL_HEBREW, | ||||
|         u'islamique'             : Date.CAL_ISLAMIC, | ||||
|         u'i'                     : Date.CAL_ISLAMIC, | ||||
|         u'révolutionnaire': Date.CAL_FRENCH, | ||||
|         u'r'                     : Date.CAL_FRENCH, | ||||
|         u'perse'                 : Date.CAL_PERSIAN, | ||||
|         u'p'                     : Date.CAL_PERSIAN, | ||||
|         } | ||||
|  | ||||
|     quality_to_int = { | ||||
|         'estimated'  : Date.QUAL_ESTIMATED, | ||||
|         'est.'       : Date.QUAL_ESTIMATED, | ||||
|         'est'        : Date.QUAL_ESTIMATED, | ||||
|         'calc.'      : Date.QUAL_CALCULATED, | ||||
|         'calc'       : Date.QUAL_CALCULATED, | ||||
|         'calculated' : Date.QUAL_CALCULATED, | ||||
|         u'estimated'  : Date.QUAL_ESTIMATED, | ||||
|         u'est.'       : Date.QUAL_ESTIMATED, | ||||
|         u'est'        : Date.QUAL_ESTIMATED, | ||||
|         u'calc.'      : Date.QUAL_CALCULATED, | ||||
|         u'calc'       : Date.QUAL_CALCULATED, | ||||
|         u'calculated' : Date.QUAL_CALCULATED, | ||||
|         } | ||||
|  | ||||
|     def init_strings(self): | ||||
|         DateParser.init_strings(self) | ||||
|         self._span     = re.compile("(de)\s+(.+)\s+(à)\s+(.+)", | ||||
|         _span_1 = [u'de'] | ||||
|         _span_2 = [u'à'] | ||||
|         _range_1 = [u'ent.',u'ent',u'entre'] | ||||
|         _range_2 = [u'et'] | ||||
|         self._span     = re.compile("(%s)\s+(.+)\s+(%s)\s+(.+)" %  | ||||
|                                    ('|'.join(_span_1),'|'.join(_span_2)), | ||||
|                            re.IGNORECASE) | ||||
|         self._range    = re.compile("(ent.|ent|entre)\s+(.+)\s+(et)\s+(.+)", | ||||
|         self._range    = re.compile("(%s)\s+(.+)\s+(%s)\s+(.+)" % | ||||
|                                    ('|'.join(_range_1),'|'.join(_range_2)), | ||||
|                            re.IGNORECASE) | ||||
|  | ||||
| #------------------------------------------------------------------------- | ||||
| @@ -102,11 +108,11 @@ class DateParserFR(DateParser): | ||||
| class DateDisplayFR(DateDisplay): | ||||
|  | ||||
|     calendar = ( | ||||
|         "", " (Julien)", " (Hébreu)",  | ||||
|         " (Révolutionnaire)", " (Perse)", " (Islamique)" | ||||
|         "", u" (Julien)", u" (Hébreu)",  | ||||
|         u" (Révolutionnaire)", u" (Perse)", u" (Islamique)" | ||||
|         ) | ||||
|  | ||||
|     _mod_str = ("","avant ","après ","vers ","","","") | ||||
|     _mod_str = ("",u"avant ",u"après ",u"vers ","","","") | ||||
|      | ||||
|     def display(self,date): | ||||
|         """ | ||||
| @@ -126,11 +132,11 @@ class DateDisplayFR(DateDisplay): | ||||
|         elif mod == Date.MOD_SPAN: | ||||
|             d1 = self.display_cal[cal](start) | ||||
|             d2 = self.display_cal[cal](date.get_stop_date()) | ||||
|             return "%sde %s à %s%s" % (qual_str,d1,d2,self.calendar[cal]) | ||||
|             return "%s%s %s %s %s%s" % (qual_str,u'de',d1,u'à',d2,self.calendar[cal]) | ||||
|         elif mod == Date.MOD_RANGE: | ||||
|             d1 = self.display_cal[cal](start) | ||||
|             d2 = self.display_cal[cal](date.get_stop_date()) | ||||
|             return "%sentre %s et %s%s" % (qual_str,d1,d2,self.calendar[cal]) | ||||
|             return "%s%s %s %s %s%s" % (qual_str,u'entre',d1,u'et',d2,self.calendar[cal]) | ||||
|         else: | ||||
|             text = self.display_cal[date.get_calendar()](start) | ||||
|             return "%s%s%s%s" % (qual_str,self._mod_str[mod],text,self.calendar[cal]) | ||||
|   | ||||
| @@ -50,58 +50,64 @@ from DateDisplay import DateDisplay | ||||
| class DateParserRU(DateParser): | ||||
|  | ||||
|     modifier_to_int = { | ||||
|         'до'    : Date.MOD_BEFORE,  | ||||
|         'по'    : Date.MOD_BEFORE, | ||||
|         'после' : Date.MOD_AFTER, | ||||
|         'п.'    : Date.MOD_AFTER, | ||||
|         'п'    : Date.MOD_AFTER, | ||||
|         'с'     : Date.MOD_AFTER, | ||||
|         'ок' : Date.MOD_ABOUT, | ||||
|         'ок.'   : Date.MOD_ABOUT, | ||||
|         'около'    : Date.MOD_ABOUT, | ||||
|         'примерно'  : Date.MOD_ABOUT, | ||||
|         'прим'     : Date.MOD_ABOUT, | ||||
|         'прим.'     : Date.MOD_ABOUT, | ||||
|         'приблизительно'  : Date.MOD_ABOUT, | ||||
|         'приб.'  : Date.MOD_ABOUT, | ||||
|         'прибл.'  : Date.MOD_ABOUT, | ||||
|         'приб'  : Date.MOD_ABOUT, | ||||
|         'прибл'  : Date.MOD_ABOUT, | ||||
|         u'до'    : Date.MOD_BEFORE,  | ||||
|         u'по'    : Date.MOD_BEFORE, | ||||
|         u'после' : Date.MOD_AFTER, | ||||
|         u'п.'    : Date.MOD_AFTER, | ||||
|         u'п'    : Date.MOD_AFTER, | ||||
|         u'с'     : Date.MOD_AFTER, | ||||
|         u'ок' : Date.MOD_ABOUT, | ||||
|         u'ок.'   : Date.MOD_ABOUT, | ||||
|         u'около'    : Date.MOD_ABOUT, | ||||
|         u'примерно'  : Date.MOD_ABOUT, | ||||
|         u'прим'     : Date.MOD_ABOUT, | ||||
|         u'прим.'     : Date.MOD_ABOUT, | ||||
|         u'приблизительно'  : Date.MOD_ABOUT, | ||||
|         u'приб.'  : Date.MOD_ABOUT, | ||||
|         u'прибл.'  : Date.MOD_ABOUT, | ||||
|         u'приб'  : Date.MOD_ABOUT, | ||||
|         u'прибл'  : Date.MOD_ABOUT, | ||||
|         } | ||||
|  | ||||
|     calendar_to_int = { | ||||
|         'григорианский'   : Date.CAL_GREGORIAN, | ||||
|         'г'                 : Date.CAL_GREGORIAN, | ||||
|         'юлианский'            : Date.CAL_JULIAN, | ||||
|         'ю'                 : Date.CAL_JULIAN, | ||||
|         'еврейский'         : Date.CAL_HEBREW, | ||||
|         'е'         : Date.CAL_HEBREW, | ||||
|         'исламский'         : Date.CAL_ISLAMIC, | ||||
|         'и'                 : Date.CAL_ISLAMIC, | ||||
|         'республиканский': Date.CAL_FRENCH, | ||||
|         'р'                 : Date.CAL_FRENCH, | ||||
|         'персидский'             : Date.CAL_PERSIAN, | ||||
|         'п'             : Date.CAL_PERSIAN, | ||||
|         u'григорианский'   : Date.CAL_GREGORIAN, | ||||
|         u'г'                 : Date.CAL_GREGORIAN, | ||||
|         u'юлианский'            : Date.CAL_JULIAN, | ||||
|         u'ю'                 : Date.CAL_JULIAN, | ||||
|         u'еврейский'         : Date.CAL_HEBREW, | ||||
|         u'е'         : Date.CAL_HEBREW, | ||||
|         u'исламский'         : Date.CAL_ISLAMIC, | ||||
|         u'и'                 : Date.CAL_ISLAMIC, | ||||
|         u'республиканский': Date.CAL_FRENCH, | ||||
|         u'р'                 : Date.CAL_FRENCH, | ||||
|         u'персидский'             : Date.CAL_PERSIAN, | ||||
|         u'п'             : Date.CAL_PERSIAN, | ||||
|         } | ||||
|  | ||||
|     quality_to_int = { | ||||
|         'оценено'  : Date.QUAL_ESTIMATED, | ||||
|         'оцен.'       : Date.QUAL_ESTIMATED, | ||||
|         'оц.'        : Date.QUAL_ESTIMATED, | ||||
|         'оцен'       : Date.QUAL_ESTIMATED, | ||||
|         'оц'        : Date.QUAL_ESTIMATED, | ||||
|         'вычислено'      : Date.QUAL_CALCULATED, | ||||
|         'вычисл.'       : Date.QUAL_CALCULATED, | ||||
|         'выч.' : Date.QUAL_CALCULATED, | ||||
|         'вычисл'       : Date.QUAL_CALCULATED, | ||||
|         'выч' : Date.QUAL_CALCULATED, | ||||
|         u'оценено'  : Date.QUAL_ESTIMATED, | ||||
|         u'оцен.'       : Date.QUAL_ESTIMATED, | ||||
|         u'оц.'        : Date.QUAL_ESTIMATED, | ||||
|         u'оцен'       : Date.QUAL_ESTIMATED, | ||||
|         u'оц'        : Date.QUAL_ESTIMATED, | ||||
|         u'вычислено'      : Date.QUAL_CALCULATED, | ||||
|         u'вычисл.'       : Date.QUAL_CALCULATED, | ||||
|         u'выч.' : Date.QUAL_CALCULATED, | ||||
|         u'вычисл'       : Date.QUAL_CALCULATED, | ||||
|         u'выч' : Date.QUAL_CALCULATED, | ||||
|         } | ||||
|  | ||||
|     def init_strings(self): | ||||
|         DateParser.init_strings(self) | ||||
|         self._span     = re.compile("(с|от)\s+(.+)\s+(по|до)\s+(.+)", | ||||
|         _span_1 = [u'с',u'от'] | ||||
|         _span_2 = [u'по',u'до'] | ||||
|         _range_1 = [u'между',u'меж',u'меж.'] | ||||
|         _range_2 = [u'и'] | ||||
|         self._span     = re.compile("(%s)\s+(.+)\s+(%s)\s+(.+)" %  | ||||
|                                    ('|'.join(_span_1),'|'.join(_span_2)), | ||||
|                            re.IGNORECASE) | ||||
|         self._range    = re.compile("(между|меж|меж.)\s+(.+)\s+(и)\s+(.+)", | ||||
|         self._range    = re.compile("(%s)\s+(.+)\s+(%s)\s+(.+)" % | ||||
|                                    ('|'.join(_range_1),'|'.join(_range_2)), | ||||
|                            re.IGNORECASE) | ||||
|  | ||||
| #------------------------------------------------------------------------- | ||||
| @@ -112,16 +118,16 @@ class DateParserRU(DateParser): | ||||
| class DateDisplayRU(DateDisplay): | ||||
|  | ||||
|     calendar = ( | ||||
|         "", " (юлианский)",  | ||||
|         " (еврейский)",  | ||||
|         " (республиканский)",  | ||||
|         " (персидский)",  | ||||
|         " (исламский)" | ||||
|         "", u" (юлианский)",  | ||||
|         u" (еврейский)",  | ||||
|         u" (республиканский)",  | ||||
|         u" (персидский)",  | ||||
|         u" (исламский)" | ||||
|         ) | ||||
|  | ||||
|     _mod_str = ("","до ", | ||||
|         "после ", | ||||
|         "около ","","","") | ||||
|     _mod_str = ("",u"до ", | ||||
|         u"после ", | ||||
|         u"около ","","","") | ||||
|      | ||||
|     def display(self,date): | ||||
|         """ | ||||
| @@ -141,11 +147,11 @@ class DateDisplayRU(DateDisplay): | ||||
|         elif mod == Date.MOD_SPAN: | ||||
|             d1 = self.display_cal[cal](start) | ||||
|             d2 = self.display_cal[cal](date.get_stop_date()) | ||||
|             return "%sс %s по %s%s" % (qual_str,d1,d2,self.calendar[cal]) | ||||
|             return "%sс %s %s %s%s" % (qual_str,d1,u'по',d2,self.calendar[cal]) | ||||
|         elif mod == Date.MOD_RANGE: | ||||
|             d1 = self.display_cal[cal](start) | ||||
|             d2 = self.display_cal[cal](date.get_stop_date()) | ||||
|             return "%sмежду %s и %s%s" % (qual_str,d1,d2,self.calendar[cal]) | ||||
|             return "%s%s %s %s %s%s" % (qual_str,u'между',d1,u'и',d2,self.calendar[cal]) | ||||
|         else: | ||||
|             text = self.display_cal[date.get_calendar()](start) | ||||
|             return "%s%s%s%s" % (qual_str,self._mod_str[mod],text,self.calendar[cal]) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user