medkit.text.preprocessing.char_rules#

Attributes#

Module Contents#

medkit.text.preprocessing.char_rules.LIGATURE_RULES = [('Æ', 'AE'), ('æ', 'ae'), ('Œ', 'OE'), ('œ', 'oe')]#
medkit.text.preprocessing.char_rules.FRACTION_RULES = [('¼', '1/4'), ('½', '1/2'), ('¾', '3/4'), ('⅐', '1/7'), ('⅑', '1/9'), ('⅒', '1/10'), ('⅓',...#
medkit.text.preprocessing.char_rules.SPACE_RULES = [('\xa0', ' '), ('\u1680', ' '), ('\u2002', ' '), ('\u2003', ' '), ('\u2004', ' '), ('\u2005', '...#
medkit.text.preprocessing.char_rules.SIGN_RULES = [('©', ''), ('®', ''), ('™', '')]#
medkit.text.preprocessing.char_rules.DOT_RULES = [('…', '...'), ('⋯', '...')]#
medkit.text.preprocessing.char_rules.QUOTATION_RULES = [('»', '"'), ('«', '"'), ('“', '"'), ('”', '"'), ('„', '"'), ('‟', '"'), ('‹', '"'), ('›', '"'),...#
medkit.text.preprocessing.char_rules.ALL_CHAR_RULES#