medkit.text.segmentation.syntagma_tokenizer#

Classes#

SyntagmaTokenizer

Syntagma segmentation annotator based on provided separators.

Module Contents#

class medkit.text.segmentation.syntagma_tokenizer.SyntagmaTokenizer(separators: tuple[str, Ellipsis] | None = None, output_label: str = _DEFAULT_LABEL, strip_chars: str = _DEFAULT_STRIP_CHARS, attrs_to_copy: list[str] | None = None, uid: str | None = None)#

Bases: medkit.core.text.SegmentationOperation

Syntagma segmentation annotator based on provided separators.

_DEFAULT_LABEL = 'syntagma'#
_DEFAULT_STRIP_CHARS = Multiline-String#
Show Value
""".;,?!

       """
init_args#
output_label#
strip_chars#
separators#
attrs_to_copy#
run(segments: list[medkit.core.text.Segment]) list[medkit.core.text.Segment]#

Return syntagmes detected in segments.

Parameters:
segmentslist of Segment

List of segments into which to look for sentences

Returns:
list of Segment

Syntagmas segments found in segments

_find_syntagmas_in_segment(segment: medkit.core.text.Segment) Iterator[medkit.core.text.Segment]#
classmethod get_example()#
static load_syntagma_definition(filepath: pathlib.Path, encoding: str | None = None) tuple[str, Ellipsis]#

Load the syntagma definition stored in yml file.

Parameters:
filepathPath

Path to a yml file containing the syntagma separators

encodingstr, optional

Encoding of the file to open

Returns:
tuple of str

Tuple containing the separators

static save_syntagma_definition(syntagma_seps: tuple[str, Ellipsis], filepath: pathlib.Path, encoding: str | None = None)#

Save syntagma yaml definition file.

Parameters:
syntagma_sepstuple of str

The tuple of regular expressions corresponding to separators

filepathPath

The path of the file to save

encodingstr, optional

The encoding of the file. Default: None