ConceptPortal-public/rsconcept/backend/cctext/conceptapi.py

'''
Concept API Python functions.

::guarantee:: doesnt raise exceptions and returns workable outputs in situations where empty string would be returned
'''
from cctext.rumodel import Morphology
from .syntax import RuSyntax
from .ruparser import RuParser

parser = RuParser()


def split_tags(tags: str) -> frozenset[str]:
    ''' Split grammemes string into set of items. '''
    return frozenset([tag.strip() for tag in filter(None, tags.split(','))])


def parse(text: str, require_tags: str = '') -> str:
    ''' Determine morpho tags for input text.
    ::returns:: string of comma separated grammar tags or empty string '''
    model = parser.parse(text, require_tags=split_tags(require_tags))
    if model is None:
        return ''
    result = model.get_morpho().as_str()
    return result if result != 'UNKN' else ''


def get_all_forms(text_normal: str) -> list[tuple[str, str]]:
    ''' Get all infeclted forms. '''
    model = parser.parse(text_normal)
    if not model:
        return []
    result = []
    for form in model.get_form().lexeme:
        result.append((form.word, Morphology(form.tag).as_str()))
    return result


def normalize(text: str) -> str:
    ''' Generate normal form.
    ::returns:: normal form of input text or text itself if no parse is available '''
    model = parser.parse(text)
    if model is None:
        return text
    return model.normal_form()


def inflect(text: str, target_tags: str) -> str:
    ''' Inflect text to match required tags.
    ::returns:: infected text or initial text if infection failed '''
    target_set = split_tags(target_tags)
    model = parser.parse(text)
    if model is None:
        return text
    return model.inflect(target_set)


def inflect_context(target: str, cntxt_before: str = '', cntxt_after: str = '') -> str:
    ''' Inflect text in accordance to context before and after. '''
    return parser.inflect_context(target, cntxt_before, cntxt_after)


def inflect_substitute(substitute_normal: str, original: str) -> str:
    ''' Inflect substitute to match original form. '''
    return parser.inflect_substitute(substitute_normal, original)


def inflect_dependant(dependant_normal: str, master: str) -> str:
    ''' Inflect dependant to coordinate with master text. '''
    return parser.inflect_dependant(dependant_normal, master)


def match_all_morpho(text: str, filter_tags: str) -> list[list[int]]:
    ''' Search for all words corresponding to tags. '''
    target_set = split_tags(filter_tags)
    if len(target_set) == 0:
        return []

    result = []
    for elem in RuSyntax.tokenize(text):
        model = parser.parse(elem.text, require_tags=target_set)
        if model:
            result.append([elem.start, elem.stop])
    return result


def find_substr(text: str, sub: str) -> tuple[int, int]:
    ''' Search for substring position in text regardless of morphology. '''
    return parser.find_substr(text, sub)
Add cctext python module 2023-08-16 21:26:59 +03:00			`'''`
			`Concept API Python functions.`

			`::guarantee:: doesnt raise exceptions and returns workable outputs in situations where empty string would be returned`
			`'''`
			`from cctext.rumodel import Morphology`
			`from .syntax import RuSyntax`
			`from .ruparser import RuParser`

			`parser = RuParser()`


			`def split_tags(tags: str) -> frozenset[str]:`
			`''' Split grammemes string into set of items. '''`
			`return frozenset([tag.strip() for tag in filter(None, tags.split(','))])`


			`def parse(text: str, require_tags: str = '') -> str:`
			`''' Determine morpho tags for input text.`
			`::returns:: string of comma separated grammar tags or empty string '''`
			`model = parser.parse(text, require_tags=split_tags(require_tags))`
			`if model is None:`
			`return ''`
			`result = model.get_morpho().as_str()`
			`return result if result != 'UNKN' else ''`


			`def get_all_forms(text_normal: str) -> list[tuple[str, str]]:`
			`''' Get all infeclted forms. '''`
			`model = parser.parse(text_normal)`
			`if not model:`
			`return []`
			`result = []`
			`for form in model.get_form().lexeme:`
			`result.append((form.word, Morphology(form.tag).as_str()))`
			`return result`


			`def normalize(text: str) -> str:`
			`''' Generate normal form.`
			`::returns:: normal form of input text or text itself if no parse is available '''`
			`model = parser.parse(text)`
			`if model is None:`
			`return text`
			`return model.normal_form()`


			`def inflect(text: str, target_tags: str) -> str:`
			`''' Inflect text to match required tags.`
			`::returns:: infected text or initial text if infection failed '''`
			`target_set = split_tags(target_tags)`
			`model = parser.parse(text)`
			`if model is None:`
			`return text`
			`return model.inflect(target_set)`


			`def inflect_context(target: str, cntxt_before: str = '', cntxt_after: str = '') -> str:`
			`''' Inflect text in accordance to context before and after. '''`
			`return parser.inflect_context(target, cntxt_before, cntxt_after)`


			`def inflect_substitute(substitute_normal: str, original: str) -> str:`
			`''' Inflect substitute to match original form. '''`
			`return parser.inflect_substitute(substitute_normal, original)`


			`def inflect_dependant(dependant_normal: str, master: str) -> str:`
			`''' Inflect dependant to coordinate with master text. '''`
			`return parser.inflect_dependant(dependant_normal, master)`


			`def match_all_morpho(text: str, filter_tags: str) -> list[list[int]]:`
			`''' Search for all words corresponding to tags. '''`
			`target_set = split_tags(filter_tags)`
			`if len(target_set) == 0:`
			`return []`

			`result = []`
			`for elem in RuSyntax.tokenize(text):`
			`model = parser.parse(elem.text, require_tags=target_set)`
			`if model:`
			`result.append([elem.start, elem.stop])`
			`return result`


			`def find_substr(text: str, sub: str) -> tuple[int, int]:`
			`''' Search for substring position in text regardless of morphology. '''`
			`return parser.find_substr(text, sub)`