From 7dbbbab15ab705450ba8ea8f3ccb2c477733a6e8 Mon Sep 17 00:00:00 2001 From: IRBorisov <8611739+IRBorisov@users.noreply.github.com> Date: Sun, 20 Aug 2023 13:59:46 +0300 Subject: [PATCH] Implement text reference resolution for backend --- .dockerignore | 1 + .gitignore | 1 + .vscode/settings.json | 17 ++- README.md | 3 + .../backend/apps/rsform/tests/__init__.py | 1 - rsconcept/backend/apps/rsform/utils.py | 12 +- rsconcept/backend/cctext/__init__.py | 8 +- rsconcept/backend/cctext/conceptapi.py | 12 +- rsconcept/backend/cctext/context.py | 62 ++++++++++ rsconcept/backend/cctext/reference.py | 60 +++++++++ rsconcept/backend/cctext/resolver.py | 114 ++++++++++++++++++ rsconcept/backend/cctext/rumodel.py | 37 +++--- rsconcept/backend/cctext/ruparser.py | 2 +- rsconcept/backend/cctext/tests/__init__.py | 7 ++ .../{testConceptAPI.py => t_conceptapi.py} | 8 +- rsconcept/backend/cctext/tests/t_context.py | 32 +++++ rsconcept/backend/cctext/tests/t_reference.py | 43 +++++++ rsconcept/backend/cctext/tests/t_resolver.py | 76 ++++++++++++ rsconcept/backend/cctext/tests/t_rumodel.py | 18 +++ .../tests/{testRuParser.py => t_ruparser.py} | 4 +- .../tests/{testSyntax.py => t_syntax.py} | 4 +- 21 files changed, 479 insertions(+), 43 deletions(-) create mode 100644 rsconcept/backend/cctext/context.py create mode 100644 rsconcept/backend/cctext/reference.py create mode 100644 rsconcept/backend/cctext/resolver.py rename rsconcept/backend/cctext/tests/{testConceptAPI.py => t_conceptapi.py} (95%) create mode 100644 rsconcept/backend/cctext/tests/t_context.py create mode 100644 rsconcept/backend/cctext/tests/t_reference.py create mode 100644 rsconcept/backend/cctext/tests/t_resolver.py create mode 100644 rsconcept/backend/cctext/tests/t_rumodel.py rename rsconcept/backend/cctext/tests/{testRuParser.py => t_ruparser.py} (99%) rename rsconcept/backend/cctext/tests/{testSyntax.py => t_syntax.py} (97%) diff --git a/.dockerignore b/.dockerignore index 24175c1d..3754bfc1 100644 --- a/.dockerignore +++ b/.dockerignore @@ -40,6 +40,7 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ +.mypy_cache/ cover/ diff --git a/.gitignore b/.gitignore index f3a3f0e2..2558558d 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +.mypy_cache/ # Django diff --git a/.vscode/settings.json b/.vscode/settings.json index 8a37642b..6f99defc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,8 @@ { + "search.exclude": { + ".mypy_cache/": true, + ".pytest_cache/": true + }, "python.testing.unittestArgs": [ "-v", "-s", @@ -14,5 +18,16 @@ } ], "python.linting.enabled": true, - "python.linting.mypyEnabled": true + "python.linting.mypyEnabled": true, + "python.analysis.typeCheckingMode": "off", + "python.analysis.diagnosticSeverityOverrides": { + // "reportOptionalMemberAccess": "none" + }, + "python.analysis.ignore": ["**/tests/**", "**/node_modules/**", "**/venv/**"], + "python.analysis.packageIndexDepths": [ + { + "name": "django", + "depth": 5 + } + ] } \ No newline at end of file diff --git a/README.md b/README.md index 5d24f39f..5dee022a 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,8 @@ This readme file is used mostly to document project dependencies - gunicorn - coreapi - psycopg2-binary + - pymorphy2 + - razdel
@@ -87,6 +89,7 @@ This readme file is used mostly to document project dependencies
   - Pylance
   - Pylint
+  - Django
   
diff --git a/rsconcept/backend/apps/rsform/tests/__init__.py b/rsconcept/backend/apps/rsform/tests/__init__.py index c14522a1..9f51ce17 100644 --- a/rsconcept/backend/apps/rsform/tests/__init__.py +++ b/rsconcept/backend/apps/rsform/tests/__init__.py @@ -1,5 +1,4 @@ ''' Tests. ''' -# flake8: noqa from .t_imports import * from .t_views import * from .t_models import * diff --git a/rsconcept/backend/apps/rsform/utils.py b/rsconcept/backend/apps/rsform/utils.py index 8e372cdc..3729821b 100644 --- a/rsconcept/backend/apps/rsform/utils.py +++ b/rsconcept/backend/apps/rsform/utils.py @@ -8,13 +8,21 @@ from rest_framework.permissions import BasePermission class ObjectOwnerOrAdmin(BasePermission): ''' Permission for object ownership restriction ''' def has_object_permission(self, request, view, obj): - return request.user == obj.owner or request.user.is_staff + if request.user == obj.owner: + return True + if not hasattr(request.user, 'is_staff'): + return False + return request.user.is_staff # type: ignore class SchemaOwnerOrAdmin(BasePermission): ''' Permission for object ownership restriction ''' def has_object_permission(self, request, view, obj): - return request.user == obj.schema.owner or request.user.is_staff + if request.user == obj.schema.owner: + return True + if not hasattr(request.user, 'is_staff'): + return False + return request.user.is_staff # type: ignore def read_trs(file) -> dict: diff --git a/rsconcept/backend/cctext/__init__.py b/rsconcept/backend/cctext/__init__.py index 7c60c16a..545d4b2e 100644 --- a/rsconcept/backend/cctext/__init__.py +++ b/rsconcept/backend/cctext/__init__.py @@ -1,14 +1,16 @@ ''' Concept core text processing library. ''' # pylint: skip-file from .syntax import RuSyntax, Capitalization -from .rumodel import Morphology, SemanticRole, WordTag, morpho +from .rumodel import Morphology, SemanticRole, WordTag, morpho, split_tags, combine_tags from .ruparser import PhraseParser, WordToken, Collation +from .reference import EntityReference, ReferenceType, SyntacticReference, parse_reference +from .context import TermForm, Entity, TermContext +from .resolver import Position, Resolver, ResolvedReference, resolve_entity, resolve_syntactic from .conceptapi import ( parse, normalize, get_all_forms, inflect, inflect_context, inflect_substitute, inflect_dependant, - match_all_morpho, find_substr, - split_tags + match_all_morpho, find_substr ) # TODO: implement Part of speech transition for VERB <-> NOUN diff --git a/rsconcept/backend/cctext/conceptapi.py b/rsconcept/backend/cctext/conceptapi.py index e28271a9..90caf793 100644 --- a/rsconcept/backend/cctext/conceptapi.py +++ b/rsconcept/backend/cctext/conceptapi.py @@ -1,27 +1,23 @@ ''' Concept API Python functions. -::guarantee:: doesnt raise exceptions and returns workable outputs in situations where empty string would be returned +::guarantee:: doesnt raise exceptions and returns workable outputs ''' from cctext.rumodel import Morphology from .syntax import RuSyntax from .ruparser import PhraseParser +from .rumodel import split_tags parser = PhraseParser() -def split_tags(tags: str) -> frozenset[str]: - ''' Split grammemes string into set of items. ''' - return frozenset([tag.strip() for tag in filter(None, tags.split(','))]) - - def parse(text: str, require_tags: str = '') -> str: ''' Determine morpho tags for input text. ::returns:: string of comma separated grammar tags or empty string ''' model = parser.parse(text, require_tags=split_tags(require_tags)) if model is None: return '' - result = model.get_morpho().as_str() + result = model.get_morpho().to_text() return result if result != 'UNKN' else '' @@ -32,7 +28,7 @@ def get_all_forms(text_normal: str) -> list[tuple[str, str]]: return [] result = [] for form in model.get_form().lexeme: - result.append((form.word, Morphology(form.tag).as_str())) + result.append((form.word, Morphology(form.tag).to_text())) return result diff --git a/rsconcept/backend/cctext/context.py b/rsconcept/backend/cctext/context.py new file mode 100644 index 00000000..fd658f71 --- /dev/null +++ b/rsconcept/backend/cctext/context.py @@ -0,0 +1,62 @@ +''' Term context for reference resolution. ''' +from typing import Iterable, Dict, Optional +from dataclasses import dataclass + +from .conceptapi import inflect + + +@dataclass +class TermForm: + ''' Term in a specific form. ''' + text: str + form: str + + +def _search_form(query: str, data: Iterable[TermForm]) -> Optional[str]: + for tf in data: + if tf.form == query: + return tf.text + return None + + +class Entity: + ''' Text entity. ''' + def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]]=None): + if manual_forms is None: + self.manual = [] + else: + self.manual = list(manual_forms) + self.alias = alias + self._nominal = nominal + self._cached: list[TermForm] = [] + + def get_nominal(self) -> str: + ''' Getter for _nominal. ''' + return self._nominal + + def set_nominal(self, new_text: str): + ''' Setter for _nominal. + Note: clears manual and cached forms. ''' + if self._nominal == new_text: + return + self._nominal = new_text + self.manual = [] + self._cached = [] + + def get_form(self, form: str) -> str: + ''' Get specific term form. ''' + if form == '': + return self._nominal + text = _search_form(form, self.manual) + if text is None: + text = _search_form(form, self._cached) + if text is None: + try: + text = inflect(self._nominal, form) + except ValueError as error: + text = f'!{error}!'.replace('Unknown grammeme', 'Неизвестная граммема') + self._cached.append(TermForm(text=text, form=form)) + return text + +# Term context for resolving entity references. +TermContext = Dict[str, Entity] diff --git a/rsconcept/backend/cctext/reference.py b/rsconcept/backend/cctext/reference.py new file mode 100644 index 00000000..763a6799 --- /dev/null +++ b/rsconcept/backend/cctext/reference.py @@ -0,0 +1,60 @@ +''' Text reference API. ''' +from enum import Enum, unique +from typing import Optional, Union + + +@unique +class ReferenceType(Enum): + ''' Text reference types. ''' + entity = 'entity' + syntactic = 'syntax' + + +class EntityReference: + ''' Reference to entity. ''' + + def __init__(self, identifier: str, form: str): + self.entity = identifier + self.form = form + + def get_type(self) -> ReferenceType: + return ReferenceType.entity + + def to_text(self) -> str: + return f'@{{{self.entity}|{self.form}}}' + + +class SyntacticReference: + ''' Reference to syntactic dependcy on EntityReference. ''' + + def __init__(self, referal_offset: int, text: str): + self.nominal = text + self.offset = referal_offset + + def get_type(self) -> ReferenceType: + return ReferenceType.syntactic + + def to_text(self) -> str: + return f'@{{{self.offset}|{self.nominal}}}' + + +Reference = Union[EntityReference, SyntacticReference] + + +def parse_reference(text: str) -> Optional[Reference]: + if len(text) < 4 or text[-1] != '}' or text[0:2] != '@{': + return None + blocks: list[str] = [block.strip() for block in text[2:-1].split('|')] + if len(blocks) != 2 or blocks[0] == '' or blocks[0][0] in '0': + return None + if blocks[0][0] in '-123456789': + if blocks[1] == '': + return None + try: + offset = int(blocks[0]) + return SyntacticReference(offset, blocks[1]) + except ValueError: + return None + else: + form = blocks[1].replace(' ', '') + return EntityReference(blocks[0], form) diff --git a/rsconcept/backend/cctext/resolver.py b/rsconcept/backend/cctext/resolver.py new file mode 100644 index 00000000..62d3a534 --- /dev/null +++ b/rsconcept/backend/cctext/resolver.py @@ -0,0 +1,114 @@ +''' Reference resolution API. ''' +import re +from typing import cast, Optional +from dataclasses import dataclass + +from .conceptapi import inflect_dependant +from .context import TermContext +from .reference import EntityReference, SyntacticReference, parse_reference, Reference + + +def resolve_entity(ref: EntityReference, context: TermContext) -> str: + ''' Resolve entity reference. ''' + alias = ref.entity + if alias not in context: + return f'!Неизвестная сущность: {alias}!' + return context[alias].get_form(ref.form) + + +def resolve_syntactic(ref: SyntacticReference, index: int, allrefs: list['ResolvedReference']) -> str: + ''' Resolve syntactic reference. ''' + offset = ref.offset + mainref: Optional['ResolvedReference'] = None + if offset > 0: + index += 1 + while index < len(allrefs): + if isinstance(allrefs[index].ref, EntityReference): + if offset == 1: + mainref = allrefs[index] + else: + offset -= 1 + index += 1 + else: + index -= 1 + while index >= 0: + if isinstance(allrefs[index].ref, EntityReference): + if offset == -1: + mainref = allrefs[index] + else: + offset += 1 + index -= 1 + if mainref is None: + return f'!Некорректное смещение: {ref.offset}!' + return inflect_dependant(ref.nominal, mainref.resolved) + + +@dataclass +class Position: + ''' 0-indexed contiguous segment position in text. ''' + start: int = 0 + finish: int = 0 + + +@dataclass +class ResolvedReference: + ''' Resolved reference data ''' + ref: Reference + resolved: str = '' + pos_input: Position = Position() + pos_output: Position = Position() + + +class Resolver: + ''' Text reference resolver ''' + REFERENCE_PATTERN = re.compile(r'@{.*?}') + + def __init__(self, context: TermContext): + self.context = context + self.refs = cast(list[ResolvedReference], []) + self.input = '' + self.output = '' + + def resolve(self, text: str) -> str: + ''' Resolve references in input text. + Note: data on references positions is accessed through class attributes ''' + self._reset(text) + self._parse_refs() + if len(self.refs) == 0: + self.output = self.input + return self.output + else: + self._resolve_refs() + self._combine_output() + return self.output + + def _reset(self, input_text: str): + self.refs = cast(list[ResolvedReference], []) + self.input = input_text + self.output = '' + + def _parse_refs(self): + for segment in re.finditer(Resolver.REFERENCE_PATTERN, self.input): + parse = parse_reference(segment[0]) + if parse is not None: + ref_info = ResolvedReference(ref=parse, + resolved='', + pos_input=Position(segment.start(0), segment.end(0))) + self.refs.append(ref_info) + + def _resolve_refs(self): + for ref in self.refs: + if isinstance(ref.ref, EntityReference): + ref.resolved = resolve_entity(ref.ref, self.context) + for (index, ref) in enumerate(self.refs): + if isinstance(ref.ref, SyntacticReference): + ref.resolved = resolve_syntactic(ref.ref, index, self.refs) + + def _combine_output(self): + pos_in = 0 + for ref in self.refs: + self.output += self.input[pos_in : ref.pos_input.start] + self.output += ref.resolved + ref.pos_output = Position(len(self.output) - len(ref.resolved), len(self.output)) + pos_in = ref.pos_input.finish + self.output += self.input[pos_in : len(self.input)] diff --git a/rsconcept/backend/cctext/rumodel.py b/rsconcept/backend/cctext/rumodel.py index aad798db..b9deacd2 100644 --- a/rsconcept/backend/cctext/rumodel.py +++ b/rsconcept/backend/cctext/rumodel.py @@ -1,7 +1,7 @@ ''' Russian language models. ''' from __future__ import annotations from enum import Enum, unique -from typing import Iterable +from typing import Iterable, Optional from pymorphy2 import MorphAnalyzer from pymorphy2.tagset import OpencorporaTag as WordTag @@ -10,6 +10,16 @@ from pymorphy2.tagset import OpencorporaTag as WordTag morpho = MorphAnalyzer() +def split_tags(text: str) -> list[str]: + ''' Split grammemes string into set of items. ''' + return [tag.strip() for tag in filter(None, text.split(','))] + + +def combine_tags(tags: Iterable[str]) -> str: + ''' Combine grammemes into string. ''' + return ','.join(tags) + + @unique class SemanticRole(Enum): ''' Enumerating semantic types for different parse patterns. ''' @@ -19,8 +29,8 @@ class SemanticRole(Enum): definition = 3 @staticmethod - def from_pos(pos: str) -> SemanticRole: - ''' Fabric method to produce types from part of speech. ''' + def from_POS(pos: Optional[str]) -> SemanticRole: + ''' Production method: types from part of speech. ''' if pos in ['NOUN', 'NPRO']: return SemanticRole.term elif pos in ['VERB', 'INFN', 'PRTF', 'PRTS']: @@ -36,10 +46,7 @@ class Morphology: ''' def __init__(self, tag: WordTag, semantic=SemanticRole.unknwn): self.tag = tag - self.semantic = semantic if semantic != SemanticRole.unknwn else SemanticRole.from_pos(tag.POS) - - def __del__(self): - pass + self.semantic = semantic if semantic != SemanticRole.unknwn else SemanticRole.from_POS(tag.POS) _TAGS_IMMUTABLE = frozenset(['INFN', 'ADVB', 'COMP', 'PNCT', 'PREP', 'CONJ', 'PRCL', 'INTJ']) @@ -60,9 +67,9 @@ class Morphology: return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS'] @property - def effective_POS(self) -> str: + def effective_POS(self) -> Optional[str]: ''' Access part of speech. Pronouns are considered as nouns ''' - pos: str = self.tag.POS + pos: Optional[str] = self.tag.POS if pos and self.tag.POS == 'NPRO': return 'NOUN' return pos @@ -105,14 +112,6 @@ class Morphology: result.add(self.tag.gender) return result - def as_str(self) -> str: + def to_text(self) -> str: ''' Produce string of all grammemes. ''' - grammemes = self.tag.grammemes - count = len(grammemes) - if count == 0: - return '' - elif count == 1: - result: str = next(iter(grammemes)) - return result - else: - return ','.join(grammemes) + return combine_tags(self.tag.grammemes) diff --git a/rsconcept/backend/cctext/ruparser.py b/rsconcept/backend/cctext/ruparser.py index 7a8acd00..9bac7b25 100644 --- a/rsconcept/backend/cctext/ruparser.py +++ b/rsconcept/backend/cctext/ruparser.py @@ -381,7 +381,7 @@ class PhraseParser: case = form.tag.case if pos not in ['ADJF', 'ADJS', 'PRTF', 'PRTS']: continue - if SemanticRole.from_pos(pos) == SemanticRole.term and case == 'gent': + if SemanticRole.from_POS(pos) == SemanticRole.term and case == 'gent': if before_main: continue else: diff --git a/rsconcept/backend/cctext/tests/__init__.py b/rsconcept/backend/cctext/tests/__init__.py index beaff6d5..b554047a 100644 --- a/rsconcept/backend/cctext/tests/__init__.py +++ b/rsconcept/backend/cctext/tests/__init__.py @@ -1 +1,8 @@ ''' Tests. ''' +from .t_reference import * +from .t_ruparser import * +from .t_syntax import * +from .t_conceptapi import * +from .t_rumodel import * +from .t_context import * +from .t_resolver import * diff --git a/rsconcept/backend/cctext/tests/testConceptAPI.py b/rsconcept/backend/cctext/tests/t_conceptapi.py similarity index 95% rename from rsconcept/backend/cctext/tests/testConceptAPI.py rename to rsconcept/backend/cctext/tests/t_conceptapi.py index 39c823ce..76405b3e 100644 --- a/rsconcept/backend/cctext/tests/testConceptAPI.py +++ b/rsconcept/backend/cctext/tests/t_conceptapi.py @@ -1,4 +1,4 @@ -'''Test Concept Text API''' +''' Unit tests: conceptapi. ''' import unittest import cctext as cc @@ -7,12 +7,12 @@ import cctext as cc class TestConceptAPI(unittest.TestCase): '''Test class for Concept API.''' def _assert_tags(self, actual: str, expected: str): - self.assertEqual(cc.split_tags(actual), cc.split_tags(expected)) - + self.assertEqual(set(cc.split_tags(actual)), set(cc.split_tags(expected))) + def test_parse(self): ''' Test parsing. ''' self._assert_tags(cc.parse(''), '') - self._assert_tags(cc.parse('1'), 'intg,NUMB') + self._assert_tags(cc.parse('1'), 'NUMB,intg') self._assert_tags(cc.parse('слон', require_tags='masc'), 'NOUN,anim,masc,sing,nomn') def test_normalize_word(self): diff --git a/rsconcept/backend/cctext/tests/t_context.py b/rsconcept/backend/cctext/tests/t_context.py new file mode 100644 index 00000000..0a2c68e4 --- /dev/null +++ b/rsconcept/backend/cctext/tests/t_context.py @@ -0,0 +1,32 @@ +''' Unit tests: context. ''' +import unittest + +from cctext.context import TermForm, Entity, TermContext + +class TestEntity(unittest.TestCase): + '''Test Entity termform access.''' + def setUp(self): + self.alias = 'X1' + self.nominal = 'человек' + self.text1 = 'test1' + self.form1 = 'sing,datv' + self.entity = Entity(self.alias, self.nominal, [TermForm(self.text1, self.form1)]) + + def test_attributes(self): + self.assertEqual(self.entity.alias, self.alias) + self.assertEqual(self.entity.get_nominal(), self.nominal) + self.assertEqual(self.entity.manual, [TermForm(self.text1, self.form1)]) + + def test_get_form(self): + self.assertEqual(self.entity.get_form(''), self.nominal) + self.assertEqual(self.entity.get_form(self.form1), self.text1) + self.assertEqual(self.entity.get_form('invalid tags'), '!Неизвестная граммема: invalid tags!') + self.assertEqual(self.entity.get_form('plur'), 'люди') + + def test_set_nominal(self): + new_nomial = 'TEST' + self.assertEqual(self.entity.get_form('plur'), 'люди') + self.entity.set_nominal(new_nomial) + self.assertEqual(self.entity.get_nominal(), new_nomial) + self.assertEqual(self.entity.get_form('plur'), new_nomial) + self.assertEqual(self.entity.manual, []) diff --git a/rsconcept/backend/cctext/tests/t_reference.py b/rsconcept/backend/cctext/tests/t_reference.py new file mode 100644 index 00000000..148500e7 --- /dev/null +++ b/rsconcept/backend/cctext/tests/t_reference.py @@ -0,0 +1,43 @@ +''' Unit tests: reference. ''' +import unittest + +from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference + +class TestReferences(unittest.TestCase): + ''' Test class for references. ''' + + def test_EntityReference(self): + ''' Testing EntityRefence basics. ''' + ref = EntityReference('X1', 'sing,nomn') + self.assertEqual(ref.get_type(), ReferenceType.entity) + self.assertEqual(ref.to_text(), '@{X1|sing,nomn}') + + def test_SyntacticReference(self): + ''' Testing SyntacticReference basics. ''' + ref = SyntacticReference(-1, 'черный') + self.assertEqual(ref.get_type(), ReferenceType.syntactic) + self.assertEqual(ref.to_text(), '@{-1|черный}') + + def test_parse_reference_invalid(self): + ''' Testing parsing reference invalid input. ''' + self.assertIsNone(parse_reference('')) + self.assertIsNone(parse_reference('X1')) + self.assertIsNone(parse_reference('invalid')) + self.assertIsNone(parse_reference(' ')) + self.assertIsNone(parse_reference('@{|}')) + self.assertIsNone(parse_reference('@{ | }')) + self.assertIsNone(parse_reference('@{-1| }')) + self.assertIsNone(parse_reference('@{1| }')) + self.assertIsNone(parse_reference('@{0|черный}')) + + def test_parse_reference(self): + ''' Testing parsing reference text. ''' + ref = parse_reference('@{1| черный }') + self.assertIsNotNone(ref) + self.assertEqual(ref.to_text(), '@{1|черный}') + self.assertEqual(ref.get_type(), ReferenceType.syntactic) + + ref = parse_reference('@{X1 | VERB, past, sing}') + self.assertIsNotNone(ref) + self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}') + self.assertEqual(ref.get_type(), ReferenceType.entity) diff --git a/rsconcept/backend/cctext/tests/t_resolver.py b/rsconcept/backend/cctext/tests/t_resolver.py new file mode 100644 index 00000000..0dafe80e --- /dev/null +++ b/rsconcept/backend/cctext/tests/t_resolver.py @@ -0,0 +1,76 @@ +''' Unit tests: resolver. ''' +import unittest +from typing import cast + +from cctext import ( + EntityReference, TermContext, Entity, SyntacticReference, + Resolver, ResolvedReference, Position, + resolve_entity, resolve_syntactic +) + +class TestResolver(unittest.TestCase): + '''Test reference Resolver.''' + def setUp(self): + self.context = cast(TermContext, {}) + self.context['X1'] = Entity('X1', 'человек') + self.resolver = Resolver(self.context) + + def test_resolve_entity(self): + self.assertEqual(resolve_entity(EntityReference('X1', ''), self.context), 'человек') + self.assertEqual(resolve_entity(EntityReference('X1', 'plur'), self.context), 'люди') + self.assertEqual(resolve_entity(EntityReference('X1', 'invalid'), self.context), '!Неизвестная граммема: invalid!') + self.assertEqual(resolve_entity(EntityReference('X123', 'plur'), self.context), '!Неизвестная сущность: X123!') + + def test_resolve_syntactic(self): + ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку') + allrefs = [ref, ref, ref, ref] + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-1), 0, allrefs), '!Некорректное смещение: -1!') + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=1), 3, allrefs), '!Некорректное смещение: 1!') + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=1), 0, allrefs), 'умному') + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=2), 0, allrefs), 'умному') + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=3), 0, allrefs), 'умному') + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-1), 3, allrefs), 'умному') + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-2), 3, allrefs), 'умному') + self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-3), 3, allrefs), 'умному') + + def test_resolve_invalid(self): + self.assertEqual(self.resolver.resolve(''), '') + self.assertEqual(len(self.resolver.refs), 0) + + self.assertEqual(self.resolver.resolve('simple text'), 'simple text') + self.assertEqual(len(self.resolver.refs), 0) + + self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text') + self.assertEqual(len(self.resolver.refs), 0) + + def test_resolve_single(self): + self.assertEqual(self.resolver.resolve('просто @{-1|умный} текст'), 'просто !Некорректное смещение: -1! текст') + self.assertEqual(len(self.resolver.refs), 1) + self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18)) + self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34)) + + self.assertEqual(self.resolver.resolve('просто @{X123|sing,nomn} текст'), 'просто !Неизвестная сущность: X123! текст') + self.assertEqual(len(self.resolver.refs), 1) + self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24)) + self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35)) + + self.assertEqual(self.resolver.resolve('@{X1|sing,nomn}'), 'человек') + self.assertEqual(len(self.resolver.refs), 1) + self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15)) + self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7)) + + self.assertEqual(self.resolver.resolve('просто @{X1|sing,nomn} текст'), 'просто человек текст') + self.assertEqual(len(self.resolver.refs), 1) + self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22)) + self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14)) + + def test_resolve_multiple(self): + input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют' + self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют') + self.assertEqual(len(self.resolver.refs), 3) + self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15)) + self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 8)) + self.assertEqual(self.resolver.refs[1].pos_input, Position(16, 27)) + self.assertEqual(self.resolver.refs[1].pos_output, Position(9, 15)) + self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38)) + self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20)) diff --git a/rsconcept/backend/cctext/tests/t_rumodel.py b/rsconcept/backend/cctext/tests/t_rumodel.py new file mode 100644 index 00000000..c5e80ac7 --- /dev/null +++ b/rsconcept/backend/cctext/tests/t_rumodel.py @@ -0,0 +1,18 @@ +''' Unit tests: rumodel. ''' +import unittest + +from cctext import split_tags, combine_tags + + +class TestTags(unittest.TestCase): + '''Test tags manipulation.''' + + def test_split_tags(self): + self.assertEqual(split_tags(''), []) + self.assertEqual(split_tags('NOUN'), ['NOUN']) + self.assertEqual(split_tags('NOUN,plur,sing'), ['NOUN','plur','sing']) + + def test_combine_tags(self): + self.assertEqual(combine_tags([]), '') + self.assertEqual(combine_tags(['NOUN']), 'NOUN') + self.assertEqual(combine_tags(['NOUN','plur','sing']), 'NOUN,plur,sing') diff --git a/rsconcept/backend/cctext/tests/testRuParser.py b/rsconcept/backend/cctext/tests/t_ruparser.py similarity index 99% rename from rsconcept/backend/cctext/tests/testRuParser.py rename to rsconcept/backend/cctext/tests/t_ruparser.py index 0a98013b..e3a352cb 100644 --- a/rsconcept/backend/cctext/tests/testRuParser.py +++ b/rsconcept/backend/cctext/tests/t_ruparser.py @@ -1,4 +1,4 @@ -''' Test russian language parsing. ''' +''' Unit tests: ruparser. ''' import unittest from typing import Iterable, Optional @@ -10,7 +10,7 @@ parser = PhraseParser() class TestRuParser(unittest.TestCase): ''' Test class for russian parsing. ''' - def _assert_parse(self, text: str, expected: list[str], + def _assert_parse(self, text: str, expected: Iterable[str], require_index: int = -1, require_tags: Optional[Iterable[str]] = None): phrase = parser.parse(text, require_index, require_tags) diff --git a/rsconcept/backend/cctext/tests/testSyntax.py b/rsconcept/backend/cctext/tests/t_syntax.py similarity index 97% rename from rsconcept/backend/cctext/tests/testSyntax.py rename to rsconcept/backend/cctext/tests/t_syntax.py index 2a960515..f5d75ec2 100644 --- a/rsconcept/backend/cctext/tests/testSyntax.py +++ b/rsconcept/backend/cctext/tests/t_syntax.py @@ -1,10 +1,10 @@ -'''Test module for Russian syntax''' +''' Unit tests: syntax. ''' import unittest from cctext import RuSyntax, Capitalization -class TestRusParser(unittest.TestCase): +class TestRusSyntax(unittest.TestCase): ''' Test class for russian syntax. ''' def test_capitalization(self):