Implement text reference resolution for backend

This commit is contained in:
IRBorisov 2023-08-20 13:59:46 +03:00
parent 7cd76f6004
commit 7dbbbab15a
21 changed files with 479 additions and 43 deletions

View File

@ -40,6 +40,7 @@ coverage.xml
*.py,cover *.py,cover
.hypothesis/ .hypothesis/
.pytest_cache/ .pytest_cache/
.mypy_cache/
cover/ cover/

1
.gitignore vendored
View File

@ -35,6 +35,7 @@ coverage.xml
.hypothesis/ .hypothesis/
.pytest_cache/ .pytest_cache/
cover/ cover/
.mypy_cache/
# Django # Django

17
.vscode/settings.json vendored
View File

@ -1,4 +1,8 @@
{ {
"search.exclude": {
".mypy_cache/": true,
".pytest_cache/": true
},
"python.testing.unittestArgs": [ "python.testing.unittestArgs": [
"-v", "-v",
"-s", "-s",
@ -14,5 +18,16 @@
} }
], ],
"python.linting.enabled": true, "python.linting.enabled": true,
"python.linting.mypyEnabled": true "python.linting.mypyEnabled": true,
"python.analysis.typeCheckingMode": "off",
"python.analysis.diagnosticSeverityOverrides": {
// "reportOptionalMemberAccess": "none"
},
"python.analysis.ignore": ["**/tests/**", "**/node_modules/**", "**/venv/**"],
"python.analysis.packageIndexDepths": [
{
"name": "django",
"depth": 5
}
]
} }

View File

@ -70,6 +70,8 @@ This readme file is used mostly to document project dependencies
- gunicorn - gunicorn
- coreapi - coreapi
- psycopg2-binary - psycopg2-binary
- pymorphy2
- razdel
</pre> </pre>
</details> </details>
<details> <details>
@ -87,6 +89,7 @@ This readme file is used mostly to document project dependencies
<pre> <pre>
- Pylance - Pylance
- Pylint - Pylint
- Django
</pre> </pre>
</details> </details>

View File

@ -1,5 +1,4 @@
''' Tests. ''' ''' Tests. '''
# flake8: noqa
from .t_imports import * from .t_imports import *
from .t_views import * from .t_views import *
from .t_models import * from .t_models import *

View File

@ -8,13 +8,21 @@ from rest_framework.permissions import BasePermission
class ObjectOwnerOrAdmin(BasePermission): class ObjectOwnerOrAdmin(BasePermission):
''' Permission for object ownership restriction ''' ''' Permission for object ownership restriction '''
def has_object_permission(self, request, view, obj): def has_object_permission(self, request, view, obj):
return request.user == obj.owner or request.user.is_staff if request.user == obj.owner:
return True
if not hasattr(request.user, 'is_staff'):
return False
return request.user.is_staff # type: ignore
class SchemaOwnerOrAdmin(BasePermission): class SchemaOwnerOrAdmin(BasePermission):
''' Permission for object ownership restriction ''' ''' Permission for object ownership restriction '''
def has_object_permission(self, request, view, obj): def has_object_permission(self, request, view, obj):
return request.user == obj.schema.owner or request.user.is_staff if request.user == obj.schema.owner:
return True
if not hasattr(request.user, 'is_staff'):
return False
return request.user.is_staff # type: ignore
def read_trs(file) -> dict: def read_trs(file) -> dict:

View File

@ -1,14 +1,16 @@
''' Concept core text processing library. ''' ''' Concept core text processing library. '''
# pylint: skip-file # pylint: skip-file
from .syntax import RuSyntax, Capitalization from .syntax import RuSyntax, Capitalization
from .rumodel import Morphology, SemanticRole, WordTag, morpho from .rumodel import Morphology, SemanticRole, WordTag, morpho, split_tags, combine_tags
from .ruparser import PhraseParser, WordToken, Collation from .ruparser import PhraseParser, WordToken, Collation
from .reference import EntityReference, ReferenceType, SyntacticReference, parse_reference
from .context import TermForm, Entity, TermContext
from .resolver import Position, Resolver, ResolvedReference, resolve_entity, resolve_syntactic
from .conceptapi import ( from .conceptapi import (
parse, normalize, parse, normalize,
get_all_forms, inflect, inflect_context, inflect_substitute, inflect_dependant, get_all_forms, inflect, inflect_context, inflect_substitute, inflect_dependant,
match_all_morpho, find_substr, match_all_morpho, find_substr
split_tags
) )
# TODO: implement Part of speech transition for VERB <-> NOUN # TODO: implement Part of speech transition for VERB <-> NOUN

View File

@ -1,27 +1,23 @@
''' '''
Concept API Python functions. Concept API Python functions.
::guarantee:: doesnt raise exceptions and returns workable outputs in situations where empty string would be returned ::guarantee:: doesnt raise exceptions and returns workable outputs
''' '''
from cctext.rumodel import Morphology from cctext.rumodel import Morphology
from .syntax import RuSyntax from .syntax import RuSyntax
from .ruparser import PhraseParser from .ruparser import PhraseParser
from .rumodel import split_tags
parser = PhraseParser() parser = PhraseParser()
def split_tags(tags: str) -> frozenset[str]:
''' Split grammemes string into set of items. '''
return frozenset([tag.strip() for tag in filter(None, tags.split(','))])
def parse(text: str, require_tags: str = '') -> str: def parse(text: str, require_tags: str = '') -> str:
''' Determine morpho tags for input text. ''' Determine morpho tags for input text.
::returns:: string of comma separated grammar tags or empty string ''' ::returns:: string of comma separated grammar tags or empty string '''
model = parser.parse(text, require_tags=split_tags(require_tags)) model = parser.parse(text, require_tags=split_tags(require_tags))
if model is None: if model is None:
return '' return ''
result = model.get_morpho().as_str() result = model.get_morpho().to_text()
return result if result != 'UNKN' else '' return result if result != 'UNKN' else ''
@ -32,7 +28,7 @@ def get_all_forms(text_normal: str) -> list[tuple[str, str]]:
return [] return []
result = [] result = []
for form in model.get_form().lexeme: for form in model.get_form().lexeme:
result.append((form.word, Morphology(form.tag).as_str())) result.append((form.word, Morphology(form.tag).to_text()))
return result return result

View File

@ -0,0 +1,62 @@
''' Term context for reference resolution. '''
from typing import Iterable, Dict, Optional
from dataclasses import dataclass
from .conceptapi import inflect
@dataclass
class TermForm:
''' Term in a specific form. '''
text: str
form: str
def _search_form(query: str, data: Iterable[TermForm]) -> Optional[str]:
for tf in data:
if tf.form == query:
return tf.text
return None
class Entity:
''' Text entity. '''
def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]]=None):
if manual_forms is None:
self.manual = []
else:
self.manual = list(manual_forms)
self.alias = alias
self._nominal = nominal
self._cached: list[TermForm] = []
def get_nominal(self) -> str:
''' Getter for _nominal. '''
return self._nominal
def set_nominal(self, new_text: str):
''' Setter for _nominal.
Note: clears manual and cached forms. '''
if self._nominal == new_text:
return
self._nominal = new_text
self.manual = []
self._cached = []
def get_form(self, form: str) -> str:
''' Get specific term form. '''
if form == '':
return self._nominal
text = _search_form(form, self.manual)
if text is None:
text = _search_form(form, self._cached)
if text is None:
try:
text = inflect(self._nominal, form)
except ValueError as error:
text = f'!{error}!'.replace('Unknown grammeme', 'Неизвестная граммема')
self._cached.append(TermForm(text=text, form=form))
return text
# Term context for resolving entity references.
TermContext = Dict[str, Entity]

View File

@ -0,0 +1,60 @@
''' Text reference API. '''
from enum import Enum, unique
from typing import Optional, Union
@unique
class ReferenceType(Enum):
''' Text reference types. '''
entity = 'entity'
syntactic = 'syntax'
class EntityReference:
''' Reference to entity. '''
def __init__(self, identifier: str, form: str):
self.entity = identifier
self.form = form
def get_type(self) -> ReferenceType:
return ReferenceType.entity
def to_text(self) -> str:
return f'@{{{self.entity}|{self.form}}}'
class SyntacticReference:
''' Reference to syntactic dependcy on EntityReference. '''
def __init__(self, referal_offset: int, text: str):
self.nominal = text
self.offset = referal_offset
def get_type(self) -> ReferenceType:
return ReferenceType.syntactic
def to_text(self) -> str:
return f'@{{{self.offset}|{self.nominal}}}'
Reference = Union[EntityReference, SyntacticReference]
def parse_reference(text: str) -> Optional[Reference]:
if len(text) < 4 or text[-1] != '}' or text[0:2] != '@{':
return None
blocks: list[str] = [block.strip() for block in text[2:-1].split('|')]
if len(blocks) != 2 or blocks[0] == '' or blocks[0][0] in '0':
return None
if blocks[0][0] in '-123456789':
if blocks[1] == '':
return None
try:
offset = int(blocks[0])
return SyntacticReference(offset, blocks[1])
except ValueError:
return None
else:
form = blocks[1].replace(' ', '')
return EntityReference(blocks[0], form)

View File

@ -0,0 +1,114 @@
''' Reference resolution API. '''
import re
from typing import cast, Optional
from dataclasses import dataclass
from .conceptapi import inflect_dependant
from .context import TermContext
from .reference import EntityReference, SyntacticReference, parse_reference, Reference
def resolve_entity(ref: EntityReference, context: TermContext) -> str:
''' Resolve entity reference. '''
alias = ref.entity
if alias not in context:
return f'!Неизвестная сущность: {alias}!'
return context[alias].get_form(ref.form)
def resolve_syntactic(ref: SyntacticReference, index: int, allrefs: list['ResolvedReference']) -> str:
''' Resolve syntactic reference. '''
offset = ref.offset
mainref: Optional['ResolvedReference'] = None
if offset > 0:
index += 1
while index < len(allrefs):
if isinstance(allrefs[index].ref, EntityReference):
if offset == 1:
mainref = allrefs[index]
else:
offset -= 1
index += 1
else:
index -= 1
while index >= 0:
if isinstance(allrefs[index].ref, EntityReference):
if offset == -1:
mainref = allrefs[index]
else:
offset += 1
index -= 1
if mainref is None:
return f'!Некорректное смещение: {ref.offset}!'
return inflect_dependant(ref.nominal, mainref.resolved)
@dataclass
class Position:
''' 0-indexed contiguous segment position in text. '''
start: int = 0
finish: int = 0
@dataclass
class ResolvedReference:
''' Resolved reference data '''
ref: Reference
resolved: str = ''
pos_input: Position = Position()
pos_output: Position = Position()
class Resolver:
''' Text reference resolver '''
REFERENCE_PATTERN = re.compile(r'@{.*?}')
def __init__(self, context: TermContext):
self.context = context
self.refs = cast(list[ResolvedReference], [])
self.input = ''
self.output = ''
def resolve(self, text: str) -> str:
''' Resolve references in input text.
Note: data on references positions is accessed through class attributes '''
self._reset(text)
self._parse_refs()
if len(self.refs) == 0:
self.output = self.input
return self.output
else:
self._resolve_refs()
self._combine_output()
return self.output
def _reset(self, input_text: str):
self.refs = cast(list[ResolvedReference], [])
self.input = input_text
self.output = ''
def _parse_refs(self):
for segment in re.finditer(Resolver.REFERENCE_PATTERN, self.input):
parse = parse_reference(segment[0])
if parse is not None:
ref_info = ResolvedReference(ref=parse,
resolved='',
pos_input=Position(segment.start(0), segment.end(0)))
self.refs.append(ref_info)
def _resolve_refs(self):
for ref in self.refs:
if isinstance(ref.ref, EntityReference):
ref.resolved = resolve_entity(ref.ref, self.context)
for (index, ref) in enumerate(self.refs):
if isinstance(ref.ref, SyntacticReference):
ref.resolved = resolve_syntactic(ref.ref, index, self.refs)
def _combine_output(self):
pos_in = 0
for ref in self.refs:
self.output += self.input[pos_in : ref.pos_input.start]
self.output += ref.resolved
ref.pos_output = Position(len(self.output) - len(ref.resolved), len(self.output))
pos_in = ref.pos_input.finish
self.output += self.input[pos_in : len(self.input)]

View File

@ -1,7 +1,7 @@
''' Russian language models. ''' ''' Russian language models. '''
from __future__ import annotations from __future__ import annotations
from enum import Enum, unique from enum import Enum, unique
from typing import Iterable from typing import Iterable, Optional
from pymorphy2 import MorphAnalyzer from pymorphy2 import MorphAnalyzer
from pymorphy2.tagset import OpencorporaTag as WordTag from pymorphy2.tagset import OpencorporaTag as WordTag
@ -10,6 +10,16 @@ from pymorphy2.tagset import OpencorporaTag as WordTag
morpho = MorphAnalyzer() morpho = MorphAnalyzer()
def split_tags(text: str) -> list[str]:
''' Split grammemes string into set of items. '''
return [tag.strip() for tag in filter(None, text.split(','))]
def combine_tags(tags: Iterable[str]) -> str:
''' Combine grammemes into string. '''
return ','.join(tags)
@unique @unique
class SemanticRole(Enum): class SemanticRole(Enum):
''' Enumerating semantic types for different parse patterns. ''' ''' Enumerating semantic types for different parse patterns. '''
@ -19,8 +29,8 @@ class SemanticRole(Enum):
definition = 3 definition = 3
@staticmethod @staticmethod
def from_pos(pos: str) -> SemanticRole: def from_POS(pos: Optional[str]) -> SemanticRole:
''' Fabric method to produce types from part of speech. ''' ''' Production method: types from part of speech. '''
if pos in ['NOUN', 'NPRO']: if pos in ['NOUN', 'NPRO']:
return SemanticRole.term return SemanticRole.term
elif pos in ['VERB', 'INFN', 'PRTF', 'PRTS']: elif pos in ['VERB', 'INFN', 'PRTF', 'PRTS']:
@ -36,10 +46,7 @@ class Morphology:
''' '''
def __init__(self, tag: WordTag, semantic=SemanticRole.unknwn): def __init__(self, tag: WordTag, semantic=SemanticRole.unknwn):
self.tag = tag self.tag = tag
self.semantic = semantic if semantic != SemanticRole.unknwn else SemanticRole.from_pos(tag.POS) self.semantic = semantic if semantic != SemanticRole.unknwn else SemanticRole.from_POS(tag.POS)
def __del__(self):
pass
_TAGS_IMMUTABLE = frozenset(['INFN', 'ADVB', 'COMP', 'PNCT', 'PREP', 'CONJ', 'PRCL', 'INTJ']) _TAGS_IMMUTABLE = frozenset(['INFN', 'ADVB', 'COMP', 'PNCT', 'PREP', 'CONJ', 'PRCL', 'INTJ'])
@ -60,9 +67,9 @@ class Morphology:
return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS'] return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS']
@property @property
def effective_POS(self) -> str: def effective_POS(self) -> Optional[str]:
''' Access part of speech. Pronouns are considered as nouns ''' ''' Access part of speech. Pronouns are considered as nouns '''
pos: str = self.tag.POS pos: Optional[str] = self.tag.POS
if pos and self.tag.POS == 'NPRO': if pos and self.tag.POS == 'NPRO':
return 'NOUN' return 'NOUN'
return pos return pos
@ -105,14 +112,6 @@ class Morphology:
result.add(self.tag.gender) result.add(self.tag.gender)
return result return result
def as_str(self) -> str: def to_text(self) -> str:
''' Produce string of all grammemes. ''' ''' Produce string of all grammemes. '''
grammemes = self.tag.grammemes return combine_tags(self.tag.grammemes)
count = len(grammemes)
if count == 0:
return ''
elif count == 1:
result: str = next(iter(grammemes))
return result
else:
return ','.join(grammemes)

View File

@ -381,7 +381,7 @@ class PhraseParser:
case = form.tag.case case = form.tag.case
if pos not in ['ADJF', 'ADJS', 'PRTF', 'PRTS']: if pos not in ['ADJF', 'ADJS', 'PRTF', 'PRTS']:
continue continue
if SemanticRole.from_pos(pos) == SemanticRole.term and case == 'gent': if SemanticRole.from_POS(pos) == SemanticRole.term and case == 'gent':
if before_main: if before_main:
continue continue
else: else:

View File

@ -1 +1,8 @@
''' Tests. ''' ''' Tests. '''
from .t_reference import *
from .t_ruparser import *
from .t_syntax import *
from .t_conceptapi import *
from .t_rumodel import *
from .t_context import *
from .t_resolver import *

View File

@ -1,4 +1,4 @@
'''Test Concept Text API''' ''' Unit tests: conceptapi. '''
import unittest import unittest
import cctext as cc import cctext as cc
@ -7,12 +7,12 @@ import cctext as cc
class TestConceptAPI(unittest.TestCase): class TestConceptAPI(unittest.TestCase):
'''Test class for Concept API.''' '''Test class for Concept API.'''
def _assert_tags(self, actual: str, expected: str): def _assert_tags(self, actual: str, expected: str):
self.assertEqual(cc.split_tags(actual), cc.split_tags(expected)) self.assertEqual(set(cc.split_tags(actual)), set(cc.split_tags(expected)))
def test_parse(self): def test_parse(self):
''' Test parsing. ''' ''' Test parsing. '''
self._assert_tags(cc.parse(''), '') self._assert_tags(cc.parse(''), '')
self._assert_tags(cc.parse('1'), 'intg,NUMB') self._assert_tags(cc.parse('1'), 'NUMB,intg')
self._assert_tags(cc.parse('слон', require_tags='masc'), 'NOUN,anim,masc,sing,nomn') self._assert_tags(cc.parse('слон', require_tags='masc'), 'NOUN,anim,masc,sing,nomn')
def test_normalize_word(self): def test_normalize_word(self):

View File

@ -0,0 +1,32 @@
''' Unit tests: context. '''
import unittest
from cctext.context import TermForm, Entity, TermContext
class TestEntity(unittest.TestCase):
'''Test Entity termform access.'''
def setUp(self):
self.alias = 'X1'
self.nominal = 'человек'
self.text1 = 'test1'
self.form1 = 'sing,datv'
self.entity = Entity(self.alias, self.nominal, [TermForm(self.text1, self.form1)])
def test_attributes(self):
self.assertEqual(self.entity.alias, self.alias)
self.assertEqual(self.entity.get_nominal(), self.nominal)
self.assertEqual(self.entity.manual, [TermForm(self.text1, self.form1)])
def test_get_form(self):
self.assertEqual(self.entity.get_form(''), self.nominal)
self.assertEqual(self.entity.get_form(self.form1), self.text1)
self.assertEqual(self.entity.get_form('invalid tags'), '!Неизвестная граммема: invalid tags!')
self.assertEqual(self.entity.get_form('plur'), 'люди')
def test_set_nominal(self):
new_nomial = 'TEST'
self.assertEqual(self.entity.get_form('plur'), 'люди')
self.entity.set_nominal(new_nomial)
self.assertEqual(self.entity.get_nominal(), new_nomial)
self.assertEqual(self.entity.get_form('plur'), new_nomial)
self.assertEqual(self.entity.manual, [])

View File

@ -0,0 +1,43 @@
''' Unit tests: reference. '''
import unittest
from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference
class TestReferences(unittest.TestCase):
''' Test class for references. '''
def test_EntityReference(self):
''' Testing EntityRefence basics. '''
ref = EntityReference('X1', 'sing,nomn')
self.assertEqual(ref.get_type(), ReferenceType.entity)
self.assertEqual(ref.to_text(), '@{X1|sing,nomn}')
def test_SyntacticReference(self):
''' Testing SyntacticReference basics. '''
ref = SyntacticReference(-1, 'черный')
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
self.assertEqual(ref.to_text(), '@{-1|черный}')
def test_parse_reference_invalid(self):
''' Testing parsing reference invalid input. '''
self.assertIsNone(parse_reference(''))
self.assertIsNone(parse_reference('X1'))
self.assertIsNone(parse_reference('invalid'))
self.assertIsNone(parse_reference(' '))
self.assertIsNone(parse_reference('@{|}'))
self.assertIsNone(parse_reference('@{ | }'))
self.assertIsNone(parse_reference('@{-1| }'))
self.assertIsNone(parse_reference('@{1| }'))
self.assertIsNone(parse_reference('@{0|черный}'))
def test_parse_reference(self):
''' Testing parsing reference text. '''
ref = parse_reference('@{1| черный }')
self.assertIsNotNone(ref)
self.assertEqual(ref.to_text(), '@{1|черный}')
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
ref = parse_reference('@{X1 | VERB, past, sing}')
self.assertIsNotNone(ref)
self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}')
self.assertEqual(ref.get_type(), ReferenceType.entity)

View File

@ -0,0 +1,76 @@
''' Unit tests: resolver. '''
import unittest
from typing import cast
from cctext import (
EntityReference, TermContext, Entity, SyntacticReference,
Resolver, ResolvedReference, Position,
resolve_entity, resolve_syntactic
)
class TestResolver(unittest.TestCase):
'''Test reference Resolver.'''
def setUp(self):
self.context = cast(TermContext, {})
self.context['X1'] = Entity('X1', 'человек')
self.resolver = Resolver(self.context)
def test_resolve_entity(self):
self.assertEqual(resolve_entity(EntityReference('X1', ''), self.context), 'человек')
self.assertEqual(resolve_entity(EntityReference('X1', 'plur'), self.context), 'люди')
self.assertEqual(resolve_entity(EntityReference('X1', 'invalid'), self.context), '!Неизвестная граммема: invalid!')
self.assertEqual(resolve_entity(EntityReference('X123', 'plur'), self.context), '!Неизвестная сущность: X123!')
def test_resolve_syntactic(self):
ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку')
allrefs = [ref, ref, ref, ref]
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-1), 0, allrefs), '!Некорректное смещение: -1!')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=1), 3, allrefs), '!Некорректное смещение: 1!')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=1), 0, allrefs), 'умному')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=2), 0, allrefs), 'умному')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=3), 0, allrefs), 'умному')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-1), 3, allrefs), 'умному')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-2), 3, allrefs), 'умному')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-3), 3, allrefs), 'умному')
def test_resolve_invalid(self):
self.assertEqual(self.resolver.resolve(''), '')
self.assertEqual(len(self.resolver.refs), 0)
self.assertEqual(self.resolver.resolve('simple text'), 'simple text')
self.assertEqual(len(self.resolver.refs), 0)
self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text')
self.assertEqual(len(self.resolver.refs), 0)
def test_resolve_single(self):
self.assertEqual(self.resolver.resolve('просто @{-1|умный} текст'), 'просто !Некорректное смещение: -1! текст')
self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18))
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34))
self.assertEqual(self.resolver.resolve('просто @{X123|sing,nomn} текст'), 'просто !Неизвестная сущность: X123! текст')
self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24))
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35))
self.assertEqual(self.resolver.resolve('@{X1|sing,nomn}'), 'человек')
self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7))
self.assertEqual(self.resolver.resolve('просто @{X1|sing,nomn} текст'), 'просто человек текст')
self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22))
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14))
def test_resolve_multiple(self):
input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют'
self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют')
self.assertEqual(len(self.resolver.refs), 3)
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 8))
self.assertEqual(self.resolver.refs[1].pos_input, Position(16, 27))
self.assertEqual(self.resolver.refs[1].pos_output, Position(9, 15))
self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38))
self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20))

View File

@ -0,0 +1,18 @@
''' Unit tests: rumodel. '''
import unittest
from cctext import split_tags, combine_tags
class TestTags(unittest.TestCase):
'''Test tags manipulation.'''
def test_split_tags(self):
self.assertEqual(split_tags(''), [])
self.assertEqual(split_tags('NOUN'), ['NOUN'])
self.assertEqual(split_tags('NOUN,plur,sing'), ['NOUN','plur','sing'])
def test_combine_tags(self):
self.assertEqual(combine_tags([]), '')
self.assertEqual(combine_tags(['NOUN']), 'NOUN')
self.assertEqual(combine_tags(['NOUN','plur','sing']), 'NOUN,plur,sing')

View File

@ -1,4 +1,4 @@
''' Test russian language parsing. ''' ''' Unit tests: ruparser. '''
import unittest import unittest
from typing import Iterable, Optional from typing import Iterable, Optional
@ -10,7 +10,7 @@ parser = PhraseParser()
class TestRuParser(unittest.TestCase): class TestRuParser(unittest.TestCase):
''' Test class for russian parsing. ''' ''' Test class for russian parsing. '''
def _assert_parse(self, text: str, expected: list[str], def _assert_parse(self, text: str, expected: Iterable[str],
require_index: int = -1, require_index: int = -1,
require_tags: Optional[Iterable[str]] = None): require_tags: Optional[Iterable[str]] = None):
phrase = parser.parse(text, require_index, require_tags) phrase = parser.parse(text, require_index, require_tags)

View File

@ -1,10 +1,10 @@
'''Test module for Russian syntax''' ''' Unit tests: syntax. '''
import unittest import unittest
from cctext import RuSyntax, Capitalization from cctext import RuSyntax, Capitalization
class TestRusParser(unittest.TestCase): class TestRusSyntax(unittest.TestCase):
''' Test class for russian syntax. ''' ''' Test class for russian syntax. '''
def test_capitalization(self): def test_capitalization(self):