mirror of
https://github.com/IRBorisov/ConceptPortal.git
synced 2025-06-26 04:50:36 +03:00
Implement text reference resolution for backend
This commit is contained in:
parent
7cd76f6004
commit
7dbbbab15a
|
@ -40,6 +40,7 @@ coverage.xml
|
|||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
.mypy_cache/
|
||||
cover/
|
||||
|
||||
|
||||
|
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -35,6 +35,7 @@ coverage.xml
|
|||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
.mypy_cache/
|
||||
|
||||
|
||||
# Django
|
||||
|
|
17
.vscode/settings.json
vendored
17
.vscode/settings.json
vendored
|
@ -1,4 +1,8 @@
|
|||
{
|
||||
"search.exclude": {
|
||||
".mypy_cache/": true,
|
||||
".pytest_cache/": true
|
||||
},
|
||||
"python.testing.unittestArgs": [
|
||||
"-v",
|
||||
"-s",
|
||||
|
@ -14,5 +18,16 @@
|
|||
}
|
||||
],
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.mypyEnabled": true
|
||||
"python.linting.mypyEnabled": true,
|
||||
"python.analysis.typeCheckingMode": "off",
|
||||
"python.analysis.diagnosticSeverityOverrides": {
|
||||
// "reportOptionalMemberAccess": "none"
|
||||
},
|
||||
"python.analysis.ignore": ["**/tests/**", "**/node_modules/**", "**/venv/**"],
|
||||
"python.analysis.packageIndexDepths": [
|
||||
{
|
||||
"name": "django",
|
||||
"depth": 5
|
||||
}
|
||||
]
|
||||
}
|
|
@ -70,6 +70,8 @@ This readme file is used mostly to document project dependencies
|
|||
- gunicorn
|
||||
- coreapi
|
||||
- psycopg2-binary
|
||||
- pymorphy2
|
||||
- razdel
|
||||
</pre>
|
||||
</details>
|
||||
<details>
|
||||
|
@ -87,6 +89,7 @@ This readme file is used mostly to document project dependencies
|
|||
<pre>
|
||||
- Pylance
|
||||
- Pylint
|
||||
- Django
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
''' Tests. '''
|
||||
# flake8: noqa
|
||||
from .t_imports import *
|
||||
from .t_views import *
|
||||
from .t_models import *
|
||||
|
|
|
@ -8,13 +8,21 @@ from rest_framework.permissions import BasePermission
|
|||
class ObjectOwnerOrAdmin(BasePermission):
|
||||
''' Permission for object ownership restriction '''
|
||||
def has_object_permission(self, request, view, obj):
|
||||
return request.user == obj.owner or request.user.is_staff
|
||||
if request.user == obj.owner:
|
||||
return True
|
||||
if not hasattr(request.user, 'is_staff'):
|
||||
return False
|
||||
return request.user.is_staff # type: ignore
|
||||
|
||||
|
||||
class SchemaOwnerOrAdmin(BasePermission):
|
||||
''' Permission for object ownership restriction '''
|
||||
def has_object_permission(self, request, view, obj):
|
||||
return request.user == obj.schema.owner or request.user.is_staff
|
||||
if request.user == obj.schema.owner:
|
||||
return True
|
||||
if not hasattr(request.user, 'is_staff'):
|
||||
return False
|
||||
return request.user.is_staff # type: ignore
|
||||
|
||||
|
||||
def read_trs(file) -> dict:
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
''' Concept core text processing library. '''
|
||||
# pylint: skip-file
|
||||
from .syntax import RuSyntax, Capitalization
|
||||
from .rumodel import Morphology, SemanticRole, WordTag, morpho
|
||||
from .rumodel import Morphology, SemanticRole, WordTag, morpho, split_tags, combine_tags
|
||||
from .ruparser import PhraseParser, WordToken, Collation
|
||||
from .reference import EntityReference, ReferenceType, SyntacticReference, parse_reference
|
||||
from .context import TermForm, Entity, TermContext
|
||||
from .resolver import Position, Resolver, ResolvedReference, resolve_entity, resolve_syntactic
|
||||
|
||||
from .conceptapi import (
|
||||
parse, normalize,
|
||||
get_all_forms, inflect, inflect_context, inflect_substitute, inflect_dependant,
|
||||
match_all_morpho, find_substr,
|
||||
split_tags
|
||||
match_all_morpho, find_substr
|
||||
)
|
||||
|
||||
# TODO: implement Part of speech transition for VERB <-> NOUN
|
||||
|
|
|
@ -1,27 +1,23 @@
|
|||
'''
|
||||
Concept API Python functions.
|
||||
|
||||
::guarantee:: doesnt raise exceptions and returns workable outputs in situations where empty string would be returned
|
||||
::guarantee:: doesnt raise exceptions and returns workable outputs
|
||||
'''
|
||||
from cctext.rumodel import Morphology
|
||||
from .syntax import RuSyntax
|
||||
from .ruparser import PhraseParser
|
||||
from .rumodel import split_tags
|
||||
|
||||
parser = PhraseParser()
|
||||
|
||||
|
||||
def split_tags(tags: str) -> frozenset[str]:
|
||||
''' Split grammemes string into set of items. '''
|
||||
return frozenset([tag.strip() for tag in filter(None, tags.split(','))])
|
||||
|
||||
|
||||
def parse(text: str, require_tags: str = '') -> str:
|
||||
''' Determine morpho tags for input text.
|
||||
::returns:: string of comma separated grammar tags or empty string '''
|
||||
model = parser.parse(text, require_tags=split_tags(require_tags))
|
||||
if model is None:
|
||||
return ''
|
||||
result = model.get_morpho().as_str()
|
||||
result = model.get_morpho().to_text()
|
||||
return result if result != 'UNKN' else ''
|
||||
|
||||
|
||||
|
@ -32,7 +28,7 @@ def get_all_forms(text_normal: str) -> list[tuple[str, str]]:
|
|||
return []
|
||||
result = []
|
||||
for form in model.get_form().lexeme:
|
||||
result.append((form.word, Morphology(form.tag).as_str()))
|
||||
result.append((form.word, Morphology(form.tag).to_text()))
|
||||
return result
|
||||
|
||||
|
||||
|
|
62
rsconcept/backend/cctext/context.py
Normal file
62
rsconcept/backend/cctext/context.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
''' Term context for reference resolution. '''
|
||||
from typing import Iterable, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .conceptapi import inflect
|
||||
|
||||
|
||||
@dataclass
|
||||
class TermForm:
|
||||
''' Term in a specific form. '''
|
||||
text: str
|
||||
form: str
|
||||
|
||||
|
||||
def _search_form(query: str, data: Iterable[TermForm]) -> Optional[str]:
|
||||
for tf in data:
|
||||
if tf.form == query:
|
||||
return tf.text
|
||||
return None
|
||||
|
||||
|
||||
class Entity:
|
||||
''' Text entity. '''
|
||||
def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]]=None):
|
||||
if manual_forms is None:
|
||||
self.manual = []
|
||||
else:
|
||||
self.manual = list(manual_forms)
|
||||
self.alias = alias
|
||||
self._nominal = nominal
|
||||
self._cached: list[TermForm] = []
|
||||
|
||||
def get_nominal(self) -> str:
|
||||
''' Getter for _nominal. '''
|
||||
return self._nominal
|
||||
|
||||
def set_nominal(self, new_text: str):
|
||||
''' Setter for _nominal.
|
||||
Note: clears manual and cached forms. '''
|
||||
if self._nominal == new_text:
|
||||
return
|
||||
self._nominal = new_text
|
||||
self.manual = []
|
||||
self._cached = []
|
||||
|
||||
def get_form(self, form: str) -> str:
|
||||
''' Get specific term form. '''
|
||||
if form == '':
|
||||
return self._nominal
|
||||
text = _search_form(form, self.manual)
|
||||
if text is None:
|
||||
text = _search_form(form, self._cached)
|
||||
if text is None:
|
||||
try:
|
||||
text = inflect(self._nominal, form)
|
||||
except ValueError as error:
|
||||
text = f'!{error}!'.replace('Unknown grammeme', 'Неизвестная граммема')
|
||||
self._cached.append(TermForm(text=text, form=form))
|
||||
return text
|
||||
|
||||
# Term context for resolving entity references.
|
||||
TermContext = Dict[str, Entity]
|
60
rsconcept/backend/cctext/reference.py
Normal file
60
rsconcept/backend/cctext/reference.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
''' Text reference API. '''
|
||||
from enum import Enum, unique
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
@unique
|
||||
class ReferenceType(Enum):
|
||||
''' Text reference types. '''
|
||||
entity = 'entity'
|
||||
syntactic = 'syntax'
|
||||
|
||||
|
||||
class EntityReference:
|
||||
''' Reference to entity. '''
|
||||
|
||||
def __init__(self, identifier: str, form: str):
|
||||
self.entity = identifier
|
||||
self.form = form
|
||||
|
||||
def get_type(self) -> ReferenceType:
|
||||
return ReferenceType.entity
|
||||
|
||||
def to_text(self) -> str:
|
||||
return f'@{{{self.entity}|{self.form}}}'
|
||||
|
||||
|
||||
class SyntacticReference:
|
||||
''' Reference to syntactic dependcy on EntityReference. '''
|
||||
|
||||
def __init__(self, referal_offset: int, text: str):
|
||||
self.nominal = text
|
||||
self.offset = referal_offset
|
||||
|
||||
def get_type(self) -> ReferenceType:
|
||||
return ReferenceType.syntactic
|
||||
|
||||
def to_text(self) -> str:
|
||||
return f'@{{{self.offset}|{self.nominal}}}'
|
||||
|
||||
|
||||
Reference = Union[EntityReference, SyntacticReference]
|
||||
|
||||
|
||||
def parse_reference(text: str) -> Optional[Reference]:
|
||||
if len(text) < 4 or text[-1] != '}' or text[0:2] != '@{':
|
||||
return None
|
||||
blocks: list[str] = [block.strip() for block in text[2:-1].split('|')]
|
||||
if len(blocks) != 2 or blocks[0] == '' or blocks[0][0] in '0':
|
||||
return None
|
||||
if blocks[0][0] in '-123456789':
|
||||
if blocks[1] == '':
|
||||
return None
|
||||
try:
|
||||
offset = int(blocks[0])
|
||||
return SyntacticReference(offset, blocks[1])
|
||||
except ValueError:
|
||||
return None
|
||||
else:
|
||||
form = blocks[1].replace(' ', '')
|
||||
return EntityReference(blocks[0], form)
|
114
rsconcept/backend/cctext/resolver.py
Normal file
114
rsconcept/backend/cctext/resolver.py
Normal file
|
@ -0,0 +1,114 @@
|
|||
''' Reference resolution API. '''
|
||||
import re
|
||||
from typing import cast, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .conceptapi import inflect_dependant
|
||||
from .context import TermContext
|
||||
from .reference import EntityReference, SyntacticReference, parse_reference, Reference
|
||||
|
||||
|
||||
def resolve_entity(ref: EntityReference, context: TermContext) -> str:
|
||||
''' Resolve entity reference. '''
|
||||
alias = ref.entity
|
||||
if alias not in context:
|
||||
return f'!Неизвестная сущность: {alias}!'
|
||||
return context[alias].get_form(ref.form)
|
||||
|
||||
|
||||
def resolve_syntactic(ref: SyntacticReference, index: int, allrefs: list['ResolvedReference']) -> str:
|
||||
''' Resolve syntactic reference. '''
|
||||
offset = ref.offset
|
||||
mainref: Optional['ResolvedReference'] = None
|
||||
if offset > 0:
|
||||
index += 1
|
||||
while index < len(allrefs):
|
||||
if isinstance(allrefs[index].ref, EntityReference):
|
||||
if offset == 1:
|
||||
mainref = allrefs[index]
|
||||
else:
|
||||
offset -= 1
|
||||
index += 1
|
||||
else:
|
||||
index -= 1
|
||||
while index >= 0:
|
||||
if isinstance(allrefs[index].ref, EntityReference):
|
||||
if offset == -1:
|
||||
mainref = allrefs[index]
|
||||
else:
|
||||
offset += 1
|
||||
index -= 1
|
||||
if mainref is None:
|
||||
return f'!Некорректное смещение: {ref.offset}!'
|
||||
return inflect_dependant(ref.nominal, mainref.resolved)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Position:
|
||||
''' 0-indexed contiguous segment position in text. '''
|
||||
start: int = 0
|
||||
finish: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolvedReference:
|
||||
''' Resolved reference data '''
|
||||
ref: Reference
|
||||
resolved: str = ''
|
||||
pos_input: Position = Position()
|
||||
pos_output: Position = Position()
|
||||
|
||||
|
||||
class Resolver:
|
||||
''' Text reference resolver '''
|
||||
REFERENCE_PATTERN = re.compile(r'@{.*?}')
|
||||
|
||||
def __init__(self, context: TermContext):
|
||||
self.context = context
|
||||
self.refs = cast(list[ResolvedReference], [])
|
||||
self.input = ''
|
||||
self.output = ''
|
||||
|
||||
def resolve(self, text: str) -> str:
|
||||
''' Resolve references in input text.
|
||||
Note: data on references positions is accessed through class attributes '''
|
||||
self._reset(text)
|
||||
self._parse_refs()
|
||||
if len(self.refs) == 0:
|
||||
self.output = self.input
|
||||
return self.output
|
||||
else:
|
||||
self._resolve_refs()
|
||||
self._combine_output()
|
||||
return self.output
|
||||
|
||||
def _reset(self, input_text: str):
|
||||
self.refs = cast(list[ResolvedReference], [])
|
||||
self.input = input_text
|
||||
self.output = ''
|
||||
|
||||
def _parse_refs(self):
|
||||
for segment in re.finditer(Resolver.REFERENCE_PATTERN, self.input):
|
||||
parse = parse_reference(segment[0])
|
||||
if parse is not None:
|
||||
ref_info = ResolvedReference(ref=parse,
|
||||
resolved='',
|
||||
pos_input=Position(segment.start(0), segment.end(0)))
|
||||
self.refs.append(ref_info)
|
||||
|
||||
def _resolve_refs(self):
|
||||
for ref in self.refs:
|
||||
if isinstance(ref.ref, EntityReference):
|
||||
ref.resolved = resolve_entity(ref.ref, self.context)
|
||||
for (index, ref) in enumerate(self.refs):
|
||||
if isinstance(ref.ref, SyntacticReference):
|
||||
ref.resolved = resolve_syntactic(ref.ref, index, self.refs)
|
||||
|
||||
def _combine_output(self):
|
||||
pos_in = 0
|
||||
for ref in self.refs:
|
||||
self.output += self.input[pos_in : ref.pos_input.start]
|
||||
self.output += ref.resolved
|
||||
ref.pos_output = Position(len(self.output) - len(ref.resolved), len(self.output))
|
||||
pos_in = ref.pos_input.finish
|
||||
self.output += self.input[pos_in : len(self.input)]
|
|
@ -1,7 +1,7 @@
|
|||
''' Russian language models. '''
|
||||
from __future__ import annotations
|
||||
from enum import Enum, unique
|
||||
from typing import Iterable
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from pymorphy2 import MorphAnalyzer
|
||||
from pymorphy2.tagset import OpencorporaTag as WordTag
|
||||
|
@ -10,6 +10,16 @@ from pymorphy2.tagset import OpencorporaTag as WordTag
|
|||
morpho = MorphAnalyzer()
|
||||
|
||||
|
||||
def split_tags(text: str) -> list[str]:
|
||||
''' Split grammemes string into set of items. '''
|
||||
return [tag.strip() for tag in filter(None, text.split(','))]
|
||||
|
||||
|
||||
def combine_tags(tags: Iterable[str]) -> str:
|
||||
''' Combine grammemes into string. '''
|
||||
return ','.join(tags)
|
||||
|
||||
|
||||
@unique
|
||||
class SemanticRole(Enum):
|
||||
''' Enumerating semantic types for different parse patterns. '''
|
||||
|
@ -19,8 +29,8 @@ class SemanticRole(Enum):
|
|||
definition = 3
|
||||
|
||||
@staticmethod
|
||||
def from_pos(pos: str) -> SemanticRole:
|
||||
''' Fabric method to produce types from part of speech. '''
|
||||
def from_POS(pos: Optional[str]) -> SemanticRole:
|
||||
''' Production method: types from part of speech. '''
|
||||
if pos in ['NOUN', 'NPRO']:
|
||||
return SemanticRole.term
|
||||
elif pos in ['VERB', 'INFN', 'PRTF', 'PRTS']:
|
||||
|
@ -36,10 +46,7 @@ class Morphology:
|
|||
'''
|
||||
def __init__(self, tag: WordTag, semantic=SemanticRole.unknwn):
|
||||
self.tag = tag
|
||||
self.semantic = semantic if semantic != SemanticRole.unknwn else SemanticRole.from_pos(tag.POS)
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
self.semantic = semantic if semantic != SemanticRole.unknwn else SemanticRole.from_POS(tag.POS)
|
||||
|
||||
_TAGS_IMMUTABLE = frozenset(['INFN', 'ADVB', 'COMP', 'PNCT', 'PREP', 'CONJ', 'PRCL', 'INTJ'])
|
||||
|
||||
|
@ -60,9 +67,9 @@ class Morphology:
|
|||
return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS']
|
||||
|
||||
@property
|
||||
def effective_POS(self) -> str:
|
||||
def effective_POS(self) -> Optional[str]:
|
||||
''' Access part of speech. Pronouns are considered as nouns '''
|
||||
pos: str = self.tag.POS
|
||||
pos: Optional[str] = self.tag.POS
|
||||
if pos and self.tag.POS == 'NPRO':
|
||||
return 'NOUN'
|
||||
return pos
|
||||
|
@ -105,14 +112,6 @@ class Morphology:
|
|||
result.add(self.tag.gender)
|
||||
return result
|
||||
|
||||
def as_str(self) -> str:
|
||||
def to_text(self) -> str:
|
||||
''' Produce string of all grammemes. '''
|
||||
grammemes = self.tag.grammemes
|
||||
count = len(grammemes)
|
||||
if count == 0:
|
||||
return ''
|
||||
elif count == 1:
|
||||
result: str = next(iter(grammemes))
|
||||
return result
|
||||
else:
|
||||
return ','.join(grammemes)
|
||||
return combine_tags(self.tag.grammemes)
|
||||
|
|
|
@ -381,7 +381,7 @@ class PhraseParser:
|
|||
case = form.tag.case
|
||||
if pos not in ['ADJF', 'ADJS', 'PRTF', 'PRTS']:
|
||||
continue
|
||||
if SemanticRole.from_pos(pos) == SemanticRole.term and case == 'gent':
|
||||
if SemanticRole.from_POS(pos) == SemanticRole.term and case == 'gent':
|
||||
if before_main:
|
||||
continue
|
||||
else:
|
||||
|
|
|
@ -1 +1,8 @@
|
|||
''' Tests. '''
|
||||
from .t_reference import *
|
||||
from .t_ruparser import *
|
||||
from .t_syntax import *
|
||||
from .t_conceptapi import *
|
||||
from .t_rumodel import *
|
||||
from .t_context import *
|
||||
from .t_resolver import *
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
'''Test Concept Text API'''
|
||||
''' Unit tests: conceptapi. '''
|
||||
import unittest
|
||||
|
||||
import cctext as cc
|
||||
|
@ -7,12 +7,12 @@ import cctext as cc
|
|||
class TestConceptAPI(unittest.TestCase):
|
||||
'''Test class for Concept API.'''
|
||||
def _assert_tags(self, actual: str, expected: str):
|
||||
self.assertEqual(cc.split_tags(actual), cc.split_tags(expected))
|
||||
self.assertEqual(set(cc.split_tags(actual)), set(cc.split_tags(expected)))
|
||||
|
||||
def test_parse(self):
|
||||
''' Test parsing. '''
|
||||
self._assert_tags(cc.parse(''), '')
|
||||
self._assert_tags(cc.parse('1'), 'intg,NUMB')
|
||||
self._assert_tags(cc.parse('1'), 'NUMB,intg')
|
||||
self._assert_tags(cc.parse('слон', require_tags='masc'), 'NOUN,anim,masc,sing,nomn')
|
||||
|
||||
def test_normalize_word(self):
|
32
rsconcept/backend/cctext/tests/t_context.py
Normal file
32
rsconcept/backend/cctext/tests/t_context.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
''' Unit tests: context. '''
|
||||
import unittest
|
||||
|
||||
from cctext.context import TermForm, Entity, TermContext
|
||||
|
||||
class TestEntity(unittest.TestCase):
|
||||
'''Test Entity termform access.'''
|
||||
def setUp(self):
|
||||
self.alias = 'X1'
|
||||
self.nominal = 'человек'
|
||||
self.text1 = 'test1'
|
||||
self.form1 = 'sing,datv'
|
||||
self.entity = Entity(self.alias, self.nominal, [TermForm(self.text1, self.form1)])
|
||||
|
||||
def test_attributes(self):
|
||||
self.assertEqual(self.entity.alias, self.alias)
|
||||
self.assertEqual(self.entity.get_nominal(), self.nominal)
|
||||
self.assertEqual(self.entity.manual, [TermForm(self.text1, self.form1)])
|
||||
|
||||
def test_get_form(self):
|
||||
self.assertEqual(self.entity.get_form(''), self.nominal)
|
||||
self.assertEqual(self.entity.get_form(self.form1), self.text1)
|
||||
self.assertEqual(self.entity.get_form('invalid tags'), '!Неизвестная граммема: invalid tags!')
|
||||
self.assertEqual(self.entity.get_form('plur'), 'люди')
|
||||
|
||||
def test_set_nominal(self):
|
||||
new_nomial = 'TEST'
|
||||
self.assertEqual(self.entity.get_form('plur'), 'люди')
|
||||
self.entity.set_nominal(new_nomial)
|
||||
self.assertEqual(self.entity.get_nominal(), new_nomial)
|
||||
self.assertEqual(self.entity.get_form('plur'), new_nomial)
|
||||
self.assertEqual(self.entity.manual, [])
|
43
rsconcept/backend/cctext/tests/t_reference.py
Normal file
43
rsconcept/backend/cctext/tests/t_reference.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
''' Unit tests: reference. '''
|
||||
import unittest
|
||||
|
||||
from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference
|
||||
|
||||
class TestReferences(unittest.TestCase):
|
||||
''' Test class for references. '''
|
||||
|
||||
def test_EntityReference(self):
|
||||
''' Testing EntityRefence basics. '''
|
||||
ref = EntityReference('X1', 'sing,nomn')
|
||||
self.assertEqual(ref.get_type(), ReferenceType.entity)
|
||||
self.assertEqual(ref.to_text(), '@{X1|sing,nomn}')
|
||||
|
||||
def test_SyntacticReference(self):
|
||||
''' Testing SyntacticReference basics. '''
|
||||
ref = SyntacticReference(-1, 'черный')
|
||||
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
|
||||
self.assertEqual(ref.to_text(), '@{-1|черный}')
|
||||
|
||||
def test_parse_reference_invalid(self):
|
||||
''' Testing parsing reference invalid input. '''
|
||||
self.assertIsNone(parse_reference(''))
|
||||
self.assertIsNone(parse_reference('X1'))
|
||||
self.assertIsNone(parse_reference('invalid'))
|
||||
self.assertIsNone(parse_reference(' '))
|
||||
self.assertIsNone(parse_reference('@{|}'))
|
||||
self.assertIsNone(parse_reference('@{ | }'))
|
||||
self.assertIsNone(parse_reference('@{-1| }'))
|
||||
self.assertIsNone(parse_reference('@{1| }'))
|
||||
self.assertIsNone(parse_reference('@{0|черный}'))
|
||||
|
||||
def test_parse_reference(self):
|
||||
''' Testing parsing reference text. '''
|
||||
ref = parse_reference('@{1| черный }')
|
||||
self.assertIsNotNone(ref)
|
||||
self.assertEqual(ref.to_text(), '@{1|черный}')
|
||||
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
|
||||
|
||||
ref = parse_reference('@{X1 | VERB, past, sing}')
|
||||
self.assertIsNotNone(ref)
|
||||
self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}')
|
||||
self.assertEqual(ref.get_type(), ReferenceType.entity)
|
76
rsconcept/backend/cctext/tests/t_resolver.py
Normal file
76
rsconcept/backend/cctext/tests/t_resolver.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
''' Unit tests: resolver. '''
|
||||
import unittest
|
||||
from typing import cast
|
||||
|
||||
from cctext import (
|
||||
EntityReference, TermContext, Entity, SyntacticReference,
|
||||
Resolver, ResolvedReference, Position,
|
||||
resolve_entity, resolve_syntactic
|
||||
)
|
||||
|
||||
class TestResolver(unittest.TestCase):
|
||||
'''Test reference Resolver.'''
|
||||
def setUp(self):
|
||||
self.context = cast(TermContext, {})
|
||||
self.context['X1'] = Entity('X1', 'человек')
|
||||
self.resolver = Resolver(self.context)
|
||||
|
||||
def test_resolve_entity(self):
|
||||
self.assertEqual(resolve_entity(EntityReference('X1', ''), self.context), 'человек')
|
||||
self.assertEqual(resolve_entity(EntityReference('X1', 'plur'), self.context), 'люди')
|
||||
self.assertEqual(resolve_entity(EntityReference('X1', 'invalid'), self.context), '!Неизвестная граммема: invalid!')
|
||||
self.assertEqual(resolve_entity(EntityReference('X123', 'plur'), self.context), '!Неизвестная сущность: X123!')
|
||||
|
||||
def test_resolve_syntactic(self):
|
||||
ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку')
|
||||
allrefs = [ref, ref, ref, ref]
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-1), 0, allrefs), '!Некорректное смещение: -1!')
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=1), 3, allrefs), '!Некорректное смещение: 1!')
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=1), 0, allrefs), 'умному')
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=2), 0, allrefs), 'умному')
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=3), 0, allrefs), 'умному')
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-1), 3, allrefs), 'умному')
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-2), 3, allrefs), 'умному')
|
||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referal_offset=-3), 3, allrefs), 'умному')
|
||||
|
||||
def test_resolve_invalid(self):
|
||||
self.assertEqual(self.resolver.resolve(''), '')
|
||||
self.assertEqual(len(self.resolver.refs), 0)
|
||||
|
||||
self.assertEqual(self.resolver.resolve('simple text'), 'simple text')
|
||||
self.assertEqual(len(self.resolver.refs), 0)
|
||||
|
||||
self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text')
|
||||
self.assertEqual(len(self.resolver.refs), 0)
|
||||
|
||||
def test_resolve_single(self):
|
||||
self.assertEqual(self.resolver.resolve('просто @{-1|умный} текст'), 'просто !Некорректное смещение: -1! текст')
|
||||
self.assertEqual(len(self.resolver.refs), 1)
|
||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18))
|
||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34))
|
||||
|
||||
self.assertEqual(self.resolver.resolve('просто @{X123|sing,nomn} текст'), 'просто !Неизвестная сущность: X123! текст')
|
||||
self.assertEqual(len(self.resolver.refs), 1)
|
||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24))
|
||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35))
|
||||
|
||||
self.assertEqual(self.resolver.resolve('@{X1|sing,nomn}'), 'человек')
|
||||
self.assertEqual(len(self.resolver.refs), 1)
|
||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
|
||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7))
|
||||
|
||||
self.assertEqual(self.resolver.resolve('просто @{X1|sing,nomn} текст'), 'просто человек текст')
|
||||
self.assertEqual(len(self.resolver.refs), 1)
|
||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22))
|
||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14))
|
||||
|
||||
def test_resolve_multiple(self):
|
||||
input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют'
|
||||
self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют')
|
||||
self.assertEqual(len(self.resolver.refs), 3)
|
||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
|
||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 8))
|
||||
self.assertEqual(self.resolver.refs[1].pos_input, Position(16, 27))
|
||||
self.assertEqual(self.resolver.refs[1].pos_output, Position(9, 15))
|
||||
self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38))
|
||||
self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20))
|
18
rsconcept/backend/cctext/tests/t_rumodel.py
Normal file
18
rsconcept/backend/cctext/tests/t_rumodel.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
''' Unit tests: rumodel. '''
|
||||
import unittest
|
||||
|
||||
from cctext import split_tags, combine_tags
|
||||
|
||||
|
||||
class TestTags(unittest.TestCase):
|
||||
'''Test tags manipulation.'''
|
||||
|
||||
def test_split_tags(self):
|
||||
self.assertEqual(split_tags(''), [])
|
||||
self.assertEqual(split_tags('NOUN'), ['NOUN'])
|
||||
self.assertEqual(split_tags('NOUN,plur,sing'), ['NOUN','plur','sing'])
|
||||
|
||||
def test_combine_tags(self):
|
||||
self.assertEqual(combine_tags([]), '')
|
||||
self.assertEqual(combine_tags(['NOUN']), 'NOUN')
|
||||
self.assertEqual(combine_tags(['NOUN','plur','sing']), 'NOUN,plur,sing')
|
|
@ -1,4 +1,4 @@
|
|||
''' Test russian language parsing. '''
|
||||
''' Unit tests: ruparser. '''
|
||||
import unittest
|
||||
|
||||
from typing import Iterable, Optional
|
||||
|
@ -10,7 +10,7 @@ parser = PhraseParser()
|
|||
class TestRuParser(unittest.TestCase):
|
||||
''' Test class for russian parsing. '''
|
||||
|
||||
def _assert_parse(self, text: str, expected: list[str],
|
||||
def _assert_parse(self, text: str, expected: Iterable[str],
|
||||
require_index: int = -1,
|
||||
require_tags: Optional[Iterable[str]] = None):
|
||||
phrase = parser.parse(text, require_index, require_tags)
|
|
@ -1,10 +1,10 @@
|
|||
'''Test module for Russian syntax'''
|
||||
''' Unit tests: syntax. '''
|
||||
import unittest
|
||||
|
||||
from cctext import RuSyntax, Capitalization
|
||||
|
||||
|
||||
class TestRusParser(unittest.TestCase):
|
||||
class TestRusSyntax(unittest.TestCase):
|
||||
''' Test class for russian syntax. '''
|
||||
|
||||
def test_capitalization(self):
|
Loading…
Reference in New Issue
Block a user