Setup tools and fix linter issues

This commit is contained in:
IRBorisov 2024-04-11 23:25:09 +03:00
parent cc94f805b8
commit 1e8166c9ac
21 changed files with 221 additions and 102 deletions

6
.flake8 Normal file
View File

@ -0,0 +1,6 @@
[flake8]
# E303 - too many blank lines
# E203 - whitespace before semicolon
ignore = E303, E203
exclude = __init__.py
max-line-length = 120

7
.vscode/launch.json vendored
View File

@ -17,6 +17,13 @@
"request": "launch", "request": "launch",
"script": "${workspaceFolder}/scripts/RunLint.ps1", "script": "${workspaceFolder}/scripts/RunLint.ps1",
"args": [] "args": []
},
{
"name": "Setup",
"type": "PowerShell",
"request": "launch",
"script": "${workspaceFolder}/scripts/Setup.ps1",
"args": []
} }
] ]
} }

40
.vscode/settings.json vendored
View File

@ -3,38 +3,58 @@
".mypy_cache/": true, ".mypy_cache/": true,
".pytest_cache/": true ".pytest_cache/": true
}, },
"python.testing.unittestArgs": ["-v", "-s", "./tests", "-p", "test*.py"], "python.testing.unittestArgs": ["-v", "-s", "./tests", "-p", "t_*.py"],
"python.testing.pytestEnabled": false, "python.testing.pytestEnabled": false,
"python.testing.unittestEnabled": true, "python.testing.unittestEnabled": true,
"eslint.workingDirectories": [
{
"mode": "auto"
}
],
"python.analysis.typeCheckingMode": "off", "python.analysis.typeCheckingMode": "off",
"python.analysis.ignore": ["**/tests/**", "**/node_modules/**", "**/venv/**"], "python.analysis.ignore": ["**/tests/**", "**/venv/**"],
"cSpell.words": [ "cSpell.words": [
"ablt",
"accs",
"actv",
"ADJF", "ADJF",
"ADJS", "ADJS",
"ADVB", "ADVB",
"Anph",
"cctext",
"datv",
"femn",
"Fixd",
"Geox",
"grammeme",
"Grammemes", "Grammemes",
"GRND", "GRND",
"Impe",
"impr",
"inan",
"indc",
"INFN", "INFN",
"intg",
"INTJ", "INTJ",
"loct",
"moprho",
"multiword", "multiword",
"nomn",
"NPRO", "NPRO",
"NUMR", "NUMR",
"Opencorpora", "Opencorpora",
"Pltm",
"PNCT", "PNCT",
"PRCL", "PRCL",
"Prnt",
"PRTF", "PRTF",
"PRTS", "PRTS",
"pssv",
"pymorphy", "pymorphy",
"razdel", "razdel",
"rumodel", "rumodel",
"ruparser",
"Sgtm",
"tagset", "tagset",
"unknwn" "termform",
"unknwn",
"круть",
"фторметил"
], ],
"cSpell.language": "en,ru", "cSpell.language": "en,ru"
"cSpell.ignorePaths": ["node_modules/**", "*.json"]
} }

1
TODO.txt Normal file
View File

@ -0,0 +1 @@
- implement Part of speech transition for VERB <-> NOUN

View File

@ -1,16 +1,51 @@
''' Concept core text processing library. ''' ''' Concept core text processing library. '''
# pylint: skip-file # pylint: skip-file
from .syntax import RuSyntax, Capitalization from .syntax import (
from .rumodel import Morphology, SemanticRole, WordTag, morpho, split_grams, combine_grams RuSyntax,
from .ruparser import PhraseParser, WordToken, Collation Capitalization
from .reference import EntityReference, ReferenceType, SyntacticReference, parse_reference )
from .context import TermForm, Entity, TermContext from .rumodel import (
from .resolver import Reference, Position, Resolver, ResolvedReference, resolve_entity, resolve_syntactic, extract_entities Morphology,
SemanticRole,
from .conceptapi import ( WordTag,
parse, normalize, morpho,
generate_lexeme, inflect, inflect_context, inflect_substitute, inflect_dependant, split_grams,
match_all_morpho, find_substr combine_grams
)
from .ruparser import (
PhraseParser,
WordToken,
Collation
)
from .reference import (
EntityReference,
ReferenceType,
SyntacticReference,
parse_reference
)
from .context import (
TermForm,
Entity,
TermContext
)
from .resolver import (
Reference,
Position,
Resolver,
ResolvedReference,
resolve_entity,
resolve_syntactic,
extract_entities
) )
# TODO: implement Part of speech transition for VERB <-> NOUN from .api import (
parse,
normalize,
generate_lexeme,
inflect,
inflect_context,
inflect_substitute,
inflect_dependant,
match_all_morpho,
find_substr
)

View File

@ -1,5 +1,5 @@
''' '''
Concept API Python functions. Core API Python procedures.
::guarantee:: doesn't raise exceptions and returns workable outputs ::guarantee:: doesn't raise exceptions and returns workable outputs
''' '''
@ -21,11 +21,6 @@ def parse(text: str, require_grams: str = '') -> str:
return result if result != 'UNKN' else '' return result if result != 'UNKN' else ''
# def parse_variants(text: str, require_grams: str = '') -> list[tuple[str, str]]:
# ''' Get all variants of a parse.
# ::returns:: string of comma separated grammar tags or empty string '''
def generate_lexeme(text_normal: str) -> list[tuple[str, str]]: def generate_lexeme(text_normal: str) -> list[tuple[str, str]]:
''' Get all inflected forms belonging to same Lexeme. ''' ''' Get all inflected forms belonging to same Lexeme. '''
model = parser.parse(text_normal) model = parser.parse(text_normal)

View File

@ -17,8 +17,8 @@ class TermForm(TypedDict):
def _match_grams(query: Iterable[str], test: Iterable[str]) -> bool: def _match_grams(query: Iterable[str], test: Iterable[str]) -> bool:
''' Check if grams from test fit query. ''' ''' Check if grams from test fit query. '''
for gram in test: for gram in test:
if not gram in query: if gram not in query:
if not gram in WordTag.PARTS_OF_SPEECH: if gram not in WordTag.PARTS_OF_SPEECH:
return False return False
for pos in WordTag.PARTS_OF_SPEECH: for pos in WordTag.PARTS_OF_SPEECH:
if pos in query: if pos in query:
@ -35,7 +35,7 @@ def _search_form(query: Iterable[str], data: Iterable[TermForm]) -> Optional[str
class Entity: class Entity:
''' Represents text entity. ''' ''' Represents text entity. '''
def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]]=None): def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]] = None):
if manual_forms is None: if manual_forms is None:
self.manual = [] self.manual = []
else: else:

View File

@ -5,7 +5,7 @@ from dataclasses import dataclass
from .rumodel import split_grams from .rumodel import split_grams
from .conceptapi import inflect_dependant from .api import inflect_dependant
from .context import TermContext from .context import TermContext
from .reference import EntityReference, SyntacticReference, parse_reference, Reference from .reference import EntityReference, SyntacticReference, parse_reference, Reference

View File

@ -1,4 +1,4 @@
''' Parsing russian language using pymorphy3 library. ''' ''' Parsing russian language based on pymorphy3 library. '''
from __future__ import annotations from __future__ import annotations
from typing import Optional from typing import Optional
@ -155,9 +155,9 @@ class PhraseParser:
def parse(self, text: str, def parse(self, text: str,
require_index: int = INDEX_NONE, require_index: int = INDEX_NONE,
require_grams: Optional[Grammemes] = None) -> Optional[Collation]: require_grams: Optional[Grammemes] = None) -> Optional[Collation]:
''' '''
Determine morpho tags for input text. Determine morpho tags for input text.
::returns:: Morphology of a text or None if no suitable form is available ::returns:: Morphology of a text or None if no suitable form is available
''' '''
segments = list(RuSyntax.tokenize(text)) segments = list(RuSyntax.tokenize(text))
if len(segments) == 0: if len(segments) == 0:

View File

@ -1,4 +1,4 @@
''' Russian language syntax incapsulation. ''' ''' Russian language syntax. '''
from __future__ import annotations from __future__ import annotations
from enum import Enum, unique from enum import Enum, unique

Binary file not shown.

View File

@ -1,3 +1,2 @@
pymorphy3 pymorphy3
pymorphy3-dicts-ru
razdel razdel

11
scripts/Setup.ps1 Normal file
View File

@ -0,0 +1,11 @@
Set-Location $PSScriptRoot\..
$python = '.\venv\Scripts\python.exe'
$env = '.\venv'
if (Test-Path -Path $python -PathType Leaf) {
Remove-Item $env -Recurse -Force
}
& 'python' -m venv .\venv
& $python -m pip install --upgrade pip
& $python -m pip install -r requirements-build.txt

View File

@ -1,10 +0,0 @@
# Run lint
function RunLinters() {
$pylint = "$PSScriptRoot\..\venv\Scripts\pylint.exe"
$mypy = "$PSScriptRoot\..\venv\Scripts\mypy.exe"
& $pylint cctext
& $mypy cctext
}
RunLinters

View File

@ -2,7 +2,7 @@
from .t_reference import * from .t_reference import *
from .t_ruparser import * from .t_ruparser import *
from .t_syntax import * from .t_syntax import *
from .t_conceptapi import * from .t_api import *
from .t_rumodel import * from .t_rumodel import *
from .t_context import * from .t_context import *
from .t_resolver import * from .t_resolver import *

View File

@ -1,4 +1,4 @@
''' Unit tests: conceptapi. ''' ''' Unit tests: api. '''
import unittest import unittest
import cctext as cc import cctext as cc
@ -8,19 +8,22 @@ class TestConceptAPI(unittest.TestCase):
'''Test class for Concept API.''' '''Test class for Concept API.'''
def _assert_tags(self, actual: str, expected: str): def _assert_tags(self, actual: str, expected: str):
self.assertEqual(set(cc.split_grams(actual)), set(cc.split_grams(expected))) self.assertEqual(set(cc.split_grams(actual)), set(cc.split_grams(expected)))
def test_parse(self): def test_parse(self):
''' Test parsing. ''' ''' Test parsing. '''
self._assert_tags(cc.parse(''), '') self._assert_tags(cc.parse(''), '')
self._assert_tags(cc.parse('1'), 'NUMB,intg') self._assert_tags(cc.parse('1'), 'NUMB,intg')
self._assert_tags(cc.parse('слон', require_grams='masc'), 'NOUN,anim,masc,sing,nomn') self._assert_tags(cc.parse('слон', require_grams='masc'), 'NOUN,anim,masc,sing,nomn')
def test_normalize_word(self): def test_normalize_word(self):
''' Test normalize for single word. ''' ''' Test normalize for single word. '''
self.assertEqual(cc.normalize(''), '') self.assertEqual(cc.normalize(''), '')
self.assertEqual(cc.normalize('первого'), 'первый') self.assertEqual(cc.normalize('первого'), 'первый')
self.assertEqual(cc.normalize('диких людей'), 'дикий человек') self.assertEqual(cc.normalize('диких людей'), 'дикий человек')
def test_generate_lexeme(self): def test_generate_lexeme(self):
''' Test all lexical forms. ''' ''' Test all lexical forms. '''
self.assertEqual(cc.generate_lexeme(''), []) self.assertEqual(cc.generate_lexeme(''), [])
@ -35,6 +38,7 @@ class TestConceptAPI(unittest.TestCase):
self.assertEqual(forms[0][0], 'молодой человек') self.assertEqual(forms[0][0], 'молодой человек')
self._assert_tags(forms[0][1], 'nomn,masc,sing,anim,NOUN') self._assert_tags(forms[0][1], 'nomn,masc,sing,anim,NOUN')
def test_inflect(self): def test_inflect(self):
''' Test inflection. ''' ''' Test inflection. '''
self.assertEqual(cc.inflect('', ''), '') self.assertEqual(cc.inflect('', ''), '')
@ -44,16 +48,19 @@ class TestConceptAPI(unittest.TestCase):
self.assertEqual(cc.inflect('слона', 'nomn, plur'), 'слоны') self.assertEqual(cc.inflect('слона', 'nomn, plur'), 'слоны')
self.assertEqual(cc.inflect('шкала оценок', 'loct,plur'), 'шкалах оценок') self.assertEqual(cc.inflect('шкала оценок', 'loct,plur'), 'шкалах оценок')
def test_find_substr(self): def test_find_substr(self):
'''Test substring search''' '''Test substring search'''
self.assertEqual(cc.find_substr('', ''), (0, 0)) self.assertEqual(cc.find_substr('', ''), (0, 0))
self.assertEqual(cc.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24)) self.assertEqual(cc.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24))
def test_inflect_context(self): def test_inflect_context(self):
'''Test contex inflection''' '''Test context inflection'''
self.assertEqual(cc.inflect_context('', '', ''), '') self.assertEqual(cc.inflect_context('', '', ''), '')
self.assertEqual(cc.inflect_context('красивый', '', 'чашка'), 'красивая') self.assertEqual(cc.inflect_context('красивый', '', 'чашка'), 'красивая')
def test_inflect_substitute(self): def test_inflect_substitute(self):
'''Test substitute inflection''' '''Test substitute inflection'''
self.assertEqual(cc.inflect_substitute('', ''), '') self.assertEqual(cc.inflect_substitute('', ''), '')
@ -61,6 +68,7 @@ class TestConceptAPI(unittest.TestCase):
self.assertEqual(cc.inflect_substitute('слон', ''), 'слон') self.assertEqual(cc.inflect_substitute('слон', ''), 'слон')
self.assertEqual(cc.inflect_substitute('красивый бантик', 'кошкой'), 'красивым бантиком') self.assertEqual(cc.inflect_substitute('красивый бантик', 'кошкой'), 'красивым бантиком')
def test_inflect_dependant(self): def test_inflect_dependant(self):
''' Test coordination inflection. ''' ''' Test coordination inflection. '''
self.assertEqual(cc.inflect_dependant('', ''), '') self.assertEqual(cc.inflect_dependant('', ''), '')
@ -69,6 +77,7 @@ class TestConceptAPI(unittest.TestCase):
self.assertEqual(cc.inflect_dependant('общий', 'мать'), 'общая') self.assertEqual(cc.inflect_dependant('общий', 'мать'), 'общая')
self.assertEqual(cc.inflect_dependant('синий', 'слонов'), 'синих') self.assertEqual(cc.inflect_dependant('синий', 'слонов'), 'синих')
def test_match_all_morpho(self): def test_match_all_morpho(self):
''' Test extracting matching morpho. ''' ''' Test extracting matching morpho. '''
self.assertEqual(cc.match_all_morpho('', ''), []) self.assertEqual(cc.match_all_morpho('', ''), [])

View File

@ -1,7 +1,8 @@
''' Unit tests: context. ''' ''' Unit tests: context. '''
import unittest import unittest
from cctext.context import Entity, TermContext from cctext.context import Entity
class TestEntity(unittest.TestCase): class TestEntity(unittest.TestCase):
'''Test Entity termform access.''' '''Test Entity termform access.'''
@ -9,24 +10,27 @@ class TestEntity(unittest.TestCase):
self.alias = 'X1' self.alias = 'X1'
self.nominal = 'человек' self.nominal = 'человек'
self.text1 = 'test1' self.text1 = 'test1'
self.form1 = ['sing','datv'] self.form1 = ['sing', 'datv']
self.entity = Entity(self.alias, self.nominal, [{'text': self.text1, 'grams': self.form1}]) self.entity = Entity(self.alias, self.nominal, [{'text': self.text1, 'grams': self.form1}])
def test_attributes(self): def test_attributes(self):
self.assertEqual(self.entity.alias, self.alias) self.assertEqual(self.entity.alias, self.alias)
self.assertEqual(self.entity.get_nominal(), self.nominal) self.assertEqual(self.entity.get_nominal(), self.nominal)
self.assertEqual(self.entity.manual, [{'text': self.text1, 'grams': self.form1}]) self.assertEqual(self.entity.manual, [{'text': self.text1, 'grams': self.form1}])
def test_get_form(self): def test_get_form(self):
self.assertEqual(self.entity.get_form([]), self.nominal) self.assertEqual(self.entity.get_form([]), self.nominal)
self.assertEqual(self.entity.get_form(self.form1), self.text1) self.assertEqual(self.entity.get_form(self.form1), self.text1)
self.assertEqual(self.entity.get_form(['invalid tags']), '!Неизвестная граммема: invalid tags!') self.assertEqual(self.entity.get_form(['invalid tags']), '!Неизвестная граммема: invalid tags!')
self.assertEqual(self.entity.get_form(['plur']), 'люди') self.assertEqual(self.entity.get_form(['plur']), 'люди')
def test_set_nominal(self): def test_set_nominal(self):
new_nomial = 'TEST' new_nominal = 'TEST'
self.assertEqual(self.entity.get_form(['plur']), 'люди') self.assertEqual(self.entity.get_form(['plur']), 'люди')
self.entity.set_nominal(new_nomial) self.entity.set_nominal(new_nominal)
self.assertEqual(self.entity.get_nominal(), new_nomial) self.assertEqual(self.entity.get_nominal(), new_nominal)
self.assertEqual(self.entity.get_form(['plur']), new_nomial) self.assertEqual(self.entity.get_form(['plur']), new_nominal)
self.assertEqual(self.entity.manual, []) self.assertEqual(self.entity.manual, [])

View File

@ -3,21 +3,25 @@ import unittest
from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference
class TestReferences(unittest.TestCase): class TestReferences(unittest.TestCase):
''' Test class for references. ''' ''' Test class for references. '''
def test_EntityReference(self): def test_EntityReference(self):
''' Testing EntityRefence basics. ''' ''' Testing EntityReference basics. '''
ref = EntityReference('X1', 'sing,nomn') ref = EntityReference('X1', 'sing,nomn')
self.assertEqual(ref.get_type(), ReferenceType.entity) self.assertEqual(ref.get_type(), ReferenceType.entity)
self.assertEqual(ref.to_text(), '@{X1|sing,nomn}') self.assertEqual(ref.to_text(), '@{X1|sing,nomn}')
def test_SyntacticReference(self): def test_SyntacticReference(self):
''' Testing SyntacticReference basics. ''' ''' Testing SyntacticReference basics. '''
ref = SyntacticReference(-1, 'черный') ref = SyntacticReference(-1, 'черный')
self.assertEqual(ref.get_type(), ReferenceType.syntactic) self.assertEqual(ref.get_type(), ReferenceType.syntactic)
self.assertEqual(ref.to_text(), '@{-1|черный}') self.assertEqual(ref.to_text(), '@{-1|черный}')
def test_parse_reference_invalid(self): def test_parse_reference_invalid(self):
''' Testing parsing reference invalid input. ''' ''' Testing parsing reference invalid input. '''
self.assertIsNone(parse_reference('')) self.assertIsNone(parse_reference(''))
@ -31,14 +35,15 @@ class TestReferences(unittest.TestCase):
self.assertIsNone(parse_reference('@{1| }')) self.assertIsNone(parse_reference('@{1| }'))
self.assertIsNone(parse_reference('@{0|черный}')) self.assertIsNone(parse_reference('@{0|черный}'))
def test_parse_reference(self): def test_parse_reference(self):
''' Testing parsing reference text. ''' ''' Testing parsing reference text. '''
ref = parse_reference('@{1| черный }') ref = parse_reference('@{1| черный }')
self.assertIsNotNone(ref) self.assertIsNotNone(ref)
self.assertEqual(ref.to_text(), '@{1|черный}') self.assertEqual(ref.to_text(), '@{1|черный}')
self.assertEqual(ref.get_type(), ReferenceType.syntactic) self.assertEqual(ref.get_type(), ReferenceType.syntactic)
ref = parse_reference('@{X1 | VERB, past, sing}') ref = parse_reference('@{X1 | VERB, past, sing}')
self.assertIsNotNone(ref) self.assertIsNotNone(ref)
self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}') self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}')
self.assertEqual(ref.get_type(), ReferenceType.entity) self.assertEqual(ref.get_type(), ReferenceType.entity)

View File

@ -28,24 +28,52 @@ class TestResolver(unittest.TestCase):
self.context['X2'] = Entity('X2', '') self.context['X2'] = Entity('X2', '')
self.resolver = Resolver(self.context) self.resolver = Resolver(self.context)
def test_resolve_entity(self): def test_resolve_entity(self):
self.assertEqual(resolve_entity(EntityReference('X1', ''), self.context), 'человек') ref = EntityReference('X1', '')
self.assertEqual(resolve_entity(EntityReference('X1', 'plur'), self.context), 'люди') self.assertEqual(resolve_entity(ref, self.context), 'человек')
self.assertEqual(resolve_entity(EntityReference('X2', ''), self.context), '!Отсутствует термин: X2!')
self.assertEqual(resolve_entity(EntityReference('X1', 'invalid'), self.context), '!Неизвестная граммема: invalid!') ref = EntityReference('X1', 'plur')
self.assertEqual(resolve_entity(EntityReference('X123', 'plur'), self.context), '!Неизвестная сущность: X123!') self.assertEqual(resolve_entity(ref, self.context), 'люди')
ref = EntityReference('X2', '')
self.assertEqual(resolve_entity(ref, self.context), '!Отсутствует термин: X2!')
ref = EntityReference('X1', 'invalid')
self.assertEqual(resolve_entity(ref, self.context), '!Неизвестная граммема: invalid!')
ref = EntityReference('X123', 'plur')
self.assertEqual(resolve_entity(ref, self.context), '!Неизвестная сущность: X123!')
def test_resolve_syntactic(self): def test_resolve_syntactic(self):
ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку') ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку')
refs_list = [ref, ref, ref, ref] refs_list = [ref, ref, ref, ref]
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-1), 0, refs_list), '!Некорректное смещение: -1!')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=1), 3, refs_list), '!Некорректное смещение: 1!') ref = SyntacticReference(text='умный', referral_offset=-1)
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=1), 0, refs_list), 'умному') self.assertEqual(resolve_syntactic(ref, 0, refs_list), '!Некорректное смещение: -1!')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=2), 0, refs_list), 'умному')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=3), 0, refs_list), 'умному') ref = SyntacticReference(text='умный', referral_offset=1)
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-1), 3, refs_list), 'умному') self.assertEqual(resolve_syntactic(ref, 3, refs_list), '!Некорректное смещение: 1!')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-2), 3, refs_list), 'умному')
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-3), 3, refs_list), 'умному') ref = SyntacticReference(text='умный', referral_offset=1)
self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному')
ref = SyntacticReference(text='умный', referral_offset=2)
self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному')
ref = SyntacticReference(text='умный', referral_offset=3)
self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному')
ref = SyntacticReference(text='умный', referral_offset=-1)
self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному')
ref = SyntacticReference(text='умный', referral_offset=-2)
self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному')
ref = SyntacticReference(text='умный', referral_offset=-3)
self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному')
def test_resolve_invalid(self): def test_resolve_invalid(self):
self.assertEqual(self.resolver.resolve(''), '') self.assertEqual(self.resolver.resolve(''), '')
@ -57,29 +85,35 @@ class TestResolver(unittest.TestCase):
self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text') self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text')
self.assertEqual(len(self.resolver.refs), 0) self.assertEqual(len(self.resolver.refs), 0)
def test_resolve_single(self): def test_resolve_single(self):
self.assertEqual(self.resolver.resolve('просто @{-1|умный} текст'), 'просто !Некорректное смещение: -1! текст') resolved = self.resolver.resolve('просто @{-1|умный} текст')
self.assertEqual(resolved, 'просто !Некорректное смещение: -1! текст')
self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18)) self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18))
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34)) self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34))
self.assertEqual(self.resolver.resolve('просто @{X123|sing,nomn} текст'), 'просто !Неизвестная сущность: X123! текст') resolved = self.resolver.resolve('просто @{X123|sing,nomn} текст')
self.assertEqual(resolved, 'просто !Неизвестная сущность: X123! текст')
self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24)) self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24))
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35)) self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35))
self.assertEqual(self.resolver.resolve('@{X1|sing,nomn}'), 'человек') resolved = self.resolver.resolve('@{X1|sing,nomn}')
self.assertEqual(resolved, 'человек')
self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15)) self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7)) self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7))
self.assertEqual(self.resolver.resolve('просто @{X1|sing,nomn} текст'), 'просто человек текст') resolved = self.resolver.resolve('просто @{X1|sing,nomn} текст')
self.assertEqual(resolved, 'просто человек текст')
self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(len(self.resolver.refs), 1)
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22)) self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22))
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14)) self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14))
def test_resolve_multiple(self): def test_resolve_multiple(self):
input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют' input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют'
self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют') self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют')
self.assertEqual(len(self.resolver.refs), 3) self.assertEqual(len(self.resolver.refs), 3)
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15)) self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
@ -89,6 +123,7 @@ class TestResolver(unittest.TestCase):
self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38)) self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38))
self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20)) self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20))
def test_resolve_manual_forms(self): def test_resolve_manual_forms(self):
self.context['X1'] = Entity( self.context['X1'] = Entity(
alias='X1', alias='X1',

View File

@ -10,9 +10,9 @@ class TestTags(unittest.TestCase):
def test_split_tags(self): def test_split_tags(self):
self.assertEqual(split_grams(''), []) self.assertEqual(split_grams(''), [])
self.assertEqual(split_grams('NOUN'), ['NOUN']) self.assertEqual(split_grams('NOUN'), ['NOUN'])
self.assertEqual(split_grams('NOUN,plur,sing'), ['NOUN','plur','sing']) self.assertEqual(split_grams('NOUN,plur,sing'), ['NOUN', 'plur', 'sing'])
def test_combine_tags(self): def test_combine_tags(self):
self.assertEqual(combine_grams([]), '') self.assertEqual(combine_grams([]), '')
self.assertEqual(combine_grams(['NOUN']), 'NOUN') self.assertEqual(combine_grams(['NOUN']), 'NOUN')
self.assertEqual(combine_grams(['NOUN','plur','sing']), 'NOUN,plur,sing') self.assertEqual(combine_grams(['NOUN', 'plur', 'sing']), 'NOUN,plur,sing')

View File

@ -7,6 +7,7 @@ from cctext import PhraseParser
parser = PhraseParser() parser = PhraseParser()
# cSpell:disable
class TestRuParser(unittest.TestCase): class TestRuParser(unittest.TestCase):
''' Test class for russian parsing. ''' ''' Test class for russian parsing. '''
@ -28,25 +29,25 @@ class TestRuParser(unittest.TestCase):
def test_parse_word(self): def test_parse_word(self):
''' Test parse for single word. ''' ''' Test parse for single word. '''
self._assert_parse('1', ['NUMB', 'intg']) self._assert_parse('1', ['NUMB', 'intg'])
self._assert_parse('пять', ['NUMR', 'nomn']) self._assert_parse('пять', ['NUMR', 'nomn'])
self._assert_parse('трёх', ['NUMR', 'gent']) self._assert_parse('трёх', ['NUMR', 'gent'])
self._assert_parse('трех', ['NUMR', 'gent']) self._assert_parse('трех', ['NUMR', 'gent'])
self._assert_parse('круча', ['NOUN', 'femn', 'sing', 'nomn', 'inan']) self._assert_parse('круча', ['NOUN', 'femn', 'sing', 'nomn', 'inan'])
self._assert_parse('круть', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Geox']) self._assert_parse('круть', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Geox'])
self._assert_parse('ПВО', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Abbr', 'Fixd']) self._assert_parse('ПВО', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Abbr', 'Fixd'])
self._assert_parse('СМИ', ['NOUN', 'plur', 'nomn', 'inan', 'Pltm', 'Abbr', 'Fixd', 'GNdr']) self._assert_parse('СМИ', ['NOUN', 'plur', 'nomn', 'inan', 'Pltm', 'Abbr', 'Fixd', 'GNdr'])
self._assert_parse('ему', ['NPRO', 'masc', 'sing', 'datv', '3per', 'Anph']) self._assert_parse('ему', ['NPRO', 'masc', 'sing', 'datv', '3per', 'Anph'])
self._assert_parse('крутит', ['VERB', 'sing', '3per', 'pres', 'impf', 'tran', 'indc']) self._assert_parse('крутит', ['VERB', 'sing', '3per', 'pres', 'impf', 'tran', 'indc'])
self._assert_parse('смеркалось', ['VERB', 'neut', 'sing', 'Impe', 'past', 'impf', 'intr', 'indc']) self._assert_parse('смеркалось', ['VERB', 'neut', 'sing', 'Impe', 'past', 'impf', 'intr', 'indc'])
self._assert_parse('крутить', ['INFN', 'impf', 'tran']) self._assert_parse('крутить', ['INFN', 'impf', 'tran'])
self._assert_parse('крученый', ['ADJF', 'masc', 'sing', 'nomn']) self._assert_parse('крученый', ['ADJF', 'masc', 'sing', 'nomn'])
self._assert_parse('крут', ['ADJS', 'masc', 'sing', 'Qual']) self._assert_parse('крут', ['ADJS', 'masc', 'sing', 'Qual'])
self._assert_parse('крутящего', ['PRTF', 'masc', 'sing', 'gent', 'pres', 'impf', 'tran', 'actv']) self._assert_parse('крутящего', ['PRTF', 'masc', 'sing', 'gent', 'pres', 'impf', 'tran', 'actv'])
self._assert_parse('откручен', ['PRTS', 'masc', 'sing', 'past', 'perf', 'pssv']) self._assert_parse('откручен', ['PRTS', 'masc', 'sing', 'past', 'perf', 'pssv'])
self._assert_parse('крутя', ['GRND', 'pres', 'impf', 'tran']) self._assert_parse('крутя', ['GRND', 'pres', 'impf', 'tran'])
self._assert_parse('круто', ['ADVB']) self._assert_parse('круто', ['ADVB'])
self._assert_parse('круче', ['COMP', 'Qual']) self._assert_parse('круче', ['COMP', 'Qual'])
self._assert_parse(',', ['PNCT']) self._assert_parse(',', ['PNCT'])
self._assert_parse('32-', ['intg', 'NUMB']) self._assert_parse('32-', ['intg', 'NUMB'])
@ -199,7 +200,7 @@ class TestRuParser(unittest.TestCase):
self._assert_inflect('три', ['loct'], 'трёх') self._assert_inflect('три', ['loct'], 'трёх')
def test_inflect_adjf(self): def test_inflect_adjf(self):
''' Test inflection for single adjectif. ''' ''' Test inflection for single adjective. '''
self._assert_inflect('хороший', ['nomn'], 'хороший') self._assert_inflect('хороший', ['nomn'], 'хороший')
self._assert_inflect('хороший', ['gent'], 'хорошего') self._assert_inflect('хороший', ['gent'], 'хорошего')
self._assert_inflect('хороший', ['datv'], 'хорошему') self._assert_inflect('хороший', ['datv'], 'хорошему')
@ -317,8 +318,8 @@ class TestRuParser(unittest.TestCase):
# self._assert_inflect('реципиенту воздействия', ['nomn'], 'реципиент воздействия') # self._assert_inflect('реципиенту воздействия', ['nomn'], 'реципиент воздействия')
def test_inflect_complex_mainword(self): def test_inflect_complex_main(self):
''' Test inflection of mainword conmprised of multiple words. ''' ''' Test inflection of main word comprised of multiple words. '''
# Do not parse complex main words # Do not parse complex main words
self._assert_inflect('слона и кота', ['nomn'], 'слон и кота') self._assert_inflect('слона и кота', ['nomn'], 'слон и кота')
self._assert_inflect('сказал и поехал', ['INFN'], 'сказать и поехал') self._assert_inflect('сказал и поехал', ['INFN'], 'сказать и поехал')
@ -440,6 +441,7 @@ class TestRuParser(unittest.TestCase):
self.assertEqual(parser.find_substr('сложного слона', 'слоном'), (9, 14)) self.assertEqual(parser.find_substr('сложного слона', 'слоном'), (9, 14))
self.assertEqual(parser.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24)) self.assertEqual(parser.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24))
self.assertEqual(parser.find_substr('человек', 'люди'), (0, 7)) self.assertEqual(parser.find_substr('человек', 'люди'), (0, 7))
# cSpell:enable
if __name__ == '__main__': if __name__ == '__main__':