diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..b8518ca --- /dev/null +++ b/.flake8 @@ -0,0 +1,6 @@ +[flake8] +# E303 - too many blank lines +# E203 - whitespace before semicolon +ignore = E303, E203 +exclude = __init__.py +max-line-length = 120 \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 26a2d2e..8f01fdd 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -17,6 +17,13 @@ "request": "launch", "script": "${workspaceFolder}/scripts/RunLint.ps1", "args": [] + }, + { + "name": "Setup", + "type": "PowerShell", + "request": "launch", + "script": "${workspaceFolder}/scripts/Setup.ps1", + "args": [] } ] } diff --git a/.vscode/settings.json b/.vscode/settings.json index 184603b..c1025e3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,38 +3,58 @@ ".mypy_cache/": true, ".pytest_cache/": true }, - "python.testing.unittestArgs": ["-v", "-s", "./tests", "-p", "test*.py"], + "python.testing.unittestArgs": ["-v", "-s", "./tests", "-p", "t_*.py"], "python.testing.pytestEnabled": false, "python.testing.unittestEnabled": true, - "eslint.workingDirectories": [ - { - "mode": "auto" - } - ], "python.analysis.typeCheckingMode": "off", - "python.analysis.ignore": ["**/tests/**", "**/node_modules/**", "**/venv/**"], + "python.analysis.ignore": ["**/tests/**", "**/venv/**"], "cSpell.words": [ + "ablt", + "accs", + "actv", "ADJF", "ADJS", "ADVB", + "Anph", + "cctext", + "datv", + "femn", + "Fixd", + "Geox", + "grammeme", "Grammemes", "GRND", + "Impe", + "impr", + "inan", + "indc", "INFN", + "intg", "INTJ", + "loct", + "moprho", "multiword", + "nomn", "NPRO", "NUMR", "Opencorpora", + "Pltm", "PNCT", "PRCL", + "Prnt", "PRTF", "PRTS", + "pssv", "pymorphy", "razdel", "rumodel", + "ruparser", + "Sgtm", "tagset", - "unknwn" + "termform", + "unknwn", + "круть", + "фторметил" ], - "cSpell.language": "en,ru", - "cSpell.ignorePaths": ["node_modules/**", "*.json"] + "cSpell.language": "en,ru" } diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000..82e05a6 --- /dev/null +++ b/TODO.txt @@ -0,0 +1 @@ +- implement Part of speech transition for VERB <-> NOUN \ No newline at end of file diff --git a/cctext/__init__.py b/cctext/__init__.py index 35bcebe..fac06bd 100644 --- a/cctext/__init__.py +++ b/cctext/__init__.py @@ -1,16 +1,51 @@ ''' Concept core text processing library. ''' # pylint: skip-file -from .syntax import RuSyntax, Capitalization -from .rumodel import Morphology, SemanticRole, WordTag, morpho, split_grams, combine_grams -from .ruparser import PhraseParser, WordToken, Collation -from .reference import EntityReference, ReferenceType, SyntacticReference, parse_reference -from .context import TermForm, Entity, TermContext -from .resolver import Reference, Position, Resolver, ResolvedReference, resolve_entity, resolve_syntactic, extract_entities - -from .conceptapi import ( - parse, normalize, - generate_lexeme, inflect, inflect_context, inflect_substitute, inflect_dependant, - match_all_morpho, find_substr +from .syntax import ( + RuSyntax, + Capitalization +) +from .rumodel import ( + Morphology, + SemanticRole, + WordTag, + morpho, + split_grams, + combine_grams +) +from .ruparser import ( + PhraseParser, + WordToken, + Collation +) +from .reference import ( + EntityReference, + ReferenceType, + SyntacticReference, + parse_reference +) +from .context import ( + TermForm, + Entity, + TermContext +) +from .resolver import ( + Reference, + Position, + Resolver, + ResolvedReference, + resolve_entity, + resolve_syntactic, + extract_entities ) -# TODO: implement Part of speech transition for VERB <-> NOUN +from .api import ( + parse, + normalize, + generate_lexeme, + inflect, + inflect_context, + inflect_substitute, + inflect_dependant, + match_all_morpho, + find_substr +) diff --git a/cctext/conceptapi.py b/cctext/api.py similarity index 92% rename from cctext/conceptapi.py rename to cctext/api.py index f6f8e2d..b03ad81 100644 --- a/cctext/conceptapi.py +++ b/cctext/api.py @@ -1,5 +1,5 @@ ''' -Concept API Python functions. +Core API Python procedures. ::guarantee:: doesn't raise exceptions and returns workable outputs ''' @@ -21,11 +21,6 @@ def parse(text: str, require_grams: str = '') -> str: return result if result != 'UNKN' else '' -# def parse_variants(text: str, require_grams: str = '') -> list[tuple[str, str]]: -# ''' Get all variants of a parse. -# ::returns:: string of comma separated grammar tags or empty string ''' - - def generate_lexeme(text_normal: str) -> list[tuple[str, str]]: ''' Get all inflected forms belonging to same Lexeme. ''' model = parser.parse(text_normal) diff --git a/cctext/context.py b/cctext/context.py index de487cd..705a9dc 100644 --- a/cctext/context.py +++ b/cctext/context.py @@ -17,8 +17,8 @@ class TermForm(TypedDict): def _match_grams(query: Iterable[str], test: Iterable[str]) -> bool: ''' Check if grams from test fit query. ''' for gram in test: - if not gram in query: - if not gram in WordTag.PARTS_OF_SPEECH: + if gram not in query: + if gram not in WordTag.PARTS_OF_SPEECH: return False for pos in WordTag.PARTS_OF_SPEECH: if pos in query: @@ -35,7 +35,7 @@ def _search_form(query: Iterable[str], data: Iterable[TermForm]) -> Optional[str class Entity: ''' Represents text entity. ''' - def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]]=None): + def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]] = None): if manual_forms is None: self.manual = [] else: diff --git a/cctext/resolver.py b/cctext/resolver.py index e9df3df..922b30e 100644 --- a/cctext/resolver.py +++ b/cctext/resolver.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from .rumodel import split_grams -from .conceptapi import inflect_dependant +from .api import inflect_dependant from .context import TermContext from .reference import EntityReference, SyntacticReference, parse_reference, Reference diff --git a/cctext/ruparser.py b/cctext/ruparser.py index 7b64cd2..2427466 100644 --- a/cctext/ruparser.py +++ b/cctext/ruparser.py @@ -1,4 +1,4 @@ -''' Parsing russian language using pymorphy3 library. ''' +''' Parsing russian language based on pymorphy3 library. ''' from __future__ import annotations from typing import Optional @@ -155,9 +155,9 @@ class PhraseParser: def parse(self, text: str, require_index: int = INDEX_NONE, require_grams: Optional[Grammemes] = None) -> Optional[Collation]: - ''' + ''' Determine morpho tags for input text. - ::returns:: Morphology of a text or None if no suitable form is available + ::returns:: Morphology of a text or None if no suitable form is available ''' segments = list(RuSyntax.tokenize(text)) if len(segments) == 0: diff --git a/cctext/syntax.py b/cctext/syntax.py index 012b396..369413e 100644 --- a/cctext/syntax.py +++ b/cctext/syntax.py @@ -1,4 +1,4 @@ -''' Russian language syntax incapsulation. ''' +''' Russian language syntax. ''' from __future__ import annotations from enum import Enum, unique diff --git a/requirements-build.txt b/requirements-build.txt index 001f66e..7a89acf 100644 Binary files a/requirements-build.txt and b/requirements-build.txt differ diff --git a/requirements.txt b/requirements.txt index 1ee73e3..46df764 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ pymorphy3 -pymorphy3-dicts-ru razdel \ No newline at end of file diff --git a/scripts/Setup.ps1 b/scripts/Setup.ps1 new file mode 100644 index 0000000..d02ac17 --- /dev/null +++ b/scripts/Setup.ps1 @@ -0,0 +1,11 @@ +Set-Location $PSScriptRoot\.. + +$python = '.\venv\Scripts\python.exe' +$env = '.\venv' +if (Test-Path -Path $python -PathType Leaf) { + Remove-Item $env -Recurse -Force +} + +& 'python' -m venv .\venv +& $python -m pip install --upgrade pip +& $python -m pip install -r requirements-build.txt \ No newline at end of file diff --git a/scripts/Test.ps1 b/scripts/Test.ps1 deleted file mode 100644 index 7f93dff..0000000 --- a/scripts/Test.ps1 +++ /dev/null @@ -1,10 +0,0 @@ -# Run lint -function RunLinters() { - $pylint = "$PSScriptRoot\..\venv\Scripts\pylint.exe" - $mypy = "$PSScriptRoot\..\venv\Scripts\mypy.exe" - - & $pylint cctext - & $mypy cctext -} - -RunLinters \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py index b554047..c4de9d3 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -2,7 +2,7 @@ from .t_reference import * from .t_ruparser import * from .t_syntax import * -from .t_conceptapi import * +from .t_api import * from .t_rumodel import * from .t_context import * from .t_resolver import * diff --git a/tests/t_conceptapi.py b/tests/t_api.py similarity index 98% rename from tests/t_conceptapi.py rename to tests/t_api.py index becbe09..c65dc1d 100644 --- a/tests/t_conceptapi.py +++ b/tests/t_api.py @@ -1,4 +1,4 @@ -''' Unit tests: conceptapi. ''' +''' Unit tests: api. ''' import unittest import cctext as cc @@ -8,19 +8,22 @@ class TestConceptAPI(unittest.TestCase): '''Test class for Concept API.''' def _assert_tags(self, actual: str, expected: str): self.assertEqual(set(cc.split_grams(actual)), set(cc.split_grams(expected))) - + + def test_parse(self): ''' Test parsing. ''' self._assert_tags(cc.parse(''), '') self._assert_tags(cc.parse('1'), 'NUMB,intg') self._assert_tags(cc.parse('слон', require_grams='masc'), 'NOUN,anim,masc,sing,nomn') + def test_normalize_word(self): ''' Test normalize for single word. ''' self.assertEqual(cc.normalize(''), '') self.assertEqual(cc.normalize('первого'), 'первый') self.assertEqual(cc.normalize('диких людей'), 'дикий человек') + def test_generate_lexeme(self): ''' Test all lexical forms. ''' self.assertEqual(cc.generate_lexeme(''), []) @@ -35,6 +38,7 @@ class TestConceptAPI(unittest.TestCase): self.assertEqual(forms[0][0], 'молодой человек') self._assert_tags(forms[0][1], 'nomn,masc,sing,anim,NOUN') + def test_inflect(self): ''' Test inflection. ''' self.assertEqual(cc.inflect('', ''), '') @@ -44,16 +48,19 @@ class TestConceptAPI(unittest.TestCase): self.assertEqual(cc.inflect('слона', 'nomn, plur'), 'слоны') self.assertEqual(cc.inflect('шкала оценок', 'loct,plur'), 'шкалах оценок') + def test_find_substr(self): '''Test substring search''' self.assertEqual(cc.find_substr('', ''), (0, 0)) self.assertEqual(cc.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24)) + def test_inflect_context(self): - '''Test contex inflection''' + '''Test context inflection''' self.assertEqual(cc.inflect_context('', '', ''), '') self.assertEqual(cc.inflect_context('красивый', '', 'чашка'), 'красивая') + def test_inflect_substitute(self): '''Test substitute inflection''' self.assertEqual(cc.inflect_substitute('', ''), '') @@ -61,6 +68,7 @@ class TestConceptAPI(unittest.TestCase): self.assertEqual(cc.inflect_substitute('слон', ''), 'слон') self.assertEqual(cc.inflect_substitute('красивый бантик', 'кошкой'), 'красивым бантиком') + def test_inflect_dependant(self): ''' Test coordination inflection. ''' self.assertEqual(cc.inflect_dependant('', ''), '') @@ -69,6 +77,7 @@ class TestConceptAPI(unittest.TestCase): self.assertEqual(cc.inflect_dependant('общий', 'мать'), 'общая') self.assertEqual(cc.inflect_dependant('синий', 'слонов'), 'синих') + def test_match_all_morpho(self): ''' Test extracting matching morpho. ''' self.assertEqual(cc.match_all_morpho('', ''), []) diff --git a/tests/t_context.py b/tests/t_context.py index 8d996e9..4df1a50 100644 --- a/tests/t_context.py +++ b/tests/t_context.py @@ -1,7 +1,8 @@ ''' Unit tests: context. ''' import unittest -from cctext.context import Entity, TermContext +from cctext.context import Entity + class TestEntity(unittest.TestCase): '''Test Entity termform access.''' @@ -9,24 +10,27 @@ class TestEntity(unittest.TestCase): self.alias = 'X1' self.nominal = 'человек' self.text1 = 'test1' - self.form1 = ['sing','datv'] + self.form1 = ['sing', 'datv'] self.entity = Entity(self.alias, self.nominal, [{'text': self.text1, 'grams': self.form1}]) + def test_attributes(self): self.assertEqual(self.entity.alias, self.alias) self.assertEqual(self.entity.get_nominal(), self.nominal) self.assertEqual(self.entity.manual, [{'text': self.text1, 'grams': self.form1}]) + def test_get_form(self): self.assertEqual(self.entity.get_form([]), self.nominal) self.assertEqual(self.entity.get_form(self.form1), self.text1) self.assertEqual(self.entity.get_form(['invalid tags']), '!Неизвестная граммема: invalid tags!') self.assertEqual(self.entity.get_form(['plur']), 'люди') + def test_set_nominal(self): - new_nomial = 'TEST' + new_nominal = 'TEST' self.assertEqual(self.entity.get_form(['plur']), 'люди') - self.entity.set_nominal(new_nomial) - self.assertEqual(self.entity.get_nominal(), new_nomial) - self.assertEqual(self.entity.get_form(['plur']), new_nomial) + self.entity.set_nominal(new_nominal) + self.assertEqual(self.entity.get_nominal(), new_nominal) + self.assertEqual(self.entity.get_form(['plur']), new_nominal) self.assertEqual(self.entity.manual, []) diff --git a/tests/t_reference.py b/tests/t_reference.py index ae7c3e6..8f601f5 100644 --- a/tests/t_reference.py +++ b/tests/t_reference.py @@ -3,21 +3,25 @@ import unittest from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference + class TestReferences(unittest.TestCase): ''' Test class for references. ''' + def test_EntityReference(self): - ''' Testing EntityRefence basics. ''' + ''' Testing EntityReference basics. ''' ref = EntityReference('X1', 'sing,nomn') self.assertEqual(ref.get_type(), ReferenceType.entity) self.assertEqual(ref.to_text(), '@{X1|sing,nomn}') + def test_SyntacticReference(self): ''' Testing SyntacticReference basics. ''' ref = SyntacticReference(-1, 'черный') self.assertEqual(ref.get_type(), ReferenceType.syntactic) self.assertEqual(ref.to_text(), '@{-1|черный}') + def test_parse_reference_invalid(self): ''' Testing parsing reference invalid input. ''' self.assertIsNone(parse_reference('')) @@ -31,14 +35,15 @@ class TestReferences(unittest.TestCase): self.assertIsNone(parse_reference('@{1| }')) self.assertIsNone(parse_reference('@{0|черный}')) + def test_parse_reference(self): ''' Testing parsing reference text. ''' ref = parse_reference('@{1| черный }') self.assertIsNotNone(ref) - self.assertEqual(ref.to_text(), '@{1|черный}') + self.assertEqual(ref.to_text(), '@{1|черный}') self.assertEqual(ref.get_type(), ReferenceType.syntactic) ref = parse_reference('@{X1 | VERB, past, sing}') self.assertIsNotNone(ref) - self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}') + self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}') self.assertEqual(ref.get_type(), ReferenceType.entity) diff --git a/tests/t_resolver.py b/tests/t_resolver.py index 92f000c..7e644c4 100644 --- a/tests/t_resolver.py +++ b/tests/t_resolver.py @@ -28,24 +28,52 @@ class TestResolver(unittest.TestCase): self.context['X2'] = Entity('X2', '') self.resolver = Resolver(self.context) + def test_resolve_entity(self): - self.assertEqual(resolve_entity(EntityReference('X1', ''), self.context), 'человек') - self.assertEqual(resolve_entity(EntityReference('X1', 'plur'), self.context), 'люди') - self.assertEqual(resolve_entity(EntityReference('X2', ''), self.context), '!Отсутствует термин: X2!') - self.assertEqual(resolve_entity(EntityReference('X1', 'invalid'), self.context), '!Неизвестная граммема: invalid!') - self.assertEqual(resolve_entity(EntityReference('X123', 'plur'), self.context), '!Неизвестная сущность: X123!') + ref = EntityReference('X1', '') + self.assertEqual(resolve_entity(ref, self.context), 'человек') + + ref = EntityReference('X1', 'plur') + self.assertEqual(resolve_entity(ref, self.context), 'люди') + + ref = EntityReference('X2', '') + self.assertEqual(resolve_entity(ref, self.context), '!Отсутствует термин: X2!') + + ref = EntityReference('X1', 'invalid') + self.assertEqual(resolve_entity(ref, self.context), '!Неизвестная граммема: invalid!') + + ref = EntityReference('X123', 'plur') + self.assertEqual(resolve_entity(ref, self.context), '!Неизвестная сущность: X123!') + def test_resolve_syntactic(self): ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку') refs_list = [ref, ref, ref, ref] - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-1), 0, refs_list), '!Некорректное смещение: -1!') - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=1), 3, refs_list), '!Некорректное смещение: 1!') - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=1), 0, refs_list), 'умному') - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=2), 0, refs_list), 'умному') - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=3), 0, refs_list), 'умному') - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-1), 3, refs_list), 'умному') - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-2), 3, refs_list), 'умному') - self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-3), 3, refs_list), 'умному') + + ref = SyntacticReference(text='умный', referral_offset=-1) + self.assertEqual(resolve_syntactic(ref, 0, refs_list), '!Некорректное смещение: -1!') + + ref = SyntacticReference(text='умный', referral_offset=1) + self.assertEqual(resolve_syntactic(ref, 3, refs_list), '!Некорректное смещение: 1!') + + ref = SyntacticReference(text='умный', referral_offset=1) + self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному') + + ref = SyntacticReference(text='умный', referral_offset=2) + self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному') + + ref = SyntacticReference(text='умный', referral_offset=3) + self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному') + + ref = SyntacticReference(text='умный', referral_offset=-1) + self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному') + + ref = SyntacticReference(text='умный', referral_offset=-2) + self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному') + + ref = SyntacticReference(text='умный', referral_offset=-3) + self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному') + def test_resolve_invalid(self): self.assertEqual(self.resolver.resolve(''), '') @@ -57,29 +85,35 @@ class TestResolver(unittest.TestCase): self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text') self.assertEqual(len(self.resolver.refs), 0) + def test_resolve_single(self): - self.assertEqual(self.resolver.resolve('просто @{-1|умный} текст'), 'просто !Некорректное смещение: -1! текст') + resolved = self.resolver.resolve('просто @{-1|умный} текст') + self.assertEqual(resolved, 'просто !Некорректное смещение: -1! текст') self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18)) self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34)) - self.assertEqual(self.resolver.resolve('просто @{X123|sing,nomn} текст'), 'просто !Неизвестная сущность: X123! текст') + resolved = self.resolver.resolve('просто @{X123|sing,nomn} текст') + self.assertEqual(resolved, 'просто !Неизвестная сущность: X123! текст') self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24)) self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35)) - self.assertEqual(self.resolver.resolve('@{X1|sing,nomn}'), 'человек') + resolved = self.resolver.resolve('@{X1|sing,nomn}') + self.assertEqual(resolved, 'человек') self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15)) self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7)) - self.assertEqual(self.resolver.resolve('просто @{X1|sing,nomn} текст'), 'просто человек текст') + resolved = self.resolver.resolve('просто @{X1|sing,nomn} текст') + self.assertEqual(resolved, 'просто человек текст') self.assertEqual(len(self.resolver.refs), 1) self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22)) self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14)) + def test_resolve_multiple(self): - input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют' + input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют' self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют') self.assertEqual(len(self.resolver.refs), 3) self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15)) @@ -89,6 +123,7 @@ class TestResolver(unittest.TestCase): self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38)) self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20)) + def test_resolve_manual_forms(self): self.context['X1'] = Entity( alias='X1', diff --git a/tests/t_rumodel.py b/tests/t_rumodel.py index 870960b..77bc44e 100644 --- a/tests/t_rumodel.py +++ b/tests/t_rumodel.py @@ -10,9 +10,9 @@ class TestTags(unittest.TestCase): def test_split_tags(self): self.assertEqual(split_grams(''), []) self.assertEqual(split_grams('NOUN'), ['NOUN']) - self.assertEqual(split_grams('NOUN,plur,sing'), ['NOUN','plur','sing']) + self.assertEqual(split_grams('NOUN,plur,sing'), ['NOUN', 'plur', 'sing']) def test_combine_tags(self): self.assertEqual(combine_grams([]), '') self.assertEqual(combine_grams(['NOUN']), 'NOUN') - self.assertEqual(combine_grams(['NOUN','plur','sing']), 'NOUN,plur,sing') + self.assertEqual(combine_grams(['NOUN', 'plur', 'sing']), 'NOUN,plur,sing') diff --git a/tests/t_ruparser.py b/tests/t_ruparser.py index eab2d77..0dc8122 100644 --- a/tests/t_ruparser.py +++ b/tests/t_ruparser.py @@ -7,6 +7,7 @@ from cctext import PhraseParser parser = PhraseParser() +# cSpell:disable class TestRuParser(unittest.TestCase): ''' Test class for russian parsing. ''' @@ -28,25 +29,25 @@ class TestRuParser(unittest.TestCase): def test_parse_word(self): ''' Test parse for single word. ''' - self._assert_parse('1', ['NUMB', 'intg']) - self._assert_parse('пять', ['NUMR', 'nomn']) - self._assert_parse('трёх', ['NUMR', 'gent']) - self._assert_parse('трех', ['NUMR', 'gent']) - self._assert_parse('круча', ['NOUN', 'femn', 'sing', 'nomn', 'inan']) - self._assert_parse('круть', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Geox']) - self._assert_parse('ПВО', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Abbr', 'Fixd']) - self._assert_parse('СМИ', ['NOUN', 'plur', 'nomn', 'inan', 'Pltm', 'Abbr', 'Fixd', 'GNdr']) - self._assert_parse('ему', ['NPRO', 'masc', 'sing', 'datv', '3per', 'Anph']) - self._assert_parse('крутит', ['VERB', 'sing', '3per', 'pres', 'impf', 'tran', 'indc']) - self._assert_parse('смеркалось', ['VERB', 'neut', 'sing', 'Impe', 'past', 'impf', 'intr', 'indc']) - self._assert_parse('крутить', ['INFN', 'impf', 'tran']) - self._assert_parse('крученый', ['ADJF', 'masc', 'sing', 'nomn']) - self._assert_parse('крут', ['ADJS', 'masc', 'sing', 'Qual']) - self._assert_parse('крутящего', ['PRTF', 'masc', 'sing', 'gent', 'pres', 'impf', 'tran', 'actv']) - self._assert_parse('откручен', ['PRTS', 'masc', 'sing', 'past', 'perf', 'pssv']) - self._assert_parse('крутя', ['GRND', 'pres', 'impf', 'tran']) - self._assert_parse('круто', ['ADVB']) - self._assert_parse('круче', ['COMP', 'Qual']) + self._assert_parse('1', ['NUMB', 'intg']) + self._assert_parse('пять', ['NUMR', 'nomn']) + self._assert_parse('трёх', ['NUMR', 'gent']) + self._assert_parse('трех', ['NUMR', 'gent']) + self._assert_parse('круча', ['NOUN', 'femn', 'sing', 'nomn', 'inan']) + self._assert_parse('круть', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Geox']) + self._assert_parse('ПВО', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Abbr', 'Fixd']) + self._assert_parse('СМИ', ['NOUN', 'plur', 'nomn', 'inan', 'Pltm', 'Abbr', 'Fixd', 'GNdr']) + self._assert_parse('ему', ['NPRO', 'masc', 'sing', 'datv', '3per', 'Anph']) + self._assert_parse('крутит', ['VERB', 'sing', '3per', 'pres', 'impf', 'tran', 'indc']) + self._assert_parse('смеркалось', ['VERB', 'neut', 'sing', 'Impe', 'past', 'impf', 'intr', 'indc']) + self._assert_parse('крутить', ['INFN', 'impf', 'tran']) + self._assert_parse('крученый', ['ADJF', 'masc', 'sing', 'nomn']) + self._assert_parse('крут', ['ADJS', 'masc', 'sing', 'Qual']) + self._assert_parse('крутящего', ['PRTF', 'masc', 'sing', 'gent', 'pres', 'impf', 'tran', 'actv']) + self._assert_parse('откручен', ['PRTS', 'masc', 'sing', 'past', 'perf', 'pssv']) + self._assert_parse('крутя', ['GRND', 'pres', 'impf', 'tran']) + self._assert_parse('круто', ['ADVB']) + self._assert_parse('круче', ['COMP', 'Qual']) self._assert_parse(',', ['PNCT']) self._assert_parse('32-', ['intg', 'NUMB']) @@ -199,7 +200,7 @@ class TestRuParser(unittest.TestCase): self._assert_inflect('три', ['loct'], 'трёх') def test_inflect_adjf(self): - ''' Test inflection for single adjectif. ''' + ''' Test inflection for single adjective. ''' self._assert_inflect('хороший', ['nomn'], 'хороший') self._assert_inflect('хороший', ['gent'], 'хорошего') self._assert_inflect('хороший', ['datv'], 'хорошему') @@ -317,8 +318,8 @@ class TestRuParser(unittest.TestCase): # self._assert_inflect('реципиенту воздействия', ['nomn'], 'реципиент воздействия') - def test_inflect_complex_mainword(self): - ''' Test inflection of mainword conmprised of multiple words. ''' + def test_inflect_complex_main(self): + ''' Test inflection of main word comprised of multiple words. ''' # Do not parse complex main words self._assert_inflect('слона и кота', ['nomn'], 'слон и кота') self._assert_inflect('сказал и поехал', ['INFN'], 'сказать и поехал') @@ -440,6 +441,7 @@ class TestRuParser(unittest.TestCase): self.assertEqual(parser.find_substr('сложного слона', 'слоном'), (9, 14)) self.assertEqual(parser.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24)) self.assertEqual(parser.find_substr('человек', 'люди'), (0, 7)) +# cSpell:enable if __name__ == '__main__':