mirror of
https://github.com/IRBorisov/cctext.git
synced 2025-06-25 21:20:36 +03:00
Setup tools and fix linter issues
This commit is contained in:
parent
cc94f805b8
commit
1e8166c9ac
6
.flake8
Normal file
6
.flake8
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
[flake8]
|
||||||
|
# E303 - too many blank lines
|
||||||
|
# E203 - whitespace before semicolon
|
||||||
|
ignore = E303, E203
|
||||||
|
exclude = __init__.py
|
||||||
|
max-line-length = 120
|
7
.vscode/launch.json
vendored
7
.vscode/launch.json
vendored
|
@ -17,6 +17,13 @@
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"script": "${workspaceFolder}/scripts/RunLint.ps1",
|
"script": "${workspaceFolder}/scripts/RunLint.ps1",
|
||||||
"args": []
|
"args": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Setup",
|
||||||
|
"type": "PowerShell",
|
||||||
|
"request": "launch",
|
||||||
|
"script": "${workspaceFolder}/scripts/Setup.ps1",
|
||||||
|
"args": []
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
40
.vscode/settings.json
vendored
40
.vscode/settings.json
vendored
|
@ -3,38 +3,58 @@
|
||||||
".mypy_cache/": true,
|
".mypy_cache/": true,
|
||||||
".pytest_cache/": true
|
".pytest_cache/": true
|
||||||
},
|
},
|
||||||
"python.testing.unittestArgs": ["-v", "-s", "./tests", "-p", "test*.py"],
|
"python.testing.unittestArgs": ["-v", "-s", "./tests", "-p", "t_*.py"],
|
||||||
"python.testing.pytestEnabled": false,
|
"python.testing.pytestEnabled": false,
|
||||||
"python.testing.unittestEnabled": true,
|
"python.testing.unittestEnabled": true,
|
||||||
"eslint.workingDirectories": [
|
|
||||||
{
|
|
||||||
"mode": "auto"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"python.analysis.typeCheckingMode": "off",
|
"python.analysis.typeCheckingMode": "off",
|
||||||
"python.analysis.ignore": ["**/tests/**", "**/node_modules/**", "**/venv/**"],
|
"python.analysis.ignore": ["**/tests/**", "**/venv/**"],
|
||||||
"cSpell.words": [
|
"cSpell.words": [
|
||||||
|
"ablt",
|
||||||
|
"accs",
|
||||||
|
"actv",
|
||||||
"ADJF",
|
"ADJF",
|
||||||
"ADJS",
|
"ADJS",
|
||||||
"ADVB",
|
"ADVB",
|
||||||
|
"Anph",
|
||||||
|
"cctext",
|
||||||
|
"datv",
|
||||||
|
"femn",
|
||||||
|
"Fixd",
|
||||||
|
"Geox",
|
||||||
|
"grammeme",
|
||||||
"Grammemes",
|
"Grammemes",
|
||||||
"GRND",
|
"GRND",
|
||||||
|
"Impe",
|
||||||
|
"impr",
|
||||||
|
"inan",
|
||||||
|
"indc",
|
||||||
"INFN",
|
"INFN",
|
||||||
|
"intg",
|
||||||
"INTJ",
|
"INTJ",
|
||||||
|
"loct",
|
||||||
|
"moprho",
|
||||||
"multiword",
|
"multiword",
|
||||||
|
"nomn",
|
||||||
"NPRO",
|
"NPRO",
|
||||||
"NUMR",
|
"NUMR",
|
||||||
"Opencorpora",
|
"Opencorpora",
|
||||||
|
"Pltm",
|
||||||
"PNCT",
|
"PNCT",
|
||||||
"PRCL",
|
"PRCL",
|
||||||
|
"Prnt",
|
||||||
"PRTF",
|
"PRTF",
|
||||||
"PRTS",
|
"PRTS",
|
||||||
|
"pssv",
|
||||||
"pymorphy",
|
"pymorphy",
|
||||||
"razdel",
|
"razdel",
|
||||||
"rumodel",
|
"rumodel",
|
||||||
|
"ruparser",
|
||||||
|
"Sgtm",
|
||||||
"tagset",
|
"tagset",
|
||||||
"unknwn"
|
"termform",
|
||||||
|
"unknwn",
|
||||||
|
"круть",
|
||||||
|
"фторметил"
|
||||||
],
|
],
|
||||||
"cSpell.language": "en,ru",
|
"cSpell.language": "en,ru"
|
||||||
"cSpell.ignorePaths": ["node_modules/**", "*.json"]
|
|
||||||
}
|
}
|
||||||
|
|
1
TODO.txt
Normal file
1
TODO.txt
Normal file
|
@ -0,0 +1 @@
|
||||||
|
- implement Part of speech transition for VERB <-> NOUN
|
|
@ -1,16 +1,51 @@
|
||||||
''' Concept core text processing library. '''
|
''' Concept core text processing library. '''
|
||||||
# pylint: skip-file
|
# pylint: skip-file
|
||||||
from .syntax import RuSyntax, Capitalization
|
from .syntax import (
|
||||||
from .rumodel import Morphology, SemanticRole, WordTag, morpho, split_grams, combine_grams
|
RuSyntax,
|
||||||
from .ruparser import PhraseParser, WordToken, Collation
|
Capitalization
|
||||||
from .reference import EntityReference, ReferenceType, SyntacticReference, parse_reference
|
)
|
||||||
from .context import TermForm, Entity, TermContext
|
from .rumodel import (
|
||||||
from .resolver import Reference, Position, Resolver, ResolvedReference, resolve_entity, resolve_syntactic, extract_entities
|
Morphology,
|
||||||
|
SemanticRole,
|
||||||
from .conceptapi import (
|
WordTag,
|
||||||
parse, normalize,
|
morpho,
|
||||||
generate_lexeme, inflect, inflect_context, inflect_substitute, inflect_dependant,
|
split_grams,
|
||||||
match_all_morpho, find_substr
|
combine_grams
|
||||||
|
)
|
||||||
|
from .ruparser import (
|
||||||
|
PhraseParser,
|
||||||
|
WordToken,
|
||||||
|
Collation
|
||||||
|
)
|
||||||
|
from .reference import (
|
||||||
|
EntityReference,
|
||||||
|
ReferenceType,
|
||||||
|
SyntacticReference,
|
||||||
|
parse_reference
|
||||||
|
)
|
||||||
|
from .context import (
|
||||||
|
TermForm,
|
||||||
|
Entity,
|
||||||
|
TermContext
|
||||||
|
)
|
||||||
|
from .resolver import (
|
||||||
|
Reference,
|
||||||
|
Position,
|
||||||
|
Resolver,
|
||||||
|
ResolvedReference,
|
||||||
|
resolve_entity,
|
||||||
|
resolve_syntactic,
|
||||||
|
extract_entities
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: implement Part of speech transition for VERB <-> NOUN
|
from .api import (
|
||||||
|
parse,
|
||||||
|
normalize,
|
||||||
|
generate_lexeme,
|
||||||
|
inflect,
|
||||||
|
inflect_context,
|
||||||
|
inflect_substitute,
|
||||||
|
inflect_dependant,
|
||||||
|
match_all_morpho,
|
||||||
|
find_substr
|
||||||
|
)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
'''
|
'''
|
||||||
Concept API Python functions.
|
Core API Python procedures.
|
||||||
|
|
||||||
::guarantee:: doesn't raise exceptions and returns workable outputs
|
::guarantee:: doesn't raise exceptions and returns workable outputs
|
||||||
'''
|
'''
|
||||||
|
@ -21,11 +21,6 @@ def parse(text: str, require_grams: str = '') -> str:
|
||||||
return result if result != 'UNKN' else ''
|
return result if result != 'UNKN' else ''
|
||||||
|
|
||||||
|
|
||||||
# def parse_variants(text: str, require_grams: str = '') -> list[tuple[str, str]]:
|
|
||||||
# ''' Get all variants of a parse.
|
|
||||||
# ::returns:: string of comma separated grammar tags or empty string '''
|
|
||||||
|
|
||||||
|
|
||||||
def generate_lexeme(text_normal: str) -> list[tuple[str, str]]:
|
def generate_lexeme(text_normal: str) -> list[tuple[str, str]]:
|
||||||
''' Get all inflected forms belonging to same Lexeme. '''
|
''' Get all inflected forms belonging to same Lexeme. '''
|
||||||
model = parser.parse(text_normal)
|
model = parser.parse(text_normal)
|
|
@ -17,8 +17,8 @@ class TermForm(TypedDict):
|
||||||
def _match_grams(query: Iterable[str], test: Iterable[str]) -> bool:
|
def _match_grams(query: Iterable[str], test: Iterable[str]) -> bool:
|
||||||
''' Check if grams from test fit query. '''
|
''' Check if grams from test fit query. '''
|
||||||
for gram in test:
|
for gram in test:
|
||||||
if not gram in query:
|
if gram not in query:
|
||||||
if not gram in WordTag.PARTS_OF_SPEECH:
|
if gram not in WordTag.PARTS_OF_SPEECH:
|
||||||
return False
|
return False
|
||||||
for pos in WordTag.PARTS_OF_SPEECH:
|
for pos in WordTag.PARTS_OF_SPEECH:
|
||||||
if pos in query:
|
if pos in query:
|
||||||
|
@ -35,7 +35,7 @@ def _search_form(query: Iterable[str], data: Iterable[TermForm]) -> Optional[str
|
||||||
|
|
||||||
class Entity:
|
class Entity:
|
||||||
''' Represents text entity. '''
|
''' Represents text entity. '''
|
||||||
def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]]=None):
|
def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]] = None):
|
||||||
if manual_forms is None:
|
if manual_forms is None:
|
||||||
self.manual = []
|
self.manual = []
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -5,7 +5,7 @@ from dataclasses import dataclass
|
||||||
|
|
||||||
from .rumodel import split_grams
|
from .rumodel import split_grams
|
||||||
|
|
||||||
from .conceptapi import inflect_dependant
|
from .api import inflect_dependant
|
||||||
from .context import TermContext
|
from .context import TermContext
|
||||||
from .reference import EntityReference, SyntacticReference, parse_reference, Reference
|
from .reference import EntityReference, SyntacticReference, parse_reference, Reference
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
''' Parsing russian language using pymorphy3 library. '''
|
''' Parsing russian language based on pymorphy3 library. '''
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
@ -155,9 +155,9 @@ class PhraseParser:
|
||||||
def parse(self, text: str,
|
def parse(self, text: str,
|
||||||
require_index: int = INDEX_NONE,
|
require_index: int = INDEX_NONE,
|
||||||
require_grams: Optional[Grammemes] = None) -> Optional[Collation]:
|
require_grams: Optional[Grammemes] = None) -> Optional[Collation]:
|
||||||
'''
|
'''
|
||||||
Determine morpho tags for input text.
|
Determine morpho tags for input text.
|
||||||
::returns:: Morphology of a text or None if no suitable form is available
|
::returns:: Morphology of a text or None if no suitable form is available
|
||||||
'''
|
'''
|
||||||
segments = list(RuSyntax.tokenize(text))
|
segments = list(RuSyntax.tokenize(text))
|
||||||
if len(segments) == 0:
|
if len(segments) == 0:
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
''' Russian language syntax incapsulation. '''
|
''' Russian language syntax. '''
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from enum import Enum, unique
|
from enum import Enum, unique
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -1,3 +1,2 @@
|
||||||
pymorphy3
|
pymorphy3
|
||||||
pymorphy3-dicts-ru
|
|
||||||
razdel
|
razdel
|
11
scripts/Setup.ps1
Normal file
11
scripts/Setup.ps1
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
Set-Location $PSScriptRoot\..
|
||||||
|
|
||||||
|
$python = '.\venv\Scripts\python.exe'
|
||||||
|
$env = '.\venv'
|
||||||
|
if (Test-Path -Path $python -PathType Leaf) {
|
||||||
|
Remove-Item $env -Recurse -Force
|
||||||
|
}
|
||||||
|
|
||||||
|
& 'python' -m venv .\venv
|
||||||
|
& $python -m pip install --upgrade pip
|
||||||
|
& $python -m pip install -r requirements-build.txt
|
|
@ -1,10 +0,0 @@
|
||||||
# Run lint
|
|
||||||
function RunLinters() {
|
|
||||||
$pylint = "$PSScriptRoot\..\venv\Scripts\pylint.exe"
|
|
||||||
$mypy = "$PSScriptRoot\..\venv\Scripts\mypy.exe"
|
|
||||||
|
|
||||||
& $pylint cctext
|
|
||||||
& $mypy cctext
|
|
||||||
}
|
|
||||||
|
|
||||||
RunLinters
|
|
|
@ -2,7 +2,7 @@
|
||||||
from .t_reference import *
|
from .t_reference import *
|
||||||
from .t_ruparser import *
|
from .t_ruparser import *
|
||||||
from .t_syntax import *
|
from .t_syntax import *
|
||||||
from .t_conceptapi import *
|
from .t_api import *
|
||||||
from .t_rumodel import *
|
from .t_rumodel import *
|
||||||
from .t_context import *
|
from .t_context import *
|
||||||
from .t_resolver import *
|
from .t_resolver import *
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
''' Unit tests: conceptapi. '''
|
''' Unit tests: api. '''
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import cctext as cc
|
import cctext as cc
|
||||||
|
@ -8,19 +8,22 @@ class TestConceptAPI(unittest.TestCase):
|
||||||
'''Test class for Concept API.'''
|
'''Test class for Concept API.'''
|
||||||
def _assert_tags(self, actual: str, expected: str):
|
def _assert_tags(self, actual: str, expected: str):
|
||||||
self.assertEqual(set(cc.split_grams(actual)), set(cc.split_grams(expected)))
|
self.assertEqual(set(cc.split_grams(actual)), set(cc.split_grams(expected)))
|
||||||
|
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
''' Test parsing. '''
|
''' Test parsing. '''
|
||||||
self._assert_tags(cc.parse(''), '')
|
self._assert_tags(cc.parse(''), '')
|
||||||
self._assert_tags(cc.parse('1'), 'NUMB,intg')
|
self._assert_tags(cc.parse('1'), 'NUMB,intg')
|
||||||
self._assert_tags(cc.parse('слон', require_grams='masc'), 'NOUN,anim,masc,sing,nomn')
|
self._assert_tags(cc.parse('слон', require_grams='masc'), 'NOUN,anim,masc,sing,nomn')
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_word(self):
|
def test_normalize_word(self):
|
||||||
''' Test normalize for single word. '''
|
''' Test normalize for single word. '''
|
||||||
self.assertEqual(cc.normalize(''), '')
|
self.assertEqual(cc.normalize(''), '')
|
||||||
self.assertEqual(cc.normalize('первого'), 'первый')
|
self.assertEqual(cc.normalize('первого'), 'первый')
|
||||||
self.assertEqual(cc.normalize('диких людей'), 'дикий человек')
|
self.assertEqual(cc.normalize('диких людей'), 'дикий человек')
|
||||||
|
|
||||||
|
|
||||||
def test_generate_lexeme(self):
|
def test_generate_lexeme(self):
|
||||||
''' Test all lexical forms. '''
|
''' Test all lexical forms. '''
|
||||||
self.assertEqual(cc.generate_lexeme(''), [])
|
self.assertEqual(cc.generate_lexeme(''), [])
|
||||||
|
@ -35,6 +38,7 @@ class TestConceptAPI(unittest.TestCase):
|
||||||
self.assertEqual(forms[0][0], 'молодой человек')
|
self.assertEqual(forms[0][0], 'молодой человек')
|
||||||
self._assert_tags(forms[0][1], 'nomn,masc,sing,anim,NOUN')
|
self._assert_tags(forms[0][1], 'nomn,masc,sing,anim,NOUN')
|
||||||
|
|
||||||
|
|
||||||
def test_inflect(self):
|
def test_inflect(self):
|
||||||
''' Test inflection. '''
|
''' Test inflection. '''
|
||||||
self.assertEqual(cc.inflect('', ''), '')
|
self.assertEqual(cc.inflect('', ''), '')
|
||||||
|
@ -44,16 +48,19 @@ class TestConceptAPI(unittest.TestCase):
|
||||||
self.assertEqual(cc.inflect('слона', 'nomn, plur'), 'слоны')
|
self.assertEqual(cc.inflect('слона', 'nomn, plur'), 'слоны')
|
||||||
self.assertEqual(cc.inflect('шкала оценок', 'loct,plur'), 'шкалах оценок')
|
self.assertEqual(cc.inflect('шкала оценок', 'loct,plur'), 'шкалах оценок')
|
||||||
|
|
||||||
|
|
||||||
def test_find_substr(self):
|
def test_find_substr(self):
|
||||||
'''Test substring search'''
|
'''Test substring search'''
|
||||||
self.assertEqual(cc.find_substr('', ''), (0, 0))
|
self.assertEqual(cc.find_substr('', ''), (0, 0))
|
||||||
self.assertEqual(cc.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24))
|
self.assertEqual(cc.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24))
|
||||||
|
|
||||||
|
|
||||||
def test_inflect_context(self):
|
def test_inflect_context(self):
|
||||||
'''Test contex inflection'''
|
'''Test context inflection'''
|
||||||
self.assertEqual(cc.inflect_context('', '', ''), '')
|
self.assertEqual(cc.inflect_context('', '', ''), '')
|
||||||
self.assertEqual(cc.inflect_context('красивый', '', 'чашка'), 'красивая')
|
self.assertEqual(cc.inflect_context('красивый', '', 'чашка'), 'красивая')
|
||||||
|
|
||||||
|
|
||||||
def test_inflect_substitute(self):
|
def test_inflect_substitute(self):
|
||||||
'''Test substitute inflection'''
|
'''Test substitute inflection'''
|
||||||
self.assertEqual(cc.inflect_substitute('', ''), '')
|
self.assertEqual(cc.inflect_substitute('', ''), '')
|
||||||
|
@ -61,6 +68,7 @@ class TestConceptAPI(unittest.TestCase):
|
||||||
self.assertEqual(cc.inflect_substitute('слон', ''), 'слон')
|
self.assertEqual(cc.inflect_substitute('слон', ''), 'слон')
|
||||||
self.assertEqual(cc.inflect_substitute('красивый бантик', 'кошкой'), 'красивым бантиком')
|
self.assertEqual(cc.inflect_substitute('красивый бантик', 'кошкой'), 'красивым бантиком')
|
||||||
|
|
||||||
|
|
||||||
def test_inflect_dependant(self):
|
def test_inflect_dependant(self):
|
||||||
''' Test coordination inflection. '''
|
''' Test coordination inflection. '''
|
||||||
self.assertEqual(cc.inflect_dependant('', ''), '')
|
self.assertEqual(cc.inflect_dependant('', ''), '')
|
||||||
|
@ -69,6 +77,7 @@ class TestConceptAPI(unittest.TestCase):
|
||||||
self.assertEqual(cc.inflect_dependant('общий', 'мать'), 'общая')
|
self.assertEqual(cc.inflect_dependant('общий', 'мать'), 'общая')
|
||||||
self.assertEqual(cc.inflect_dependant('синий', 'слонов'), 'синих')
|
self.assertEqual(cc.inflect_dependant('синий', 'слонов'), 'синих')
|
||||||
|
|
||||||
|
|
||||||
def test_match_all_morpho(self):
|
def test_match_all_morpho(self):
|
||||||
''' Test extracting matching morpho. '''
|
''' Test extracting matching morpho. '''
|
||||||
self.assertEqual(cc.match_all_morpho('', ''), [])
|
self.assertEqual(cc.match_all_morpho('', ''), [])
|
|
@ -1,7 +1,8 @@
|
||||||
''' Unit tests: context. '''
|
''' Unit tests: context. '''
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from cctext.context import Entity, TermContext
|
from cctext.context import Entity
|
||||||
|
|
||||||
|
|
||||||
class TestEntity(unittest.TestCase):
|
class TestEntity(unittest.TestCase):
|
||||||
'''Test Entity termform access.'''
|
'''Test Entity termform access.'''
|
||||||
|
@ -9,24 +10,27 @@ class TestEntity(unittest.TestCase):
|
||||||
self.alias = 'X1'
|
self.alias = 'X1'
|
||||||
self.nominal = 'человек'
|
self.nominal = 'человек'
|
||||||
self.text1 = 'test1'
|
self.text1 = 'test1'
|
||||||
self.form1 = ['sing','datv']
|
self.form1 = ['sing', 'datv']
|
||||||
self.entity = Entity(self.alias, self.nominal, [{'text': self.text1, 'grams': self.form1}])
|
self.entity = Entity(self.alias, self.nominal, [{'text': self.text1, 'grams': self.form1}])
|
||||||
|
|
||||||
|
|
||||||
def test_attributes(self):
|
def test_attributes(self):
|
||||||
self.assertEqual(self.entity.alias, self.alias)
|
self.assertEqual(self.entity.alias, self.alias)
|
||||||
self.assertEqual(self.entity.get_nominal(), self.nominal)
|
self.assertEqual(self.entity.get_nominal(), self.nominal)
|
||||||
self.assertEqual(self.entity.manual, [{'text': self.text1, 'grams': self.form1}])
|
self.assertEqual(self.entity.manual, [{'text': self.text1, 'grams': self.form1}])
|
||||||
|
|
||||||
|
|
||||||
def test_get_form(self):
|
def test_get_form(self):
|
||||||
self.assertEqual(self.entity.get_form([]), self.nominal)
|
self.assertEqual(self.entity.get_form([]), self.nominal)
|
||||||
self.assertEqual(self.entity.get_form(self.form1), self.text1)
|
self.assertEqual(self.entity.get_form(self.form1), self.text1)
|
||||||
self.assertEqual(self.entity.get_form(['invalid tags']), '!Неизвестная граммема: invalid tags!')
|
self.assertEqual(self.entity.get_form(['invalid tags']), '!Неизвестная граммема: invalid tags!')
|
||||||
self.assertEqual(self.entity.get_form(['plur']), 'люди')
|
self.assertEqual(self.entity.get_form(['plur']), 'люди')
|
||||||
|
|
||||||
|
|
||||||
def test_set_nominal(self):
|
def test_set_nominal(self):
|
||||||
new_nomial = 'TEST'
|
new_nominal = 'TEST'
|
||||||
self.assertEqual(self.entity.get_form(['plur']), 'люди')
|
self.assertEqual(self.entity.get_form(['plur']), 'люди')
|
||||||
self.entity.set_nominal(new_nomial)
|
self.entity.set_nominal(new_nominal)
|
||||||
self.assertEqual(self.entity.get_nominal(), new_nomial)
|
self.assertEqual(self.entity.get_nominal(), new_nominal)
|
||||||
self.assertEqual(self.entity.get_form(['plur']), new_nomial)
|
self.assertEqual(self.entity.get_form(['plur']), new_nominal)
|
||||||
self.assertEqual(self.entity.manual, [])
|
self.assertEqual(self.entity.manual, [])
|
||||||
|
|
|
@ -3,21 +3,25 @@ import unittest
|
||||||
|
|
||||||
from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference
|
from cctext import EntityReference, ReferenceType, SyntacticReference, parse_reference
|
||||||
|
|
||||||
|
|
||||||
class TestReferences(unittest.TestCase):
|
class TestReferences(unittest.TestCase):
|
||||||
''' Test class for references. '''
|
''' Test class for references. '''
|
||||||
|
|
||||||
|
|
||||||
def test_EntityReference(self):
|
def test_EntityReference(self):
|
||||||
''' Testing EntityRefence basics. '''
|
''' Testing EntityReference basics. '''
|
||||||
ref = EntityReference('X1', 'sing,nomn')
|
ref = EntityReference('X1', 'sing,nomn')
|
||||||
self.assertEqual(ref.get_type(), ReferenceType.entity)
|
self.assertEqual(ref.get_type(), ReferenceType.entity)
|
||||||
self.assertEqual(ref.to_text(), '@{X1|sing,nomn}')
|
self.assertEqual(ref.to_text(), '@{X1|sing,nomn}')
|
||||||
|
|
||||||
|
|
||||||
def test_SyntacticReference(self):
|
def test_SyntacticReference(self):
|
||||||
''' Testing SyntacticReference basics. '''
|
''' Testing SyntacticReference basics. '''
|
||||||
ref = SyntacticReference(-1, 'черный')
|
ref = SyntacticReference(-1, 'черный')
|
||||||
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
|
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
|
||||||
self.assertEqual(ref.to_text(), '@{-1|черный}')
|
self.assertEqual(ref.to_text(), '@{-1|черный}')
|
||||||
|
|
||||||
|
|
||||||
def test_parse_reference_invalid(self):
|
def test_parse_reference_invalid(self):
|
||||||
''' Testing parsing reference invalid input. '''
|
''' Testing parsing reference invalid input. '''
|
||||||
self.assertIsNone(parse_reference(''))
|
self.assertIsNone(parse_reference(''))
|
||||||
|
@ -31,14 +35,15 @@ class TestReferences(unittest.TestCase):
|
||||||
self.assertIsNone(parse_reference('@{1| }'))
|
self.assertIsNone(parse_reference('@{1| }'))
|
||||||
self.assertIsNone(parse_reference('@{0|черный}'))
|
self.assertIsNone(parse_reference('@{0|черный}'))
|
||||||
|
|
||||||
|
|
||||||
def test_parse_reference(self):
|
def test_parse_reference(self):
|
||||||
''' Testing parsing reference text. '''
|
''' Testing parsing reference text. '''
|
||||||
ref = parse_reference('@{1| черный }')
|
ref = parse_reference('@{1| черный }')
|
||||||
self.assertIsNotNone(ref)
|
self.assertIsNotNone(ref)
|
||||||
self.assertEqual(ref.to_text(), '@{1|черный}')
|
self.assertEqual(ref.to_text(), '@{1|черный}')
|
||||||
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
|
self.assertEqual(ref.get_type(), ReferenceType.syntactic)
|
||||||
|
|
||||||
ref = parse_reference('@{X1 | VERB, past, sing}')
|
ref = parse_reference('@{X1 | VERB, past, sing}')
|
||||||
self.assertIsNotNone(ref)
|
self.assertIsNotNone(ref)
|
||||||
self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}')
|
self.assertEqual(ref.to_text(), '@{X1|VERB,past,sing}')
|
||||||
self.assertEqual(ref.get_type(), ReferenceType.entity)
|
self.assertEqual(ref.get_type(), ReferenceType.entity)
|
||||||
|
|
|
@ -28,24 +28,52 @@ class TestResolver(unittest.TestCase):
|
||||||
self.context['X2'] = Entity('X2', '')
|
self.context['X2'] = Entity('X2', '')
|
||||||
self.resolver = Resolver(self.context)
|
self.resolver = Resolver(self.context)
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_entity(self):
|
def test_resolve_entity(self):
|
||||||
self.assertEqual(resolve_entity(EntityReference('X1', ''), self.context), 'человек')
|
ref = EntityReference('X1', '')
|
||||||
self.assertEqual(resolve_entity(EntityReference('X1', 'plur'), self.context), 'люди')
|
self.assertEqual(resolve_entity(ref, self.context), 'человек')
|
||||||
self.assertEqual(resolve_entity(EntityReference('X2', ''), self.context), '!Отсутствует термин: X2!')
|
|
||||||
self.assertEqual(resolve_entity(EntityReference('X1', 'invalid'), self.context), '!Неизвестная граммема: invalid!')
|
ref = EntityReference('X1', 'plur')
|
||||||
self.assertEqual(resolve_entity(EntityReference('X123', 'plur'), self.context), '!Неизвестная сущность: X123!')
|
self.assertEqual(resolve_entity(ref, self.context), 'люди')
|
||||||
|
|
||||||
|
ref = EntityReference('X2', '')
|
||||||
|
self.assertEqual(resolve_entity(ref, self.context), '!Отсутствует термин: X2!')
|
||||||
|
|
||||||
|
ref = EntityReference('X1', 'invalid')
|
||||||
|
self.assertEqual(resolve_entity(ref, self.context), '!Неизвестная граммема: invalid!')
|
||||||
|
|
||||||
|
ref = EntityReference('X123', 'plur')
|
||||||
|
self.assertEqual(resolve_entity(ref, self.context), '!Неизвестная сущность: X123!')
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_syntactic(self):
|
def test_resolve_syntactic(self):
|
||||||
ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку')
|
ref = ResolvedReference(ref=EntityReference('X1', 'sing,datv'), resolved='человеку')
|
||||||
refs_list = [ref, ref, ref, ref]
|
refs_list = [ref, ref, ref, ref]
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-1), 0, refs_list), '!Некорректное смещение: -1!')
|
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=1), 3, refs_list), '!Некорректное смещение: 1!')
|
ref = SyntacticReference(text='умный', referral_offset=-1)
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=1), 0, refs_list), 'умному')
|
self.assertEqual(resolve_syntactic(ref, 0, refs_list), '!Некорректное смещение: -1!')
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=2), 0, refs_list), 'умному')
|
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=3), 0, refs_list), 'умному')
|
ref = SyntacticReference(text='умный', referral_offset=1)
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-1), 3, refs_list), 'умному')
|
self.assertEqual(resolve_syntactic(ref, 3, refs_list), '!Некорректное смещение: 1!')
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-2), 3, refs_list), 'умному')
|
|
||||||
self.assertEqual(resolve_syntactic(SyntacticReference(text='умный', referral_offset=-3), 3, refs_list), 'умному')
|
ref = SyntacticReference(text='умный', referral_offset=1)
|
||||||
|
self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному')
|
||||||
|
|
||||||
|
ref = SyntacticReference(text='умный', referral_offset=2)
|
||||||
|
self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному')
|
||||||
|
|
||||||
|
ref = SyntacticReference(text='умный', referral_offset=3)
|
||||||
|
self.assertEqual(resolve_syntactic(ref, 0, refs_list), 'умному')
|
||||||
|
|
||||||
|
ref = SyntacticReference(text='умный', referral_offset=-1)
|
||||||
|
self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному')
|
||||||
|
|
||||||
|
ref = SyntacticReference(text='умный', referral_offset=-2)
|
||||||
|
self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному')
|
||||||
|
|
||||||
|
ref = SyntacticReference(text='умный', referral_offset=-3)
|
||||||
|
self.assertEqual(resolve_syntactic(ref, 3, refs_list), 'умному')
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_invalid(self):
|
def test_resolve_invalid(self):
|
||||||
self.assertEqual(self.resolver.resolve(''), '')
|
self.assertEqual(self.resolver.resolve(''), '')
|
||||||
|
@ -57,29 +85,35 @@ class TestResolver(unittest.TestCase):
|
||||||
self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text')
|
self.assertEqual(self.resolver.resolve('simple @{unparsable ref} text'), 'simple @{unparsable ref} text')
|
||||||
self.assertEqual(len(self.resolver.refs), 0)
|
self.assertEqual(len(self.resolver.refs), 0)
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_single(self):
|
def test_resolve_single(self):
|
||||||
self.assertEqual(self.resolver.resolve('просто @{-1|умный} текст'), 'просто !Некорректное смещение: -1! текст')
|
resolved = self.resolver.resolve('просто @{-1|умный} текст')
|
||||||
|
self.assertEqual(resolved, 'просто !Некорректное смещение: -1! текст')
|
||||||
self.assertEqual(len(self.resolver.refs), 1)
|
self.assertEqual(len(self.resolver.refs), 1)
|
||||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18))
|
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 18))
|
||||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34))
|
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 34))
|
||||||
|
|
||||||
self.assertEqual(self.resolver.resolve('просто @{X123|sing,nomn} текст'), 'просто !Неизвестная сущность: X123! текст')
|
resolved = self.resolver.resolve('просто @{X123|sing,nomn} текст')
|
||||||
|
self.assertEqual(resolved, 'просто !Неизвестная сущность: X123! текст')
|
||||||
self.assertEqual(len(self.resolver.refs), 1)
|
self.assertEqual(len(self.resolver.refs), 1)
|
||||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24))
|
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 24))
|
||||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35))
|
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 35))
|
||||||
|
|
||||||
self.assertEqual(self.resolver.resolve('@{X1|sing,nomn}'), 'человек')
|
resolved = self.resolver.resolve('@{X1|sing,nomn}')
|
||||||
|
self.assertEqual(resolved, 'человек')
|
||||||
self.assertEqual(len(self.resolver.refs), 1)
|
self.assertEqual(len(self.resolver.refs), 1)
|
||||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
|
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
|
||||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7))
|
self.assertEqual(self.resolver.refs[0].pos_output, Position(0, 7))
|
||||||
|
|
||||||
self.assertEqual(self.resolver.resolve('просто @{X1|sing,nomn} текст'), 'просто человек текст')
|
resolved = self.resolver.resolve('просто @{X1|sing,nomn} текст')
|
||||||
|
self.assertEqual(resolved, 'просто человек текст')
|
||||||
self.assertEqual(len(self.resolver.refs), 1)
|
self.assertEqual(len(self.resolver.refs), 1)
|
||||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22))
|
self.assertEqual(self.resolver.refs[0].pos_input, Position(7, 22))
|
||||||
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14))
|
self.assertEqual(self.resolver.refs[0].pos_output, Position(7, 14))
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_multiple(self):
|
def test_resolve_multiple(self):
|
||||||
input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют'
|
input = '@{X1|sing,datv} @{-1|умный} @{X1|plur} завидуют'
|
||||||
self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют')
|
self.assertEqual(self.resolver.resolve(input), 'человеку умному люди завидуют')
|
||||||
self.assertEqual(len(self.resolver.refs), 3)
|
self.assertEqual(len(self.resolver.refs), 3)
|
||||||
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
|
self.assertEqual(self.resolver.refs[0].pos_input, Position(0, 15))
|
||||||
|
@ -89,6 +123,7 @@ class TestResolver(unittest.TestCase):
|
||||||
self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38))
|
self.assertEqual(self.resolver.refs[2].pos_input, Position(28, 38))
|
||||||
self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20))
|
self.assertEqual(self.resolver.refs[2].pos_output, Position(16, 20))
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_manual_forms(self):
|
def test_resolve_manual_forms(self):
|
||||||
self.context['X1'] = Entity(
|
self.context['X1'] = Entity(
|
||||||
alias='X1',
|
alias='X1',
|
||||||
|
|
|
@ -10,9 +10,9 @@ class TestTags(unittest.TestCase):
|
||||||
def test_split_tags(self):
|
def test_split_tags(self):
|
||||||
self.assertEqual(split_grams(''), [])
|
self.assertEqual(split_grams(''), [])
|
||||||
self.assertEqual(split_grams('NOUN'), ['NOUN'])
|
self.assertEqual(split_grams('NOUN'), ['NOUN'])
|
||||||
self.assertEqual(split_grams('NOUN,plur,sing'), ['NOUN','plur','sing'])
|
self.assertEqual(split_grams('NOUN,plur,sing'), ['NOUN', 'plur', 'sing'])
|
||||||
|
|
||||||
def test_combine_tags(self):
|
def test_combine_tags(self):
|
||||||
self.assertEqual(combine_grams([]), '')
|
self.assertEqual(combine_grams([]), '')
|
||||||
self.assertEqual(combine_grams(['NOUN']), 'NOUN')
|
self.assertEqual(combine_grams(['NOUN']), 'NOUN')
|
||||||
self.assertEqual(combine_grams(['NOUN','plur','sing']), 'NOUN,plur,sing')
|
self.assertEqual(combine_grams(['NOUN', 'plur', 'sing']), 'NOUN,plur,sing')
|
||||||
|
|
|
@ -7,6 +7,7 @@ from cctext import PhraseParser
|
||||||
parser = PhraseParser()
|
parser = PhraseParser()
|
||||||
|
|
||||||
|
|
||||||
|
# cSpell:disable
|
||||||
class TestRuParser(unittest.TestCase):
|
class TestRuParser(unittest.TestCase):
|
||||||
''' Test class for russian parsing. '''
|
''' Test class for russian parsing. '''
|
||||||
|
|
||||||
|
@ -28,25 +29,25 @@ class TestRuParser(unittest.TestCase):
|
||||||
|
|
||||||
def test_parse_word(self):
|
def test_parse_word(self):
|
||||||
''' Test parse for single word. '''
|
''' Test parse for single word. '''
|
||||||
self._assert_parse('1', ['NUMB', 'intg'])
|
self._assert_parse('1', ['NUMB', 'intg'])
|
||||||
self._assert_parse('пять', ['NUMR', 'nomn'])
|
self._assert_parse('пять', ['NUMR', 'nomn'])
|
||||||
self._assert_parse('трёх', ['NUMR', 'gent'])
|
self._assert_parse('трёх', ['NUMR', 'gent'])
|
||||||
self._assert_parse('трех', ['NUMR', 'gent'])
|
self._assert_parse('трех', ['NUMR', 'gent'])
|
||||||
self._assert_parse('круча', ['NOUN', 'femn', 'sing', 'nomn', 'inan'])
|
self._assert_parse('круча', ['NOUN', 'femn', 'sing', 'nomn', 'inan'])
|
||||||
self._assert_parse('круть', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Geox'])
|
self._assert_parse('круть', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Geox'])
|
||||||
self._assert_parse('ПВО', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Abbr', 'Fixd'])
|
self._assert_parse('ПВО', ['NOUN', 'femn', 'sing', 'nomn', 'inan', 'Sgtm', 'Abbr', 'Fixd'])
|
||||||
self._assert_parse('СМИ', ['NOUN', 'plur', 'nomn', 'inan', 'Pltm', 'Abbr', 'Fixd', 'GNdr'])
|
self._assert_parse('СМИ', ['NOUN', 'plur', 'nomn', 'inan', 'Pltm', 'Abbr', 'Fixd', 'GNdr'])
|
||||||
self._assert_parse('ему', ['NPRO', 'masc', 'sing', 'datv', '3per', 'Anph'])
|
self._assert_parse('ему', ['NPRO', 'masc', 'sing', 'datv', '3per', 'Anph'])
|
||||||
self._assert_parse('крутит', ['VERB', 'sing', '3per', 'pres', 'impf', 'tran', 'indc'])
|
self._assert_parse('крутит', ['VERB', 'sing', '3per', 'pres', 'impf', 'tran', 'indc'])
|
||||||
self._assert_parse('смеркалось', ['VERB', 'neut', 'sing', 'Impe', 'past', 'impf', 'intr', 'indc'])
|
self._assert_parse('смеркалось', ['VERB', 'neut', 'sing', 'Impe', 'past', 'impf', 'intr', 'indc'])
|
||||||
self._assert_parse('крутить', ['INFN', 'impf', 'tran'])
|
self._assert_parse('крутить', ['INFN', 'impf', 'tran'])
|
||||||
self._assert_parse('крученый', ['ADJF', 'masc', 'sing', 'nomn'])
|
self._assert_parse('крученый', ['ADJF', 'masc', 'sing', 'nomn'])
|
||||||
self._assert_parse('крут', ['ADJS', 'masc', 'sing', 'Qual'])
|
self._assert_parse('крут', ['ADJS', 'masc', 'sing', 'Qual'])
|
||||||
self._assert_parse('крутящего', ['PRTF', 'masc', 'sing', 'gent', 'pres', 'impf', 'tran', 'actv'])
|
self._assert_parse('крутящего', ['PRTF', 'masc', 'sing', 'gent', 'pres', 'impf', 'tran', 'actv'])
|
||||||
self._assert_parse('откручен', ['PRTS', 'masc', 'sing', 'past', 'perf', 'pssv'])
|
self._assert_parse('откручен', ['PRTS', 'masc', 'sing', 'past', 'perf', 'pssv'])
|
||||||
self._assert_parse('крутя', ['GRND', 'pres', 'impf', 'tran'])
|
self._assert_parse('крутя', ['GRND', 'pres', 'impf', 'tran'])
|
||||||
self._assert_parse('круто', ['ADVB'])
|
self._assert_parse('круто', ['ADVB'])
|
||||||
self._assert_parse('круче', ['COMP', 'Qual'])
|
self._assert_parse('круче', ['COMP', 'Qual'])
|
||||||
self._assert_parse(',', ['PNCT'])
|
self._assert_parse(',', ['PNCT'])
|
||||||
self._assert_parse('32-', ['intg', 'NUMB'])
|
self._assert_parse('32-', ['intg', 'NUMB'])
|
||||||
|
|
||||||
|
@ -199,7 +200,7 @@ class TestRuParser(unittest.TestCase):
|
||||||
self._assert_inflect('три', ['loct'], 'трёх')
|
self._assert_inflect('три', ['loct'], 'трёх')
|
||||||
|
|
||||||
def test_inflect_adjf(self):
|
def test_inflect_adjf(self):
|
||||||
''' Test inflection for single adjectif. '''
|
''' Test inflection for single adjective. '''
|
||||||
self._assert_inflect('хороший', ['nomn'], 'хороший')
|
self._assert_inflect('хороший', ['nomn'], 'хороший')
|
||||||
self._assert_inflect('хороший', ['gent'], 'хорошего')
|
self._assert_inflect('хороший', ['gent'], 'хорошего')
|
||||||
self._assert_inflect('хороший', ['datv'], 'хорошему')
|
self._assert_inflect('хороший', ['datv'], 'хорошему')
|
||||||
|
@ -317,8 +318,8 @@ class TestRuParser(unittest.TestCase):
|
||||||
|
|
||||||
# self._assert_inflect('реципиенту воздействия', ['nomn'], 'реципиент воздействия')
|
# self._assert_inflect('реципиенту воздействия', ['nomn'], 'реципиент воздействия')
|
||||||
|
|
||||||
def test_inflect_complex_mainword(self):
|
def test_inflect_complex_main(self):
|
||||||
''' Test inflection of mainword conmprised of multiple words. '''
|
''' Test inflection of main word comprised of multiple words. '''
|
||||||
# Do not parse complex main words
|
# Do not parse complex main words
|
||||||
self._assert_inflect('слона и кота', ['nomn'], 'слон и кота')
|
self._assert_inflect('слона и кота', ['nomn'], 'слон и кота')
|
||||||
self._assert_inflect('сказал и поехал', ['INFN'], 'сказать и поехал')
|
self._assert_inflect('сказал и поехал', ['INFN'], 'сказать и поехал')
|
||||||
|
@ -440,6 +441,7 @@ class TestRuParser(unittest.TestCase):
|
||||||
self.assertEqual(parser.find_substr('сложного слона', 'слоном'), (9, 14))
|
self.assertEqual(parser.find_substr('сложного слона', 'слоном'), (9, 14))
|
||||||
self.assertEqual(parser.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24))
|
self.assertEqual(parser.find_substr('сложного красивого слона', 'красивые слоном'), (9, 24))
|
||||||
self.assertEqual(parser.find_substr('человек', 'люди'), (0, 7))
|
self.assertEqual(parser.find_substr('человек', 'люди'), (0, 7))
|
||||||
|
# cSpell:enable
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue
Block a user