Add mypy linter to Python toolchain

This commit is contained in:
IRBorisov 2023-08-17 21:23:54 +03:00
parent 37d9b74cb6
commit 7cd76f6004
16 changed files with 176 additions and 120 deletions

View File

@ -13,6 +13,6 @@
"mode": "auto" "mode": "auto"
} }
], ],
"python.linting.pylintEnabled": true, "python.linting.enabled": true,
"python.linting.enabled": true "python.linting.mypyEnabled": true
} }

View File

@ -76,6 +76,10 @@ This readme file is used mostly to document project dependencies
<summary>requirements_dev</summary> <summary>requirements_dev</summary>
<pre> <pre>
- coverage - coverage
- pylint
- mypy
- django-stubs[compatible-mypy]
- djangorestframework-stubs[compatible-mypy]
</pre> </pre>
</details> </details>
<details> <details>

View File

@ -2,5 +2,7 @@
Set-Location $PSScriptRoot\backend Set-Location $PSScriptRoot\backend
$pylint = "$PSScriptRoot\backend\venv\Scripts\pylint.exe" $pylint = "$PSScriptRoot\backend\venv\Scripts\pylint.exe"
$mypy = "$PSScriptRoot\backend\venv\Scripts\mypy.exe"
& $pylint cctext project apps & $pylint cctext project apps
& $mypy cctext project apps

View File

@ -1,14 +1,18 @@
''' Models: RSForms for conceptual schemas. ''' ''' Models: RSForms for conceptual schemas. '''
import json import json
import pyconcept import pyconcept
from django.db import models, transaction from django.db import transaction
from django.db.models import (
CASCADE, SET_NULL, ForeignKey, Model, PositiveIntegerField, QuerySet,
TextChoices, TextField, BooleanField, CharField, DateTimeField, JSONField
)
from django.core.validators import MinValueValidator from django.core.validators import MinValueValidator
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.urls import reverse from django.urls import reverse
from apps.users.models import User from apps.users.models import User
class CstType(models.TextChoices): class CstType(TextChoices):
''' Type of constituenta ''' ''' Type of constituenta '''
BASE = 'basic' BASE = 'basic'
CONSTANT = 'constant' CONSTANT = 'constant'
@ -20,7 +24,7 @@ class CstType(models.TextChoices):
THEOREM = 'theorem' THEOREM = 'theorem'
class Syntax(models.TextChoices): class Syntax(TextChoices):
''' Syntax types ''' ''' Syntax types '''
UNDEF = 'undefined' UNDEF = 'undefined'
ASCII = 'ascii' ASCII = 'ascii'
@ -31,35 +35,35 @@ def _empty_forms():
return [] return []
class RSForm(models.Model): class RSForm(Model):
''' RSForm is a math form of capturing conceptual schema ''' ''' RSForm is a math form of capturing conceptual schema '''
owner = models.ForeignKey( owner: ForeignKey = ForeignKey(
verbose_name='Владелец', verbose_name='Владелец',
to=User, to=User,
on_delete=models.SET_NULL, on_delete=SET_NULL,
null=True null=True
) )
title = models.TextField( title: TextField = TextField(
verbose_name='Название' verbose_name='Название'
) )
alias = models.CharField( alias: CharField = CharField(
verbose_name='Шифр', verbose_name='Шифр',
max_length=255, max_length=255,
blank=True blank=True
) )
comment = models.TextField( comment: TextField = TextField(
verbose_name='Комментарий', verbose_name='Комментарий',
blank=True blank=True
) )
is_common = models.BooleanField( is_common: BooleanField = BooleanField(
verbose_name='Общая', verbose_name='Общая',
default=False default=False
) )
time_create = models.DateTimeField( time_create: DateTimeField = DateTimeField(
verbose_name='Дата создания', verbose_name='Дата создания',
auto_now_add=True auto_now_add=True
) )
time_update = models.DateTimeField( time_update: DateTimeField = DateTimeField(
verbose_name='Дата изменения', verbose_name='Дата изменения',
auto_now=True auto_now=True
) )
@ -69,7 +73,7 @@ class RSForm(models.Model):
verbose_name = 'Схема' verbose_name = 'Схема'
verbose_name_plural = 'Схемы' verbose_name_plural = 'Схемы'
def constituents(self) -> models.QuerySet: def constituents(self) -> QuerySet:
''' Get QuerySet containing all constituents of current RSForm ''' ''' Get QuerySet containing all constituents of current RSForm '''
return Constituenta.objects.filter(schema=self) return Constituenta.objects.filter(schema=self)
@ -162,7 +166,7 @@ class RSForm(models.Model):
else: else:
cst = Constituenta.create_from_trs(cst_data, self, order) cst = Constituenta.create_from_trs(cst_data, self, order)
cst.save() cst.save()
uid = cst.id uid = cst.pk
loaded_ids.add(uid) loaded_ids.add(uid)
order += 1 order += 1
for prev_cst in prev_constituents: for prev_cst in prev_constituents:
@ -186,10 +190,10 @@ class RSForm(models.Model):
schema._create_items_from_trs(data['items']) schema._create_items_from_trs(data['items'])
return schema return schema
def to_trs(self) -> str: def to_trs(self) -> dict:
''' Generate JSON string containing all data from RSForm ''' ''' Generate JSON string containing all data from RSForm '''
result = self._prepare_json_rsform() result = self._prepare_json_rsform()
items: list['Constituenta'] = self.constituents().order_by('order') items = self.constituents().order_by('order')
for cst in items: for cst in items:
result['items'].append(cst.to_trs()) result['items'].append(cst.to_trs())
return result return result
@ -200,7 +204,7 @@ class RSForm(models.Model):
def get_absolute_url(self): def get_absolute_url(self):
return reverse('rsform-detail', kwargs={'pk': self.pk}) return reverse('rsform-detail', kwargs={'pk': self.pk})
def _prepare_json_rsform(self: 'Constituenta') -> dict: def _prepare_json_rsform(self: 'RSForm') -> dict:
return { return {
'type': 'rsform', 'type': 'rsform',
'title': self.title, 'title': self.title,
@ -211,10 +215,10 @@ class RSForm(models.Model):
@transaction.atomic @transaction.atomic
def _update_from_core(self) -> dict: def _update_from_core(self) -> dict:
checked = json.loads(pyconcept.check_schema(json.dumps(self.to_trs()))) checked: dict = json.loads(pyconcept.check_schema(json.dumps(self.to_trs())))
update_list = self.constituents().only('id', 'order') update_list = self.constituents().only('id', 'order')
if len(checked['items']) != update_list.count(): if len(checked['items']) != update_list.count():
raise ValidationError raise ValidationError('Invalid constituents count')
order = 1 order = 1
for cst in checked['items']: for cst in checked['items']:
cst_id = cst['entityUID'] cst_id = cst['entityUID']
@ -235,59 +239,59 @@ class RSForm(models.Model):
order += 1 order += 1
class Constituenta(models.Model): class Constituenta(Model):
''' Constituenta is the base unit for every conceptual schema ''' ''' Constituenta is the base unit for every conceptual schema '''
schema = models.ForeignKey( schema: ForeignKey = ForeignKey(
verbose_name='Концептуальная схема', verbose_name='Концептуальная схема',
to=RSForm, to=RSForm,
on_delete=models.CASCADE on_delete=CASCADE
) )
order = models.PositiveIntegerField( order: PositiveIntegerField = PositiveIntegerField(
verbose_name='Позиция', verbose_name='Позиция',
validators=[MinValueValidator(1)], validators=[MinValueValidator(1)],
default=-1, default=-1,
) )
alias = models.CharField( alias: CharField = CharField(
verbose_name='Имя', verbose_name='Имя',
max_length=8, max_length=8,
default='undefined' default='undefined'
) )
cst_type = models.CharField( cst_type: CharField = CharField(
verbose_name='Тип', verbose_name='Тип',
max_length=10, max_length=10,
choices=CstType.choices, choices=CstType.choices,
default=CstType.BASE default=CstType.BASE
) )
convention = models.TextField( convention: TextField = TextField(
verbose_name='Комментарий/Конвенция', verbose_name='Комментарий/Конвенция',
default='', default='',
blank=True blank=True
) )
term_raw = models.TextField( term_raw: TextField = TextField(
verbose_name='Термин (с отсылками)', verbose_name='Термин (с отсылками)',
default='', default='',
blank=True blank=True
) )
term_resolved = models.TextField( term_resolved: TextField = TextField(
verbose_name='Термин', verbose_name='Термин',
default='', default='',
blank=True blank=True
) )
term_forms = models.JSONField( term_forms: JSONField = JSONField(
verbose_name='Словоформы', verbose_name='Словоформы',
default=_empty_forms default=_empty_forms
) )
definition_formal = models.TextField( definition_formal: TextField = TextField(
verbose_name='Родоструктурное определение', verbose_name='Родоструктурное определение',
default='', default='',
blank=True blank=True
) )
definition_raw = models.TextField( definition_raw: TextField = TextField(
verbose_name='Текстовое определние (с отсылками)', verbose_name='Текстовое определние (с отсылками)',
default='', default='',
blank=True blank=True
) )
definition_resolved = models.TextField( definition_resolved: TextField = TextField(
verbose_name='Текстовое определние', verbose_name='Текстовое определние',
default='', default='',
blank=True blank=True
@ -342,9 +346,9 @@ class Constituenta(models.Model):
self.term_resolved = '' self.term_resolved = ''
self.term_forms = [] self.term_forms = []
def to_trs(self) -> str: def to_trs(self) -> dict:
return { return {
'entityUID': self.id, 'entityUID': self.pk,
'type': 'constituenta', 'type': 'constituenta',
'cstType': self.cst_type, 'cstType': self.cst_type,
'alias': self.alias, 'alias': self.alias,

View File

@ -73,7 +73,7 @@ class RSFormDetailsSerlializer(serializers.BaseSerializer):
trs = pyconcept.check_schema(json.dumps(instance.to_trs())) trs = pyconcept.check_schema(json.dumps(instance.to_trs()))
trs = trs.replace('entityUID', 'id') trs = trs.replace('entityUID', 'id')
result = json.loads(trs) result = json.loads(trs)
result['id'] = instance.id result['id'] = instance.pk
result['time_update'] = instance.time_update result['time_update'] = instance.time_update
result['time_create'] = instance.time_create result['time_create'] = instance.time_create
result['is_common'] = instance.is_common result['is_common'] = instance.is_common
@ -101,7 +101,7 @@ class ConstituentaSerializer(serializers.ModelSerializer):
if 'definition_raw' in validated_data: if 'definition_raw' in validated_data:
validated_data['definition_resolved'] = validated_data['definition_raw'] validated_data['definition_resolved'] = validated_data['definition_raw']
result = super().update(instance, validated_data) result: Constituenta = super().update(instance, validated_data)
instance.schema.save() instance.schema.save()
return result return result

View File

@ -16,7 +16,7 @@ from apps.rsform.views import (
def _response_contains(response, schema: RSForm) -> bool: def _response_contains(response, schema: RSForm) -> bool:
return any(x for x in response.data if x['id'] == schema.id) return any(x for x in response.data if x['id'] == schema.pk)
class TestConstituentaAPI(APITestCase): class TestConstituentaAPI(APITestCase):
@ -25,8 +25,8 @@ class TestConstituentaAPI(APITestCase):
self.user = User.objects.create(username='UserTest') self.user = User.objects.create(username='UserTest')
self.client = APIClient() self.client = APIClient()
self.client.force_authenticate(user=self.user) self.client.force_authenticate(user=self.user)
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user) self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2') self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
self.cst1 = Constituenta.objects.create( self.cst1 = Constituenta.objects.create(
alias='X1', schema=self.rsform_owned, order=1, convention='Test') alias='X1', schema=self.rsform_owned, order=1, convention='Test')
self.cst2 = Constituenta.objects.create( self.cst2 = Constituenta.objects.create(
@ -87,8 +87,8 @@ class TestRSFormViewset(APITestCase):
self.user = User.objects.create(username='UserTest') self.user = User.objects.create(username='UserTest')
self.client = APIClient() self.client = APIClient()
self.client.force_authenticate(user=self.user) self.client.force_authenticate(user=self.user)
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user) self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2') self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
def test_create_anonymous(self): def test_create_anonymous(self):
self.client.logout() self.client.logout()
@ -131,7 +131,7 @@ class TestRSFormViewset(APITestCase):
def test_contents(self): def test_contents(self):
schema = RSForm.objects.create(title='Title1') schema = RSForm.objects.create(title='Title1')
schema.insert_last(alias='X1', type=CstType.BASE) schema.insert_last(alias='X1', insert_type=CstType.BASE)
response = self.client.get(f'/api/rsforms/{schema.id}/contents/') response = self.client.get(f'/api/rsforms/{schema.id}/contents/')
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
@ -418,9 +418,9 @@ class TestLibraryAPI(APITestCase):
self.user = User.objects.create(username='UserTest') self.user = User.objects.create(username='UserTest')
self.client = APIClient() self.client = APIClient()
self.client.force_authenticate(user=self.user) self.client.force_authenticate(user=self.user)
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user) self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2') self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
self.rsform_common: RSForm = RSForm.objects.create(title='Test3', alias='T3', is_common=True) self.rsform_common = RSForm.objects.create(title='Test3', alias='T3', is_common=True)
def test_retrieve_common(self): def test_retrieve_common(self):
self.client.logout() self.client.logout()

View File

@ -21,7 +21,8 @@ def read_trs(file) -> dict:
''' Read JSON from TRS file ''' ''' Read JSON from TRS file '''
with ZipFile(file, 'r') as archive: with ZipFile(file, 'r') as archive:
json_data = archive.read('document.json') json_data = archive.read('document.json')
return json.loads(json_data) result: dict = json.loads(json_data)
return result
def write_trs(json_data: dict) -> bytes: def write_trs(json_data: dict) -> bytes:

View File

@ -53,7 +53,7 @@ class RSFormViewSet(viewsets.ModelViewSet):
ordering = '-time_update' ordering = '-time_update'
def _get_schema(self) -> models.RSForm: def _get_schema(self) -> models.RSForm:
return self.get_object() return self.get_object() # type: ignore
def perform_create(self, serializer): def perform_create(self, serializer):
if not self.request.user.is_anonymous and 'owner' not in self.request.POST: if not self.request.user.is_anonymous and 'owner' not in self.request.POST:
@ -114,7 +114,7 @@ class RSFormViewSet(viewsets.ModelViewSet):
@action(detail=True, methods=['patch'], url_path='cst-moveto') @action(detail=True, methods=['patch'], url_path='cst-moveto')
def cst_moveto(self, request, pk): def cst_moveto(self, request, pk):
''' Endpoint: Move multiple constituents. ''' ''' Endpoint: Move multiple constituents. '''
schema: models.RSForm = self._get_schema() schema = self._get_schema()
serializer = serializers.CstMoveSerlializer(data=request.data, context={'schema': schema}) serializer = serializers.CstMoveSerlializer(data=request.data, context={'schema': schema})
serializer.is_valid(raise_exception=True) serializer.is_valid(raise_exception=True)
schema.move_cst(serializer.validated_data['constituents'], serializer.validated_data['move_to']) schema.move_cst(serializer.validated_data['constituents'], serializer.validated_data['move_to'])

View File

@ -1,4 +1,5 @@
''' Concept core text processing library. ''' ''' Concept core text processing library. '''
# pylint: skip-file
from .syntax import RuSyntax, Capitalization from .syntax import RuSyntax, Capitalization
from .rumodel import Morphology, SemanticRole, WordTag, morpho from .rumodel import Morphology, SemanticRole, WordTag, morpho
from .ruparser import PhraseParser, WordToken, Collation from .ruparser import PhraseParser, WordToken, Collation

View File

@ -1,6 +1,7 @@
''' Russian language models. ''' ''' Russian language models. '''
from __future__ import annotations from __future__ import annotations
from enum import Enum, unique from enum import Enum, unique
from typing import Iterable
from pymorphy2 import MorphAnalyzer from pymorphy2 import MorphAnalyzer
from pymorphy2.tagset import OpencorporaTag as WordTag from pymorphy2.tagset import OpencorporaTag as WordTag
@ -59,14 +60,14 @@ class Morphology:
return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS'] return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS']
@property @property
def effective_pos(self) -> str: def effective_POS(self) -> str:
''' Access part of speech. Pronouns are considered as nouns ''' ''' Access part of speech. Pronouns are considered as nouns '''
pos = self.tag.POS pos: str = self.tag.POS
if pos and self.tag.POS == 'NPRO': if pos and self.tag.POS == 'NPRO':
return 'NOUN' return 'NOUN'
return pos return pos
def complete_tags(self, tags: frozenset[str]) -> set[str]: def complete_tags(self, tags: Iterable[str]) -> set[str]:
''' Add missing tags before inflection. ''' ''' Add missing tags before inflection. '''
result = set(tags) result = set(tags)
pos = self.tag.POS pos = self.tag.POS
@ -111,6 +112,7 @@ class Morphology:
if count == 0: if count == 0:
return '' return ''
elif count == 1: elif count == 1:
return next(iter(grammemes)) result: str = next(iter(grammemes))
return result
else: else:
return ','.join(grammemes) return ','.join(grammemes)

View File

@ -1,5 +1,6 @@
''' Parsing russian language using pymorphy2 and natasha libraries. ''' ''' Parsing russian language using pymorphy2 and natasha libraries. '''
from __future__ import annotations from __future__ import annotations
from typing import Iterable, Optional
from razdel.substring import Substring as Segment from razdel.substring import Substring as Segment
from pymorphy2.analyzer import Parse as WordForm from pymorphy2.analyzer import Parse as WordForm
@ -11,17 +12,16 @@ INDEX_NONE = -1
NO_COORDINATION = -1 NO_COORDINATION = -1
WORD_NONE = -1 WORD_NONE = -1
Tags = Iterable[str]
class WordToken: class WordToken:
''' Minimal text token. ''' ''' Atomic text token. '''
def __init__(self, segment: Segment, forms: list[WordForm], main_form: int = 0): def __init__(self, segment: Segment, forms: list[WordForm], main_form: int = 0):
self.segment: Segment = segment self.segment: Segment = segment
self.forms: list[WordForm] = forms self.forms: list[WordForm] = forms
self.main: int = main_form self.main: int = main_form
def __del__(self):
pass
def get_morpho(self) -> Morphology: def get_morpho(self) -> Morphology:
''' Return morphology for current token. ''' ''' Return morphology for current token. '''
return Morphology(self.get_form().tag) return Morphology(self.get_form().tag)
@ -30,7 +30,7 @@ class WordToken:
''' Access main form. ''' ''' Access main form. '''
return self.forms[self.main] return self.forms[self.main]
def inflect(self, inflection_tags: set[str]): def inflect(self, inflection_tags: set[str]) -> Optional[WordForm]:
''' Apply inflection to segment text. Does not modify forms ''' ''' Apply inflection to segment text. Does not modify forms '''
inflected = self.get_form().inflect(inflection_tags) inflected = self.get_form().inflect(inflection_tags)
if not inflected: if not inflected:
@ -43,21 +43,20 @@ class Collation:
''' Parsed data for input coordinated text. ''' ''' Parsed data for input coordinated text. '''
def __init__(self, text: str): def __init__(self, text: str):
self.text = text self.text = text
self.words = [] self.words: list[WordToken] = []
self.coordination = [] self.coordination: list[int] = []
self.main_word: int = WORD_NONE self.main_word: int = WORD_NONE
def __del__(self): def is_valid(self) -> bool:
pass ''' Check if data is parsed correctly '''
return self.main_word != WORD_NONE
def get_form(self) -> WordForm: def get_form(self) -> WordForm:
''' Access main form. ''' ''' Access WordForm. '''
return self.words[self.main_word].get_form() return self.words[self.main_word].get_form()
def get_morpho(self) -> Morphology: def get_morpho(self) -> Morphology:
''' Access parsed main mrophology. ''' ''' Access parsed main mrophology. '''
if self.main_word == WORD_NONE:
return None
return self.words[self.main_word].get_morpho() return self.words[self.main_word].get_morpho()
def add_word(self, segment, forms: list, main_form: int, need_coordination: bool = True): def add_word(self, segment, forms: list, main_form: int, need_coordination: bool = True):
@ -65,28 +64,29 @@ class Collation:
self.words.append(WordToken(segment, forms, main_form)) self.words.append(WordToken(segment, forms, main_form))
self.coordination.append(NO_COORDINATION if not need_coordination else 0) self.coordination.append(NO_COORDINATION if not need_coordination else 0)
def inflect(self, target_tags: frozenset[str]) -> str: def inflect(self, target_tags: Tags) -> str:
''' Inflect text to match required tags. ''' ''' Inflect text to match required tags. '''
if self.is_valid():
origin = self.get_morpho() origin = self.get_morpho()
if not origin or origin.tag.grammemes.issuperset(target_tags): if not origin.tag.grammemes.issuperset(target_tags):
if self._apply_inflection(origin, target_tags):
return self._generate_text()
return self.text return self.text
if not self._apply_inflection(origin, target_tags):
return self.text
new_text = self._generate_text()
return new_text
def inflect_like(self, base_model: Collation) -> str: def inflect_like(self, base_model: Collation) -> str:
''' Create inflection to substitute base_model form. ''' ''' Create inflection to substitute base_model form. '''
if self.is_valid():
morph = base_model.get_morpho() morph = base_model.get_morpho()
if morph.effective_pos is None: if morph.effective_POS:
return self.text
tags = set() tags = set()
tags.add(morph.effective_pos) tags.add(morph.effective_POS)
tags = morph.complete_tags(tags) tags = morph.complete_tags(tags)
return self.inflect(tags) return self.inflect(tags)
return self.text
def inflect_dependant(self, master_model: Collation) -> str: def inflect_dependant(self, master_model: Collation) -> str:
''' Create inflection to coordinate with master_model form. ''' ''' Create inflection to coordinate with master_model form. '''
assert self.is_valid()
morph = master_model.get_morpho() morph = master_model.get_morpho()
tags = morph.coordination_tags() tags = morph.coordination_tags()
tags = self.get_morpho().complete_tags(tags) tags = self.get_morpho().complete_tags(tags)
@ -94,12 +94,12 @@ class Collation:
def normal_form(self) -> str: def normal_form(self) -> str:
''' Generate normal form. ''' ''' Generate normal form. '''
if self.is_valid():
main_form = self.get_form() main_form = self.get_form()
if not main_form:
return self.text
new_morpho = Morphology(main_form.normalized.tag) new_morpho = Morphology(main_form.normalized.tag)
new_tags = new_morpho.complete_tags(frozenset()) new_tags = new_morpho.complete_tags(frozenset())
return self.inflect(new_tags) return self.inflect(new_tags)
return self.text
def _iterate_coordinated(self): def _iterate_coordinated(self):
words_count = len(self.words) words_count = len(self.words)
@ -108,21 +108,20 @@ class Collation:
yield self.words[current_word] yield self.words[current_word]
current_word += self.coordination[current_word] current_word += self.coordination[current_word]
def _inflect_main_word(self, origin: Morphology, target_tags: frozenset[str]) -> Morphology: def _inflect_main_word(self, origin: Morphology, target_tags: Tags) -> Optional[Morphology]:
full_tags = origin.complete_tags(target_tags) full_tags = origin.complete_tags(target_tags)
inflected = self.words[self.main_word].inflect(full_tags) inflected = self.words[self.main_word].inflect(full_tags)
if not inflected: if not inflected:
return None return None
return Morphology(inflected.tag) return Morphology(inflected.tag)
def _apply_inflection(self, origin: Morphology, target_tags: frozenset[str]) -> bool: def _apply_inflection(self, origin: Morphology, target_tags: Tags) -> bool:
new_moprho = self._inflect_main_word(origin, target_tags) new_moprho = self._inflect_main_word(origin, target_tags)
if not new_moprho: if not new_moprho:
return False return False
inflection_tags = new_moprho.coordination_tags() inflection_tags = new_moprho.coordination_tags()
if len(inflection_tags) == 0: if len(inflection_tags) == 0:
return True return True
for word in self._iterate_coordinated(): for word in self._iterate_coordinated():
word.inflect(inflection_tags) word.inflect(inflection_tags)
return True return True
@ -155,13 +154,17 @@ class PhraseParser:
_MAIN_WAIT_LIMIT = 10 # count words untill fixing main _MAIN_WAIT_LIMIT = 10 # count words untill fixing main
_MAIN_MAX_FOLLOWERS = 3 # count words after main as coordination candidates _MAIN_MAX_FOLLOWERS = 3 # count words after main as coordination candidates
def parse(self, text: str, require_index: int = INDEX_NONE, require_tags: frozenset[str] = None) -> Collation: def parse(self, text: str,
''' Determine morpho tags for input text. require_index: int = INDEX_NONE,
::returns:: Morphology of a text or None if no suitable form is available ''' require_tags: Optional[Tags] = None) -> Optional[Collation]:
if text == '': '''
return None Determine morpho tags for input text.
::returns:: Morphology of a text or None if no suitable form is available
'''
segments = list(RuSyntax.tokenize(text)) segments = list(RuSyntax.tokenize(text))
if len(segments) == 1: if len(segments) == 0:
return None
elif len(segments) == 1:
return self._parse_single(segments[0], require_index, require_tags) return self._parse_single(segments[0], require_index, require_tags)
else: else:
return self._parse_multiword(text, segments, require_index, require_tags) return self._parse_multiword(text, segments, require_index, require_tags)
@ -169,9 +172,9 @@ class PhraseParser:
def normalize(self, text: str): def normalize(self, text: str):
''' Get normal form for target text. ''' ''' Get normal form for target text. '''
processed = self.parse(text) processed = self.parse(text)
if not processed: if processed:
return text
return processed.normal_form() return processed.normal_form()
return text
def find_substr(self, text: str, sub: str) -> tuple[int, int]: def find_substr(self, text: str, sub: str) -> tuple[int, int]:
''' Search for substring position in text regardless of morphology. ''' ''' Search for substring position in text regardless of morphology. '''
@ -234,7 +237,7 @@ class PhraseParser:
return dependant_normal return dependant_normal
return dependant_model.inflect_dependant(master_model) return dependant_model.inflect_dependant(master_model)
def _parse_single(self, segment, require_index: int, require_tags: frozenset[str]) -> Collation: def _parse_single(self, segment, require_index: int, require_tags: Optional[Tags]) -> Optional[Collation]:
forms = list(self._filtered_parse(segment.text)) forms = list(self._filtered_parse(segment.text))
parse_index = INDEX_NONE parse_index = INDEX_NONE
if len(forms) == 0 or require_index >= len(forms): if len(forms) == 0 or require_index >= len(forms):
@ -266,9 +269,10 @@ class PhraseParser:
result.main_word = 0 result.main_word = 0
return result return result
def _parse_multiword(self, text: str, segments: list, require_index: int, require_tags: frozenset[str]): def _parse_multiword(self, text: str, segments: list, require_index: int,
require_tags: Optional[Tags]) -> Optional[Collation]:
result = Collation(text) result = Collation(text)
priority_main = self._PRIORITY_NONE priority_main: float = self._PRIORITY_NONE
segment_index = 0 segment_index = 0
main_wait = 0 main_wait = 0
word_index = 0 word_index = 0
@ -295,20 +299,20 @@ class PhraseParser:
output: Collation, output: Collation,
segment: Segment, segment: Segment,
require_index: int, require_index: int,
require_tags: frozenset[str]) -> float: require_tags: Optional[Tags]) -> Optional[float]:
''' Return priority for this can be a new main word ''' ''' Return priority for this can be a new main word '''
forms = list(self._filtered_parse(segment.text)) forms = list(self._filtered_parse(segment.text))
if len(forms) == 0: if len(forms) == 0:
return None return None
main_index = INDEX_NONE main_index: int = INDEX_NONE
segment_score = self._PRIORITY_NONE segment_score: float = self._PRIORITY_NONE
needs_coordination = False needs_coordination = False
local_sum = 0 local_sum: float = 0
score_sum = 0 score_sum: float = 0
if require_index != INDEX_NONE: if require_index != INDEX_NONE:
form = forms[require_index] form = forms[require_index]
if not require_tags or form.tag.grammemes.issuperset(require_tags): if not require_tags or form.tag.grammemes.issuperset(require_tags):
(local_max, segment_score) = PhraseParser._get_priority_for(form.tag) (local_max, segment_score) = PhraseParser._get_priorities_for(form.tag)
main_index = require_index main_index = require_index
needs_coordination = Morphology.is_dependable(form.tag.POS) needs_coordination = Morphology.is_dependable(form.tag.POS)
else: else:
@ -316,7 +320,7 @@ class PhraseParser:
for (index, form) in enumerate(forms): for (index, form) in enumerate(forms):
if require_tags and not form.tag.grammemes.issuperset(require_tags): if require_tags and not form.tag.grammemes.issuperset(require_tags):
continue continue
(local_priority, global_priority) = PhraseParser._get_priority_for(form.tag) (local_priority, global_priority) = PhraseParser._get_priorities_for(form.tag)
needs_coordination = needs_coordination or Morphology.is_dependable(form.tag.POS) needs_coordination = needs_coordination or Morphology.is_dependable(form.tag.POS)
local_sum += global_priority * form.score local_sum += global_priority * form.score
score_sum += form.score score_sum += form.score
@ -414,7 +418,8 @@ class PhraseParser:
yield form yield form
@staticmethod @staticmethod
def _parse_word(text: str, require_index: int = INDEX_NONE, require_tags: frozenset[str] = None) -> Morphology: def _parse_word(text: str, require_index: int = INDEX_NONE,
require_tags: Optional[Tags] = None) -> Optional[Morphology]:
parsed_variants = morpho.parse(text) parsed_variants = morpho.parse(text)
if not parsed_variants or require_index >= len(parsed_variants): if not parsed_variants or require_index >= len(parsed_variants):
return None return None
@ -432,7 +437,7 @@ class PhraseParser:
return None return None
@staticmethod @staticmethod
def _get_priority_for(tag: WordTag) -> tuple[float, float]: def _get_priorities_for(tag: WordTag) -> tuple[float, float]:
''' Return pair of local and global priorities. ''' ''' Return pair of local and global priorities. '''
if tag.POS in ['VERB', 'INFN']: if tag.POS in ['VERB', 'INFN']:
return (9, 10) return (9, 10)
@ -447,7 +452,9 @@ class PhraseParser:
return (0, 0) return (0, 0)
@staticmethod @staticmethod
def _choose_context_etalon(target: Morphology, before: Collation, after: Collation) -> Collation: def _choose_context_etalon(target: Morphology,
before: Optional[Collation],
after: Optional[Collation]) -> Optional[Collation]:
if not before or not before.get_morpho().can_coordinate: if not before or not before.get_morpho().can_coordinate:
return after return after
if not after or not after.get_morpho().can_coordinate: if not after or not after.get_morpho().can_coordinate:
@ -473,7 +480,7 @@ class PhraseParser:
return before return before
@staticmethod @staticmethod
def _combine_morpho(target: Morphology, etalon: WordTag) -> str: def _combine_morpho(target: Morphology, etalon: WordTag) -> frozenset[str]:
part_of_speech = target.tag.POS part_of_speech = target.tag.POS
number = etalon.number number = etalon.number
if number == 'plur': if number == 'plur':

View File

@ -68,7 +68,11 @@ class RuSyntax:
''' Test if text is a single word. ''' ''' Test if text is a single word. '''
try: try:
gen = tokenize(text) gen = tokenize(text)
return next(gen) == '' or next(gen) == '' if next(gen) == '':
return True
if next(gen) == '':
return True
return False
except StopIteration: except StopIteration:
return True return True

View File

@ -1,6 +1,7 @@
''' Test russian language parsing. ''' ''' Test russian language parsing. '''
import unittest import unittest
from typing import Iterable, Optional
from cctext import PhraseParser from cctext import PhraseParser
parser = PhraseParser() parser = PhraseParser()
@ -9,8 +10,12 @@ parser = PhraseParser()
class TestRuParser(unittest.TestCase): class TestRuParser(unittest.TestCase):
''' Test class for russian parsing. ''' ''' Test class for russian parsing. '''
def _assert_parse(self, text: str, expected: list[str], require_index: int = -1, require_tags: list[str] = None): def _assert_parse(self, text: str, expected: list[str],
require_index: int = -1,
require_tags: Optional[Iterable[str]] = None):
phrase = parser.parse(text, require_index, require_tags) phrase = parser.parse(text, require_index, require_tags)
self.assertIsNotNone(phrase)
if phrase:
self.assertEqual(phrase.get_morpho().tag.grammemes, set(expected)) self.assertEqual(phrase.get_morpho().tag.grammemes, set(expected))
def _assert_inflect(self, text: str, tags: list[str], expected: str): def _assert_inflect(self, text: str, tags: list[str], expected: str):
@ -18,7 +23,7 @@ class TestRuParser(unittest.TestCase):
if not model: if not model:
result = text result = text
else: else:
result = model.inflect(set(tags)) result = model.inflect(frozenset(tags))
self.assertEqual(result, expected) self.assertEqual(result, expected)
def test_parse_word(self): def test_parse_word(self):

View File

@ -0,0 +1,23 @@
# Global options:
[mypy]
warn_return_any = True
warn_unused_configs = True
plugins = mypy_drf_plugin.main, mypy_django_plugin.main
# Per-module options:
[mypy.plugins.django-stubs]
django_settings_module = "project.settings"
[mypy-django_filters.*]
ignore_missing_imports = True
[mypy-pyconcept.*]
ignore_missing_imports = True
[mypy-razdel.*]
ignore_missing_imports = True
[mypy-pymorphy2.*]
ignore_missing_imports = True

View File

@ -123,7 +123,7 @@ DATABASES = {
# Password validation # Password validation
# https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators # https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [ AUTH_PASSWORD_VALIDATORS: list[str] = [
# NOTE: Password validators disabled # NOTE: Password validators disabled
# { # {
# 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',

View File

@ -9,5 +9,8 @@ pymorphy2-dicts-ru
pymorphy2-dicts-uk pymorphy2-dicts-uk
razdel razdel
mypy
pylint pylint
coverage coverage
django-stubs[compatible-mypy]
djangorestframework-stubs[compatible-mypy]