mirror of
https://github.com/IRBorisov/ConceptPortal.git
synced 2025-06-26 13:00:39 +03:00
Add mypy linter to Python toolchain
This commit is contained in:
parent
37d9b74cb6
commit
7cd76f6004
4
.vscode/settings.json
vendored
4
.vscode/settings.json
vendored
|
@ -13,6 +13,6 @@
|
||||||
"mode": "auto"
|
"mode": "auto"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"python.linting.pylintEnabled": true,
|
"python.linting.enabled": true,
|
||||||
"python.linting.enabled": true
|
"python.linting.mypyEnabled": true
|
||||||
}
|
}
|
|
@ -76,6 +76,10 @@ This readme file is used mostly to document project dependencies
|
||||||
<summary>requirements_dev</summary>
|
<summary>requirements_dev</summary>
|
||||||
<pre>
|
<pre>
|
||||||
- coverage
|
- coverage
|
||||||
|
- pylint
|
||||||
|
- mypy
|
||||||
|
- django-stubs[compatible-mypy]
|
||||||
|
- djangorestframework-stubs[compatible-mypy]
|
||||||
</pre>
|
</pre>
|
||||||
</details>
|
</details>
|
||||||
<details>
|
<details>
|
||||||
|
|
|
@ -2,5 +2,7 @@
|
||||||
Set-Location $PSScriptRoot\backend
|
Set-Location $PSScriptRoot\backend
|
||||||
|
|
||||||
$pylint = "$PSScriptRoot\backend\venv\Scripts\pylint.exe"
|
$pylint = "$PSScriptRoot\backend\venv\Scripts\pylint.exe"
|
||||||
|
$mypy = "$PSScriptRoot\backend\venv\Scripts\mypy.exe"
|
||||||
|
|
||||||
& $pylint cctext project apps
|
& $pylint cctext project apps
|
||||||
|
& $mypy cctext project apps
|
|
@ -1,14 +1,18 @@
|
||||||
''' Models: RSForms for conceptual schemas. '''
|
''' Models: RSForms for conceptual schemas. '''
|
||||||
import json
|
import json
|
||||||
import pyconcept
|
import pyconcept
|
||||||
from django.db import models, transaction
|
from django.db import transaction
|
||||||
|
from django.db.models import (
|
||||||
|
CASCADE, SET_NULL, ForeignKey, Model, PositiveIntegerField, QuerySet,
|
||||||
|
TextChoices, TextField, BooleanField, CharField, DateTimeField, JSONField
|
||||||
|
)
|
||||||
from django.core.validators import MinValueValidator
|
from django.core.validators import MinValueValidator
|
||||||
from django.core.exceptions import ValidationError
|
from django.core.exceptions import ValidationError
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
from apps.users.models import User
|
from apps.users.models import User
|
||||||
|
|
||||||
|
|
||||||
class CstType(models.TextChoices):
|
class CstType(TextChoices):
|
||||||
''' Type of constituenta '''
|
''' Type of constituenta '''
|
||||||
BASE = 'basic'
|
BASE = 'basic'
|
||||||
CONSTANT = 'constant'
|
CONSTANT = 'constant'
|
||||||
|
@ -20,7 +24,7 @@ class CstType(models.TextChoices):
|
||||||
THEOREM = 'theorem'
|
THEOREM = 'theorem'
|
||||||
|
|
||||||
|
|
||||||
class Syntax(models.TextChoices):
|
class Syntax(TextChoices):
|
||||||
''' Syntax types '''
|
''' Syntax types '''
|
||||||
UNDEF = 'undefined'
|
UNDEF = 'undefined'
|
||||||
ASCII = 'ascii'
|
ASCII = 'ascii'
|
||||||
|
@ -31,35 +35,35 @@ def _empty_forms():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
class RSForm(models.Model):
|
class RSForm(Model):
|
||||||
''' RSForm is a math form of capturing conceptual schema '''
|
''' RSForm is a math form of capturing conceptual schema '''
|
||||||
owner = models.ForeignKey(
|
owner: ForeignKey = ForeignKey(
|
||||||
verbose_name='Владелец',
|
verbose_name='Владелец',
|
||||||
to=User,
|
to=User,
|
||||||
on_delete=models.SET_NULL,
|
on_delete=SET_NULL,
|
||||||
null=True
|
null=True
|
||||||
)
|
)
|
||||||
title = models.TextField(
|
title: TextField = TextField(
|
||||||
verbose_name='Название'
|
verbose_name='Название'
|
||||||
)
|
)
|
||||||
alias = models.CharField(
|
alias: CharField = CharField(
|
||||||
verbose_name='Шифр',
|
verbose_name='Шифр',
|
||||||
max_length=255,
|
max_length=255,
|
||||||
blank=True
|
blank=True
|
||||||
)
|
)
|
||||||
comment = models.TextField(
|
comment: TextField = TextField(
|
||||||
verbose_name='Комментарий',
|
verbose_name='Комментарий',
|
||||||
blank=True
|
blank=True
|
||||||
)
|
)
|
||||||
is_common = models.BooleanField(
|
is_common: BooleanField = BooleanField(
|
||||||
verbose_name='Общая',
|
verbose_name='Общая',
|
||||||
default=False
|
default=False
|
||||||
)
|
)
|
||||||
time_create = models.DateTimeField(
|
time_create: DateTimeField = DateTimeField(
|
||||||
verbose_name='Дата создания',
|
verbose_name='Дата создания',
|
||||||
auto_now_add=True
|
auto_now_add=True
|
||||||
)
|
)
|
||||||
time_update = models.DateTimeField(
|
time_update: DateTimeField = DateTimeField(
|
||||||
verbose_name='Дата изменения',
|
verbose_name='Дата изменения',
|
||||||
auto_now=True
|
auto_now=True
|
||||||
)
|
)
|
||||||
|
@ -69,7 +73,7 @@ class RSForm(models.Model):
|
||||||
verbose_name = 'Схема'
|
verbose_name = 'Схема'
|
||||||
verbose_name_plural = 'Схемы'
|
verbose_name_plural = 'Схемы'
|
||||||
|
|
||||||
def constituents(self) -> models.QuerySet:
|
def constituents(self) -> QuerySet:
|
||||||
''' Get QuerySet containing all constituents of current RSForm '''
|
''' Get QuerySet containing all constituents of current RSForm '''
|
||||||
return Constituenta.objects.filter(schema=self)
|
return Constituenta.objects.filter(schema=self)
|
||||||
|
|
||||||
|
@ -162,7 +166,7 @@ class RSForm(models.Model):
|
||||||
else:
|
else:
|
||||||
cst = Constituenta.create_from_trs(cst_data, self, order)
|
cst = Constituenta.create_from_trs(cst_data, self, order)
|
||||||
cst.save()
|
cst.save()
|
||||||
uid = cst.id
|
uid = cst.pk
|
||||||
loaded_ids.add(uid)
|
loaded_ids.add(uid)
|
||||||
order += 1
|
order += 1
|
||||||
for prev_cst in prev_constituents:
|
for prev_cst in prev_constituents:
|
||||||
|
@ -186,10 +190,10 @@ class RSForm(models.Model):
|
||||||
schema._create_items_from_trs(data['items'])
|
schema._create_items_from_trs(data['items'])
|
||||||
return schema
|
return schema
|
||||||
|
|
||||||
def to_trs(self) -> str:
|
def to_trs(self) -> dict:
|
||||||
''' Generate JSON string containing all data from RSForm '''
|
''' Generate JSON string containing all data from RSForm '''
|
||||||
result = self._prepare_json_rsform()
|
result = self._prepare_json_rsform()
|
||||||
items: list['Constituenta'] = self.constituents().order_by('order')
|
items = self.constituents().order_by('order')
|
||||||
for cst in items:
|
for cst in items:
|
||||||
result['items'].append(cst.to_trs())
|
result['items'].append(cst.to_trs())
|
||||||
return result
|
return result
|
||||||
|
@ -200,7 +204,7 @@ class RSForm(models.Model):
|
||||||
def get_absolute_url(self):
|
def get_absolute_url(self):
|
||||||
return reverse('rsform-detail', kwargs={'pk': self.pk})
|
return reverse('rsform-detail', kwargs={'pk': self.pk})
|
||||||
|
|
||||||
def _prepare_json_rsform(self: 'Constituenta') -> dict:
|
def _prepare_json_rsform(self: 'RSForm') -> dict:
|
||||||
return {
|
return {
|
||||||
'type': 'rsform',
|
'type': 'rsform',
|
||||||
'title': self.title,
|
'title': self.title,
|
||||||
|
@ -211,10 +215,10 @@ class RSForm(models.Model):
|
||||||
|
|
||||||
@transaction.atomic
|
@transaction.atomic
|
||||||
def _update_from_core(self) -> dict:
|
def _update_from_core(self) -> dict:
|
||||||
checked = json.loads(pyconcept.check_schema(json.dumps(self.to_trs())))
|
checked: dict = json.loads(pyconcept.check_schema(json.dumps(self.to_trs())))
|
||||||
update_list = self.constituents().only('id', 'order')
|
update_list = self.constituents().only('id', 'order')
|
||||||
if len(checked['items']) != update_list.count():
|
if len(checked['items']) != update_list.count():
|
||||||
raise ValidationError
|
raise ValidationError('Invalid constituents count')
|
||||||
order = 1
|
order = 1
|
||||||
for cst in checked['items']:
|
for cst in checked['items']:
|
||||||
cst_id = cst['entityUID']
|
cst_id = cst['entityUID']
|
||||||
|
@ -235,59 +239,59 @@ class RSForm(models.Model):
|
||||||
order += 1
|
order += 1
|
||||||
|
|
||||||
|
|
||||||
class Constituenta(models.Model):
|
class Constituenta(Model):
|
||||||
''' Constituenta is the base unit for every conceptual schema '''
|
''' Constituenta is the base unit for every conceptual schema '''
|
||||||
schema = models.ForeignKey(
|
schema: ForeignKey = ForeignKey(
|
||||||
verbose_name='Концептуальная схема',
|
verbose_name='Концептуальная схема',
|
||||||
to=RSForm,
|
to=RSForm,
|
||||||
on_delete=models.CASCADE
|
on_delete=CASCADE
|
||||||
)
|
)
|
||||||
order = models.PositiveIntegerField(
|
order: PositiveIntegerField = PositiveIntegerField(
|
||||||
verbose_name='Позиция',
|
verbose_name='Позиция',
|
||||||
validators=[MinValueValidator(1)],
|
validators=[MinValueValidator(1)],
|
||||||
default=-1,
|
default=-1,
|
||||||
)
|
)
|
||||||
alias = models.CharField(
|
alias: CharField = CharField(
|
||||||
verbose_name='Имя',
|
verbose_name='Имя',
|
||||||
max_length=8,
|
max_length=8,
|
||||||
default='undefined'
|
default='undefined'
|
||||||
)
|
)
|
||||||
cst_type = models.CharField(
|
cst_type: CharField = CharField(
|
||||||
verbose_name='Тип',
|
verbose_name='Тип',
|
||||||
max_length=10,
|
max_length=10,
|
||||||
choices=CstType.choices,
|
choices=CstType.choices,
|
||||||
default=CstType.BASE
|
default=CstType.BASE
|
||||||
)
|
)
|
||||||
convention = models.TextField(
|
convention: TextField = TextField(
|
||||||
verbose_name='Комментарий/Конвенция',
|
verbose_name='Комментарий/Конвенция',
|
||||||
default='',
|
default='',
|
||||||
blank=True
|
blank=True
|
||||||
)
|
)
|
||||||
term_raw = models.TextField(
|
term_raw: TextField = TextField(
|
||||||
verbose_name='Термин (с отсылками)',
|
verbose_name='Термин (с отсылками)',
|
||||||
default='',
|
default='',
|
||||||
blank=True
|
blank=True
|
||||||
)
|
)
|
||||||
term_resolved = models.TextField(
|
term_resolved: TextField = TextField(
|
||||||
verbose_name='Термин',
|
verbose_name='Термин',
|
||||||
default='',
|
default='',
|
||||||
blank=True
|
blank=True
|
||||||
)
|
)
|
||||||
term_forms = models.JSONField(
|
term_forms: JSONField = JSONField(
|
||||||
verbose_name='Словоформы',
|
verbose_name='Словоформы',
|
||||||
default=_empty_forms
|
default=_empty_forms
|
||||||
)
|
)
|
||||||
definition_formal = models.TextField(
|
definition_formal: TextField = TextField(
|
||||||
verbose_name='Родоструктурное определение',
|
verbose_name='Родоструктурное определение',
|
||||||
default='',
|
default='',
|
||||||
blank=True
|
blank=True
|
||||||
)
|
)
|
||||||
definition_raw = models.TextField(
|
definition_raw: TextField = TextField(
|
||||||
verbose_name='Текстовое определние (с отсылками)',
|
verbose_name='Текстовое определние (с отсылками)',
|
||||||
default='',
|
default='',
|
||||||
blank=True
|
blank=True
|
||||||
)
|
)
|
||||||
definition_resolved = models.TextField(
|
definition_resolved: TextField = TextField(
|
||||||
verbose_name='Текстовое определние',
|
verbose_name='Текстовое определние',
|
||||||
default='',
|
default='',
|
||||||
blank=True
|
blank=True
|
||||||
|
@ -342,9 +346,9 @@ class Constituenta(models.Model):
|
||||||
self.term_resolved = ''
|
self.term_resolved = ''
|
||||||
self.term_forms = []
|
self.term_forms = []
|
||||||
|
|
||||||
def to_trs(self) -> str:
|
def to_trs(self) -> dict:
|
||||||
return {
|
return {
|
||||||
'entityUID': self.id,
|
'entityUID': self.pk,
|
||||||
'type': 'constituenta',
|
'type': 'constituenta',
|
||||||
'cstType': self.cst_type,
|
'cstType': self.cst_type,
|
||||||
'alias': self.alias,
|
'alias': self.alias,
|
||||||
|
|
|
@ -73,7 +73,7 @@ class RSFormDetailsSerlializer(serializers.BaseSerializer):
|
||||||
trs = pyconcept.check_schema(json.dumps(instance.to_trs()))
|
trs = pyconcept.check_schema(json.dumps(instance.to_trs()))
|
||||||
trs = trs.replace('entityUID', 'id')
|
trs = trs.replace('entityUID', 'id')
|
||||||
result = json.loads(trs)
|
result = json.loads(trs)
|
||||||
result['id'] = instance.id
|
result['id'] = instance.pk
|
||||||
result['time_update'] = instance.time_update
|
result['time_update'] = instance.time_update
|
||||||
result['time_create'] = instance.time_create
|
result['time_create'] = instance.time_create
|
||||||
result['is_common'] = instance.is_common
|
result['is_common'] = instance.is_common
|
||||||
|
@ -101,7 +101,7 @@ class ConstituentaSerializer(serializers.ModelSerializer):
|
||||||
if 'definition_raw' in validated_data:
|
if 'definition_raw' in validated_data:
|
||||||
validated_data['definition_resolved'] = validated_data['definition_raw']
|
validated_data['definition_resolved'] = validated_data['definition_raw']
|
||||||
|
|
||||||
result = super().update(instance, validated_data)
|
result: Constituenta = super().update(instance, validated_data)
|
||||||
instance.schema.save()
|
instance.schema.save()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ from apps.rsform.views import (
|
||||||
|
|
||||||
|
|
||||||
def _response_contains(response, schema: RSForm) -> bool:
|
def _response_contains(response, schema: RSForm) -> bool:
|
||||||
return any(x for x in response.data if x['id'] == schema.id)
|
return any(x for x in response.data if x['id'] == schema.pk)
|
||||||
|
|
||||||
|
|
||||||
class TestConstituentaAPI(APITestCase):
|
class TestConstituentaAPI(APITestCase):
|
||||||
|
@ -25,8 +25,8 @@ class TestConstituentaAPI(APITestCase):
|
||||||
self.user = User.objects.create(username='UserTest')
|
self.user = User.objects.create(username='UserTest')
|
||||||
self.client = APIClient()
|
self.client = APIClient()
|
||||||
self.client.force_authenticate(user=self.user)
|
self.client.force_authenticate(user=self.user)
|
||||||
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||||
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2')
|
self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
|
||||||
self.cst1 = Constituenta.objects.create(
|
self.cst1 = Constituenta.objects.create(
|
||||||
alias='X1', schema=self.rsform_owned, order=1, convention='Test')
|
alias='X1', schema=self.rsform_owned, order=1, convention='Test')
|
||||||
self.cst2 = Constituenta.objects.create(
|
self.cst2 = Constituenta.objects.create(
|
||||||
|
@ -87,8 +87,8 @@ class TestRSFormViewset(APITestCase):
|
||||||
self.user = User.objects.create(username='UserTest')
|
self.user = User.objects.create(username='UserTest')
|
||||||
self.client = APIClient()
|
self.client = APIClient()
|
||||||
self.client.force_authenticate(user=self.user)
|
self.client.force_authenticate(user=self.user)
|
||||||
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||||
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2')
|
self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
|
||||||
|
|
||||||
def test_create_anonymous(self):
|
def test_create_anonymous(self):
|
||||||
self.client.logout()
|
self.client.logout()
|
||||||
|
@ -131,7 +131,7 @@ class TestRSFormViewset(APITestCase):
|
||||||
|
|
||||||
def test_contents(self):
|
def test_contents(self):
|
||||||
schema = RSForm.objects.create(title='Title1')
|
schema = RSForm.objects.create(title='Title1')
|
||||||
schema.insert_last(alias='X1', type=CstType.BASE)
|
schema.insert_last(alias='X1', insert_type=CstType.BASE)
|
||||||
response = self.client.get(f'/api/rsforms/{schema.id}/contents/')
|
response = self.client.get(f'/api/rsforms/{schema.id}/contents/')
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
|
@ -418,9 +418,9 @@ class TestLibraryAPI(APITestCase):
|
||||||
self.user = User.objects.create(username='UserTest')
|
self.user = User.objects.create(username='UserTest')
|
||||||
self.client = APIClient()
|
self.client = APIClient()
|
||||||
self.client.force_authenticate(user=self.user)
|
self.client.force_authenticate(user=self.user)
|
||||||
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||||
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2')
|
self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
|
||||||
self.rsform_common: RSForm = RSForm.objects.create(title='Test3', alias='T3', is_common=True)
|
self.rsform_common = RSForm.objects.create(title='Test3', alias='T3', is_common=True)
|
||||||
|
|
||||||
def test_retrieve_common(self):
|
def test_retrieve_common(self):
|
||||||
self.client.logout()
|
self.client.logout()
|
||||||
|
|
|
@ -21,7 +21,8 @@ def read_trs(file) -> dict:
|
||||||
''' Read JSON from TRS file '''
|
''' Read JSON from TRS file '''
|
||||||
with ZipFile(file, 'r') as archive:
|
with ZipFile(file, 'r') as archive:
|
||||||
json_data = archive.read('document.json')
|
json_data = archive.read('document.json')
|
||||||
return json.loads(json_data)
|
result: dict = json.loads(json_data)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def write_trs(json_data: dict) -> bytes:
|
def write_trs(json_data: dict) -> bytes:
|
||||||
|
|
|
@ -53,7 +53,7 @@ class RSFormViewSet(viewsets.ModelViewSet):
|
||||||
ordering = '-time_update'
|
ordering = '-time_update'
|
||||||
|
|
||||||
def _get_schema(self) -> models.RSForm:
|
def _get_schema(self) -> models.RSForm:
|
||||||
return self.get_object()
|
return self.get_object() # type: ignore
|
||||||
|
|
||||||
def perform_create(self, serializer):
|
def perform_create(self, serializer):
|
||||||
if not self.request.user.is_anonymous and 'owner' not in self.request.POST:
|
if not self.request.user.is_anonymous and 'owner' not in self.request.POST:
|
||||||
|
@ -114,7 +114,7 @@ class RSFormViewSet(viewsets.ModelViewSet):
|
||||||
@action(detail=True, methods=['patch'], url_path='cst-moveto')
|
@action(detail=True, methods=['patch'], url_path='cst-moveto')
|
||||||
def cst_moveto(self, request, pk):
|
def cst_moveto(self, request, pk):
|
||||||
''' Endpoint: Move multiple constituents. '''
|
''' Endpoint: Move multiple constituents. '''
|
||||||
schema: models.RSForm = self._get_schema()
|
schema = self._get_schema()
|
||||||
serializer = serializers.CstMoveSerlializer(data=request.data, context={'schema': schema})
|
serializer = serializers.CstMoveSerlializer(data=request.data, context={'schema': schema})
|
||||||
serializer.is_valid(raise_exception=True)
|
serializer.is_valid(raise_exception=True)
|
||||||
schema.move_cst(serializer.validated_data['constituents'], serializer.validated_data['move_to'])
|
schema.move_cst(serializer.validated_data['constituents'], serializer.validated_data['move_to'])
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
''' Concept core text processing library. '''
|
''' Concept core text processing library. '''
|
||||||
|
# pylint: skip-file
|
||||||
from .syntax import RuSyntax, Capitalization
|
from .syntax import RuSyntax, Capitalization
|
||||||
from .rumodel import Morphology, SemanticRole, WordTag, morpho
|
from .rumodel import Morphology, SemanticRole, WordTag, morpho
|
||||||
from .ruparser import PhraseParser, WordToken, Collation
|
from .ruparser import PhraseParser, WordToken, Collation
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
''' Russian language models. '''
|
''' Russian language models. '''
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from enum import Enum, unique
|
from enum import Enum, unique
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
from pymorphy2 import MorphAnalyzer
|
from pymorphy2 import MorphAnalyzer
|
||||||
from pymorphy2.tagset import OpencorporaTag as WordTag
|
from pymorphy2.tagset import OpencorporaTag as WordTag
|
||||||
|
@ -59,14 +60,14 @@ class Morphology:
|
||||||
return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS']
|
return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS']
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def effective_pos(self) -> str:
|
def effective_POS(self) -> str:
|
||||||
''' Access part of speech. Pronouns are considered as nouns '''
|
''' Access part of speech. Pronouns are considered as nouns '''
|
||||||
pos = self.tag.POS
|
pos: str = self.tag.POS
|
||||||
if pos and self.tag.POS == 'NPRO':
|
if pos and self.tag.POS == 'NPRO':
|
||||||
return 'NOUN'
|
return 'NOUN'
|
||||||
return pos
|
return pos
|
||||||
|
|
||||||
def complete_tags(self, tags: frozenset[str]) -> set[str]:
|
def complete_tags(self, tags: Iterable[str]) -> set[str]:
|
||||||
''' Add missing tags before inflection. '''
|
''' Add missing tags before inflection. '''
|
||||||
result = set(tags)
|
result = set(tags)
|
||||||
pos = self.tag.POS
|
pos = self.tag.POS
|
||||||
|
@ -111,6 +112,7 @@ class Morphology:
|
||||||
if count == 0:
|
if count == 0:
|
||||||
return ''
|
return ''
|
||||||
elif count == 1:
|
elif count == 1:
|
||||||
return next(iter(grammemes))
|
result: str = next(iter(grammemes))
|
||||||
|
return result
|
||||||
else:
|
else:
|
||||||
return ','.join(grammemes)
|
return ','.join(grammemes)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
''' Parsing russian language using pymorphy2 and natasha libraries. '''
|
''' Parsing russian language using pymorphy2 and natasha libraries. '''
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
from razdel.substring import Substring as Segment
|
from razdel.substring import Substring as Segment
|
||||||
from pymorphy2.analyzer import Parse as WordForm
|
from pymorphy2.analyzer import Parse as WordForm
|
||||||
|
@ -11,17 +12,16 @@ INDEX_NONE = -1
|
||||||
NO_COORDINATION = -1
|
NO_COORDINATION = -1
|
||||||
WORD_NONE = -1
|
WORD_NONE = -1
|
||||||
|
|
||||||
|
Tags = Iterable[str]
|
||||||
|
|
||||||
|
|
||||||
class WordToken:
|
class WordToken:
|
||||||
''' Minimal text token. '''
|
''' Atomic text token. '''
|
||||||
def __init__(self, segment: Segment, forms: list[WordForm], main_form: int = 0):
|
def __init__(self, segment: Segment, forms: list[WordForm], main_form: int = 0):
|
||||||
self.segment: Segment = segment
|
self.segment: Segment = segment
|
||||||
self.forms: list[WordForm] = forms
|
self.forms: list[WordForm] = forms
|
||||||
self.main: int = main_form
|
self.main: int = main_form
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_morpho(self) -> Morphology:
|
def get_morpho(self) -> Morphology:
|
||||||
''' Return morphology for current token. '''
|
''' Return morphology for current token. '''
|
||||||
return Morphology(self.get_form().tag)
|
return Morphology(self.get_form().tag)
|
||||||
|
@ -30,7 +30,7 @@ class WordToken:
|
||||||
''' Access main form. '''
|
''' Access main form. '''
|
||||||
return self.forms[self.main]
|
return self.forms[self.main]
|
||||||
|
|
||||||
def inflect(self, inflection_tags: set[str]):
|
def inflect(self, inflection_tags: set[str]) -> Optional[WordForm]:
|
||||||
''' Apply inflection to segment text. Does not modify forms '''
|
''' Apply inflection to segment text. Does not modify forms '''
|
||||||
inflected = self.get_form().inflect(inflection_tags)
|
inflected = self.get_form().inflect(inflection_tags)
|
||||||
if not inflected:
|
if not inflected:
|
||||||
|
@ -43,21 +43,20 @@ class Collation:
|
||||||
''' Parsed data for input coordinated text. '''
|
''' Parsed data for input coordinated text. '''
|
||||||
def __init__(self, text: str):
|
def __init__(self, text: str):
|
||||||
self.text = text
|
self.text = text
|
||||||
self.words = []
|
self.words: list[WordToken] = []
|
||||||
self.coordination = []
|
self.coordination: list[int] = []
|
||||||
self.main_word: int = WORD_NONE
|
self.main_word: int = WORD_NONE
|
||||||
|
|
||||||
def __del__(self):
|
def is_valid(self) -> bool:
|
||||||
pass
|
''' Check if data is parsed correctly '''
|
||||||
|
return self.main_word != WORD_NONE
|
||||||
|
|
||||||
def get_form(self) -> WordForm:
|
def get_form(self) -> WordForm:
|
||||||
''' Access main form. '''
|
''' Access WordForm. '''
|
||||||
return self.words[self.main_word].get_form()
|
return self.words[self.main_word].get_form()
|
||||||
|
|
||||||
def get_morpho(self) -> Morphology:
|
def get_morpho(self) -> Morphology:
|
||||||
''' Access parsed main mrophology. '''
|
''' Access parsed main mrophology. '''
|
||||||
if self.main_word == WORD_NONE:
|
|
||||||
return None
|
|
||||||
return self.words[self.main_word].get_morpho()
|
return self.words[self.main_word].get_morpho()
|
||||||
|
|
||||||
def add_word(self, segment, forms: list, main_form: int, need_coordination: bool = True):
|
def add_word(self, segment, forms: list, main_form: int, need_coordination: bool = True):
|
||||||
|
@ -65,28 +64,29 @@ class Collation:
|
||||||
self.words.append(WordToken(segment, forms, main_form))
|
self.words.append(WordToken(segment, forms, main_form))
|
||||||
self.coordination.append(NO_COORDINATION if not need_coordination else 0)
|
self.coordination.append(NO_COORDINATION if not need_coordination else 0)
|
||||||
|
|
||||||
def inflect(self, target_tags: frozenset[str]) -> str:
|
def inflect(self, target_tags: Tags) -> str:
|
||||||
''' Inflect text to match required tags. '''
|
''' Inflect text to match required tags. '''
|
||||||
origin = self.get_morpho()
|
if self.is_valid():
|
||||||
if not origin or origin.tag.grammemes.issuperset(target_tags):
|
origin = self.get_morpho()
|
||||||
return self.text
|
if not origin.tag.grammemes.issuperset(target_tags):
|
||||||
if not self._apply_inflection(origin, target_tags):
|
if self._apply_inflection(origin, target_tags):
|
||||||
return self.text
|
return self._generate_text()
|
||||||
new_text = self._generate_text()
|
return self.text
|
||||||
return new_text
|
|
||||||
|
|
||||||
def inflect_like(self, base_model: Collation) -> str:
|
def inflect_like(self, base_model: Collation) -> str:
|
||||||
''' Create inflection to substitute base_model form. '''
|
''' Create inflection to substitute base_model form. '''
|
||||||
morph = base_model.get_morpho()
|
if self.is_valid():
|
||||||
if morph.effective_pos is None:
|
morph = base_model.get_morpho()
|
||||||
return self.text
|
if morph.effective_POS:
|
||||||
tags = set()
|
tags = set()
|
||||||
tags.add(morph.effective_pos)
|
tags.add(morph.effective_POS)
|
||||||
tags = morph.complete_tags(tags)
|
tags = morph.complete_tags(tags)
|
||||||
return self.inflect(tags)
|
return self.inflect(tags)
|
||||||
|
return self.text
|
||||||
|
|
||||||
def inflect_dependant(self, master_model: Collation) -> str:
|
def inflect_dependant(self, master_model: Collation) -> str:
|
||||||
''' Create inflection to coordinate with master_model form. '''
|
''' Create inflection to coordinate with master_model form. '''
|
||||||
|
assert self.is_valid()
|
||||||
morph = master_model.get_morpho()
|
morph = master_model.get_morpho()
|
||||||
tags = morph.coordination_tags()
|
tags = morph.coordination_tags()
|
||||||
tags = self.get_morpho().complete_tags(tags)
|
tags = self.get_morpho().complete_tags(tags)
|
||||||
|
@ -94,12 +94,12 @@ class Collation:
|
||||||
|
|
||||||
def normal_form(self) -> str:
|
def normal_form(self) -> str:
|
||||||
''' Generate normal form. '''
|
''' Generate normal form. '''
|
||||||
main_form = self.get_form()
|
if self.is_valid():
|
||||||
if not main_form:
|
main_form = self.get_form()
|
||||||
return self.text
|
new_morpho = Morphology(main_form.normalized.tag)
|
||||||
new_morpho = Morphology(main_form.normalized.tag)
|
new_tags = new_morpho.complete_tags(frozenset())
|
||||||
new_tags = new_morpho.complete_tags(frozenset())
|
return self.inflect(new_tags)
|
||||||
return self.inflect(new_tags)
|
return self.text
|
||||||
|
|
||||||
def _iterate_coordinated(self):
|
def _iterate_coordinated(self):
|
||||||
words_count = len(self.words)
|
words_count = len(self.words)
|
||||||
|
@ -108,21 +108,20 @@ class Collation:
|
||||||
yield self.words[current_word]
|
yield self.words[current_word]
|
||||||
current_word += self.coordination[current_word]
|
current_word += self.coordination[current_word]
|
||||||
|
|
||||||
def _inflect_main_word(self, origin: Morphology, target_tags: frozenset[str]) -> Morphology:
|
def _inflect_main_word(self, origin: Morphology, target_tags: Tags) -> Optional[Morphology]:
|
||||||
full_tags = origin.complete_tags(target_tags)
|
full_tags = origin.complete_tags(target_tags)
|
||||||
inflected = self.words[self.main_word].inflect(full_tags)
|
inflected = self.words[self.main_word].inflect(full_tags)
|
||||||
if not inflected:
|
if not inflected:
|
||||||
return None
|
return None
|
||||||
return Morphology(inflected.tag)
|
return Morphology(inflected.tag)
|
||||||
|
|
||||||
def _apply_inflection(self, origin: Morphology, target_tags: frozenset[str]) -> bool:
|
def _apply_inflection(self, origin: Morphology, target_tags: Tags) -> bool:
|
||||||
new_moprho = self._inflect_main_word(origin, target_tags)
|
new_moprho = self._inflect_main_word(origin, target_tags)
|
||||||
if not new_moprho:
|
if not new_moprho:
|
||||||
return False
|
return False
|
||||||
inflection_tags = new_moprho.coordination_tags()
|
inflection_tags = new_moprho.coordination_tags()
|
||||||
if len(inflection_tags) == 0:
|
if len(inflection_tags) == 0:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for word in self._iterate_coordinated():
|
for word in self._iterate_coordinated():
|
||||||
word.inflect(inflection_tags)
|
word.inflect(inflection_tags)
|
||||||
return True
|
return True
|
||||||
|
@ -155,13 +154,17 @@ class PhraseParser:
|
||||||
_MAIN_WAIT_LIMIT = 10 # count words untill fixing main
|
_MAIN_WAIT_LIMIT = 10 # count words untill fixing main
|
||||||
_MAIN_MAX_FOLLOWERS = 3 # count words after main as coordination candidates
|
_MAIN_MAX_FOLLOWERS = 3 # count words after main as coordination candidates
|
||||||
|
|
||||||
def parse(self, text: str, require_index: int = INDEX_NONE, require_tags: frozenset[str] = None) -> Collation:
|
def parse(self, text: str,
|
||||||
''' Determine morpho tags for input text.
|
require_index: int = INDEX_NONE,
|
||||||
::returns:: Morphology of a text or None if no suitable form is available '''
|
require_tags: Optional[Tags] = None) -> Optional[Collation]:
|
||||||
if text == '':
|
'''
|
||||||
return None
|
Determine morpho tags for input text.
|
||||||
|
::returns:: Morphology of a text or None if no suitable form is available
|
||||||
|
'''
|
||||||
segments = list(RuSyntax.tokenize(text))
|
segments = list(RuSyntax.tokenize(text))
|
||||||
if len(segments) == 1:
|
if len(segments) == 0:
|
||||||
|
return None
|
||||||
|
elif len(segments) == 1:
|
||||||
return self._parse_single(segments[0], require_index, require_tags)
|
return self._parse_single(segments[0], require_index, require_tags)
|
||||||
else:
|
else:
|
||||||
return self._parse_multiword(text, segments, require_index, require_tags)
|
return self._parse_multiword(text, segments, require_index, require_tags)
|
||||||
|
@ -169,9 +172,9 @@ class PhraseParser:
|
||||||
def normalize(self, text: str):
|
def normalize(self, text: str):
|
||||||
''' Get normal form for target text. '''
|
''' Get normal form for target text. '''
|
||||||
processed = self.parse(text)
|
processed = self.parse(text)
|
||||||
if not processed:
|
if processed:
|
||||||
return text
|
return processed.normal_form()
|
||||||
return processed.normal_form()
|
return text
|
||||||
|
|
||||||
def find_substr(self, text: str, sub: str) -> tuple[int, int]:
|
def find_substr(self, text: str, sub: str) -> tuple[int, int]:
|
||||||
''' Search for substring position in text regardless of morphology. '''
|
''' Search for substring position in text regardless of morphology. '''
|
||||||
|
@ -234,7 +237,7 @@ class PhraseParser:
|
||||||
return dependant_normal
|
return dependant_normal
|
||||||
return dependant_model.inflect_dependant(master_model)
|
return dependant_model.inflect_dependant(master_model)
|
||||||
|
|
||||||
def _parse_single(self, segment, require_index: int, require_tags: frozenset[str]) -> Collation:
|
def _parse_single(self, segment, require_index: int, require_tags: Optional[Tags]) -> Optional[Collation]:
|
||||||
forms = list(self._filtered_parse(segment.text))
|
forms = list(self._filtered_parse(segment.text))
|
||||||
parse_index = INDEX_NONE
|
parse_index = INDEX_NONE
|
||||||
if len(forms) == 0 or require_index >= len(forms):
|
if len(forms) == 0 or require_index >= len(forms):
|
||||||
|
@ -266,9 +269,10 @@ class PhraseParser:
|
||||||
result.main_word = 0
|
result.main_word = 0
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _parse_multiword(self, text: str, segments: list, require_index: int, require_tags: frozenset[str]):
|
def _parse_multiword(self, text: str, segments: list, require_index: int,
|
||||||
|
require_tags: Optional[Tags]) -> Optional[Collation]:
|
||||||
result = Collation(text)
|
result = Collation(text)
|
||||||
priority_main = self._PRIORITY_NONE
|
priority_main: float = self._PRIORITY_NONE
|
||||||
segment_index = 0
|
segment_index = 0
|
||||||
main_wait = 0
|
main_wait = 0
|
||||||
word_index = 0
|
word_index = 0
|
||||||
|
@ -295,20 +299,20 @@ class PhraseParser:
|
||||||
output: Collation,
|
output: Collation,
|
||||||
segment: Segment,
|
segment: Segment,
|
||||||
require_index: int,
|
require_index: int,
|
||||||
require_tags: frozenset[str]) -> float:
|
require_tags: Optional[Tags]) -> Optional[float]:
|
||||||
''' Return priority for this can be a new main word '''
|
''' Return priority for this can be a new main word '''
|
||||||
forms = list(self._filtered_parse(segment.text))
|
forms = list(self._filtered_parse(segment.text))
|
||||||
if len(forms) == 0:
|
if len(forms) == 0:
|
||||||
return None
|
return None
|
||||||
main_index = INDEX_NONE
|
main_index: int = INDEX_NONE
|
||||||
segment_score = self._PRIORITY_NONE
|
segment_score: float = self._PRIORITY_NONE
|
||||||
needs_coordination = False
|
needs_coordination = False
|
||||||
local_sum = 0
|
local_sum: float = 0
|
||||||
score_sum = 0
|
score_sum: float = 0
|
||||||
if require_index != INDEX_NONE:
|
if require_index != INDEX_NONE:
|
||||||
form = forms[require_index]
|
form = forms[require_index]
|
||||||
if not require_tags or form.tag.grammemes.issuperset(require_tags):
|
if not require_tags or form.tag.grammemes.issuperset(require_tags):
|
||||||
(local_max, segment_score) = PhraseParser._get_priority_for(form.tag)
|
(local_max, segment_score) = PhraseParser._get_priorities_for(form.tag)
|
||||||
main_index = require_index
|
main_index = require_index
|
||||||
needs_coordination = Morphology.is_dependable(form.tag.POS)
|
needs_coordination = Morphology.is_dependable(form.tag.POS)
|
||||||
else:
|
else:
|
||||||
|
@ -316,7 +320,7 @@ class PhraseParser:
|
||||||
for (index, form) in enumerate(forms):
|
for (index, form) in enumerate(forms):
|
||||||
if require_tags and not form.tag.grammemes.issuperset(require_tags):
|
if require_tags and not form.tag.grammemes.issuperset(require_tags):
|
||||||
continue
|
continue
|
||||||
(local_priority, global_priority) = PhraseParser._get_priority_for(form.tag)
|
(local_priority, global_priority) = PhraseParser._get_priorities_for(form.tag)
|
||||||
needs_coordination = needs_coordination or Morphology.is_dependable(form.tag.POS)
|
needs_coordination = needs_coordination or Morphology.is_dependable(form.tag.POS)
|
||||||
local_sum += global_priority * form.score
|
local_sum += global_priority * form.score
|
||||||
score_sum += form.score
|
score_sum += form.score
|
||||||
|
@ -414,7 +418,8 @@ class PhraseParser:
|
||||||
yield form
|
yield form
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_word(text: str, require_index: int = INDEX_NONE, require_tags: frozenset[str] = None) -> Morphology:
|
def _parse_word(text: str, require_index: int = INDEX_NONE,
|
||||||
|
require_tags: Optional[Tags] = None) -> Optional[Morphology]:
|
||||||
parsed_variants = morpho.parse(text)
|
parsed_variants = morpho.parse(text)
|
||||||
if not parsed_variants or require_index >= len(parsed_variants):
|
if not parsed_variants or require_index >= len(parsed_variants):
|
||||||
return None
|
return None
|
||||||
|
@ -432,7 +437,7 @@ class PhraseParser:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_priority_for(tag: WordTag) -> tuple[float, float]:
|
def _get_priorities_for(tag: WordTag) -> tuple[float, float]:
|
||||||
''' Return pair of local and global priorities. '''
|
''' Return pair of local and global priorities. '''
|
||||||
if tag.POS in ['VERB', 'INFN']:
|
if tag.POS in ['VERB', 'INFN']:
|
||||||
return (9, 10)
|
return (9, 10)
|
||||||
|
@ -447,7 +452,9 @@ class PhraseParser:
|
||||||
return (0, 0)
|
return (0, 0)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _choose_context_etalon(target: Morphology, before: Collation, after: Collation) -> Collation:
|
def _choose_context_etalon(target: Morphology,
|
||||||
|
before: Optional[Collation],
|
||||||
|
after: Optional[Collation]) -> Optional[Collation]:
|
||||||
if not before or not before.get_morpho().can_coordinate:
|
if not before or not before.get_morpho().can_coordinate:
|
||||||
return after
|
return after
|
||||||
if not after or not after.get_morpho().can_coordinate:
|
if not after or not after.get_morpho().can_coordinate:
|
||||||
|
@ -473,7 +480,7 @@ class PhraseParser:
|
||||||
return before
|
return before
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _combine_morpho(target: Morphology, etalon: WordTag) -> str:
|
def _combine_morpho(target: Morphology, etalon: WordTag) -> frozenset[str]:
|
||||||
part_of_speech = target.tag.POS
|
part_of_speech = target.tag.POS
|
||||||
number = etalon.number
|
number = etalon.number
|
||||||
if number == 'plur':
|
if number == 'plur':
|
||||||
|
|
|
@ -68,7 +68,11 @@ class RuSyntax:
|
||||||
''' Test if text is a single word. '''
|
''' Test if text is a single word. '''
|
||||||
try:
|
try:
|
||||||
gen = tokenize(text)
|
gen = tokenize(text)
|
||||||
return next(gen) == '' or next(gen) == ''
|
if next(gen) == '':
|
||||||
|
return True
|
||||||
|
if next(gen) == '':
|
||||||
|
return True
|
||||||
|
return False
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
''' Test russian language parsing. '''
|
''' Test russian language parsing. '''
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
from typing import Iterable, Optional
|
||||||
from cctext import PhraseParser
|
from cctext import PhraseParser
|
||||||
|
|
||||||
parser = PhraseParser()
|
parser = PhraseParser()
|
||||||
|
@ -9,16 +10,20 @@ parser = PhraseParser()
|
||||||
class TestRuParser(unittest.TestCase):
|
class TestRuParser(unittest.TestCase):
|
||||||
''' Test class for russian parsing. '''
|
''' Test class for russian parsing. '''
|
||||||
|
|
||||||
def _assert_parse(self, text: str, expected: list[str], require_index: int = -1, require_tags: list[str] = None):
|
def _assert_parse(self, text: str, expected: list[str],
|
||||||
|
require_index: int = -1,
|
||||||
|
require_tags: Optional[Iterable[str]] = None):
|
||||||
phrase = parser.parse(text, require_index, require_tags)
|
phrase = parser.parse(text, require_index, require_tags)
|
||||||
self.assertEqual(phrase.get_morpho().tag.grammemes, set(expected))
|
self.assertIsNotNone(phrase)
|
||||||
|
if phrase:
|
||||||
|
self.assertEqual(phrase.get_morpho().tag.grammemes, set(expected))
|
||||||
|
|
||||||
def _assert_inflect(self, text: str, tags: list[str], expected: str):
|
def _assert_inflect(self, text: str, tags: list[str], expected: str):
|
||||||
model = parser.parse(text)
|
model = parser.parse(text)
|
||||||
if not model:
|
if not model:
|
||||||
result = text
|
result = text
|
||||||
else:
|
else:
|
||||||
result = model.inflect(set(tags))
|
result = model.inflect(frozenset(tags))
|
||||||
self.assertEqual(result, expected)
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
def test_parse_word(self):
|
def test_parse_word(self):
|
||||||
|
|
23
rsconcept/backend/mypy.ini
Normal file
23
rsconcept/backend/mypy.ini
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
# Global options:
|
||||||
|
|
||||||
|
[mypy]
|
||||||
|
warn_return_any = True
|
||||||
|
warn_unused_configs = True
|
||||||
|
|
||||||
|
plugins = mypy_drf_plugin.main, mypy_django_plugin.main
|
||||||
|
|
||||||
|
# Per-module options:
|
||||||
|
[mypy.plugins.django-stubs]
|
||||||
|
django_settings_module = "project.settings"
|
||||||
|
|
||||||
|
[mypy-django_filters.*]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-pyconcept.*]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-razdel.*]
|
||||||
|
ignore_missing_imports = True
|
||||||
|
|
||||||
|
[mypy-pymorphy2.*]
|
||||||
|
ignore_missing_imports = True
|
|
@ -123,7 +123,7 @@ DATABASES = {
|
||||||
# Password validation
|
# Password validation
|
||||||
# https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators
|
# https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators
|
||||||
|
|
||||||
AUTH_PASSWORD_VALIDATORS = [
|
AUTH_PASSWORD_VALIDATORS: list[str] = [
|
||||||
# NOTE: Password validators disabled
|
# NOTE: Password validators disabled
|
||||||
# {
|
# {
|
||||||
# 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
# 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||||
|
|
|
@ -9,5 +9,8 @@ pymorphy2-dicts-ru
|
||||||
pymorphy2-dicts-uk
|
pymorphy2-dicts-uk
|
||||||
razdel
|
razdel
|
||||||
|
|
||||||
|
mypy
|
||||||
pylint
|
pylint
|
||||||
coverage
|
coverage
|
||||||
|
django-stubs[compatible-mypy]
|
||||||
|
djangorestframework-stubs[compatible-mypy]
|
Loading…
Reference in New Issue
Block a user