mirror of
https://github.com/IRBorisov/ConceptPortal.git
synced 2025-06-26 04:50:36 +03:00
Add mypy linter to Python toolchain
This commit is contained in:
parent
37d9b74cb6
commit
7cd76f6004
4
.vscode/settings.json
vendored
4
.vscode/settings.json
vendored
|
@ -13,6 +13,6 @@
|
|||
"mode": "auto"
|
||||
}
|
||||
],
|
||||
"python.linting.pylintEnabled": true,
|
||||
"python.linting.enabled": true
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.mypyEnabled": true
|
||||
}
|
|
@ -76,6 +76,10 @@ This readme file is used mostly to document project dependencies
|
|||
<summary>requirements_dev</summary>
|
||||
<pre>
|
||||
- coverage
|
||||
- pylint
|
||||
- mypy
|
||||
- django-stubs[compatible-mypy]
|
||||
- djangorestframework-stubs[compatible-mypy]
|
||||
</pre>
|
||||
</details>
|
||||
<details>
|
||||
|
|
|
@ -2,5 +2,7 @@
|
|||
Set-Location $PSScriptRoot\backend
|
||||
|
||||
$pylint = "$PSScriptRoot\backend\venv\Scripts\pylint.exe"
|
||||
$mypy = "$PSScriptRoot\backend\venv\Scripts\mypy.exe"
|
||||
|
||||
& $pylint cctext project apps
|
||||
& $mypy cctext project apps
|
|
@ -1,14 +1,18 @@
|
|||
''' Models: RSForms for conceptual schemas. '''
|
||||
import json
|
||||
import pyconcept
|
||||
from django.db import models, transaction
|
||||
from django.db import transaction
|
||||
from django.db.models import (
|
||||
CASCADE, SET_NULL, ForeignKey, Model, PositiveIntegerField, QuerySet,
|
||||
TextChoices, TextField, BooleanField, CharField, DateTimeField, JSONField
|
||||
)
|
||||
from django.core.validators import MinValueValidator
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.urls import reverse
|
||||
from apps.users.models import User
|
||||
|
||||
|
||||
class CstType(models.TextChoices):
|
||||
class CstType(TextChoices):
|
||||
''' Type of constituenta '''
|
||||
BASE = 'basic'
|
||||
CONSTANT = 'constant'
|
||||
|
@ -20,7 +24,7 @@ class CstType(models.TextChoices):
|
|||
THEOREM = 'theorem'
|
||||
|
||||
|
||||
class Syntax(models.TextChoices):
|
||||
class Syntax(TextChoices):
|
||||
''' Syntax types '''
|
||||
UNDEF = 'undefined'
|
||||
ASCII = 'ascii'
|
||||
|
@ -31,35 +35,35 @@ def _empty_forms():
|
|||
return []
|
||||
|
||||
|
||||
class RSForm(models.Model):
|
||||
class RSForm(Model):
|
||||
''' RSForm is a math form of capturing conceptual schema '''
|
||||
owner = models.ForeignKey(
|
||||
owner: ForeignKey = ForeignKey(
|
||||
verbose_name='Владелец',
|
||||
to=User,
|
||||
on_delete=models.SET_NULL,
|
||||
on_delete=SET_NULL,
|
||||
null=True
|
||||
)
|
||||
title = models.TextField(
|
||||
title: TextField = TextField(
|
||||
verbose_name='Название'
|
||||
)
|
||||
alias = models.CharField(
|
||||
alias: CharField = CharField(
|
||||
verbose_name='Шифр',
|
||||
max_length=255,
|
||||
blank=True
|
||||
)
|
||||
comment = models.TextField(
|
||||
comment: TextField = TextField(
|
||||
verbose_name='Комментарий',
|
||||
blank=True
|
||||
)
|
||||
is_common = models.BooleanField(
|
||||
is_common: BooleanField = BooleanField(
|
||||
verbose_name='Общая',
|
||||
default=False
|
||||
)
|
||||
time_create = models.DateTimeField(
|
||||
time_create: DateTimeField = DateTimeField(
|
||||
verbose_name='Дата создания',
|
||||
auto_now_add=True
|
||||
)
|
||||
time_update = models.DateTimeField(
|
||||
time_update: DateTimeField = DateTimeField(
|
||||
verbose_name='Дата изменения',
|
||||
auto_now=True
|
||||
)
|
||||
|
@ -69,7 +73,7 @@ class RSForm(models.Model):
|
|||
verbose_name = 'Схема'
|
||||
verbose_name_plural = 'Схемы'
|
||||
|
||||
def constituents(self) -> models.QuerySet:
|
||||
def constituents(self) -> QuerySet:
|
||||
''' Get QuerySet containing all constituents of current RSForm '''
|
||||
return Constituenta.objects.filter(schema=self)
|
||||
|
||||
|
@ -162,7 +166,7 @@ class RSForm(models.Model):
|
|||
else:
|
||||
cst = Constituenta.create_from_trs(cst_data, self, order)
|
||||
cst.save()
|
||||
uid = cst.id
|
||||
uid = cst.pk
|
||||
loaded_ids.add(uid)
|
||||
order += 1
|
||||
for prev_cst in prev_constituents:
|
||||
|
@ -186,10 +190,10 @@ class RSForm(models.Model):
|
|||
schema._create_items_from_trs(data['items'])
|
||||
return schema
|
||||
|
||||
def to_trs(self) -> str:
|
||||
def to_trs(self) -> dict:
|
||||
''' Generate JSON string containing all data from RSForm '''
|
||||
result = self._prepare_json_rsform()
|
||||
items: list['Constituenta'] = self.constituents().order_by('order')
|
||||
items = self.constituents().order_by('order')
|
||||
for cst in items:
|
||||
result['items'].append(cst.to_trs())
|
||||
return result
|
||||
|
@ -200,7 +204,7 @@ class RSForm(models.Model):
|
|||
def get_absolute_url(self):
|
||||
return reverse('rsform-detail', kwargs={'pk': self.pk})
|
||||
|
||||
def _prepare_json_rsform(self: 'Constituenta') -> dict:
|
||||
def _prepare_json_rsform(self: 'RSForm') -> dict:
|
||||
return {
|
||||
'type': 'rsform',
|
||||
'title': self.title,
|
||||
|
@ -211,10 +215,10 @@ class RSForm(models.Model):
|
|||
|
||||
@transaction.atomic
|
||||
def _update_from_core(self) -> dict:
|
||||
checked = json.loads(pyconcept.check_schema(json.dumps(self.to_trs())))
|
||||
checked: dict = json.loads(pyconcept.check_schema(json.dumps(self.to_trs())))
|
||||
update_list = self.constituents().only('id', 'order')
|
||||
if len(checked['items']) != update_list.count():
|
||||
raise ValidationError
|
||||
raise ValidationError('Invalid constituents count')
|
||||
order = 1
|
||||
for cst in checked['items']:
|
||||
cst_id = cst['entityUID']
|
||||
|
@ -235,59 +239,59 @@ class RSForm(models.Model):
|
|||
order += 1
|
||||
|
||||
|
||||
class Constituenta(models.Model):
|
||||
class Constituenta(Model):
|
||||
''' Constituenta is the base unit for every conceptual schema '''
|
||||
schema = models.ForeignKey(
|
||||
schema: ForeignKey = ForeignKey(
|
||||
verbose_name='Концептуальная схема',
|
||||
to=RSForm,
|
||||
on_delete=models.CASCADE
|
||||
on_delete=CASCADE
|
||||
)
|
||||
order = models.PositiveIntegerField(
|
||||
order: PositiveIntegerField = PositiveIntegerField(
|
||||
verbose_name='Позиция',
|
||||
validators=[MinValueValidator(1)],
|
||||
default=-1,
|
||||
)
|
||||
alias = models.CharField(
|
||||
alias: CharField = CharField(
|
||||
verbose_name='Имя',
|
||||
max_length=8,
|
||||
default='undefined'
|
||||
)
|
||||
cst_type = models.CharField(
|
||||
cst_type: CharField = CharField(
|
||||
verbose_name='Тип',
|
||||
max_length=10,
|
||||
choices=CstType.choices,
|
||||
default=CstType.BASE
|
||||
)
|
||||
convention = models.TextField(
|
||||
convention: TextField = TextField(
|
||||
verbose_name='Комментарий/Конвенция',
|
||||
default='',
|
||||
blank=True
|
||||
)
|
||||
term_raw = models.TextField(
|
||||
term_raw: TextField = TextField(
|
||||
verbose_name='Термин (с отсылками)',
|
||||
default='',
|
||||
blank=True
|
||||
)
|
||||
term_resolved = models.TextField(
|
||||
term_resolved: TextField = TextField(
|
||||
verbose_name='Термин',
|
||||
default='',
|
||||
blank=True
|
||||
)
|
||||
term_forms = models.JSONField(
|
||||
term_forms: JSONField = JSONField(
|
||||
verbose_name='Словоформы',
|
||||
default=_empty_forms
|
||||
)
|
||||
definition_formal = models.TextField(
|
||||
definition_formal: TextField = TextField(
|
||||
verbose_name='Родоструктурное определение',
|
||||
default='',
|
||||
blank=True
|
||||
)
|
||||
definition_raw = models.TextField(
|
||||
definition_raw: TextField = TextField(
|
||||
verbose_name='Текстовое определние (с отсылками)',
|
||||
default='',
|
||||
blank=True
|
||||
)
|
||||
definition_resolved = models.TextField(
|
||||
definition_resolved: TextField = TextField(
|
||||
verbose_name='Текстовое определние',
|
||||
default='',
|
||||
blank=True
|
||||
|
@ -342,9 +346,9 @@ class Constituenta(models.Model):
|
|||
self.term_resolved = ''
|
||||
self.term_forms = []
|
||||
|
||||
def to_trs(self) -> str:
|
||||
def to_trs(self) -> dict:
|
||||
return {
|
||||
'entityUID': self.id,
|
||||
'entityUID': self.pk,
|
||||
'type': 'constituenta',
|
||||
'cstType': self.cst_type,
|
||||
'alias': self.alias,
|
||||
|
|
|
@ -73,7 +73,7 @@ class RSFormDetailsSerlializer(serializers.BaseSerializer):
|
|||
trs = pyconcept.check_schema(json.dumps(instance.to_trs()))
|
||||
trs = trs.replace('entityUID', 'id')
|
||||
result = json.loads(trs)
|
||||
result['id'] = instance.id
|
||||
result['id'] = instance.pk
|
||||
result['time_update'] = instance.time_update
|
||||
result['time_create'] = instance.time_create
|
||||
result['is_common'] = instance.is_common
|
||||
|
@ -101,7 +101,7 @@ class ConstituentaSerializer(serializers.ModelSerializer):
|
|||
if 'definition_raw' in validated_data:
|
||||
validated_data['definition_resolved'] = validated_data['definition_raw']
|
||||
|
||||
result = super().update(instance, validated_data)
|
||||
result: Constituenta = super().update(instance, validated_data)
|
||||
instance.schema.save()
|
||||
return result
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ from apps.rsform.views import (
|
|||
|
||||
|
||||
def _response_contains(response, schema: RSForm) -> bool:
|
||||
return any(x for x in response.data if x['id'] == schema.id)
|
||||
return any(x for x in response.data if x['id'] == schema.pk)
|
||||
|
||||
|
||||
class TestConstituentaAPI(APITestCase):
|
||||
|
@ -25,8 +25,8 @@ class TestConstituentaAPI(APITestCase):
|
|||
self.user = User.objects.create(username='UserTest')
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2')
|
||||
self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||
self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
|
||||
self.cst1 = Constituenta.objects.create(
|
||||
alias='X1', schema=self.rsform_owned, order=1, convention='Test')
|
||||
self.cst2 = Constituenta.objects.create(
|
||||
|
@ -87,8 +87,8 @@ class TestRSFormViewset(APITestCase):
|
|||
self.user = User.objects.create(username='UserTest')
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2')
|
||||
self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||
self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
|
||||
|
||||
def test_create_anonymous(self):
|
||||
self.client.logout()
|
||||
|
@ -131,7 +131,7 @@ class TestRSFormViewset(APITestCase):
|
|||
|
||||
def test_contents(self):
|
||||
schema = RSForm.objects.create(title='Title1')
|
||||
schema.insert_last(alias='X1', type=CstType.BASE)
|
||||
schema.insert_last(alias='X1', insert_type=CstType.BASE)
|
||||
response = self.client.get(f'/api/rsforms/{schema.id}/contents/')
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
|
@ -418,9 +418,9 @@ class TestLibraryAPI(APITestCase):
|
|||
self.user = User.objects.create(username='UserTest')
|
||||
self.client = APIClient()
|
||||
self.client.force_authenticate(user=self.user)
|
||||
self.rsform_owned: RSForm = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||
self.rsform_unowned: RSForm = RSForm.objects.create(title='Test2', alias='T2')
|
||||
self.rsform_common: RSForm = RSForm.objects.create(title='Test3', alias='T3', is_common=True)
|
||||
self.rsform_owned = RSForm.objects.create(title='Test', alias='T1', owner=self.user)
|
||||
self.rsform_unowned = RSForm.objects.create(title='Test2', alias='T2')
|
||||
self.rsform_common = RSForm.objects.create(title='Test3', alias='T3', is_common=True)
|
||||
|
||||
def test_retrieve_common(self):
|
||||
self.client.logout()
|
||||
|
|
|
@ -21,7 +21,8 @@ def read_trs(file) -> dict:
|
|||
''' Read JSON from TRS file '''
|
||||
with ZipFile(file, 'r') as archive:
|
||||
json_data = archive.read('document.json')
|
||||
return json.loads(json_data)
|
||||
result: dict = json.loads(json_data)
|
||||
return result
|
||||
|
||||
|
||||
def write_trs(json_data: dict) -> bytes:
|
||||
|
|
|
@ -53,7 +53,7 @@ class RSFormViewSet(viewsets.ModelViewSet):
|
|||
ordering = '-time_update'
|
||||
|
||||
def _get_schema(self) -> models.RSForm:
|
||||
return self.get_object()
|
||||
return self.get_object() # type: ignore
|
||||
|
||||
def perform_create(self, serializer):
|
||||
if not self.request.user.is_anonymous and 'owner' not in self.request.POST:
|
||||
|
@ -114,7 +114,7 @@ class RSFormViewSet(viewsets.ModelViewSet):
|
|||
@action(detail=True, methods=['patch'], url_path='cst-moveto')
|
||||
def cst_moveto(self, request, pk):
|
||||
''' Endpoint: Move multiple constituents. '''
|
||||
schema: models.RSForm = self._get_schema()
|
||||
schema = self._get_schema()
|
||||
serializer = serializers.CstMoveSerlializer(data=request.data, context={'schema': schema})
|
||||
serializer.is_valid(raise_exception=True)
|
||||
schema.move_cst(serializer.validated_data['constituents'], serializer.validated_data['move_to'])
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
''' Concept core text processing library. '''
|
||||
# pylint: skip-file
|
||||
from .syntax import RuSyntax, Capitalization
|
||||
from .rumodel import Morphology, SemanticRole, WordTag, morpho
|
||||
from .ruparser import PhraseParser, WordToken, Collation
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
''' Russian language models. '''
|
||||
from __future__ import annotations
|
||||
from enum import Enum, unique
|
||||
from typing import Iterable
|
||||
|
||||
from pymorphy2 import MorphAnalyzer
|
||||
from pymorphy2.tagset import OpencorporaTag as WordTag
|
||||
|
@ -59,14 +60,14 @@ class Morphology:
|
|||
return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS']
|
||||
|
||||
@property
|
||||
def effective_pos(self) -> str:
|
||||
def effective_POS(self) -> str:
|
||||
''' Access part of speech. Pronouns are considered as nouns '''
|
||||
pos = self.tag.POS
|
||||
pos: str = self.tag.POS
|
||||
if pos and self.tag.POS == 'NPRO':
|
||||
return 'NOUN'
|
||||
return pos
|
||||
|
||||
def complete_tags(self, tags: frozenset[str]) -> set[str]:
|
||||
def complete_tags(self, tags: Iterable[str]) -> set[str]:
|
||||
''' Add missing tags before inflection. '''
|
||||
result = set(tags)
|
||||
pos = self.tag.POS
|
||||
|
@ -111,6 +112,7 @@ class Morphology:
|
|||
if count == 0:
|
||||
return ''
|
||||
elif count == 1:
|
||||
return next(iter(grammemes))
|
||||
result: str = next(iter(grammemes))
|
||||
return result
|
||||
else:
|
||||
return ','.join(grammemes)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
''' Parsing russian language using pymorphy2 and natasha libraries. '''
|
||||
from __future__ import annotations
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from razdel.substring import Substring as Segment
|
||||
from pymorphy2.analyzer import Parse as WordForm
|
||||
|
@ -11,17 +12,16 @@ INDEX_NONE = -1
|
|||
NO_COORDINATION = -1
|
||||
WORD_NONE = -1
|
||||
|
||||
Tags = Iterable[str]
|
||||
|
||||
|
||||
class WordToken:
|
||||
''' Minimal text token. '''
|
||||
''' Atomic text token. '''
|
||||
def __init__(self, segment: Segment, forms: list[WordForm], main_form: int = 0):
|
||||
self.segment: Segment = segment
|
||||
self.forms: list[WordForm] = forms
|
||||
self.main: int = main_form
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
|
||||
def get_morpho(self) -> Morphology:
|
||||
''' Return morphology for current token. '''
|
||||
return Morphology(self.get_form().tag)
|
||||
|
@ -30,7 +30,7 @@ class WordToken:
|
|||
''' Access main form. '''
|
||||
return self.forms[self.main]
|
||||
|
||||
def inflect(self, inflection_tags: set[str]):
|
||||
def inflect(self, inflection_tags: set[str]) -> Optional[WordForm]:
|
||||
''' Apply inflection to segment text. Does not modify forms '''
|
||||
inflected = self.get_form().inflect(inflection_tags)
|
||||
if not inflected:
|
||||
|
@ -43,21 +43,20 @@ class Collation:
|
|||
''' Parsed data for input coordinated text. '''
|
||||
def __init__(self, text: str):
|
||||
self.text = text
|
||||
self.words = []
|
||||
self.coordination = []
|
||||
self.words: list[WordToken] = []
|
||||
self.coordination: list[int] = []
|
||||
self.main_word: int = WORD_NONE
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
def is_valid(self) -> bool:
|
||||
''' Check if data is parsed correctly '''
|
||||
return self.main_word != WORD_NONE
|
||||
|
||||
def get_form(self) -> WordForm:
|
||||
''' Access main form. '''
|
||||
''' Access WordForm. '''
|
||||
return self.words[self.main_word].get_form()
|
||||
|
||||
def get_morpho(self) -> Morphology:
|
||||
''' Access parsed main mrophology. '''
|
||||
if self.main_word == WORD_NONE:
|
||||
return None
|
||||
return self.words[self.main_word].get_morpho()
|
||||
|
||||
def add_word(self, segment, forms: list, main_form: int, need_coordination: bool = True):
|
||||
|
@ -65,28 +64,29 @@ class Collation:
|
|||
self.words.append(WordToken(segment, forms, main_form))
|
||||
self.coordination.append(NO_COORDINATION if not need_coordination else 0)
|
||||
|
||||
def inflect(self, target_tags: frozenset[str]) -> str:
|
||||
def inflect(self, target_tags: Tags) -> str:
|
||||
''' Inflect text to match required tags. '''
|
||||
origin = self.get_morpho()
|
||||
if not origin or origin.tag.grammemes.issuperset(target_tags):
|
||||
return self.text
|
||||
if not self._apply_inflection(origin, target_tags):
|
||||
return self.text
|
||||
new_text = self._generate_text()
|
||||
return new_text
|
||||
if self.is_valid():
|
||||
origin = self.get_morpho()
|
||||
if not origin.tag.grammemes.issuperset(target_tags):
|
||||
if self._apply_inflection(origin, target_tags):
|
||||
return self._generate_text()
|
||||
return self.text
|
||||
|
||||
def inflect_like(self, base_model: Collation) -> str:
|
||||
''' Create inflection to substitute base_model form. '''
|
||||
morph = base_model.get_morpho()
|
||||
if morph.effective_pos is None:
|
||||
return self.text
|
||||
tags = set()
|
||||
tags.add(morph.effective_pos)
|
||||
tags = morph.complete_tags(tags)
|
||||
return self.inflect(tags)
|
||||
if self.is_valid():
|
||||
morph = base_model.get_morpho()
|
||||
if morph.effective_POS:
|
||||
tags = set()
|
||||
tags.add(morph.effective_POS)
|
||||
tags = morph.complete_tags(tags)
|
||||
return self.inflect(tags)
|
||||
return self.text
|
||||
|
||||
def inflect_dependant(self, master_model: Collation) -> str:
|
||||
''' Create inflection to coordinate with master_model form. '''
|
||||
assert self.is_valid()
|
||||
morph = master_model.get_morpho()
|
||||
tags = morph.coordination_tags()
|
||||
tags = self.get_morpho().complete_tags(tags)
|
||||
|
@ -94,12 +94,12 @@ class Collation:
|
|||
|
||||
def normal_form(self) -> str:
|
||||
''' Generate normal form. '''
|
||||
main_form = self.get_form()
|
||||
if not main_form:
|
||||
return self.text
|
||||
new_morpho = Morphology(main_form.normalized.tag)
|
||||
new_tags = new_morpho.complete_tags(frozenset())
|
||||
return self.inflect(new_tags)
|
||||
if self.is_valid():
|
||||
main_form = self.get_form()
|
||||
new_morpho = Morphology(main_form.normalized.tag)
|
||||
new_tags = new_morpho.complete_tags(frozenset())
|
||||
return self.inflect(new_tags)
|
||||
return self.text
|
||||
|
||||
def _iterate_coordinated(self):
|
||||
words_count = len(self.words)
|
||||
|
@ -108,21 +108,20 @@ class Collation:
|
|||
yield self.words[current_word]
|
||||
current_word += self.coordination[current_word]
|
||||
|
||||
def _inflect_main_word(self, origin: Morphology, target_tags: frozenset[str]) -> Morphology:
|
||||
def _inflect_main_word(self, origin: Morphology, target_tags: Tags) -> Optional[Morphology]:
|
||||
full_tags = origin.complete_tags(target_tags)
|
||||
inflected = self.words[self.main_word].inflect(full_tags)
|
||||
if not inflected:
|
||||
return None
|
||||
return Morphology(inflected.tag)
|
||||
|
||||
def _apply_inflection(self, origin: Morphology, target_tags: frozenset[str]) -> bool:
|
||||
def _apply_inflection(self, origin: Morphology, target_tags: Tags) -> bool:
|
||||
new_moprho = self._inflect_main_word(origin, target_tags)
|
||||
if not new_moprho:
|
||||
return False
|
||||
inflection_tags = new_moprho.coordination_tags()
|
||||
if len(inflection_tags) == 0:
|
||||
return True
|
||||
|
||||
for word in self._iterate_coordinated():
|
||||
word.inflect(inflection_tags)
|
||||
return True
|
||||
|
@ -155,13 +154,17 @@ class PhraseParser:
|
|||
_MAIN_WAIT_LIMIT = 10 # count words untill fixing main
|
||||
_MAIN_MAX_FOLLOWERS = 3 # count words after main as coordination candidates
|
||||
|
||||
def parse(self, text: str, require_index: int = INDEX_NONE, require_tags: frozenset[str] = None) -> Collation:
|
||||
''' Determine morpho tags for input text.
|
||||
::returns:: Morphology of a text or None if no suitable form is available '''
|
||||
if text == '':
|
||||
return None
|
||||
def parse(self, text: str,
|
||||
require_index: int = INDEX_NONE,
|
||||
require_tags: Optional[Tags] = None) -> Optional[Collation]:
|
||||
'''
|
||||
Determine morpho tags for input text.
|
||||
::returns:: Morphology of a text or None if no suitable form is available
|
||||
'''
|
||||
segments = list(RuSyntax.tokenize(text))
|
||||
if len(segments) == 1:
|
||||
if len(segments) == 0:
|
||||
return None
|
||||
elif len(segments) == 1:
|
||||
return self._parse_single(segments[0], require_index, require_tags)
|
||||
else:
|
||||
return self._parse_multiword(text, segments, require_index, require_tags)
|
||||
|
@ -169,9 +172,9 @@ class PhraseParser:
|
|||
def normalize(self, text: str):
|
||||
''' Get normal form for target text. '''
|
||||
processed = self.parse(text)
|
||||
if not processed:
|
||||
return text
|
||||
return processed.normal_form()
|
||||
if processed:
|
||||
return processed.normal_form()
|
||||
return text
|
||||
|
||||
def find_substr(self, text: str, sub: str) -> tuple[int, int]:
|
||||
''' Search for substring position in text regardless of morphology. '''
|
||||
|
@ -234,7 +237,7 @@ class PhraseParser:
|
|||
return dependant_normal
|
||||
return dependant_model.inflect_dependant(master_model)
|
||||
|
||||
def _parse_single(self, segment, require_index: int, require_tags: frozenset[str]) -> Collation:
|
||||
def _parse_single(self, segment, require_index: int, require_tags: Optional[Tags]) -> Optional[Collation]:
|
||||
forms = list(self._filtered_parse(segment.text))
|
||||
parse_index = INDEX_NONE
|
||||
if len(forms) == 0 or require_index >= len(forms):
|
||||
|
@ -266,9 +269,10 @@ class PhraseParser:
|
|||
result.main_word = 0
|
||||
return result
|
||||
|
||||
def _parse_multiword(self, text: str, segments: list, require_index: int, require_tags: frozenset[str]):
|
||||
def _parse_multiword(self, text: str, segments: list, require_index: int,
|
||||
require_tags: Optional[Tags]) -> Optional[Collation]:
|
||||
result = Collation(text)
|
||||
priority_main = self._PRIORITY_NONE
|
||||
priority_main: float = self._PRIORITY_NONE
|
||||
segment_index = 0
|
||||
main_wait = 0
|
||||
word_index = 0
|
||||
|
@ -295,20 +299,20 @@ class PhraseParser:
|
|||
output: Collation,
|
||||
segment: Segment,
|
||||
require_index: int,
|
||||
require_tags: frozenset[str]) -> float:
|
||||
require_tags: Optional[Tags]) -> Optional[float]:
|
||||
''' Return priority for this can be a new main word '''
|
||||
forms = list(self._filtered_parse(segment.text))
|
||||
if len(forms) == 0:
|
||||
return None
|
||||
main_index = INDEX_NONE
|
||||
segment_score = self._PRIORITY_NONE
|
||||
main_index: int = INDEX_NONE
|
||||
segment_score: float = self._PRIORITY_NONE
|
||||
needs_coordination = False
|
||||
local_sum = 0
|
||||
score_sum = 0
|
||||
local_sum: float = 0
|
||||
score_sum: float = 0
|
||||
if require_index != INDEX_NONE:
|
||||
form = forms[require_index]
|
||||
if not require_tags or form.tag.grammemes.issuperset(require_tags):
|
||||
(local_max, segment_score) = PhraseParser._get_priority_for(form.tag)
|
||||
(local_max, segment_score) = PhraseParser._get_priorities_for(form.tag)
|
||||
main_index = require_index
|
||||
needs_coordination = Morphology.is_dependable(form.tag.POS)
|
||||
else:
|
||||
|
@ -316,7 +320,7 @@ class PhraseParser:
|
|||
for (index, form) in enumerate(forms):
|
||||
if require_tags and not form.tag.grammemes.issuperset(require_tags):
|
||||
continue
|
||||
(local_priority, global_priority) = PhraseParser._get_priority_for(form.tag)
|
||||
(local_priority, global_priority) = PhraseParser._get_priorities_for(form.tag)
|
||||
needs_coordination = needs_coordination or Morphology.is_dependable(form.tag.POS)
|
||||
local_sum += global_priority * form.score
|
||||
score_sum += form.score
|
||||
|
@ -414,7 +418,8 @@ class PhraseParser:
|
|||
yield form
|
||||
|
||||
@staticmethod
|
||||
def _parse_word(text: str, require_index: int = INDEX_NONE, require_tags: frozenset[str] = None) -> Morphology:
|
||||
def _parse_word(text: str, require_index: int = INDEX_NONE,
|
||||
require_tags: Optional[Tags] = None) -> Optional[Morphology]:
|
||||
parsed_variants = morpho.parse(text)
|
||||
if not parsed_variants or require_index >= len(parsed_variants):
|
||||
return None
|
||||
|
@ -432,7 +437,7 @@ class PhraseParser:
|
|||
return None
|
||||
|
||||
@staticmethod
|
||||
def _get_priority_for(tag: WordTag) -> tuple[float, float]:
|
||||
def _get_priorities_for(tag: WordTag) -> tuple[float, float]:
|
||||
''' Return pair of local and global priorities. '''
|
||||
if tag.POS in ['VERB', 'INFN']:
|
||||
return (9, 10)
|
||||
|
@ -447,7 +452,9 @@ class PhraseParser:
|
|||
return (0, 0)
|
||||
|
||||
@staticmethod
|
||||
def _choose_context_etalon(target: Morphology, before: Collation, after: Collation) -> Collation:
|
||||
def _choose_context_etalon(target: Morphology,
|
||||
before: Optional[Collation],
|
||||
after: Optional[Collation]) -> Optional[Collation]:
|
||||
if not before or not before.get_morpho().can_coordinate:
|
||||
return after
|
||||
if not after or not after.get_morpho().can_coordinate:
|
||||
|
@ -473,7 +480,7 @@ class PhraseParser:
|
|||
return before
|
||||
|
||||
@staticmethod
|
||||
def _combine_morpho(target: Morphology, etalon: WordTag) -> str:
|
||||
def _combine_morpho(target: Morphology, etalon: WordTag) -> frozenset[str]:
|
||||
part_of_speech = target.tag.POS
|
||||
number = etalon.number
|
||||
if number == 'plur':
|
||||
|
|
|
@ -68,7 +68,11 @@ class RuSyntax:
|
|||
''' Test if text is a single word. '''
|
||||
try:
|
||||
gen = tokenize(text)
|
||||
return next(gen) == '' or next(gen) == ''
|
||||
if next(gen) == '':
|
||||
return True
|
||||
if next(gen) == '':
|
||||
return True
|
||||
return False
|
||||
except StopIteration:
|
||||
return True
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
''' Test russian language parsing. '''
|
||||
import unittest
|
||||
|
||||
from typing import Iterable, Optional
|
||||
from cctext import PhraseParser
|
||||
|
||||
parser = PhraseParser()
|
||||
|
@ -9,16 +10,20 @@ parser = PhraseParser()
|
|||
class TestRuParser(unittest.TestCase):
|
||||
''' Test class for russian parsing. '''
|
||||
|
||||
def _assert_parse(self, text: str, expected: list[str], require_index: int = -1, require_tags: list[str] = None):
|
||||
def _assert_parse(self, text: str, expected: list[str],
|
||||
require_index: int = -1,
|
||||
require_tags: Optional[Iterable[str]] = None):
|
||||
phrase = parser.parse(text, require_index, require_tags)
|
||||
self.assertEqual(phrase.get_morpho().tag.grammemes, set(expected))
|
||||
self.assertIsNotNone(phrase)
|
||||
if phrase:
|
||||
self.assertEqual(phrase.get_morpho().tag.grammemes, set(expected))
|
||||
|
||||
def _assert_inflect(self, text: str, tags: list[str], expected: str):
|
||||
model = parser.parse(text)
|
||||
if not model:
|
||||
result = text
|
||||
else:
|
||||
result = model.inflect(set(tags))
|
||||
result = model.inflect(frozenset(tags))
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_parse_word(self):
|
||||
|
|
23
rsconcept/backend/mypy.ini
Normal file
23
rsconcept/backend/mypy.ini
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Global options:
|
||||
|
||||
[mypy]
|
||||
warn_return_any = True
|
||||
warn_unused_configs = True
|
||||
|
||||
plugins = mypy_drf_plugin.main, mypy_django_plugin.main
|
||||
|
||||
# Per-module options:
|
||||
[mypy.plugins.django-stubs]
|
||||
django_settings_module = "project.settings"
|
||||
|
||||
[mypy-django_filters.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-pyconcept.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-razdel.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-pymorphy2.*]
|
||||
ignore_missing_imports = True
|
|
@ -123,7 +123,7 @@ DATABASES = {
|
|||
# Password validation
|
||||
# https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
AUTH_PASSWORD_VALIDATORS: list[str] = [
|
||||
# NOTE: Password validators disabled
|
||||
# {
|
||||
# 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||
|
|
|
@ -9,5 +9,8 @@ pymorphy2-dicts-ru
|
|||
pymorphy2-dicts-uk
|
||||
razdel
|
||||
|
||||
mypy
|
||||
pylint
|
||||
coverage
|
||||
django-stubs[compatible-mypy]
|
||||
djangorestframework-stubs[compatible-mypy]
|
Loading…
Reference in New Issue
Block a user