Add backend support for text parsing

This commit is contained in:
IRBorisov 2023-09-24 19:08:17 +03:00
parent f8c087ad87
commit f7a7a1b173
8 changed files with 162 additions and 22 deletions

View File

@ -27,6 +27,30 @@ class ExpressionSerializer(serializers.Serializer):
expression = serializers.CharField()
class WordFormSerializer(serializers.Serializer):
''' Serializer: inflect request. '''
text = serializers.CharField()
grams = serializers.CharField()
class MultiFormSerializer(serializers.Serializer):
''' Serializer: inflect request. '''
items = serializers.ListField(
child=WordFormSerializer()
)
@staticmethod
def from_list(data: list[tuple[str, str]]) -> dict:
result: dict = {}
result['items'] = []
for item in data:
result['items'].append({
'text': item[0],
'grams': item[1]
})
return result
class TextSerializer(serializers.Serializer):
''' Serializer: Text with references. '''
text = serializers.CharField()

View File

@ -6,14 +6,17 @@ from zipfile import ZipFile
from rest_framework.test import APITestCase, APIRequestFactory, APIClient
from rest_framework.exceptions import ErrorDetail
from cctext import ReferenceType
from cctext import ReferenceType, split_grams
from apps.users.models import User
from apps.rsform.models import Syntax, RSForm, Constituenta, CstType, LibraryItem, LibraryItemType, Subscription
from apps.rsform.views import (
convert_to_ascii,
convert_to_math,
parse_expression
parse_expression,
inflect,
parse_text,
generate_lexeme
)
@ -572,7 +575,7 @@ class TestRSFormViewset(APITestCase):
self.assertEqual(response.data['items'][1]['term_resolved'], d1.term_resolved)
class TestFunctionalViews(APITestCase):
class TestRSLanguageViews(APITestCase):
def setUp(self):
self.factory = APIRequestFactory()
self.user = User.objects.create(username='UserTest')
@ -601,35 +604,35 @@ class TestFunctionalViews(APITestCase):
def test_convert_to_ascii(self):
data = {'expression': '1=1'}
request = self.factory.post('/api/func/to-ascii', data)
request = self.factory.post('/api/rslang/to-ascii', data)
response = convert_to_ascii(request)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data['result'], r'1 \eq 1')
def test_convert_to_ascii_missing_data(self):
data = {'data': '1=1'}
request = self.factory.post('/api/func/to-ascii', data)
request = self.factory.post('/api/rslang/to-ascii', data)
response = convert_to_ascii(request)
self.assertEqual(response.status_code, 400)
self.assertIsInstance(response.data['expression'][0], ErrorDetail)
def test_convert_to_math(self):
data = {'expression': r'1 \eq 1'}
request = self.factory.post('/api/func/to-math', data)
request = self.factory.post('/api/rslang/to-math', data)
response = convert_to_math(request)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data['result'], r'1=1')
def test_convert_to_math_missing_data(self):
data = {'data': r'1 \eq 1'}
request = self.factory.post('/api/func/to-math', data)
request = self.factory.post('/api/rslang/to-math', data)
response = convert_to_math(request)
self.assertEqual(response.status_code, 400)
self.assertIsInstance(response.data['expression'][0], ErrorDetail)
def test_parse_expression(self):
data = {'expression': r'1=1'}
request = self.factory.post('/api/func/parse-expression', data)
request = self.factory.post('/api/rslang/parse-expression', data)
response = parse_expression(request)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data['parseResult'], True)
@ -638,7 +641,38 @@ class TestFunctionalViews(APITestCase):
def test_parse_expression_missing_data(self):
data = {'data': r'1=1'}
request = self.factory.post('/api/func/parse-expression', data)
request = self.factory.post('/api/rslang/parse-expression', data)
response = parse_expression(request)
self.assertEqual(response.status_code, 400)
self.assertIsInstance(response.data['expression'][0], ErrorDetail)
class TestNaturalLanguageViews(APITestCase):
def setUp(self):
self.factory = APIRequestFactory()
self.client = APIClient()
def _assert_tags(self, actual: str, expected: str):
self.assertEqual(set(split_grams(actual)), set(split_grams(expected)))
def test_parse_text(self):
data = {'text': 'синим слонам'}
request = self.factory.post('/api/cctext/parse', data)
response = parse_text(request)
self.assertEqual(response.status_code, 200)
self._assert_tags(response.data['result'], 'datv,NOUN,plur,anim,masc')
def test_inflect(self):
data = {'text': 'синий слон', 'grams': 'plur,datv'}
request = self.factory.post('/api/cctext/inflect', data)
response = inflect(request)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data['result'], 'синим слонам')
def test_generate_lexeme(self):
data = {'text': 'синий слон'}
request = self.factory.post('/api/cctext/generate-lexeme', data)
response = generate_lexeme(request)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data['items']), 12)
self.assertEqual(response.data['items'][0]['text'], 'синий слон')

View File

@ -12,8 +12,14 @@ urlpatterns = [
path('constituents/<int:pk>', views.ConstituentAPIView.as_view(), name='constituenta-detail'),
path('rsforms/import-trs', views.TrsImportView.as_view()),
path('rsforms/create-detailed', views.create_rsform),
path('func/parse-expression', views.parse_expression),
path('func/to-ascii', views.convert_to_ascii),
path('func/to-math', views.convert_to_math),
path('rslang/parse-expression', views.parse_expression),
path('rslang/to-ascii', views.convert_to_ascii),
path('rslang/to-math', views.convert_to_math),
path('cctext/inflect', views.inflect),
path('cctext/generate-lexeme', views.generate_lexeme),
path('cctext/parse', views.parse_text),
path('', include(library_router.urls)),
]

View File

@ -13,6 +13,7 @@ from drf_spectacular.utils import extend_schema, extend_schema_view
from rest_framework import status as c
import pyconcept
import cctext
from . import models as m
from . import serializers as s
from . import utils
@ -527,7 +528,10 @@ def convert_to_ascii(request):
serializer.is_valid(raise_exception=True)
expression = serializer.validated_data['expression']
result = pyconcept.convert_to_ascii(expression)
return Response({'result': result})
return Response(
status=c.HTTP_200_OK,
data={'result': result}
)
@extend_schema(
@ -544,4 +548,67 @@ def convert_to_math(request):
serializer.is_valid(raise_exception=True)
expression = serializer.validated_data['expression']
result = pyconcept.convert_to_math(expression)
return Response({'result': result})
return Response(
status=c.HTTP_200_OK,
data={'result': result}
)
@extend_schema(
summary='generate wordform',
tags=['NaturalLanguage'],
request=s.WordFormSerializer,
responses={200: s.ResultTextResponse},
auth=None
)
@api_view(['POST'])
def inflect(request):
''' Endpoint: Generate wordform with set grammemes. '''
serializer = s.WordFormSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
text = serializer.validated_data['text']
grams = serializer.validated_data['grams']
result = cctext.inflect(text, grams)
return Response(
status=c.HTTP_200_OK,
data={'result': result}
)
@extend_schema(
summary='basic set of wordforms',
tags=['NaturalLanguage'],
request=s.TextSerializer,
responses={200: s.MultiFormSerializer},
auth=None
)
@api_view(['POST'])
def generate_lexeme(request):
''' Endpoint: Generate basic set of wordforms. '''
serializer = s.TextSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
nominal = serializer.validated_data['text']
result = cctext.generate_lexeme(nominal)
return Response(
status=c.HTTP_200_OK,
data=s.MultiFormSerializer.from_list(result)
)
@extend_schema(
summary='get all language parse variants',
tags=['NaturalLanguage'],
request=s.TextSerializer,
responses={200: s.ResultTextResponse},
auth=None
)
@api_view(['POST'])
def parse_text(request):
''' Endpoint: Get likely vocabulary parse. '''
serializer = s.TextSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
text = serializer.validated_data['text']
result = cctext.parse(text)
return Response(
status=c.HTTP_200_OK,
data={'result': result}
)

View File

@ -9,7 +9,7 @@ from .resolver import Reference, Position, Resolver, ResolvedReference, resolve_
from .conceptapi import (
parse, normalize,
get_all_forms, inflect, inflect_context, inflect_substitute, inflect_dependant,
generate_lexeme, inflect, inflect_context, inflect_substitute, inflect_dependant,
match_all_morpho, find_substr
)

View File

@ -21,14 +21,19 @@ def parse(text: str, require_grams: str = '') -> str:
return result if result != 'UNKN' else ''
def get_all_forms(text_normal: str) -> list[tuple[str, str]]:
''' Get all infeclted forms. '''
# def parse_variants(text: str, require_grams: str = '') -> list[tuple[str, str]]:
# ''' Get all variants of a parse.
# ::returns:: string of comma separated grammar tags or empty string '''
def generate_lexeme(text_normal: str) -> list[tuple[str, str]]:
''' Get all inflected forms belonging to same Lexeme. '''
model = parser.parse(text_normal)
if not model:
return []
result = []
for form in model.get_form().lexeme:
result.append((form.word, Morphology(form.tag).to_text()))
result.append((model.inflect(form.tag.grammemes), Morphology(form.tag).to_text()))
return result

View File

@ -21,15 +21,20 @@ class TestConceptAPI(unittest.TestCase):
self.assertEqual(cc.normalize('первого'), 'первый')
self.assertEqual(cc.normalize('диких людей'), 'дикий человек')
def test_get_all_forms(self):
def test_generate_lexeme(self):
''' Test all lexical forms. '''
self.assertEqual(cc.get_all_forms(''), [])
self.assertEqual(cc.generate_lexeme(''), [])
forms = cc.get_all_forms('наверное')
forms = cc.generate_lexeme('наверное')
self.assertEqual(len(forms), 1)
self.assertEqual(forms[0][0], 'наверное')
self._assert_tags(forms[0][1], 'CONJ,Prnt')
forms = cc.generate_lexeme('молодой человек')
self.assertEqual(len(forms), 19)
self.assertEqual(forms[0][0], 'молодой человек')
self._assert_tags(forms[0][1], 'nomn,masc,sing,anim,NOUN')
def test_inflect(self):
''' Test inflection. '''
self.assertEqual(cc.inflect('', ''), '')

View File

@ -1,6 +1,5 @@
''' Main URL router '''
from django.contrib import admin
from django.shortcuts import redirect
from django.urls import path, include
from django.conf import settings
from django.conf.urls.static import static