mirror of
https://github.com/IRBorisov/ConceptPortal.git
synced 2025-06-26 13:00:39 +03:00
Add backend support for text parsing
This commit is contained in:
parent
f8c087ad87
commit
f7a7a1b173
|
@ -27,6 +27,30 @@ class ExpressionSerializer(serializers.Serializer):
|
|||
expression = serializers.CharField()
|
||||
|
||||
|
||||
class WordFormSerializer(serializers.Serializer):
|
||||
''' Serializer: inflect request. '''
|
||||
text = serializers.CharField()
|
||||
grams = serializers.CharField()
|
||||
|
||||
|
||||
class MultiFormSerializer(serializers.Serializer):
|
||||
''' Serializer: inflect request. '''
|
||||
items = serializers.ListField(
|
||||
child=WordFormSerializer()
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def from_list(data: list[tuple[str, str]]) -> dict:
|
||||
result: dict = {}
|
||||
result['items'] = []
|
||||
for item in data:
|
||||
result['items'].append({
|
||||
'text': item[0],
|
||||
'grams': item[1]
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
class TextSerializer(serializers.Serializer):
|
||||
''' Serializer: Text with references. '''
|
||||
text = serializers.CharField()
|
||||
|
|
|
@ -6,14 +6,17 @@ from zipfile import ZipFile
|
|||
from rest_framework.test import APITestCase, APIRequestFactory, APIClient
|
||||
from rest_framework.exceptions import ErrorDetail
|
||||
|
||||
from cctext import ReferenceType
|
||||
from cctext import ReferenceType, split_grams
|
||||
|
||||
from apps.users.models import User
|
||||
from apps.rsform.models import Syntax, RSForm, Constituenta, CstType, LibraryItem, LibraryItemType, Subscription
|
||||
from apps.rsform.views import (
|
||||
convert_to_ascii,
|
||||
convert_to_math,
|
||||
parse_expression
|
||||
parse_expression,
|
||||
inflect,
|
||||
parse_text,
|
||||
generate_lexeme
|
||||
)
|
||||
|
||||
|
||||
|
@ -572,7 +575,7 @@ class TestRSFormViewset(APITestCase):
|
|||
self.assertEqual(response.data['items'][1]['term_resolved'], d1.term_resolved)
|
||||
|
||||
|
||||
class TestFunctionalViews(APITestCase):
|
||||
class TestRSLanguageViews(APITestCase):
|
||||
def setUp(self):
|
||||
self.factory = APIRequestFactory()
|
||||
self.user = User.objects.create(username='UserTest')
|
||||
|
@ -601,35 +604,35 @@ class TestFunctionalViews(APITestCase):
|
|||
|
||||
def test_convert_to_ascii(self):
|
||||
data = {'expression': '1=1'}
|
||||
request = self.factory.post('/api/func/to-ascii', data)
|
||||
request = self.factory.post('/api/rslang/to-ascii', data)
|
||||
response = convert_to_ascii(request)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data['result'], r'1 \eq 1')
|
||||
|
||||
def test_convert_to_ascii_missing_data(self):
|
||||
data = {'data': '1=1'}
|
||||
request = self.factory.post('/api/func/to-ascii', data)
|
||||
request = self.factory.post('/api/rslang/to-ascii', data)
|
||||
response = convert_to_ascii(request)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertIsInstance(response.data['expression'][0], ErrorDetail)
|
||||
|
||||
def test_convert_to_math(self):
|
||||
data = {'expression': r'1 \eq 1'}
|
||||
request = self.factory.post('/api/func/to-math', data)
|
||||
request = self.factory.post('/api/rslang/to-math', data)
|
||||
response = convert_to_math(request)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data['result'], r'1=1')
|
||||
|
||||
def test_convert_to_math_missing_data(self):
|
||||
data = {'data': r'1 \eq 1'}
|
||||
request = self.factory.post('/api/func/to-math', data)
|
||||
request = self.factory.post('/api/rslang/to-math', data)
|
||||
response = convert_to_math(request)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertIsInstance(response.data['expression'][0], ErrorDetail)
|
||||
|
||||
def test_parse_expression(self):
|
||||
data = {'expression': r'1=1'}
|
||||
request = self.factory.post('/api/func/parse-expression', data)
|
||||
request = self.factory.post('/api/rslang/parse-expression', data)
|
||||
response = parse_expression(request)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data['parseResult'], True)
|
||||
|
@ -638,7 +641,38 @@ class TestFunctionalViews(APITestCase):
|
|||
|
||||
def test_parse_expression_missing_data(self):
|
||||
data = {'data': r'1=1'}
|
||||
request = self.factory.post('/api/func/parse-expression', data)
|
||||
request = self.factory.post('/api/rslang/parse-expression', data)
|
||||
response = parse_expression(request)
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertIsInstance(response.data['expression'][0], ErrorDetail)
|
||||
|
||||
|
||||
class TestNaturalLanguageViews(APITestCase):
|
||||
def setUp(self):
|
||||
self.factory = APIRequestFactory()
|
||||
self.client = APIClient()
|
||||
|
||||
def _assert_tags(self, actual: str, expected: str):
|
||||
self.assertEqual(set(split_grams(actual)), set(split_grams(expected)))
|
||||
|
||||
def test_parse_text(self):
|
||||
data = {'text': 'синим слонам'}
|
||||
request = self.factory.post('/api/cctext/parse', data)
|
||||
response = parse_text(request)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self._assert_tags(response.data['result'], 'datv,NOUN,plur,anim,masc')
|
||||
|
||||
def test_inflect(self):
|
||||
data = {'text': 'синий слон', 'grams': 'plur,datv'}
|
||||
request = self.factory.post('/api/cctext/inflect', data)
|
||||
response = inflect(request)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data['result'], 'синим слонам')
|
||||
|
||||
def test_generate_lexeme(self):
|
||||
data = {'text': 'синий слон'}
|
||||
request = self.factory.post('/api/cctext/generate-lexeme', data)
|
||||
response = generate_lexeme(request)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data['items']), 12)
|
||||
self.assertEqual(response.data['items'][0]['text'], 'синий слон')
|
||||
|
|
|
@ -12,8 +12,14 @@ urlpatterns = [
|
|||
path('constituents/<int:pk>', views.ConstituentAPIView.as_view(), name='constituenta-detail'),
|
||||
path('rsforms/import-trs', views.TrsImportView.as_view()),
|
||||
path('rsforms/create-detailed', views.create_rsform),
|
||||
path('func/parse-expression', views.parse_expression),
|
||||
path('func/to-ascii', views.convert_to_ascii),
|
||||
path('func/to-math', views.convert_to_math),
|
||||
|
||||
path('rslang/parse-expression', views.parse_expression),
|
||||
path('rslang/to-ascii', views.convert_to_ascii),
|
||||
path('rslang/to-math', views.convert_to_math),
|
||||
|
||||
path('cctext/inflect', views.inflect),
|
||||
path('cctext/generate-lexeme', views.generate_lexeme),
|
||||
path('cctext/parse', views.parse_text),
|
||||
|
||||
path('', include(library_router.urls)),
|
||||
]
|
||||
|
|
|
@ -13,6 +13,7 @@ from drf_spectacular.utils import extend_schema, extend_schema_view
|
|||
from rest_framework import status as c
|
||||
|
||||
import pyconcept
|
||||
import cctext
|
||||
from . import models as m
|
||||
from . import serializers as s
|
||||
from . import utils
|
||||
|
@ -527,7 +528,10 @@ def convert_to_ascii(request):
|
|||
serializer.is_valid(raise_exception=True)
|
||||
expression = serializer.validated_data['expression']
|
||||
result = pyconcept.convert_to_ascii(expression)
|
||||
return Response({'result': result})
|
||||
return Response(
|
||||
status=c.HTTP_200_OK,
|
||||
data={'result': result}
|
||||
)
|
||||
|
||||
|
||||
@extend_schema(
|
||||
|
@ -544,4 +548,67 @@ def convert_to_math(request):
|
|||
serializer.is_valid(raise_exception=True)
|
||||
expression = serializer.validated_data['expression']
|
||||
result = pyconcept.convert_to_math(expression)
|
||||
return Response({'result': result})
|
||||
return Response(
|
||||
status=c.HTTP_200_OK,
|
||||
data={'result': result}
|
||||
)
|
||||
|
||||
@extend_schema(
|
||||
summary='generate wordform',
|
||||
tags=['NaturalLanguage'],
|
||||
request=s.WordFormSerializer,
|
||||
responses={200: s.ResultTextResponse},
|
||||
auth=None
|
||||
)
|
||||
@api_view(['POST'])
|
||||
def inflect(request):
|
||||
''' Endpoint: Generate wordform with set grammemes. '''
|
||||
serializer = s.WordFormSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
text = serializer.validated_data['text']
|
||||
grams = serializer.validated_data['grams']
|
||||
result = cctext.inflect(text, grams)
|
||||
return Response(
|
||||
status=c.HTTP_200_OK,
|
||||
data={'result': result}
|
||||
)
|
||||
|
||||
|
||||
@extend_schema(
|
||||
summary='basic set of wordforms',
|
||||
tags=['NaturalLanguage'],
|
||||
request=s.TextSerializer,
|
||||
responses={200: s.MultiFormSerializer},
|
||||
auth=None
|
||||
)
|
||||
@api_view(['POST'])
|
||||
def generate_lexeme(request):
|
||||
''' Endpoint: Generate basic set of wordforms. '''
|
||||
serializer = s.TextSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
nominal = serializer.validated_data['text']
|
||||
result = cctext.generate_lexeme(nominal)
|
||||
return Response(
|
||||
status=c.HTTP_200_OK,
|
||||
data=s.MultiFormSerializer.from_list(result)
|
||||
)
|
||||
|
||||
|
||||
@extend_schema(
|
||||
summary='get all language parse variants',
|
||||
tags=['NaturalLanguage'],
|
||||
request=s.TextSerializer,
|
||||
responses={200: s.ResultTextResponse},
|
||||
auth=None
|
||||
)
|
||||
@api_view(['POST'])
|
||||
def parse_text(request):
|
||||
''' Endpoint: Get likely vocabulary parse. '''
|
||||
serializer = s.TextSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
text = serializer.validated_data['text']
|
||||
result = cctext.parse(text)
|
||||
return Response(
|
||||
status=c.HTTP_200_OK,
|
||||
data={'result': result}
|
||||
)
|
||||
|
|
|
@ -9,7 +9,7 @@ from .resolver import Reference, Position, Resolver, ResolvedReference, resolve_
|
|||
|
||||
from .conceptapi import (
|
||||
parse, normalize,
|
||||
get_all_forms, inflect, inflect_context, inflect_substitute, inflect_dependant,
|
||||
generate_lexeme, inflect, inflect_context, inflect_substitute, inflect_dependant,
|
||||
match_all_morpho, find_substr
|
||||
)
|
||||
|
||||
|
|
|
@ -21,14 +21,19 @@ def parse(text: str, require_grams: str = '') -> str:
|
|||
return result if result != 'UNKN' else ''
|
||||
|
||||
|
||||
def get_all_forms(text_normal: str) -> list[tuple[str, str]]:
|
||||
''' Get all infeclted forms. '''
|
||||
# def parse_variants(text: str, require_grams: str = '') -> list[tuple[str, str]]:
|
||||
# ''' Get all variants of a parse.
|
||||
# ::returns:: string of comma separated grammar tags or empty string '''
|
||||
|
||||
|
||||
def generate_lexeme(text_normal: str) -> list[tuple[str, str]]:
|
||||
''' Get all inflected forms belonging to same Lexeme. '''
|
||||
model = parser.parse(text_normal)
|
||||
if not model:
|
||||
return []
|
||||
result = []
|
||||
for form in model.get_form().lexeme:
|
||||
result.append((form.word, Morphology(form.tag).to_text()))
|
||||
result.append((model.inflect(form.tag.grammemes), Morphology(form.tag).to_text()))
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
@ -21,15 +21,20 @@ class TestConceptAPI(unittest.TestCase):
|
|||
self.assertEqual(cc.normalize('первого'), 'первый')
|
||||
self.assertEqual(cc.normalize('диких людей'), 'дикий человек')
|
||||
|
||||
def test_get_all_forms(self):
|
||||
def test_generate_lexeme(self):
|
||||
''' Test all lexical forms. '''
|
||||
self.assertEqual(cc.get_all_forms(''), [])
|
||||
self.assertEqual(cc.generate_lexeme(''), [])
|
||||
|
||||
forms = cc.get_all_forms('наверное')
|
||||
forms = cc.generate_lexeme('наверное')
|
||||
self.assertEqual(len(forms), 1)
|
||||
self.assertEqual(forms[0][0], 'наверное')
|
||||
self._assert_tags(forms[0][1], 'CONJ,Prnt')
|
||||
|
||||
forms = cc.generate_lexeme('молодой человек')
|
||||
self.assertEqual(len(forms), 19)
|
||||
self.assertEqual(forms[0][0], 'молодой человек')
|
||||
self._assert_tags(forms[0][1], 'nomn,masc,sing,anim,NOUN')
|
||||
|
||||
def test_inflect(self):
|
||||
''' Test inflection. '''
|
||||
self.assertEqual(cc.inflect('', ''), '')
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
''' Main URL router '''
|
||||
from django.contrib import admin
|
||||
from django.shortcuts import redirect
|
||||
from django.urls import path, include
|
||||
from django.conf import settings
|
||||
from django.conf.urls.static import static
|
||||
|
|
Loading…
Reference in New Issue
Block a user