Fix regexps and update old reference format

This commit is contained in:
IRBorisov 2023-08-22 18:47:19 +03:00
parent cff07788e5
commit 1cfab50d43
6 changed files with 62 additions and 8 deletions

View File

@ -3,6 +3,7 @@ from typing import Optional
from rest_framework import serializers from rest_framework import serializers
from django.db import transaction from django.db import transaction
from .utils import fix_old_references
from .models import Constituenta, RSForm from .models import Constituenta, RSForm
_CST_TYPE = 'constituenta' _CST_TYPE = 'constituenta'
@ -197,11 +198,11 @@ class RSFormTRSSerializer(serializers.Serializer):
if 'definition' in data: if 'definition' in data:
cst.definition_formal = data['definition'].get('formal', '') cst.definition_formal = data['definition'].get('formal', '')
if 'text' in data['definition']: if 'text' in data['definition']:
cst.definition_raw = data['definition']['text'].get('raw', '') cst.definition_raw = fix_old_references(data['definition']['text'].get('raw', ''))
else: else:
cst.definition_raw = '' cst.definition_raw = ''
if 'term' in data: if 'term' in data:
cst.term_raw = data['term'].get('raw', '') cst.term_raw = fix_old_references(data['term'].get('raw', ''))
cst.term_forms = data['term'].get('forms', []) cst.term_forms = data['term'].get('forms', [])
else: else:
cst.term_raw = '' cst.term_raw = ''

View File

@ -2,7 +2,7 @@
import unittest import unittest
import re import re
from apps.rsform.utils import apply_mapping_pattern from apps.rsform.utils import apply_mapping_pattern, fix_old_references
class TestUtils(unittest.TestCase): class TestUtils(unittest.TestCase):
@ -14,3 +14,10 @@ class TestUtils(unittest.TestCase):
self.assertEqual(apply_mapping_pattern('X20', mapping, pattern), 'X20') self.assertEqual(apply_mapping_pattern('X20', mapping, pattern), 'X20')
self.assertEqual(apply_mapping_pattern('X101', mapping, pattern), 'X20') self.assertEqual(apply_mapping_pattern('X101', mapping, pattern), 'X20')
self.assertEqual(apply_mapping_pattern('asdf X101 asdf', mapping, pattern), 'asdf X20 asdf') self.assertEqual(apply_mapping_pattern('asdf X101 asdf', mapping, pattern), 'asdf X20 asdf')
def test_fix_old_references(self):
self.assertEqual(fix_old_references(''), '')
self.assertEqual(fix_old_references('X20'), 'X20')
self.assertEqual(fix_old_references('@{X1|nomn,sing}'), '@{X1|nomn,sing}')
self.assertEqual(fix_old_references('@{X1|sing,ablt} @{X1|sing,ablt}'), '@{X1|sing,ablt} @{X1|sing,ablt}')
self.assertEqual(fix_old_references('@{X1|nomn|sing}'), '@{X1|nomn,sing}')

View File

@ -72,7 +72,9 @@ class TestConstituentaAPI(APITestCase):
response = self.client.patch(f'/api/constituents/{self.cst3.id}/', data, content_type='application/json') response = self.client.patch(f'/api/constituents/{self.cst3.id}/', data, content_type='application/json')
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.cst3.refresh_from_db() self.cst3.refresh_from_db()
self.assertEqual(response.data['term_resolved'], 'New term')
self.assertEqual(self.cst3.term_resolved, 'New term') self.assertEqual(self.cst3.term_resolved, 'New term')
self.assertEqual(response.data['definition_resolved'], 'New def')
self.assertEqual(self.cst3.definition_resolved, 'New def') self.assertEqual(self.cst3.definition_resolved, 'New def')
def test_update_resolved_refs(self): def test_update_resolved_refs(self):
@ -84,7 +86,9 @@ class TestConstituentaAPI(APITestCase):
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.cst3.refresh_from_db() self.cst3.refresh_from_db()
self.assertEqual(self.cst3.term_resolved, self.cst1.term_resolved) self.assertEqual(self.cst3.term_resolved, self.cst1.term_resolved)
self.assertEqual(response.data['term_resolved'], self.cst1.term_resolved)
self.assertEqual(self.cst3.definition_resolved, f'{self.cst1.term_resolved} form1') self.assertEqual(self.cst3.definition_resolved, f'{self.cst1.term_resolved} form1')
self.assertEqual(response.data['definition_resolved'], f'{self.cst1.term_resolved} form1')
def test_readonly_cst_fields(self): def test_readonly_cst_fields(self):
data = json.dumps({'alias': 'X33', 'order': 10}) data = json.dumps({'alias': 'X33', 'order': 10})
@ -151,14 +155,27 @@ class TestRSFormViewset(APITestCase):
def test_details(self): def test_details(self):
schema = RSForm.objects.create(title='Test') schema = RSForm.objects.create(title='Test')
cst = schema.insert_at(1, 'X1', CstType.BASE) x1 = schema.insert_at(1, 'X1', CstType.BASE)
schema.insert_at(2, 'X2', CstType.BASE) x2 = schema.insert_at(2, 'X2', CstType.BASE)
x1.term_raw = 'человек'
x1.term_resolved = 'человек'
x2.term_raw = '@{X1|plur}'
x2.term_resolved = 'люди'
x1.save()
x2.save()
response = self.client.get(f'/api/rsforms/{schema.id}/details/') response = self.client.get(f'/api/rsforms/{schema.id}/details/')
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
self.assertEqual(response.data['title'], 'Test') self.assertEqual(response.data['title'], 'Test')
self.assertEqual(len(response.data['items']), 2) self.assertEqual(len(response.data['items']), 2)
self.assertEqual(response.data['items'][0]['id'], x1.id)
self.assertEqual(response.data['items'][0]['parse']['status'], 'verified') self.assertEqual(response.data['items'][0]['parse']['status'], 'verified')
self.assertEqual(response.data['items'][0]['id'], cst.id) self.assertEqual(response.data['items'][0]['term']['raw'], x1.term_raw)
self.assertEqual(response.data['items'][0]['term']['resolved'], x1.term_resolved)
self.assertEqual(response.data['items'][1]['id'], x2.id)
self.assertEqual(response.data['items'][1]['term']['raw'], x2.term_raw)
self.assertEqual(response.data['items'][1]['term']['resolved'], x2.term_resolved)
def test_check(self): def test_check(self):
schema = RSForm.objects.create(title='Test') schema = RSForm.objects.create(title='Test')
@ -347,11 +364,24 @@ class TestRSFormViewset(APITestCase):
schema.title = 'Testt11' schema.title = 'Testt11'
schema.save() schema.save()
x1 = Constituenta.objects.create(schema=schema, alias='X12', cst_type='basic', order=1) x1 = Constituenta.objects.create(schema=schema, alias='X12', cst_type='basic', order=1)
d1 = Constituenta.objects.create(schema=schema, alias='D2', cst_type='term', order=1)
x1.term_raw = 'человек'
x1.term_resolved = 'человек'
d1.term_raw = '@{X12|plur}'
d1.term_resolved = 'люди'
x1.save()
d1.save()
data = json.dumps({'title': 'Title'}) data = json.dumps({'title': 'Title'})
response = self.client.post(f'/api/rsforms/{schema.id}/clone/', data=data, content_type='application/json') response = self.client.post(f'/api/rsforms/{schema.id}/clone/', data=data, content_type='application/json')
self.assertEqual(response.status_code, 201) self.assertEqual(response.status_code, 201)
self.assertEqual(response.data['title'], 'Title') self.assertEqual(response.data['title'], 'Title')
self.assertEqual(response.data['items'][0]['alias'], x1.alias) self.assertEqual(response.data['items'][0]['alias'], x1.alias)
self.assertEqual(response.data['items'][0]['term']['raw'], x1.term_raw)
self.assertEqual(response.data['items'][0]['term']['resolved'], x1.term_resolved)
self.assertEqual(response.data['items'][1]['term']['raw'], d1.term_raw)
self.assertEqual(response.data['items'][1]['term']['resolved'], d1.term_resolved)
class TestFunctionalViews(APITestCase): class TestFunctionalViews(APITestCase):

View File

@ -57,3 +57,18 @@ def apply_mapping_pattern(text: str, mapping: dict[str, str], pattern: re.Patter
pos_input = segment.end(0) pos_input = segment.end(0)
output += text[pos_input : len(text)] output += text[pos_input : len(text)]
return output return output
_REF_OLD_PATTERN = re.compile(r'@{([^0-9\-][^\}\|\{]*?)\|([^\}\|\{]*?)\|([^\}\|\{]*?)}')
def fix_old_references(text: str) -> str:
''' Fix reference format: @{X1|nomn|sing} -> {X1|nomn,sing} '''
if text == '':
return text
pos_input: int = 0
output: str = ''
for segment in re.finditer(_REF_OLD_PATTERN, text):
output += text[pos_input : segment.start(0)]
output += f'@{{{segment.group(1)}|{segment.group(2)},{segment.group(3)}}}'
pos_input = segment.end(0)
output += text[pos_input : len(text)]
return output

View File

@ -7,7 +7,7 @@ from .conceptapi import inflect_dependant
from .context import TermContext from .context import TermContext
from .reference import EntityReference, SyntacticReference, parse_reference, Reference from .reference import EntityReference, SyntacticReference, parse_reference, Reference
_REF_ENTITY_PATTERN = re.compile(r'@{([^0-9\-].*?)\|.*?}') _REF_ENTITY_PATTERN = re.compile(r'@{([^0-9\-][^\}\|\{]*?)\|([^\}\|\{]*?)}')
def extract_entities(text: str) -> list[str]: def extract_entities(text: str) -> list[str]:
''' Extract list of entities that are referenced. ''' ''' Extract list of entities that are referenced. '''
@ -76,7 +76,7 @@ class ResolvedReference:
class Resolver: class Resolver:
''' Text reference resolver ''' ''' Text reference resolver '''
REFERENCE_PATTERN = re.compile(r'@{.*?}') REFERENCE_PATTERN = re.compile(r'@{[^\}\{]*?}')
def __init__(self, context: TermContext): def __init__(self, context: TermContext):
self.context = context self.context = context

View File

@ -17,6 +17,7 @@ class TestUtils(unittest.TestCase):
self.assertEqual(extract_entities('@{X1|nomn}'), ['X1']) self.assertEqual(extract_entities('@{X1|nomn}'), ['X1'])
self.assertEqual(extract_entities('@{X1|datv}'), ['X1']) self.assertEqual(extract_entities('@{X1|datv}'), ['X1'])
self.assertEqual(extract_entities('@{X1|datv} @{X1|datv} @{X2|datv}'), ['X1', 'X2']) self.assertEqual(extract_entities('@{X1|datv} @{X1|datv} @{X2|datv}'), ['X1', 'X2'])
self.assertEqual(extract_entities('@{X1} | @{X1} | @{X2}'), [])
class TestResolver(unittest.TestCase): class TestResolver(unittest.TestCase):