2023-11-17 20:51:13 +03:00
/ * *
* Module : API for RSLanguage .
* /
2023-11-06 20:21:30 +03:00
2025-02-22 14:04:01 +03:00
import { type Tree } from '@lezer/common' ;
2025-02-12 20:53:31 +03:00
import { cursorNode } from '@/utils/codemirror' ;
import { PARAMETER } from '@/utils/constants' ;
2025-02-22 14:04:01 +03:00
import { CstType , type IRSErrorDescription , type RSErrorType } from '../backend/types' ;
2025-02-17 14:40:18 +03:00
2025-04-16 21:06:59 +03:00
import { type IRSForm } from './rsform' ;
2025-02-21 21:15:05 +03:00
import { type AliasMapping , type IArgumentValue , RSErrorClass , type SyntaxTree } from './rslang' ;
2023-11-06 20:21:30 +03:00
2024-04-05 15:53:05 +03:00
// cspell:disable
2023-11-06 22:21:36 +03:00
const LOCALS_REGEXP = /[_a-zα -ω][a-zα -ω]*\d*/g ;
2024-04-05 15:53:05 +03:00
const GLOBALS_REGEXP = /[XCSADFPT]\d+/g ;
const COMPLEX_SYMBOLS_REGEXP = /[∀∃×ℬ;|:]/g ;
2024-08-26 22:53:27 +03:00
const TYPIFICATION_SET = /^ℬ +\([ℬ \(X\d+\)× ]*\)$/g ;
2024-04-05 15:53:05 +03:00
// cspell:enable
2023-11-06 22:21:36 +03:00
2025-04-16 21:06:59 +03:00
/** Extracts global variable names from a given expression. */
2023-11-06 20:21:30 +03:00
export function extractGlobals ( expression : string ) : Set < string > {
2024-04-05 15:53:05 +03:00
return new Set ( expression . match ( GLOBALS_REGEXP ) ? ? [ ] ) ;
}
2025-04-16 21:06:59 +03:00
/** Check if expression is simple derivation. */
2024-04-05 15:53:05 +03:00
export function isSimpleExpression ( text : string ) : boolean {
2024-04-05 20:04:12 +03:00
return ! text . match ( COMPLEX_SYMBOLS_REGEXP ) ;
2023-11-06 20:21:30 +03:00
}
2025-04-16 21:06:59 +03:00
/** Check if expression is set typification. */
2024-08-26 22:53:27 +03:00
export function isSetTypification ( text : string ) : boolean {
return ! ! text . match ( TYPIFICATION_SET ) ;
}
2025-04-16 21:06:59 +03:00
/** Infers type of constituent for a given template and arguments. */
2023-11-06 20:21:30 +03:00
export function inferTemplatedType ( templateType : CstType , args : IArgumentValue [ ] ) : CstType {
if ( args . length === 0 || args . some ( arg = > ! arg . value ) ) {
return templateType ;
} else if ( templateType === CstType . PREDICATE ) {
return CstType . AXIOM ;
} else {
return CstType . TERM ;
}
}
2023-11-17 20:51:13 +03:00
/ * *
* Splits a string containing a template definition into its head and body parts .
*
* A template definition is expected to have the following format : ` [head] body ` .
* If the input string does not contain the opening square bracket '[' , the entire
* string is treated as the body , and an empty string is assigned to the head .
* If the opening bracket is present , the function attempts to find the matching
* closing bracket ']' to determine the head and body parts .
*
* @example
2023-12-17 20:19:28 +03:00
* const template = '[header] body content' ;
2023-11-17 20:51:13 +03:00
* const result = splitTemplateDefinition ( template ) ;
* // result: `{ head: 'header', body: 'body content' }`
* /
2023-11-06 20:21:30 +03:00
export function splitTemplateDefinition ( target : string ) {
let start = 0 ;
2023-12-28 14:04:44 +03:00
for ( ; start < target . length && target [ start ] !== '[' ; ++ start ) ;
2023-11-06 20:21:30 +03:00
if ( start < target . length ) {
for ( let counter = 0 , end = start + 1 ; end < target . length ; ++ end ) {
if ( target [ end ] === '[' ) {
++ counter ;
} else if ( target [ end ] === ']' ) {
if ( counter !== 0 ) {
-- counter ;
} else {
2023-11-06 22:21:36 +03:00
return {
head : target.substring ( start + 1 , end ) . trim ( ) ,
body : target.substring ( end + 1 ) . trim ( )
2023-12-28 14:04:44 +03:00
} ;
2023-11-06 20:21:30 +03:00
}
}
}
}
return {
head : '' ,
body : target
2023-12-28 14:04:44 +03:00
} ;
2023-11-06 20:21:30 +03:00
}
2023-11-17 20:51:13 +03:00
/ * *
* Substitutes values for template arguments in a given expression .
*
* This function takes an input mathematical expression and a list of argument values .
* It replaces template argument placeholders in the expression with their corresponding values
* from the provided arguments .
* /
2023-11-06 22:21:36 +03:00
export function substituteTemplateArgs ( expression : string , args : IArgumentValue [ ] ) : string {
if ( args . every ( arg = > ! arg . value ) ) {
return expression ;
}
2024-08-26 22:53:27 +03:00
const mapping : AliasMapping = { } ;
2023-12-28 14:04:44 +03:00
args
. filter ( arg = > ! ! arg . value )
. forEach ( arg = > {
mapping [ arg . alias ] = arg . value ! ;
} ) ;
2023-11-06 22:21:36 +03:00
let { head , body } = splitTemplateDefinition ( expression ) ;
body = applyPattern ( body , mapping , LOCALS_REGEXP ) ;
const argTexts = head . split ( ',' ) . map ( text = > text . trim ( ) ) ;
head = argTexts
2023-12-28 14:04:44 +03:00
. filter ( arg = > [ . . . arg . matchAll ( LOCALS_REGEXP ) ] . every ( local = > local . every ( match = > ! ( match in mapping ) ) ) )
. join ( ', ' ) ;
2023-11-06 22:21:36 +03:00
if ( ! head ) {
return body ;
} else {
2023-12-28 14:04:44 +03:00
return ` [ ${ head } ] ${ body } ` ;
2023-11-06 22:21:36 +03:00
}
}
2023-11-17 20:51:13 +03:00
2024-01-04 14:35:46 +03:00
/ * *
* Generate ErrorID label .
* /
export function getRSErrorPrefix ( error : IRSErrorDescription ) : string {
const id = error . errorType . toString ( 16 ) ;
// prettier-ignore
switch ( inferErrorClass ( error . errorType ) ) {
case RSErrorClass.LEXER : return 'L' + id ;
case RSErrorClass.PARSER : return 'P' + id ;
case RSErrorClass.SEMANTIC : return 'S' + id ;
case RSErrorClass.UNKNOWN : return 'U' + id ;
}
}
2024-08-26 22:53:27 +03:00
2025-04-16 21:06:59 +03:00
/** Apply alias mapping. */
2024-08-26 22:53:27 +03:00
export function applyAliasMapping ( target : string , mapping : AliasMapping ) : string {
return applyPattern ( target , mapping , GLOBALS_REGEXP ) ;
}
2025-04-16 21:06:59 +03:00
/** Apply alias typification mapping. */
2024-08-26 22:53:27 +03:00
export function applyTypificationMapping ( target : string , mapping : AliasMapping ) : string {
2024-08-28 00:00:04 +03:00
const modified = applyAliasMapping ( target , mapping ) ;
if ( modified === target ) {
2024-08-26 22:53:27 +03:00
return target ;
}
2024-08-28 00:00:04 +03:00
const deleteBrackets : number [ ] = [ ] ;
const positions : number [ ] = [ ] ;
const booleans : number [ ] = [ ] ;
let boolCount : number = 0 ;
let stackSize : number = 0 ;
for ( let i = 0 ; i < modified . length ; i ++ ) {
const char = modified [ i ] ;
if ( char === 'ℬ ' ) {
boolCount ++ ;
continue ;
}
if ( char === '(' ) {
stackSize ++ ;
positions . push ( i ) ;
booleans . push ( boolCount ) ;
}
boolCount = 0 ;
if ( char === ')' ) {
if (
i < modified . length - 1 &&
modified [ i + 1 ] === ')' &&
stackSize > 1 &&
positions [ stackSize - 2 ] + booleans [ stackSize - 1 ] + 1 === positions [ stackSize - 1 ]
) {
deleteBrackets . push ( i ) ;
deleteBrackets . push ( positions [ stackSize - 2 ] ) ;
}
if ( i === modified . length - 1 && stackSize === 1 && positions [ 0 ] === 0 ) {
deleteBrackets . push ( i ) ;
deleteBrackets . push ( positions [ 0 ] ) ;
}
stackSize -- ;
positions . pop ( ) ;
booleans . pop ( ) ;
}
}
let result = '' ;
for ( let i = 0 ; i < modified . length ; i ++ ) {
if ( ! deleteBrackets . includes ( i ) ) {
result += modified [ i ] ;
}
}
2024-08-26 22:53:27 +03:00
return result ;
}
2025-02-12 20:53:31 +03:00
2025-04-16 21:06:59 +03:00
/** Transform Tree to {@link SyntaxTree}. */
2025-02-12 20:53:31 +03:00
export function transformAST ( tree : Tree ) : SyntaxTree {
const result : SyntaxTree = [ ] ;
const parents : number [ ] = [ ] ;
const cursor = tree . cursor ( ) ;
let finished = false ;
let leave = true ;
while ( ! finished ) {
let node = cursorNode ( cursor ) ;
node . isLeaf = ! cursor . firstChild ( ) ;
leave = true ;
result . push ( {
uid : result.length ,
parent : parents.length > 0 ? parents [ parents . length - 1 ] : result . length ,
typeID : node.type.id ,
start : node.from ,
finish : node.to ,
data : {
dataType : 'string' ,
value : node.type.name == '⚠' ? PARAMETER.errorNodeLabel : node.type.name
}
} ) ;
parents . push ( result . length - 1 ) ;
if ( ! node . isLeaf ) continue ;
for ( ; ; ) {
node = cursorNode ( cursor , node . isLeaf ) ;
if ( leave ) {
parents . pop ( ) ;
}
leave = cursor . type . isAnonymous ;
node . isLeaf = false ;
if ( cursor . nextSibling ( ) ) {
break ;
}
if ( ! cursor . parent ( ) ) {
finished = true ;
break ;
}
leave = true ;
}
}
return result ;
}
2025-04-13 23:14:00 +03:00
2025-04-16 21:06:59 +03:00
export function generatePrompt ( schema : IRSForm ) : string {
const intro =
'Концептуальная схема — это формализованная модель предметной области, выраженная с помощью языка родов структур, основанного на аппарате формальной логики и теории множеств, и дополненная естественно-языковыми пояснениями. Она представляет собой систему взаимосвязанных определений, где каждое понятие или утверждение задаётся в строгом формате Обозначение - "Типизация" - "Термин" - "Определение в языке родов структур" - "Определение в естественном языке" - "Конвенция или комментарий".\nО б о зна че ние — уникальный идентификатор понятия (например, X1, S3, F14).\nТ ипиза ция — структура элементов множества, моделирующего данное понятие (например, ℬ (X1) для подмножества индивидов или ℬ (X1× X1) для бинарных отношений).\nТ е р мин — название понятия в естественном языке.\nК о нве нция описывает неопределяемые понятия предметным языком, включая уточнения, ограничения или примеры, включая ссылки на внешние данные (например, документы).\n------------\nДа ле е приведена концептуальная схема, описывающая некоторую предметную область.\n' ;
const outro =
'\n------\nПр и ответе на следующий вопрос используй представленные в концептуальной схеме понятия и определения.\n' ;
let body = ` Название концептуальной схемы: ${ schema . title } \ n ` ;
body += ` [ ${ schema . alias } ] Описание: " ${ schema . description } " \ n \ n ` ;
body += 'Понятия:\n' ;
schema . items . forEach ( item = > {
body += ` ${ item . alias } - " ${ item . parse . typification } " - " ${ item . term_resolved } " - " ${ item . definition_formal } " - " ${ item . definition_resolved } " - " ${ item . convention } " \ n ` ;
} ) ;
return ` ${ intro } ${ body } ${ outro } ` ;
}
2025-04-13 23:14:00 +03:00
// ====== Internals =========
/** Text substitution guided by mapping and regular expression. */
function applyPattern ( text : string , mapping : AliasMapping , pattern : RegExp ) : string {
if ( text === '' || pattern === null ) {
return text ;
}
let posInput = 0 ;
let output = '' ;
const patternMatches = text . matchAll ( pattern ) ;
for ( const segment of patternMatches ) {
const entity = segment [ 0 ] ;
const start = segment . index ? ? 0 ;
if ( entity in mapping ) {
output += text . substring ( posInput , start ) ;
output += mapping [ entity ] ;
posInput = start + segment [ 0 ] . length ;
}
}
output += text . substring ( posInput ) ;
return output ;
}
const ERROR_LEXER_MASK = 512 ;
const ERROR_PARSER_MASK = 1024 ;
const ERROR_SEMANTIC_MASK = 2048 ;
/** Infers error class from error type (code). */
function inferErrorClass ( error : RSErrorType ) : RSErrorClass {
if ( ( error & ERROR_LEXER_MASK ) !== 0 ) {
return RSErrorClass . LEXER ;
} else if ( ( error & ERROR_PARSER_MASK ) !== 0 ) {
return RSErrorClass . PARSER ;
} else if ( ( error & ERROR_SEMANTIC_MASK ) !== 0 ) {
return RSErrorClass . SEMANTIC ;
} else {
return RSErrorClass . UNKNOWN ;
}
}