VBCommons/parsers/z_ParserRegex.bas
2024-06-07 20:46:40 +03:00

233 lines
7.0 KiB
QBasic

Attribute VB_Name = "z_ParserRegex"
' ======== Parser regex masks ========
' Shared module version: 20210909
' Tested in:
' Depends on:
' Required reference:
Option Private Module
Option Explicit
Private Const P_SPACE = "(?:\s+?)"
Private Const P_QUOTE_OPEN = "[""'«“]"
Private Const P_QUOTE_CLOSE = "[""'»”]"
Private Const P_ANY_SHORTEST = ".*?"
Private Const P_ANY = ".*"
Private Const P_MONTH_RUS = "(?:ÿíâàðÿ|ôåâðàëÿ|ìàðòà|àïðåëÿ|ìàÿ|èþíÿ|èþëÿ|àâãóñòà|ñåíòÿáðÿ|îêòÿáðÿ|íîÿáðÿ|äåêàáðÿ)"
Private Const P_MONTH_DIGITS = "[0-1]?\d"
Private Const P_DAY_DIGITS = "[0-3]?\d"
Private Const P_YEAR_LONG = "[1-2]\d\d\d"
Private Const P_YEAR_SHORT = "\d\d"
Private Const P_YEAR_DIGITS = "(?:" & P_YEAR_LONG & "|" & P_YEAR_SHORT & ")"
Private Const P_DATE_TEXT = _
"(" & P_DAY_DIGITS & ")" & P_SPACE & _
"(" & P_MONTH_RUS & ")" & P_SPACE & _
"(" & P_YEAR_LONG & ")"
Private Const P_DATE_NUMERIC = _
"(" & P_DAY_DIGITS & ")" & "\." & _
"(" & P_MONTH_DIGITS & ")" & "\." & _
"(" & P_YEAR_DIGITS & ")"
Private Const P_NPA_PREFIX = "[À-ßà-ÿ][\dÀ-ßà-ÿA-Za-z,\-\.""'«»“”\s]+?"
Private Const P_NPA_ID_ELEMENT = "[#N¹]" & P_SPACE & "?" & "[\dÀ-ßà-ÿA-Za-z_@\.\-\\/]+"
Private Const P_NPA_ID_SEQ = P_NPA_ID_ELEMENT & "(?:," & P_SPACE & P_NPA_ID_ELEMENT & ")*"
Private Const P_NPA_INTRO = _
"(?:" & _
"[Ïï]ðèëîæåíèå ê" & "|" & _
"[Ââ] ðåäàêöèè" & "|" & _
"(?:" & _
"[Ðð]åêîìåíäîâàí[àîû]?" & "|" & _
"(?:" & _
"[Óó]òâ\." & "|" & _
"[Óó]òâåðæä(?:[å¸]í|åíà|åíî|åíû)" & _
")" & " è ââåä(?:[å¸]í|åíà|åíî|åíû) â äåéñòâèå" & "|" & _
"(?:ïðèíÿò[àîû]? è )?" & _
"(?:" & _
"[Óó]òâ\." & "|" & _
"[Óó]òâåðæä(?:[å¸]í|åíà|åíî|åíû)" & "|" & _
"îäîáðåí[àîû]?" & "|" & _
"[Ââ]âåä(?:[å¸]í|åíà|åíî|åíû) â äåéñòâèå" & _
")" & "|" & _
"ïðèíÿò[àîû]?" & _
")" & "(?: íà)?" & _
")"
Private Const P_MASK_DOCTYPE = _
"(?:" & _
"[Êê]îíñòèòóàöè[ÿè]" & "|" & _
"[Ôô]åäåðàëüí(?:ûé|ûì|îãî|îìó) çàêîí(?:îì|à|ó)?" & "|" & _
"[Ôô]åäåðàëüí(?:ûé|ûì|îãî|îìó) [Êê]îíñòèòóöèîíí(?:ûé|ûì|îãî|îìó)] çàêîí(?:îì|à|ó)?" & "|" & _
"[Çç]àêîí(?:îì|à|ó)?(?: ÐÔ| Ðîññèéñêîé Ôåäåðàöèè)?" & "|" & _
"[Ää]èðåêòèâ(?:à|îé|û|å)" & "|" & _
P_ANY & "[Êê]îäåêñ(?:îì)?" & P_ANY & "|" & _
P_ANY & "[Êê]îíâåíöè(?:ÿ|åé|èè)" & "|" & _
"[Óó]êàçàíè(?:åì|å|ÿ|þ)" & "|" & _
"[Çç]àêëþ÷åíè(?:åì|å|ÿ|þ)" & "|" & _
"[Ïï]èñüì(?:îì|î|à|ó)" & "|" & _
"[Ðð]àçúÿñíåíè(?:åì|å|ÿ|þ)" & "|" & _
"[Ïï]îñòàíîâëåíè(?:åì|å|ÿ|þ)" & "|" & _
"[Ïï]ðèêàç(?:îì|à|ó)?" & "|" & _
"[Óó]êàç(?:îì|à|ó)?" & "|" & _
"[Èè]íôîðìàöèîíí(?:îå|ûì|îãî|îìó) ïèñüì(?:îì|î|à|ó)" & "|" & _
"[Ðð]àñïîðÿæåíè(?:åì|å|ÿ|þ)" & "|" & _
"[Ññ]îãëàøåíè(?:åì|å|ÿ|þ)(?: î âçàèìîäåéñòâèè)?" & "|" & _
"[Ðð]åøåíè(?:åì|å|ÿ|þ)" & _
")"
Private Const P_NPA_TITLE_PREFIX = _
"(?:" & _
"[Îî] ââåäåíèè â äåéñòâèå" & "(?: íîâîé ðåäàêöèè)?" & "|" & _
"[Îî]á èçäàíèè è ïðèìåíåíèè" & "|" & _
"[Îî]á óòâåðæäåíèè(?: è(?: î)? ââåäåíèè â äåéñòâèå)?" & _
" " & _
")"
' ======================================
' ======== Èòîãîâûå âûðàæåíèÿ ==========
' ======================================
' ======= Äàòà ==========
Public Const P_DATE_NO_CAPTURE = _
"(?:" & _
P_DAY_DIGITS & P_SPACE & P_MONTH_RUS & P_SPACE & P_YEAR_LONG & _
"|" & _
P_DAY_DIGITS & "\." & P_MONTH_DIGITS & "\." & P_YEAR_DIGITS & _
")" & _
"(?:" & P_SPACE & "?" & "(?:ãîäà|ã\.|ã))?"
Public Const P_DATE_CAPTURE = _
"^" & _
"(?:" & _
P_DATE_TEXT & "|" & _
P_DATE_NUMERIC & _
")" & _
"(?:" & P_SPACE & "?" & "(?:ãîäà|ã\.|ã))?" & _
"$"
' ====== Íàçâàíèå ÍÏÀ ======
Public Const P_NPA_SCAN = _
"(?:" & "\b[Ññ]ì\." & P_SPACE & ")?" & _
"(?:" & _
"\bâ ðåäàêöèè " & "|" & _
"\bâ ñîîòâåòñòâèè ñ " & _
")?" & _
"(" & _
P_NPA_PREFIX & P_SPACE & "îò" & P_SPACE & P_DATE_NO_CAPTURE & P_SPACE & "?" & _
P_NPA_ID_SEQ & "(?:" & P_SPACE & P_QUOTE_OPEN & P_ANY & P_QUOTE_CLOSE & ")?" & _
"|" & _
P_ANY_SHORTEST & _
"\(" & _
P_ANY_SHORTEST & P_NPA_INTRO & P_SPACE & P_NPA_PREFIX & _
"(?:" & P_SPACE & "îò" & ")?" & P_SPACE & P_DATE_NO_CAPTURE & _
"(?:" & P_SPACE & "?" & "(?:" & P_NPA_ID_SEQ & "))?" & P_ANY_SHORTEST & _
"\)" & _
")"
Public Const P_NPA_PROPS_FIRST = _
"^" & _
"(" & P_NPA_PREFIX & ")" & _
P_SPACE & "îò" & P_SPACE & "(" & P_DATE_NO_CAPTURE & ")" & _
P_SPACE & "?" & "(" & P_NPA_ID_SEQ & ")" & _
"(?:" & P_SPACE & P_QUOTE_OPEN & "(" & P_ANY & ")" & P_QUOTE_CLOSE & ")?" & _
P_ANY_SHORTEST & _
"$"
Public Const P_NPA_TITLE_FIRST = _
"^" & _
"(" & P_ANY_SHORTEST & ")" & _
"\(" & _
P_ANY_SHORTEST & P_NPA_INTRO & P_SPACE & _
"(" & P_NPA_PREFIX & ")" & P_SPACE & _
"(?:îò" & P_SPACE & ")?" & "(" & P_DATE_NO_CAPTURE & ")" & _
"(?:" & P_SPACE & "?" & "(" & P_NPA_ID_SEQ & "))?" & _
P_ANY_SHORTEST & _
"\)" & _
P_ANY_SHORTEST & _
"$"
' ===== Òèï äîêóìåíòà =========
Public Const P_NPA_DOCTYPE = "^" & "(" & P_MASK_DOCTYPE & ")"
' ======== Òèòóë äîêóìåíòà ======
Public Const P_NPA_COMPOSITE_TITLE = _
"^" & _
"(" & P_NPA_TITLE_PREFIX & ")" & _
P_QUOTE_OPEN & "?" & _
"(" & P_ANY_SHORTEST & ")" & _
P_QUOTE_CLOSE & "?" & _
"$"
' ======== Ñðî÷íîñòü äîêóìåíòà ===
Public Const P_NPA_IMMEDIATE = _
" " & _
"(?:(?:ñ|íà|â|äî) |\()" & _
"(?:ïëàíîâûé |ïåðèîä |ïëàíîâûé ïåðèîä )?" & _
P_YEAR_DIGITS & _
"(?:" & _
"(?:\-| \- | è | ïî )" & _
P_YEAR_DIGITS & _
")?" & _
" " & _
"(?:ãîä(?:îâ|à|û|àõ)?|ãã\.|ãã|ã\.|ã)"
' ======== Static access for compiled regexp ======
Public Function GlobalDateRegex() As RegExp
Static s_Regex As RegExp
If s_Regex Is Nothing Then
Set s_Regex = New RegExp
s_Regex.Global = True
s_Regex.Pattern = P_DATE_NO_CAPTURE
End If
Set GlobalDateRegex = s_Regex
End Function
Public Function GlobalNPARegex() As RegExp
Static s_Regex As RegExp
If s_Regex Is Nothing Then
Set s_Regex = New RegExp
s_Regex.Global = True
s_Regex.Pattern = P_NPA_SCAN
End If
Set GlobalNPARegex = s_Regex
End Function
' ============= Òèïû äîêóìåíòîâ-îïðåäåëåíèé
' Ïîëîæåí(?:èÿ|èå|èè) îá?
' Ïîðÿä(?:êà|îê|êå)
' Ïðàâèëà?
' (?:Àäìèíèñòðàòèâíîãî)?[Ðð]åãëàìåíòà?(?: ïî)?
' Ðåêîìåíäàöè[èé] ïî
' Ñòðàòåãè[èÿ]
' Ñòðóêòóð[àû]
' Òèïîâî(?:ãî|îå) ïîëîæåíè[ÿå] îá?
' Òðåáîâàíèé(?:(?:, ïðåäúÿâëÿåìûõ)? ê)?
' Ôîðì[àû]? è ïîðÿäêà
' Ôîðì[àû]?
' Ïåðå÷åíü
' ============ Òèïû äîêóìåíòîâ-äåéñòâèé
' Î âíåñåíèè èçìåíåíèé â
' Î(?: íåêîòîðûõ| íåîòëîæíûõ| ïåðâîî÷åðåäíûõ)? ìåðàõ ïî (?:ðåàëèçàöèè|ñòèìóëèðîâàíèþ|ñîâåðøåíñòâîâàíèþ)
' Î ìåðîïðèÿòèÿõ ïî
' Î ïåðå÷íå
' Î ïîäãîòîâêå ïðîåêòà
' Î (ïîäïèñàíèè|ïðèíÿòèè|ðàòèôèêàöèè)
' Î ðåàëèçàöèè
' Î ñîçäàíèè(?: è âåäåíèè)?
' Îá èçìåíåíèè ñîñòàâà
' Îá îáðàçîâàíèè ' ÷åãî-ëèáî
' Îá îáúÿâëåíèè ' Äðóãîãî äîêóìåíòà
' Îá îðãàíèçàöèè(?: ðàáîòû? ïî)?
' Îá îñóùåñòâëåíèè
' Îá óñèëåíèè ' = cîâåðøåíñòâîâàíèå?
' Îá óñòàíîâëåíèè ' çàäàíèå çíà÷åíèÿ
' Îá îïðåäåëåíèè ' ïðîöåäóðà çàäàíèÿ çíà÷åíèÿ
' ========= TODO
' Ïàðñèòü âûäåëÿÿ îðãàíû-àêòîðû
' Private Const P_ANY_SPECIAL_SPACE = "[ \xA0\u1680\u180E\u2000-\u200B\u202F\u205F\u3000\uFEFF]+"