From 0c6d998cc5d8151d50b2f0d91f57aba898beb793 Mon Sep 17 00:00:00 2001 From: Ivan <8611739+IRBorisov@users.noreply.github.com> Date: Sun, 14 Jul 2024 12:30:40 +0300 Subject: [PATCH] Update re-flex --- ccl/rslang/header/AsciiLexerImpl.hpp | 1186 ++-- ccl/rslang/header/MathLexerImpl.hpp | 1230 ++-- .../import/reflex/include/reflex/absmatcher.h | 167 +- .../reflex/include/reflex/fuzzymatcher.h | 1178 ++++ .../reflex/include/reflex/linematcher.h | 4 +- .../import/reflex/include/reflex/matcher.h | 1129 +++- .../import/reflex/include/reflex/pattern.h | 10 +- .../import/reflex/include/reflex/simd.h | 24 +- ccl/rslang/import/reflex/lib/convert.cpp | 6 +- ccl/rslang/import/reflex/lib/input.cpp | 25 +- ccl/rslang/import/reflex/lib/matcher.cpp | 5939 +++++++++-------- ccl/rslang/import/reflex/lib/matcher_avx2.cpp | 1000 ++- .../import/reflex/lib/matcher_avx512bw.cpp | 347 +- ccl/rslang/import/reflex/lib/pattern.cpp | 217 +- ccl/rslang/import/reflex/lib/simd.cpp | 157 + ccl/rslang/import/reflex/lib/simd_avx2.cpp | 51 +- .../import/reflex/lib/simd_avx512bw.cpp | 11 +- ccl/rslang/import/reflex/lib/unicode.cpp | 12 +- .../reflex/unicode/language_scripts.cpp | 58 + .../import/reflex/unicode/letter_scripts.cpp | 371 + ccl/rslang/unity/reflex_unity2.cpp | 1 + scripts/BuildLexers.ps1 | 7 +- 22 files changed, 8736 insertions(+), 4394 deletions(-) create mode 100644 ccl/rslang/import/reflex/include/reflex/fuzzymatcher.h create mode 100644 ccl/rslang/import/reflex/lib/simd.cpp diff --git a/ccl/rslang/header/AsciiLexerImpl.hpp b/ccl/rslang/header/AsciiLexerImpl.hpp index c4f4bfd..f36c788 100644 --- a/ccl/rslang/header/AsciiLexerImpl.hpp +++ b/ccl/rslang/header/AsciiLexerImpl.hpp @@ -1,6 +1,6 @@ -// AsciiLexerImpl.hpp generated by reflex 4.2.1 from AsciiLexerImpl.l +// AsciiLexerImpl.hpp generated by reflex 4.4.0 from AsciiLexerImpl.l -#define REFLEX_VERSION "4.2.1" +#define REFLEX_VERSION "4.4.0" //////////////////////////////////////////////////////////////////////////////// // // @@ -376,65 +376,65 @@ namespace asciilex { void reflex_code_INITIAL(reflex::Matcher& m) { - int c0 = 0, c1 = 0; - m.FSM_INIT(c1); + int c = 0; + m.FSM_INIT(c); S0: m.FSM_FIND(); - c1 = m.FSM_CHAR(); - if (c1 == '}') goto S148; - if (c1 == '|') goto S154; - if (c1 == '{') goto S126; - if (c1 == 'r') goto S88; - if (c1 == 'p') goto S53; - if (c1 == 'd') goto S95; - if (c1 == 'c') goto S74; - if (c1 == 'b') goto S81; - if ('a' <= c1 && c1 <= 'z') goto S138; - if (c1 == '_') goto S138; - if (c1 == ']') goto S152; - if (c1 == '\\') goto S32; - if (c1 == '[') goto S150; - if (c1 == 'Z') goto S120; - if (c1 == 'R') goto S108; - if (c1 == 'P') goto S60; - if (c1 == 'I') goto S114; - if (c1 == 'F') goto S67; - if (c1 == 'D') goto S102; - if (c1 == 'B') goto S51; - if ('A' <= c1 && c1 <= 'Y') goto S132; - if (c1 == ';') goto S158; - if ('0' <= c1 && c1 <= '9') goto S129; - if (c1 == ',') goto S156; - if (c1 == '*') goto S49; - if (c1 == ')') goto S146; - if (c1 == '(') goto S144; - if (c1 == ' ') goto S165; - if (c1 == '\r') goto S165; - if (c1 == '\n') goto S160; - if (c1 == '\t') goto S165; - if (0 <= c1) goto S170; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == '}') goto S148; + if (c == '|') goto S154; + if (c == '{') goto S126; + if (c == 'r') goto S88; + if (c == 'p') goto S53; + if (c == 'd') goto S95; + if (c == 'c') goto S74; + if (c == 'b') goto S81; + if ('a' <= c && c <= 'z') goto S138; + if (c == '_') goto S138; + if (c == ']') goto S152; + if (c == '\\') goto S32; + if (c == '[') goto S150; + if (c == 'Z') goto S120; + if (c == 'R') goto S108; + if (c == 'P') goto S60; + if (c == 'I') goto S114; + if (c == 'F') goto S67; + if (c == 'D') goto S102; + if (c == 'B') goto S51; + if ('A' <= c && c <= 'Y') goto S132; + if (c == ';') goto S158; + if ('0' <= c && c <= '9') goto S129; + if (c == ',') goto S156; + if (c == '*') goto S49; + if (c == ')') goto S146; + if (c == '(') goto S144; + if (c == ' ') goto S165; + if (c == '\r') goto S165; + if (c == '\n') goto S160; + if (c == '\t') goto S165; + if (0 <= c) goto S170; + return m.FSM_HALT(c); S32: m.FSM_TAKE(60); - c1 = m.FSM_CHAR(); - if (c1 == 'u') goto S204; - if (c1 == 's') goto S200; - if (c1 == 'p') goto S189; - if (c1 == 'o') goto S182; - if (c1 == 'n') goto S176; - if (c1 == 'm') goto S191; - if (c1 == 'l') goto S197; - if (c1 == 'i') goto S184; - if (c1 == 'g') goto S194; - if (c1 == 'f') goto S206; - if (c1 == 'e') goto S187; - if (c1 == 'd') goto S208; - if (c1 == 'a') goto S179; - if (c1 == 'E') goto S174; - if (c1 == 'A') goto S172; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'u') goto S204; + if (c == 's') goto S200; + if (c == 'p') goto S189; + if (c == 'o') goto S182; + if (c == 'n') goto S176; + if (c == 'm') goto S191; + if (c == 'l') goto S197; + if (c == 'i') goto S184; + if (c == 'g') goto S194; + if (c == 'f') goto S206; + if (c == 'e') goto S187; + if (c == 'd') goto S208; + if (c == 'a') goto S179; + if (c == 'E') goto S174; + if (c == 'A') goto S172; + return m.FSM_HALT(c); S49: m.FSM_TAKE(22); @@ -446,139 +446,139 @@ S51: S53: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S210; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S210; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S60: m.FSM_TAKE(44); - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S222; - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S228; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S222; + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S228; + return m.FSM_HALT(c); S67: m.FSM_TAKE(44); - c1 = m.FSM_CHAR(); - if (c1 == 'i') goto S240; - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S246; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'i') goto S240; + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S246; + return m.FSM_HALT(c); S74: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if ('b' <= c1 && c1 <= 'z') goto S216; - if (c1 == 'a') goto S252; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('b' <= c && c <= 'z') goto S216; + if (c == 'a') goto S252; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S81: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'o') goto S259; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'o') goto S259; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S88: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S266; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S266; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S95: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S273; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S273; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S102: m.FSM_TAKE(35); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S234; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S234; + return m.FSM_HALT(c); S108: m.FSM_TAKE(36); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S280; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S280; + return m.FSM_HALT(c); S114: m.FSM_TAKE(37); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S234; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S234; + return m.FSM_HALT(c); S120: m.FSM_TAKE(38); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S234; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S234; + return m.FSM_HALT(c); S126: m.FSM_TAKE(52); - c1 = m.FSM_CHAR(); - if (c1 == '}') goto S286; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == '}') goto S286; + return m.FSM_HALT(c); S129: m.FSM_TAKE(40); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S288; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S288; + return m.FSM_HALT(c); S132: m.FSM_TAKE(44); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S234; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S234; + return m.FSM_HALT(c); S138: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S144: m.FSM_TAKE(50); @@ -614,19 +614,19 @@ S158: S160: m.FSM_TAKE(59); - c1 = m.FSM_CHAR(); - if (c1 == ' ') goto S160; - if (c1 == '\r') goto S160; - if ('\t' <= c1 && c1 <= '\n') goto S160; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == ' ') goto S160; + if (c == '\r') goto S160; + if ('\t' <= c && c <= '\n') goto S160; + return m.FSM_HALT(c); S165: m.FSM_TAKE(59); - c1 = m.FSM_CHAR(); - if (c1 == ' ') goto S160; - if (c1 == '\r') goto S160; - if ('\t' <= c1 && c1 <= '\n') goto S160; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == ' ') goto S160; + if (c == '\r') goto S160; + if ('\t' <= c && c <= '\n') goto S160; + return m.FSM_HALT(c); S170: m.FSM_TAKE(60); @@ -641,189 +641,189 @@ S174: return m.FSM_HALT(); S176: - c1 = m.FSM_CHAR(); - if (c1 == 'o') goto S293; - if (c1 == 'e') goto S291; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'o') goto S293; + if (c == 'e') goto S291; + return m.FSM_HALT(c); S179: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S297; - if (c1 == 'n') goto S295; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S297; + if (c == 'n') goto S295; + return m.FSM_HALT(c); S182: - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S299; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S299; + return m.FSM_HALT(c); S184: - c1 = m.FSM_CHAR(); - if (c1 == 'n') goto S303; - if (c1 == 'm') goto S301; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'n') goto S303; + if (c == 'm') goto S301; + return m.FSM_HALT(c); S187: - c1 = m.FSM_CHAR(); - if (c1 == 'q') goto S306; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'q') goto S306; + return m.FSM_HALT(c); S189: - c1 = m.FSM_CHAR(); - if (c1 == 'l') goto S309; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'l') goto S309; + return m.FSM_HALT(c); S191: - c1 = m.FSM_CHAR(); - if (c1 == 'u') goto S313; - if (c1 == 'i') goto S311; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'u') goto S313; + if (c == 'i') goto S311; + return m.FSM_HALT(c); S194: - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S315; - if (c1 == 'e') goto S317; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S315; + if (c == 'e') goto S317; + return m.FSM_HALT(c); S197: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S319; - if (c1 == 'e') goto S321; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S319; + if (c == 'e') goto S321; + return m.FSM_HALT(c); S200: - c1 = m.FSM_CHAR(); - if (c1 == 'y') goto S327; - if (c1 == 'u') goto S323; - if (c1 == 'e') goto S325; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'y') goto S327; + if (c == 'u') goto S323; + if (c == 'e') goto S325; + return m.FSM_HALT(c); S204: - c1 = m.FSM_CHAR(); - if (c1 == 'n') goto S329; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'n') goto S329; + return m.FSM_HALT(c); S206: - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S331; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S331; + return m.FSM_HALT(c); S208: - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S333; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S333; + return m.FSM_HALT(c); S210: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S335; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S335; + return m.FSM_HALT(c); S216: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S222: m.FSM_TAKE(44); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S342; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S342; + return m.FSM_HALT(c); S228: m.FSM_TAKE(42); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S228; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S228; + return m.FSM_HALT(c); S234: m.FSM_TAKE(44); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S234; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S234; + return m.FSM_HALT(c); S240: m.FSM_TAKE(44); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S349; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S349; + return m.FSM_HALT(c); S246: m.FSM_TAKE(41); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S246; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S246; + return m.FSM_HALT(c); S252: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S356; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S356; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S259: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'o') goto S363; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'o') goto S363; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S266: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'd') goto S370; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'd') goto S370; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S273: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'b') goto S376; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'b') goto S376; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S280: m.FSM_TAKE(43); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S280; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S280; + return m.FSM_HALT(c); S286: m.FSM_TAKE(39); @@ -831,65 +831,65 @@ S286: S288: m.FSM_TAKE(40); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S288; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S288; + return m.FSM_HALT(c); S291: - c1 = m.FSM_CHAR(); - if (c1 == 'g') goto S383; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'g') goto S383; + return m.FSM_HALT(c); S293: - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S385; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S385; + return m.FSM_HALT(c); S295: - c1 = m.FSM_CHAR(); - if (c1 == 'd') goto S389; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'd') goto S389; + return m.FSM_HALT(c); S297: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S391; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S391; + return m.FSM_HALT(c); S299: m.FSM_TAKE(5); return m.FSM_HALT(); S301: - c1 = m.FSM_CHAR(); - if (c1 == 'p') goto S393; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'p') goto S393; + return m.FSM_HALT(c); S303: m.FSM_TAKE(17); - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S395; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S395; + return m.FSM_HALT(c); S306: m.FSM_TAKE(15); - c1 = m.FSM_CHAR(); - if (c1 == 'u') goto S397; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'u') goto S397; + return m.FSM_HALT(c); S309: - c1 = m.FSM_CHAR(); - if (c1 == 'u') goto S399; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'u') goto S399; + return m.FSM_HALT(c); S311: - c1 = m.FSM_CHAR(); - if (c1 == 'n') goto S401; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'n') goto S401; + return m.FSM_HALT(c); S313: - c1 = m.FSM_CHAR(); - if (c1 == 'l') goto S403; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'l') goto S403; + return m.FSM_HALT(c); S315: m.FSM_TAKE(11); @@ -908,340 +908,340 @@ S321: return m.FSM_HALT(); S323: - c1 = m.FSM_CHAR(); - if (c1 == 'b') goto S405; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'b') goto S405; + return m.FSM_HALT(c); S325: - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S407; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S407; + return m.FSM_HALT(c); S327: - c1 = m.FSM_CHAR(); - if (c1 == 'm') goto S409; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'm') goto S409; + return m.FSM_HALT(c); S329: - c1 = m.FSM_CHAR(); - if (c1 == 'i') goto S411; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'i') goto S411; + return m.FSM_HALT(c); S331: - c1 = m.FSM_CHAR(); - if (c1 == 'o') goto S413; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'o') goto S413; + return m.FSM_HALT(c); S333: - c1 = m.FSM_CHAR(); - if (c1 == 'f') goto S415; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'f') goto S415; + return m.FSM_HALT(c); S335: m.FSM_TAKE(28); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S335; - if (c1 == ',') goto S418; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S335; + if (c == ',') goto S418; + return m.FSM_HALT(c); S342: m.FSM_TAKE(29); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S342; - if (c1 == ',') goto S420; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S342; + if (c == ',') goto S420; + return m.FSM_HALT(c); S349: m.FSM_TAKE(30); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S234; - if (c1 == '_') goto S234; - if ('A' <= c1 && c1 <= 'Z') goto S234; - if ('0' <= c1 && c1 <= '9') goto S349; - if (c1 == ',') goto S422; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S234; + if (c == '_') goto S234; + if ('A' <= c && c <= 'Z') goto S234; + if ('0' <= c && c <= '9') goto S349; + if (c == ',') goto S422; + return m.FSM_HALT(c); S356: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'd') goto S424; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'd') goto S424; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S363: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'l') goto S430; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'l') goto S430; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S370: m.FSM_TAKE(33); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S376: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'o') goto S436; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'o') goto S436; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S383: m.FSM_TAKE(3); return m.FSM_HALT(); S385: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S447; - if (c1 == 'i') goto S445; - if (c1 == 'e') goto S443; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S447; + if (c == 'i') goto S445; + if (c == 'e') goto S443; + return m.FSM_HALT(c); S389: m.FSM_TAKE(4); return m.FSM_HALT(); S391: - c1 = m.FSM_CHAR(); - if (c1 == 'i') goto S449; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'i') goto S449; + return m.FSM_HALT(c); S393: - c1 = m.FSM_CHAR(); - if (c1 == 'l') goto S451; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'l') goto S451; + return m.FSM_HALT(c); S395: - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S453; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S453; + return m.FSM_HALT(c); S397: - c1 = m.FSM_CHAR(); - if (c1 == 'i') goto S455; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'i') goto S455; + return m.FSM_HALT(c); S399: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S457; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S457; + return m.FSM_HALT(c); S401: - c1 = m.FSM_CHAR(); - if (c1 == 'u') goto S459; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'u') goto S459; + return m.FSM_HALT(c); S403: - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S461; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S461; + return m.FSM_HALT(c); S405: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S463; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S463; + return m.FSM_HALT(c); S407: - c1 = m.FSM_CHAR(); - if (c1 == 'm') goto S465; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'm') goto S465; + return m.FSM_HALT(c); S409: - c1 = m.FSM_CHAR(); - if (c1 == 'm') goto S467; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'm') goto S467; + return m.FSM_HALT(c); S411: - c1 = m.FSM_CHAR(); - if (c1 == 'o') goto S469; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'o') goto S469; + return m.FSM_HALT(c); S413: - c1 = m.FSM_CHAR(); - if (c1 == 'm') goto S471; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'm') goto S471; + return m.FSM_HALT(c); S415: - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S475; - if (c1 == 'e') goto S473; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S475; + if (c == 'e') goto S473; + return m.FSM_HALT(c); S418: - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S477; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S477; + return m.FSM_HALT(c); S420: - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S481; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S481; + return m.FSM_HALT(c); S422: - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S485; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S485; + return m.FSM_HALT(c); S424: m.FSM_TAKE(31); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S430: m.FSM_TAKE(32); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S436: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'o') goto S489; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'o') goto S489; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S443: - c1 = m.FSM_CHAR(); - if (c1 == 'q') goto S496; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'q') goto S496; + return m.FSM_HALT(c); S445: - c1 = m.FSM_CHAR(); - if (c1 == 'n') goto S498; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'n') goto S498; + return m.FSM_HALT(c); S447: - c1 = m.FSM_CHAR(); - if (c1 == 'u') goto S500; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'u') goto S500; + return m.FSM_HALT(c); S449: - c1 = m.FSM_CHAR(); - if (c1 == 'g') goto S502; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'g') goto S502; + return m.FSM_HALT(c); S451: m.FSM_TAKE(6); return m.FSM_HALT(); S453: - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S504; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S504; + return m.FSM_HALT(c); S455: - c1 = m.FSM_CHAR(); - if (c1 == 'v') goto S506; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'v') goto S506; + return m.FSM_HALT(c); S457: m.FSM_TAKE(8); return m.FSM_HALT(); S459: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S508; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S508; + return m.FSM_HALT(c); S461: - c1 = m.FSM_CHAR(); - if (c1 == 'i') goto S510; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'i') goto S510; + return m.FSM_HALT(c); S463: - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S512; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S512; + return m.FSM_HALT(c); S465: - c1 = m.FSM_CHAR(); - if (c1 == 'i') goto S514; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'i') goto S514; + return m.FSM_HALT(c); S467: - c1 = m.FSM_CHAR(); - if (c1 == 'd') goto S516; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'd') goto S516; + return m.FSM_HALT(c); S469: - c1 = m.FSM_CHAR(); - if (c1 == 'n') goto S518; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'n') goto S518; + return m.FSM_HALT(c); S471: m.FSM_TAKE(47); return m.FSM_HALT(); S473: - c1 = m.FSM_CHAR(); - if (c1 == 'x') goto S520; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'x') goto S520; + return m.FSM_HALT(c); S475: - c1 = m.FSM_CHAR(); - if (c1 == 'y') goto S522; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'y') goto S522; + return m.FSM_HALT(c); S477: m.FSM_TAKE(28); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S477; - if (c1 == ',') goto S418; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S477; + if (c == ',') goto S418; + return m.FSM_HALT(c); S481: m.FSM_TAKE(29); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S481; - if (c1 == ',') goto S420; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S481; + if (c == ',') goto S420; + return m.FSM_HALT(c); S485: m.FSM_TAKE(30); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S485; - if (c1 == ',') goto S422; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S485; + if (c == ',') goto S422; + return m.FSM_HALT(c); S489: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 'l') goto S524; - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'l') goto S524; + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S496: m.FSM_TAKE(16); @@ -1252,19 +1252,19 @@ S498: return m.FSM_HALT(); S500: - c1 = m.FSM_CHAR(); - if (c1 == 'b') goto S530; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'b') goto S530; + return m.FSM_HALT(c); S502: - c1 = m.FSM_CHAR(); - if (c1 == 'n') goto S532; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'n') goto S532; + return m.FSM_HALT(c); S504: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S534; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S534; + return m.FSM_HALT(c); S506: m.FSM_TAKE(7); @@ -1275,122 +1275,122 @@ S508: return m.FSM_HALT(); S510: - c1 = m.FSM_CHAR(); - if (c1 == 'p') goto S536; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'p') goto S536; + return m.FSM_HALT(c); S512: - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S538; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S538; + return m.FSM_HALT(c); S514: - c1 = m.FSM_CHAR(); - if (c1 == 'n') goto S541; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'n') goto S541; + return m.FSM_HALT(c); S516: - c1 = m.FSM_CHAR(); - if (c1 == 'i') goto S543; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'i') goto S543; + return m.FSM_HALT(c); S518: m.FSM_TAKE(23); return m.FSM_HALT(); S520: - c1 = m.FSM_CHAR(); - if (c1 == 'p') goto S545; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'p') goto S545; + return m.FSM_HALT(c); S522: - c1 = m.FSM_CHAR(); - if (c1 == 'p') goto S547; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'p') goto S547; + return m.FSM_HALT(c); S524: m.FSM_TAKE(34); - c1 = m.FSM_CHAR(); - if ('a' <= c1 && c1 <= 'z') goto S216; - if (c1 == '_') goto S216; - if ('A' <= c1 && c1 <= 'Z') goto S216; - if ('0' <= c1 && c1 <= '9') goto S216; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('a' <= c && c <= 'z') goto S216; + if (c == '_') goto S216; + if ('A' <= c && c <= 'Z') goto S216; + if ('0' <= c && c <= '9') goto S216; + return m.FSM_HALT(c); S530: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S549; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S549; + return m.FSM_HALT(c); S532: m.FSM_TAKE(46); return m.FSM_HALT(); S534: - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S551; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S551; + return m.FSM_HALT(c); S536: - c1 = m.FSM_CHAR(); - if (c1 == 'l') goto S553; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'l') goto S553; + return m.FSM_HALT(c); S538: m.FSM_TAKE(20); - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S555; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S555; + return m.FSM_HALT(c); S541: - c1 = m.FSM_CHAR(); - if (c1 == 'u') goto S557; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'u') goto S557; + return m.FSM_HALT(c); S543: - c1 = m.FSM_CHAR(); - if (c1 == 'f') goto S559; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'f') goto S559; + return m.FSM_HALT(c); S545: - c1 = m.FSM_CHAR(); - if (c1 == 'r') goto S561; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'r') goto S561; + return m.FSM_HALT(c); S547: - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S563; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S563; + return m.FSM_HALT(c); S549: - c1 = m.FSM_CHAR(); - if (c1 == 'e') goto S565; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'e') goto S565; + return m.FSM_HALT(c); S551: - c1 = m.FSM_CHAR(); - if (c1 == 'c') goto S567; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'c') goto S567; + return m.FSM_HALT(c); S553: - c1 = m.FSM_CHAR(); - if (c1 == 'y') goto S569; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'y') goto S569; + return m.FSM_HALT(c); S555: - c1 = m.FSM_CHAR(); - if (c1 == 'q') goto S571; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'q') goto S571; + return m.FSM_HALT(c); S557: - c1 = m.FSM_CHAR(); - if (c1 == 's') goto S573; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 's') goto S573; + return m.FSM_HALT(c); S559: - c1 = m.FSM_CHAR(); - if (c1 == 'f') goto S575; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 'f') goto S575; + return m.FSM_HALT(c); S561: m.FSM_TAKE(48); @@ -1401,14 +1401,14 @@ S563: return m.FSM_HALT(); S565: - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S577; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S577; + return m.FSM_HALT(c); S567: - c1 = m.FSM_CHAR(); - if (c1 == 't') goto S579; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 't') goto S579; + return m.FSM_HALT(c); S569: m.FSM_TAKE(10); diff --git a/ccl/rslang/header/MathLexerImpl.hpp b/ccl/rslang/header/MathLexerImpl.hpp index 05a9f5c..3a55eff 100644 --- a/ccl/rslang/header/MathLexerImpl.hpp +++ b/ccl/rslang/header/MathLexerImpl.hpp @@ -1,6 +1,6 @@ -// MathLexerImpl.hpp generated by reflex 4.2.1 from MathLexerImpl.l +// MathLexerImpl.hpp generated by reflex 4.4.0 from MathLexerImpl.l -#define REFLEX_VERSION "4.2.1" +#define REFLEX_VERSION "4.4.0" //////////////////////////////////////////////////////////////////////////////// // // @@ -385,376 +385,376 @@ namespace rslex { void reflex_code_INITIAL(reflex::Matcher& m) { - int c0 = 0, c1 = 0; - m.FSM_INIT(c1); + int c = 0; + m.FSM_INIT(c); S0: m.FSM_FIND(); - c1 = m.FSM_CHAR(); - if (c1 == 226) goto S58; - if (c1 == 207) goto S221; - if (c1 == 206) goto S217; - if (c1 == 195) goto S82; - if (c1 == 194) goto S69; - if (c1 == '}') goto S234; - if (c1 == '|') goto S243; - if (c1 == '{') goto S231; - if (c1 == 'r') goto S139; - if (c1 == 'p') goto S89; - if (c1 == 'd') goto S149; - if (c1 == 'c') goto S119; - if (c1 == 'b') goto S129; - if ('a' <= c1 && c1 <= 'z') goto S208; - if (c1 == '_') goto S208; - if (c1 == ']') goto S240; - if (c1 == '\\') goto S86; - if (c1 == '[') goto S237; - if (c1 == 'Z') goto S186; - if (c1 == 'R') goto S168; - if (c1 == 'P') goto S99; - if (c1 == 'I') goto S177; - if (c1 == 'F') goto S109; - if (c1 == 'D') goto S159; - if ('C' <= c1 && c1 <= 'Y') goto S199; - if (c1 == 'A') goto S199; - if (c1 == '>') goto S52; - if (c1 == '=') goto S66; - if (c1 == '<') goto S55; - if (c1 == ';') goto S249; - if (c1 == ':') goto S76; - if ('0' <= c1 && c1 <= '9') goto S195; - if (c1 == '-') goto S46; - if (c1 == ',') goto S246; - if (c1 == '+') goto S43; - if (c1 == '*') goto S49; - if (c1 == ')') goto S228; - if (c1 == '(') goto S225; - if (c1 == '&') goto S73; - if (c1 == ' ') goto S254; - if (c1 == '\n') goto S252; - if (c1 == '\t') goto S254; - if (0 <= c1) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 226) goto S58; + if (c == 207) goto S221; + if (c == 206) goto S217; + if (c == 195) goto S82; + if (c == 194) goto S69; + if (c == '}') goto S234; + if (c == '|') goto S243; + if (c == '{') goto S231; + if (c == 'r') goto S139; + if (c == 'p') goto S89; + if (c == 'd') goto S149; + if (c == 'c') goto S119; + if (c == 'b') goto S129; + if ('a' <= c && c <= 'z') goto S208; + if (c == '_') goto S208; + if (c == ']') goto S240; + if (c == '\\') goto S86; + if (c == '[') goto S237; + if (c == 'Z') goto S186; + if (c == 'R') goto S168; + if (c == 'P') goto S99; + if (c == 'I') goto S177; + if (c == 'F') goto S109; + if (c == 'D') goto S159; + if ('C' <= c && c <= 'Y') goto S199; + if (c == 'A') goto S199; + if (c == '>') goto S52; + if (c == '=') goto S66; + if (c == '<') goto S55; + if (c == ';') goto S249; + if (c == ':') goto S76; + if ('0' <= c && c <= '9') goto S195; + if (c == '-') goto S46; + if (c == ',') goto S246; + if (c == '+') goto S43; + if (c == '*') goto S49; + if (c == ')') goto S228; + if (c == '(') goto S225; + if (c == '&') goto S73; + if (c == ' ') goto S254; + if (c == '\n') goto S252; + if (c == '\t') goto S254; + if (0 <= c) goto S259; + return m.FSM_HALT(c); S43: m.FSM_TAKE(1); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S46: m.FSM_TAKE(2); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S49: m.FSM_TAKE(3); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S52: m.FSM_TAKE(4); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S55: m.FSM_TAKE(5); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S58: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 138) goto S285; - if (c1 == 137) goto S262; - if (c1 == 136) goto S268; - if (c1 == 135) goto S280; - if (c1 == 132) goto S291; - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 138) goto S285; + if (c == 137) goto S262; + if (c == 136) goto S268; + if (c == 135) goto S280; + if (c == 132) goto S291; + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S66: m.FSM_TAKE(8); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S69: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 172) goto S295; - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 172) goto S295; + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S73: m.FSM_TAKE(13); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S76: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 226) goto S298; - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == '=') goto S300; - if (c1 == ':') goto S303; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 226) goto S298; + if (128 <= c && c <= 191) goto S259; + if (c == '=') goto S300; + if (c == ':') goto S303; + return m.FSM_HALT(c); S82: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 151) goto S305; - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 151) goto S305; + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S86: m.FSM_TAKE(26); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S89: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == 'r') goto S308; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (128 <= c && c <= 191) goto S259; + if (c == 'r') goto S308; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S99: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == 'r') goto S328; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S336; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if (128 <= c && c <= 191) goto S259; + if (c == 'r') goto S328; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S336; + return m.FSM_HALT(c); S109: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == 'i') goto S356; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S364; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if (128 <= c && c <= 191) goto S259; + if (c == 'i') goto S356; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S364; + return m.FSM_HALT(c); S119: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (128 <= c1 && c1 <= 191) goto S259; - if ('b' <= c1 && c1 <= 'z') goto S316; - if (c1 == 'a') goto S372; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (128 <= c && c <= 191) goto S259; + if ('b' <= c && c <= 'z') goto S316; + if (c == 'a') goto S372; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S129: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == 'o') goto S381; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (128 <= c && c <= 191) goto S259; + if (c == 'o') goto S381; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S139: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == 'e') goto S390; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (128 <= c && c <= 191) goto S259; + if (c == 'e') goto S390; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S149: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == 'e') goto S399; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (128 <= c && c <= 191) goto S259; + if (c == 'e') goto S399; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S159: m.FSM_TAKE(36); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if (128 <= c1 && c1 <= 191) goto S259; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S344; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if (128 <= c && c <= 191) goto S259; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S344; + return m.FSM_HALT(c); S168: m.FSM_TAKE(37); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if (128 <= c1 && c1 <= 191) goto S259; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S408; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if (128 <= c && c <= 191) goto S259; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S408; + return m.FSM_HALT(c); S177: m.FSM_TAKE(38); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if (128 <= c1 && c1 <= 191) goto S259; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S344; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if (128 <= c && c <= 191) goto S259; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S344; + return m.FSM_HALT(c); S186: m.FSM_TAKE(39); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if (128 <= c1 && c1 <= 191) goto S259; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S344; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if (128 <= c && c <= 191) goto S259; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S344; + return m.FSM_HALT(c); S195: m.FSM_TAKE(41); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - if ('0' <= c1 && c1 <= '9') goto S416; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + if ('0' <= c && c <= '9') goto S416; + return m.FSM_HALT(c); S199: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if (128 <= c1 && c1 <= 191) goto S259; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S344; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if (128 <= c && c <= 191) goto S259; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S344; + return m.FSM_HALT(c); S208: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (128 <= c1 && c1 <= 191) goto S259; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (128 <= c && c <= 191) goto S259; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S217: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (177 <= c1 && c1 <= 191) goto S208; - if (128 <= c1 && c1 <= 176) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (177 <= c && c <= 191) goto S208; + if (128 <= c && c <= 176) goto S259; + return m.FSM_HALT(c); S221: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (138 <= c1 && c1 <= 191) goto S259; - if (128 <= c1 && c1 <= 137) goto S208; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (138 <= c && c <= 191) goto S259; + if (128 <= c && c <= 137) goto S208; + return m.FSM_HALT(c); S225: m.FSM_TAKE(50); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S228: m.FSM_TAKE(51); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S231: m.FSM_TAKE(52); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S234: m.FSM_TAKE(53); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S237: m.FSM_TAKE(54); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S240: m.FSM_TAKE(55); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S243: m.FSM_TAKE(56); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S246: m.FSM_TAKE(57); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S249: m.FSM_TAKE(58); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S252: m.FSM_TAKE(59); @@ -762,375 +762,375 @@ S252: S254: m.FSM_TAKE(60); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - if (c1 == ' ') goto S419; - if (c1 == '\t') goto S419; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + if (c == ' ') goto S419; + if (c == '\t') goto S419; + return m.FSM_HALT(c); S259: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S262: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 165) goto S423; - if (c1 == 164) goto S426; - if (c1 == 160) goto S429; - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 165) goto S423; + if (c == 164) goto S426; + if (c == 160) goto S429; + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S268: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 170) goto S447; - if (c1 == 169) goto S450; - if (c1 == 168) goto S438; - if (c1 == 137) goto S444; - if (c1 == 136) goto S441; - if (c1 == 134) goto S453; - if (c1 == 133) goto S456; - if (c1 == 131) goto S435; - if (129 <= c1 && c1 <= 191) goto S259; - if (c1 == 128) goto S432; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 170) goto S447; + if (c == 169) goto S450; + if (c == 168) goto S438; + if (c == 137) goto S444; + if (c == 136) goto S441; + if (c == 134) goto S453; + if (c == 133) goto S456; + if (c == 131) goto S435; + if (129 <= c && c <= 191) goto S259; + if (c == 128) goto S432; + return m.FSM_HALT(c); S280: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 148) goto S462; - if (c1 == 146) goto S459; - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 148) goto S462; + if (c == 146) goto S459; + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S285: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 134) goto S465; - if (c1 == 132) goto S471; - if (c1 == 130) goto S468; - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 134) goto S465; + if (c == 132) goto S471; + if (c == 130) goto S468; + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S291: m.FSM_TAKE(61); - c1 = m.FSM_CHAR(); - if (c1 == 172) goto S474; - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 172) goto S474; + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S295: m.FSM_TAKE(12); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S298: - c1 = m.FSM_CHAR(); - if (c1 == 136) goto S477; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 136) goto S477; + return m.FSM_HALT(c); S300: m.FSM_TAKE(47); - c1 = m.FSM_CHAR(); - if (c1 == '=') goto S479; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == '=') goto S479; + return m.FSM_HALT(c); S303: - c1 = m.FSM_CHAR(); - if (c1 == '=') goto S481; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == '=') goto S481; + return m.FSM_HALT(c); S305: m.FSM_TAKE(23); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S308: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S483; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S483; + return m.FSM_HALT(c); S316: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S324: - c1 = m.FSM_CHAR(); - if (177 <= c1 && c1 <= 191) goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (177 <= c && c <= 191) goto S316; + return m.FSM_HALT(c); S326: - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 137) goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 137) goto S316; + return m.FSM_HALT(c); S328: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S492; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S492; + return m.FSM_HALT(c); S336: m.FSM_TAKE(43); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S336; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S336; + return m.FSM_HALT(c); S344: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S344; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S344; + return m.FSM_HALT(c); S352: - c1 = m.FSM_CHAR(); - if (177 <= c1 && c1 <= 191) goto S344; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (177 <= c && c <= 191) goto S344; + return m.FSM_HALT(c); S354: - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 137) goto S344; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 137) goto S344; + return m.FSM_HALT(c); S356: m.FSM_TAKE(45); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S501; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S501; + return m.FSM_HALT(c); S364: m.FSM_TAKE(42); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S364; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S364; + return m.FSM_HALT(c); S372: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'r') goto S510; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'r') goto S510; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S381: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'o') goto S519; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'o') goto S519; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S390: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'd') goto S528; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'd') goto S528; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S399: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'b') goto S536; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'b') goto S536; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S408: m.FSM_TAKE(44); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S408; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S408; + return m.FSM_HALT(c); S416: m.FSM_TAKE(41); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S416; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S416; + return m.FSM_HALT(c); S419: m.FSM_TAKE(60); - c1 = m.FSM_CHAR(); - if (c1 == ' ') goto S419; - if (c1 == '\t') goto S419; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == ' ') goto S419; + if (c == '\t') goto S419; + return m.FSM_HALT(c); S423: m.FSM_TAKE(6); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S426: m.FSM_TAKE(7); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S429: m.FSM_TAKE(9); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S432: m.FSM_TAKE(10); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S435: m.FSM_TAKE(11); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S438: m.FSM_TAKE(14); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S441: m.FSM_TAKE(18); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S444: m.FSM_TAKE(19); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S447: m.FSM_TAKE(24); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S450: m.FSM_TAKE(25); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S453: m.FSM_TAKE(27); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S456: m.FSM_TAKE(40); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S459: m.FSM_TAKE(15); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S462: m.FSM_TAKE(16); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S465: m.FSM_TAKE(20); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S468: m.FSM_TAKE(21); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S471: m.FSM_TAKE(22); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S474: m.FSM_TAKE(28); - c1 = m.FSM_CHAR(); - if (128 <= c1 && c1 <= 191) goto S259; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (128 <= c && c <= 191) goto S259; + return m.FSM_HALT(c); S477: - c1 = m.FSM_CHAR(); - if (c1 == 136) goto S545; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 136) goto S545; + return m.FSM_HALT(c); S479: m.FSM_TAKE(48); @@ -1142,183 +1142,183 @@ S481: S483: m.FSM_TAKE(29); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S483; - if (c1 == ',') goto S547; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S483; + if (c == ',') goto S547; + return m.FSM_HALT(c); S492: m.FSM_TAKE(30); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S492; - if (c1 == ',') goto S549; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S492; + if (c == ',') goto S549; + return m.FSM_HALT(c); S501: m.FSM_TAKE(31); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S354; - if (c1 == 206) goto S352; - if ('a' <= c1 && c1 <= 'z') goto S344; - if (c1 == '_') goto S344; - if ('A' <= c1 && c1 <= 'Z') goto S344; - if ('0' <= c1 && c1 <= '9') goto S501; - if (c1 == ',') goto S551; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S354; + if (c == 206) goto S352; + if ('a' <= c && c <= 'z') goto S344; + if (c == '_') goto S344; + if ('A' <= c && c <= 'Z') goto S344; + if ('0' <= c && c <= '9') goto S501; + if (c == ',') goto S551; + return m.FSM_HALT(c); S510: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'd') goto S553; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'd') goto S553; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S519: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'l') goto S561; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'l') goto S561; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S528: m.FSM_TAKE(34); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S536: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'o') goto S569; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'o') goto S569; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S545: m.FSM_TAKE(17); return m.FSM_HALT(); S547: - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S578; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S578; + return m.FSM_HALT(c); S549: - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S582; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S582; + return m.FSM_HALT(c); S551: - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S586; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S586; + return m.FSM_HALT(c); S553: m.FSM_TAKE(32); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S561: m.FSM_TAKE(33); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S569: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'o') goto S590; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'o') goto S590; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S578: m.FSM_TAKE(29); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S578; - if (c1 == ',') goto S547; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S578; + if (c == ',') goto S547; + return m.FSM_HALT(c); S582: m.FSM_TAKE(30); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S582; - if (c1 == ',') goto S549; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S582; + if (c == ',') goto S549; + return m.FSM_HALT(c); S586: m.FSM_TAKE(31); - c1 = m.FSM_CHAR(); - if ('0' <= c1 && c1 <= '9') goto S586; - if (c1 == ',') goto S551; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('0' <= c && c <= '9') goto S586; + if (c == ',') goto S551; + return m.FSM_HALT(c); S590: m.FSM_TAKE(46); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if (c1 == 'l') goto S599; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if (c == 'l') goto S599; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); S599: m.FSM_TAKE(35); - c1 = m.FSM_CHAR(); - if (c1 == 207) goto S326; - if (c1 == 206) goto S324; - if ('a' <= c1 && c1 <= 'z') goto S316; - if (c1 == '_') goto S316; - if ('A' <= c1 && c1 <= 'Z') goto S316; - if ('0' <= c1 && c1 <= '9') goto S316; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if (c == 207) goto S326; + if (c == 206) goto S324; + if ('a' <= c && c <= 'z') goto S316; + if (c == '_') goto S316; + if ('A' <= c && c <= 'Z') goto S316; + if ('0' <= c && c <= '9') goto S316; + return m.FSM_HALT(c); } } // namespace ccl diff --git a/ccl/rslang/import/reflex/include/reflex/absmatcher.h b/ccl/rslang/import/reflex/include/reflex/absmatcher.h index 6f3a135..bd8dbc7 100644 --- a/ccl/rslang/import/reflex/include/reflex/absmatcher.h +++ b/ccl/rslang/import/reflex/include/reflex/absmatcher.h @@ -30,7 +30,7 @@ @file absmatcher.h @brief RE/flex abstract matcher base class and pattern matcher class @author Robert van Engelen - engelen@genivia.com -@copyright (c) 2016-2022, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) 2016-2024, Robert van Engelen, Genivia Inc. All rights reserved. @copyright (c) BSD-3 License - see LICENSE.txt */ @@ -42,12 +42,12 @@ #define WITH_REALLOC 1 #endif -/// This compile-time option speeds up matching, but slows input(). +/// This compile-time option speeds up matching, but slows input() somewhat. #ifndef WITH_FAST_GET #define WITH_FAST_GET 1 #endif -/// This compile-time option adds span(), line(), wline(), bol(), eol() +/// This compile-time option adds span(), line(), wline(), bol(), eol(). #ifndef WITH_SPAN #define WITH_SPAN 1 #endif @@ -111,7 +111,7 @@ class AbstractMatcher { static const int EOB = EOF; ///< end of buffer meta-char marker static const size_t BLOCK = 4096; ///< minimum remaining unused space in the buffer, to prevent excessive shifting #ifndef REFLEX_BUFSZ - static const size_t BUFSZ = (128*1024); ///< initial buffer size, at least 4096 bytes + static const size_t BUFSZ = (256*1024); ///< initial buffer size, at least 4096 bytes #else static const size_t BUFSZ = REFLEX_BUFSZ; #endif @@ -154,11 +154,13 @@ class AbstractMatcher { A(false), N(false), W(false), + X(false), T(8) { } bool A; ///< accept any/all (?^X) negative patterns as Const::REDO accept index codes bool N; ///< nullable, find may return empty match (N/A to scan, split, matches) - bool W; ///< half-check for "whole words", check only left of \< and right of \> for non-word character + bool W; ///< reflex::Matcher matches whole words as if bound by \< and \> + bool X; ///< reflex::LineMatcher matches empty lines char T; ///< tab size, must be a power of 2, default is 8, for column count and indent \i, \j, and \k }; /// AbstractMatcher::Iterator class for scanning, searching, and splitting input character sequences. @@ -356,7 +358,8 @@ class AbstractMatcher { { opt_.A = false; // when true: accept any/all (?^X) negative patterns as Const::REDO accept index codes opt_.N = false; // when true: find may return empty match (N/A to scan, split, matches) - opt_.W = false; // when true: half-check for "whole words", check only left of \< and right of \> for non-word character + opt_.W = false; // when true: reflex::Matcher matches whole words as if bound by \< and \> + opt_.X = false; // when true: reflex::LineMatcher matches empty lines opt_.T = 8; // tab size 1, 2, 4, or 8 if (opt) { @@ -373,6 +376,9 @@ class AbstractMatcher { case 'W': opt_.W = true; break; + case 'X': + opt_.X = true; + break; case 'T': opt_.T = isdigit(*(s += (s[1] == '=') + 1)) ? static_cast(*s - '0') : 0; break; @@ -422,6 +428,7 @@ class AbstractMatcher { own_ = true; eof_ = false; mat_ = false; + cml_ = false; } /// Set buffer block size for reading: use 0 (or omit argument) to buffer all input in which case returns true if all the data could be read and false if a read error occurred. bool buffer(size_t blk = 0) ///< new block size between 1 and Const::BLOCK, or 0 to buffer all input (default) @@ -486,7 +493,7 @@ class AbstractMatcher { (void)buffer(1); } /// Flush the buffer's remaining content. - void flush() + inline void flush() { DBGLOG("AbstractMatcher::flush()"); pos_ = end_; @@ -560,6 +567,7 @@ class AbstractMatcher { own_ = false; eof_ = true; mat_ = false; + cml_ = false; } return *this; } @@ -645,6 +653,13 @@ class AbstractMatcher { { return utf8(txt_); } +#if WITH_SPAN + /// Set or reset mode to count matching lines only and skip other (e.g. for speed). + inline void lineno_skip(bool f = false) + { + cml_ = f; + } +#endif /// Set or change the starting line number of the last match. inline void lineno(size_t n) ///< new line number { @@ -652,7 +667,7 @@ class AbstractMatcher { lno_ = n; } /// Updates and returns the starting line number of the match in the input character sequence. - inline size_t lineno() + size_t lineno() /// @returns line number { #if WITH_SPAN @@ -661,55 +676,16 @@ class AbstractMatcher { const char *s = lpb_; const char *t = txt_; size_t n = 0; -#if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64)) - if (have_HW_AVX512BW()) - n = simd_nlcount_avx512bw(s, t); - else if (have_HW_AVX2()) - n = simd_nlcount_avx2(s, t); + if (cml_) + { + // count number of matching lines only, not line numbers + n = std::memchr(s, '\n', t - s) != NULL; + } else - n = simd_nlcount_sse2(s, t); -#elif defined(HAVE_AVX2) - if (have_HW_AVX2()) - n = simd_nlcount_avx2(s, t); - else - n = simd_nlcount_sse2(s, t); -#elif defined(HAVE_SSE2) - n = simd_nlcount_sse2(s, t); -#endif -#if defined(HAVE_NEON) - // no ARM AArch64/NEON SIMD optimized loop? - no code that runs faster than the code below?! - uint32_t n0 = 0, n1 = 0; - while (s < t - 1) { - n0 += s[0] == '\n'; - n1 += s[1] == '\n'; - s += 2; + // count line numbers + n = nlcount(s, t); } - n += n0 + n1 + (s < t && *s == '\n'); -#else - // clang/gcc 4-way auto-vectorizable loop - uint32_t n0 = 0, n1 = 0, n2 = 0, n3 = 0; - while (s < t - 3) - { - n0 += s[0] == '\n'; - n1 += s[1] == '\n'; - n2 += s[2] == '\n'; - n3 += s[3] == '\n'; - s += 4; - } - n += n0 + n1 + n2 + n3; - // epilogue - if (s < t) - { - n += *s == '\n'; - if (++s < t) - { - n += *s == '\n'; - if (++s < t) - n += *s == '\n'; - } - } -#endif // if newlines are detected, then find begin of the last line to adjust bol if (n > 0) { @@ -956,13 +932,13 @@ class AbstractMatcher { else if (got_ == '\n') got_ = Const::UNK; } - /// Returns true if this matcher matched text that begins a word. + /// Returns true if this matcher matched text that begins an ASCII word. inline bool at_bow() /// @returns true if this matcher matched text that begins a word { return !isword(got_) && isword(txt_ < buf_ + end_ ? static_cast(*txt_) : peek_more()); } - /// Returns true if this matcher matched text that ends a word. + /// Returns true if this matcher matched text that ends an ASCII word. inline bool at_eow() /// @returns true if this matcher matched text that ends a word { @@ -1116,21 +1092,37 @@ class AbstractMatcher { } return buf_ + end_; } + /// Return number of bytes available given number of bytes to fetch ahead, limited by input size and buffer size + inline size_t fetch(size_t len) + /// @returns number of bytes available after fetching. + { + DBGLOG("AbstractMatcher::fetch(%zu)", len); + if (eof_) + return 0; + if (len <= end_ - (txt_ - buf_)) + return end_ - (txt_ - buf_); + if (end_ + len + 1 >= max_) + (void)grow(); + if (end_ + len + 1 >= max_) + len = max_ - end_ - 1; + end_ += get(buf_ + end_, len); + return avail(); + } /// Returns the number of bytes in the buffer available to search from the current begin()/text() position. - size_t avail() + inline size_t avail() { if (peek() == EOF) return 0; return end_ - (txt_ - buf_); } /// Returns the byte offset of the match from the start of the line. - size_t border() + inline size_t border() /// @returns border offset { return txt_ - bol(); } /// Enlarge the match to span the entire line of input (excluding \n), return text(). - const char *span() + inline const char *span() /// @returns const char* span of text for the entire line { DBGLOG("AbstractMatcher::span()"); @@ -1146,7 +1138,7 @@ class AbstractMatcher { return text(); } /// Returns the line of input (excluding \n) as a string containing the matched text as a substring. - std::string line() + inline std::string line() /// @returns matching line as a string { DBGLOG("AbstractMatcher::line()"); @@ -1156,7 +1148,7 @@ class AbstractMatcher { return std::string(b, e - b); } /// Returns the line of input (excluding \n) as a wide string containing the matched text as a substring. - std::wstring wline() + inline std::wstring wline() /// @returns matching line as a wide string { DBGLOG("AbstractMatcher::wline()"); @@ -1252,12 +1244,12 @@ class AbstractMatcher { return text(); } /// Append the next match to the currently matched text returned by AbstractMatcher::text, when the next match found is adjacent to the current match. - void more() + inline void more() { cur_ = txt_ - buf_; } /// Truncate the AbstractMatcher::text length of the match to n characters in length and reposition for next match. - void less(size_t n) ///< truncated string length + inline void less(size_t n) ///< truncated string length { if (n < len_) { @@ -1270,80 +1262,80 @@ class AbstractMatcher { } } /// Cast this matcher to positive integer indicating the nonzero capture index of the matched text in the pattern, same as AbstractMatcher::accept. - operator size_t() const + inline operator size_t() const /// @returns nonzero capture index of a match, which may be matcher dependent, or zero for a mismatch { return accept(); } /// Cast this matcher to a std::string of the text matched by this matcher. - operator std::string() const + inline operator std::string() const /// @returns std::string with matched text { return str(); } /// Cast this matcher to a std::wstring of the text matched by this matcher. - operator std::wstring() const + inline operator std::wstring() const /// @returns std::wstring converted to UCS from the 0-terminated matched UTF-8 text { return wstr(); } /// Cast the match to std::pair(accept(), wstr()), useful for tokenization into containers. - operator std::pair() const + inline operator std::pair() const /// @returns std::pair(accept(), wstr()) { return pair(); } /// Returns true if matched text is equal to a string, useful for std::algorithm. - bool operator==(const char *rhs) ///< rhs string to compare to + inline bool operator==(const char *rhs) ///< rhs string to compare to /// @returns true if matched text is equal to rhs string const { return std::strncmp(rhs, txt_, len_) == 0 && rhs[len_] == '\0'; } /// Returns true if matched text is equalt to a string, useful for std::algorithm. - bool operator==(const std::string& rhs) ///< rhs string to compare to + inline bool operator==(const std::string& rhs) ///< rhs string to compare to /// @returns true if matched text is equal to rhs string const { return rhs.size() == len_ && rhs.compare(0, std::string::npos, txt_, len_) == 0; } /// Returns true if capture index is equal to a given size_t value, useful for std::algorithm. - bool operator==(size_t rhs) ///< capture index to compare accept() to + inline bool operator==(size_t rhs) ///< capture index to compare accept() to /// @returns true if capture index is equal to rhs const { return accept() == rhs; } /// Returns true if capture index is equal to a given int value, useful for std::algorithm. - bool operator==(int rhs) ///< capture index to compare accept() to + inline bool operator==(int rhs) ///< capture index to compare accept() to /// @returns true if capture index is equal to rhs const { return static_cast(accept()) == rhs; } /// Returns true if matched text is not equal to a string, useful for std::algorithm. - bool operator!=(const char *rhs) ///< rhs string to compare to + inline bool operator!=(const char *rhs) ///< rhs string to compare to /// @returns true if matched text is not equal to rhs string const { return std::strncmp(rhs, txt_, len_) != 0 || rhs[len_] != '\0'; // if static checkers complain here, they are wrong } /// Returns true if matched text is not equal to a string, useful for std::algorithm. - bool operator!=(const std::string& rhs) ///< rhs string to compare to + inline bool operator!=(const std::string& rhs) ///< rhs string to compare to /// @returns true if matched text is not equal to rhs string const { return rhs.size() > len_ || rhs.compare(0, std::string::npos, txt_, len_) != 0; } /// Returns true if capture index is not equal to a given size_t value, useful for std::algorithm. - bool operator!=(size_t rhs) ///< capture index to compare accept() to + inline bool operator!=(size_t rhs) ///< capture index to compare accept() to /// @returns true if capture index is not equal to rhs const { return accept() != rhs; } /// Returns true if capture index is not equal to a given int value, useful for std::algorithm. - bool operator!=(int rhs) ///< capture index to compare accept() to + inline bool operator!=(int rhs) ///< capture index to compare accept() to /// @returns true if capture index is not equal to rhs const { @@ -1632,6 +1624,7 @@ class AbstractMatcher { bool own_; ///< true if AbstractMatcher::buf_ was allocated and should be deleted bool eof_; ///< input has reached EOF bool mat_; ///< true if AbstractMatcher::matches() was successful + bool cml_; ///< true when counting matching lines instead of line numbers }; /// The pattern matcher class template extends abstract matcher base class. @@ -1656,7 +1649,7 @@ class PatternMatcher : public AbstractMatcher { delete pat_; } /// Assign a matcher, the underlying pattern object is shared (not deep copied). - PatternMatcher& operator=(const PatternMatcher& matcher) ///< matcher with pattern to use (pattern may be shared) + virtual PatternMatcher& operator=(const PatternMatcher& matcher) ///< matcher with pattern to use (pattern may be shared) { scan.init(this, Const::SCAN); find.init(this, Const::FIND); @@ -1664,9 +1657,7 @@ class PatternMatcher : public AbstractMatcher { in = matcher.in; reset(); opt_ = matcher.opt_; - pat_ = matcher.pat_, - own_ = false; - return *this; + return pattern(matcher.pat_); } /// Set the pattern to use with this matcher as a shared pointer to another matcher pattern. virtual PatternMatcher& pattern(const PatternMatcher& matcher) ///< the other matcher @@ -1726,19 +1717,19 @@ class PatternMatcher : public AbstractMatcher { return *this; } /// Returns true if this matcher has a pattern. - bool has_pattern() const + inline bool has_pattern() const /// @returns true if this matcher has a pattern { return pat_ != NULL; } /// Returns true if this matcher has its own pattern not received from another matcher (responsible to delete). - bool own_pattern() const + inline bool own_pattern() const /// @returns true if this matcher has its own pattern { return own_ && pat_ != NULL; } /// Returns a reference to the pattern object associated with this matcher. - const Pattern& pattern() const + virtual const Pattern& pattern() const /// @returns reference to pattern object { ASSERT(pat_ != NULL); @@ -1809,7 +1800,7 @@ class PatternMatcher : public AbstractMatcher { delete pat_; } /// Assign a matcher, the underlying pattern string is shared (not deep copied). - PatternMatcher& operator=(const PatternMatcher& matcher) ///< matcher with pattern to use (pattern may be shared) + virtual PatternMatcher& operator=(const PatternMatcher& matcher) ///< matcher with pattern to use (pattern may be shared) { scan.init(this, Const::SCAN); find.init(this, Const::FIND); @@ -1817,9 +1808,7 @@ class PatternMatcher : public AbstractMatcher { in = matcher.in; reset(); opt_ = matcher.opt_; - pat_ = matcher.pat_, - own_ = false; - return *this; + return pattern(matcher.pat_); } /// Set the pattern to use with this matcher as a shared pointer to another matcher pattern. virtual PatternMatcher& pattern(const PatternMatcher& matcher) ///< the other matcher @@ -1865,19 +1854,19 @@ class PatternMatcher : public AbstractMatcher { return *this; } /// Returns true if this matcher has a pattern. - bool has_pattern() const + inline bool has_pattern() const /// @returns true if this matcher has a pattern { return pat_ != NULL; } /// Returns true if this matcher has its own pattern not received from another matcher (responsible to delete). - bool own_pattern() const + inline bool own_pattern() const /// @returns true if this matcher has its own pattern { return own_ && pat_ != NULL; } /// Returns a reference to the pattern string associated with this matcher. - const Pattern& pattern() const + virtual const Pattern& pattern() const /// @returns reference to pattern string { ASSERT(pat_ != NULL); diff --git a/ccl/rslang/import/reflex/include/reflex/fuzzymatcher.h b/ccl/rslang/import/reflex/include/reflex/fuzzymatcher.h new file mode 100644 index 0000000..b332ede --- /dev/null +++ b/ccl/rslang/import/reflex/include/reflex/fuzzymatcher.h @@ -0,0 +1,1178 @@ +/******************************************************************************\ +* Copyright (c) 2016, Robert van Engelen, Genivia Inc. All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without * +* modification, are permitted provided that the following conditions are met: * +* * +* (1) Redistributions of source code must retain the above copyright notice, * +* this list of conditions and the following disclaimer. * +* * +* (2) Redistributions in binary form must reproduce the above copyright * +* notice, this list of conditions and the following disclaimer in the * +* documentation and/or other materials provided with the distribution. * +* * +* (3) The name of the author may not be used to endorse or promote products * +* derived from this software without specific prior written permission. * +* * +* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * +* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * +* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * +* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * +* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * +* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * +* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * +\******************************************************************************/ + +/** +@file fuzzymatcher.h +@brief RE/flex fuzzy matcher engine +@author Robert van Engelen - engelen@genivia.com +@copyright (c) 2016-2020, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) BSD-3 License - see LICENSE.txt +*/ + +#ifndef REFLEX_FUZZYMATCHER_H +#define REFLEX_FUZZYMATCHER_H + +#include +#include + +namespace reflex { + +/// RE/flex fuzzy matcher engine class, implements reflex::Matcher fuzzy pattern matching interface with scan, find, split functors and iterators. +/** More info TODO */ +class FuzzyMatcher : public Matcher { + public: + /// Optional flags for the max parameter to constrain fuzzy matching, otherwise no constraints + static const uint16_t INS = 0x1000; ///< fuzzy match allows character insertions (default) + static const uint16_t DEL = 0x2000; ///< fuzzy match allows character deletions (default) + static const uint16_t SUB = 0x4000; ///< character substitutions count as one edit, not two (insert+delete) (default) + static const uint16_t BIN = 0x8000; ///< binary matching without UTF-8 multibyte encodings + /// Default constructor. + FuzzyMatcher() + : + Matcher() + { + distance(1); + } + /// Construct matcher engine from a pattern or a string regex, and an input character sequence. + template /// @tparam

a reflex::Pattern or a string regex + FuzzyMatcher( + const P *pattern, ///< points to a reflex::Pattern or a string regex for this matcher + const Input& input = Input(), ///< input character sequence for this matcher + const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` + : + Matcher(pattern, input, opt) + { + distance(1); + } + /// Construct matcher engine from a pattern or a string regex, and an input character sequence. + template /// @tparam

a reflex::Pattern or a string regex + FuzzyMatcher( + const P *pattern, ///< points to a reflex::Pattern or a string regex for this matcher + uint16_t max, ///< max errors + const Input& input = Input(), ///< input character sequence for this matcher + const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` + : + Matcher(pattern, input, opt) + { + distance(max); + } + /// Construct matcher engine from a pattern or a string regex, and an input character sequence. + template /// @tparam

a reflex::Pattern or a string regex + FuzzyMatcher( + const P& pattern, ///< a reflex::Pattern or a string regex for this matcher + const Input& input = Input(), ///< input character sequence for this matcher + const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` + : + Matcher(pattern, input, opt) + { + distance(1); + } + /// Construct matcher engine from a pattern or a string regex, and an input character sequence. + template /// @tparam

a reflex::Pattern or a string regex + FuzzyMatcher( + const P& pattern, ///< a reflex::Pattern or a string regex for this matcher + uint16_t max, ///< max errors + const Input& input = Input(), ///< input character sequence for this matcher + const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` + : + Matcher(pattern, input, opt) + { + distance(max); + } + /// Copy constructor. + FuzzyMatcher(const FuzzyMatcher& matcher) ///< matcher to copy with pattern (pattern may be shared) + : + Matcher(matcher), + max_(matcher.max_), + err_(0), + ins_(matcher.ins_), + del_(matcher.del_), + sub_(matcher.sub_), + bin_(matcher.bin_) + { + DBGLOG("FuzzyMatcher::FuzzyMatcher(matcher)"); + bpt_.resize(max_); + } + /// Assign a matcher. + FuzzyMatcher& operator=(const FuzzyMatcher& matcher) ///< matcher to copy + { + Matcher::operator=(matcher); + max_ = matcher.max_; + err_ = 0; + ins_ = matcher.ins_; + del_ = matcher.del_; + sub_ = matcher.sub_; + bin_ = matcher.bin_; + bpt_.resize(max_); + return *this; + } + /// Polymorphic cloning. + virtual FuzzyMatcher *clone() + { + return new FuzzyMatcher(*this); + } + /// Returns the number of edits made for the match, edits() <= max, not guaranteed to be the minimum edit distance. + uint8_t edits() + /// @returns 0 to max edit distance + const + { + return err_; + } + /// Set or update fuzzy distance parameters + void distance(uint16_t max) ///< max errors, INS, DEL, SUB + { + max_ = static_cast(max); + err_ = 0; + ins_ = ((max & (INS | DEL | SUB)) == 0 || (max & INS)); + del_ = ((max & (INS | DEL | SUB)) == 0 || (max & DEL)); + sub_ = ((max & (INS | DEL | SUB)) == 0 || (max & SUB)); + bin_ = (max & BIN); + bpt_.resize(max_); + } + /// Get the fuzzy distance parameters, the max is stored in the lower byte and INS, DEL, SUB are hi byte bits + uint16_t distance() + { + return max_; + } + protected: + /// Save state to restore fuzzy matcher state after a second pass + struct SaveState { + SaveState(size_t ded) + : + use(false), + loc(0), + cap(0), + txt(0), + cur(0), + pos(0), + ded(ded), + mrk(false), + err(0) + { } + bool use; + size_t loc; + size_t cap; + size_t txt; + size_t cur; + size_t pos; + size_t ded; + bool mrk; + uint8_t err; + }; + /// Backtrack point. + struct BacktrackPoint { + BacktrackPoint() + : + pc0(NULL), + pc1(NULL), + len(0), + err(0), + alt(true), + sub(true) + { } + const Pattern::Opcode *pc0; ///< start of opcode + const Pattern::Opcode *pc1; ///< pointer to opcode to rerun on backtracking + size_t len; ///< length of string matched so far + uint8_t err; ///< to restore errors + bool alt; ///< true if alternating between pattern char substitution and insertion, otherwise insertion only + bool sub; ///< flag alternates between pattern char substitution (true) and insertion (false) + }; + /// Set backtrack point. + void point(BacktrackPoint& bpt, const Pattern::Opcode *pc, size_t len, bool alternate = true, bool eof = false) + { + // advance to a goto opcode + while (!Pattern::is_opcode_goto(*pc)) + ++pc; + bpt.pc0 = pc; + bpt.pc1 = pc; + bpt.len = len - !eof; + bpt.err = err_; + bpt.alt = sub_ && alternate; + bpt.sub = bpt.alt; + } + /// backtrack on a backtrack point to insert or substitute a pattern char, restoring current text char matched and errors. + const Pattern::Opcode *backtrack(BacktrackPoint& bpt, int& ch) + { + // no more alternatives + if (bpt.pc1 == NULL) + return NULL; + // done when no more goto opcodes on characters remain + if (!Pattern::is_opcode_goto(*bpt.pc1)) + return bpt.pc1 = NULL; + Pattern::Index jump = Pattern::index_of(*bpt.pc1); + // last opcode is a HALT? + if (jump == Pattern::Const::HALT) + { + if (bin_ || !Pattern::is_opcode_goto(*bpt.pc0) || (Pattern::lo_of(*bpt.pc0) & 0xC0) != 0xC0 || (Pattern::hi_of(*bpt.pc0) & 0xC0) != 0xC0) + return bpt.pc1 = NULL; + // loop over UTF-8 multibytes, checking linear case only (i.e. one wide char or a short range) + for (int i = 0; i < 3; ++i) + { + jump = Pattern::index_of(*bpt.pc0); + if (jump == Pattern::Const::HALT || pat_->opc_ + jump == bpt.pc0) + return bpt.pc1 = NULL; + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(bpt.pc0[1]); + const Pattern::Opcode *pc0 = pat_->opc_ + jump; + const Pattern::Opcode *pc1 = pc0; + while (!Pattern::is_opcode_goto(*pc1)) + ++pc1; + if (Pattern::is_meta(Pattern::lo_of(*pc1)) || ((Pattern::lo_of(*pc1) & 0xC0) != 0x80 && (Pattern::hi_of(*pc1) & 0xC0) != 0x80)) + break; + bpt.pc0 = pc0; + bpt.pc1 = pc1; + } + jump = Pattern::index_of(*bpt.pc1); + if (jump == Pattern::Const::HALT) + return bpt.pc1 = NULL; + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++bpt.pc1); + bpt.sub = bpt.alt; + DBGLOG("Multibyte jump to %u", jump); + } + else if (jump == Pattern::Const::LONG) + { + jump = Pattern::long_index_of(*++bpt.pc1); + } + // restore errors + err_ = bpt.err; + // restore pos in the input + pos_ = (txt_ - buf_) + bpt.len; + // set ch to previous char before pos + if (pos_ > 0) + ch = static_cast(buf_[pos_ - 1]); + else + ch = got_; + // substitute or insert a pattern char in the text? + if (bpt.sub) + { + // try substituting a pattern char for a mismatching char in the text + DBGLOG("Substitute: jump to %u at pos %zu char %d (0x%x)", jump, pos_, ch, ch); + int c = get(); + if (!bin_ && c != EOF) + { + // skip UTF-8 multibytes + if (c >= 0xC0) + { + int n = (c >= 0xE0) + (c >= 0xF0); + while (n-- >= 0) + if ((c = get()) == EOF) + break; + } + else + { + while ((peek() & 0xC0) == 0x80) + if ((c = get()) == EOF) + break; + } + } + bpt.sub = false; + bpt.pc1 += !bpt.alt; + } + else if (del_) + { + // try inserting a pattern char in the text to match a missing char in the text + DBGLOG("Delete: jump to %u at pos %zu char %d (0x%x)", jump, pos_, ch, ch); + bpt.sub = bpt.alt; + ++bpt.pc1; + } + else + { + // no more alternatives + return NULL; + } + return pat_->opc_ + jump; + } + /// Returns true if input fuzzy-matched the pattern using method Const::SCAN, Const::FIND, Const::SPLIT, or Const::MATCH. + virtual size_t match(Method method) ///< Const::SCAN, Const::FIND, Const::SPLIT, or Const::MATCH + /// @returns nonzero if input matched the pattern + { + DBGLOG("BEGIN FuzzyMatcher::match()"); + reset_text(); + SaveState sst(ded_); + len_ = 0; // split text length starts with 0 + anc_ = false; // no word boundary anchor found and applied +scan: + txt_ = buf_ + cur_; +#if !defined(WITH_NO_INDENT) + mrk_ = false; + ind_ = pos_; // ind scans input in buf[] in newline() up to pos - 1 + col_ = 0; // count columns for indent matching +#endif +find: + int ch = got_; + bool bol = at_bol(); // at begin of line? +#if !defined(WITH_NO_INDENT) +redo: +#endif + lap_.resize(0); + cap_ = 0; + bool nul = method == Const::MATCH; + if (pat_->opc_ != NULL && (!opt_.W || at_wb())) + { + // skip to next line and keep searching if matching on anchor ^ and not at begin of line + if (method == Const::FIND && pat_->bol_ && !bol) + if (skip('\n')) + goto scan; + err_ = 0; + uint8_t stack = 0; + const Pattern::Opcode *pc = pat_->opc_; + // backtrack point (DFA and relative position in the match) + const Pattern::Opcode *pc0 = pc; + size_t len0 = pos_ - (txt_ - buf_); + while (true) + { + Pattern::Index back = Pattern::Const::IMAX; // where to jump back to + size_t bpos = 0; // backtrack position in the input + while (true) + { + Pattern::Opcode opcode = *pc; + Pattern::Index jump; + DBGLOG("Fetch: code[%zu] = 0x%08X", pc - pat_->opc_, opcode); + if (!Pattern::is_opcode_goto(opcode)) + { + // save backtrack point (DFA and relative position in the match) + pc0 = pc; + len0 = pos_ - (txt_ - buf_); + switch (opcode >> 24) + { + case 0xFE: // TAKE + int c; + if (!opt_.W || (c = peek(), at_we(c, pos_))) + { + cap_ = Pattern::long_index_of(opcode); + DBGLOG("Take: cap = %zu", cap_); + cur_ = pos_; + } + ++pc; + continue; + case 0xFD: // REDO + cap_ = Const::REDO; + DBGLOG("Redo"); + cur_ = pos_; + ++pc; + continue; + case 0xFC: // TAIL + { + Pattern::Lookahead la = Pattern::lookahead_of(opcode); + DBGLOG("Tail: %u", la); + if (lap_.size() > la && lap_[la] >= 0) + cur_ = txt_ - buf_ + static_cast(lap_[la]); // mind the (new) gap + ++pc; + continue; + } + case 0xFB: // HEAD + { + Pattern::Lookahead la = Pattern::lookahead_of(opcode); + DBGLOG("Head: lookahead[%u] = %zu", la, pos_ - (txt_ - buf_)); + if (lap_.size() <= la) + lap_.resize(la + 1, -1); + lap_[la] = static_cast(pos_ - (txt_ - buf_)); // mind the gap + ++pc; + continue; + } +#if !defined(WITH_NO_INDENT) + case Pattern::META_DED - Pattern::META_MIN: + if (ded_ > 0) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(pc[1]); + DBGLOG("Dedent ded = %zu", ded_); // unconditional dedent matching \j + nul = true; + pc = pat_->opc_ + jump; + continue; + } +#endif + } + if (ch == EOF) + break; + ch = get(); + DBGLOG("Get: ch = %d (0x%x)", ch, ch); + // to jump to longest sequence of matching metas + jump = Pattern::Const::IMAX; + while (true) + { + if (jump == Pattern::Const::IMAX || back == Pattern::Const::IMAX) + { + if (!Pattern::is_opcode_goto(opcode)) + { + // we no longer have to pass through all if jump and back are set + switch (opcode >> 24) + { + case 0xFE: // TAKE + if (!opt_.W || at_we(ch, pos_ - 1)) + { + cap_ = Pattern::long_index_of(opcode); + DBGLOG("Take: cap = %zu", cap_); + cur_ = pos_; + if (ch != EOF) + --cur_; // must unget one char + } + opcode = *++pc; + continue; + case 0xFD: // REDO + cap_ = Const::REDO; + DBGLOG("Redo"); + cur_ = pos_; + if (ch != EOF) + --cur_; // must unget one char + opcode = *++pc; + continue; + case 0xFC: // TAIL + { + Pattern::Lookahead la = Pattern::lookahead_of(opcode); + DBGLOG("Tail: %u", la); + if (lap_.size() > la && lap_[la] >= 0) + cur_ = txt_ - buf_ + static_cast(lap_[la]); // mind the (new) gap + opcode = *++pc; + continue; + } + case 0xFB: // HEAD + opcode = *++pc; + continue; +#if !defined(WITH_NO_INDENT) + case Pattern::META_DED - Pattern::META_MIN: + DBGLOG("DED? %d", ch); + if (jump == Pattern::Const::IMAX && back == Pattern::Const::IMAX && bol && dedent()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_IND - Pattern::META_MIN: + DBGLOG("IND? %d", ch); + if (jump == Pattern::Const::IMAX && back == Pattern::Const::IMAX && bol && indent()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_UND - Pattern::META_MIN: + DBGLOG("UND"); + if (mrk_) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + mrk_ = false; + ded_ = 0; + opcode = *++pc; + continue; +#endif + case Pattern::META_EOB - Pattern::META_MIN: + DBGLOG("EOB? %d", ch); + if (jump == Pattern::Const::IMAX && ch == EOF) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BOB - Pattern::META_MIN: + DBGLOG("BOB? %d", at_bob()); + if (jump == Pattern::Const::IMAX && at_bob()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_EOL - Pattern::META_MIN: + DBGLOG("EOL? %d", ch); + anc_ = true; + if (jump == Pattern::Const::IMAX && (ch == EOF || ch == '\n' || (ch == '\r' && peek() == '\n'))) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BOL - Pattern::META_MIN: + DBGLOG("BOL? %d", bol); + anc_ = true; + if (jump == Pattern::Const::IMAX && bol) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_EWE - Pattern::META_MIN: + DBGLOG("EWE? %d", at_ewe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_ewe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BWE - Pattern::META_MIN: + DBGLOG("BWE? %d", at_bwe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_bwe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_EWB - Pattern::META_MIN: + DBGLOG("EWB? %d", at_ewb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_ewb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BWB - Pattern::META_MIN: + DBGLOG("BWB? %d", at_bwb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_bwb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_NWE - Pattern::META_MIN: + DBGLOG("NWE? %d", at_nwe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_nwe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_NWB - Pattern::META_MIN: + DBGLOG("NWB? %d", at_nwb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_nwb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_WBE - Pattern::META_MIN: + DBGLOG("WBE? %d", at_wbe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_wbe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_WBB - Pattern::META_MIN: + DBGLOG("WBB? %d", at_wbb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_wbb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case 0xFF: // LONG + opcode = *++pc; + continue; + } + } + else if (ch != EOF && !Pattern::is_opcode_halt(opcode)) + { + if (jump == Pattern::Const::IMAX) + break; + if (back == Pattern::Const::IMAX) + { + back = static_cast(pc - pat_->opc_); + bpos = pos_ - (txt_ - buf_) - 1; + DBGLOG("Backtrack point: back = %u pos = %zu", back, bpos); + } + pc = pat_->opc_ + jump; + opcode = *pc; + } + } + if (jump == Pattern::Const::IMAX) + { + if (back != Pattern::Const::IMAX) + { + pc = pat_->opc_ + back; + opcode = *pc; + back = Pattern::Const::IMAX; + } + break; + } + DBGLOG("Try jump = %u", jump); + if (back == Pattern::Const::IMAX) + { + back = static_cast(pc - pat_->opc_); + bpos = pos_ - (txt_ - buf_) - 1; + DBGLOG("Backtrack point: back = %u pos = %zu", back, bpos); + } + pc = pat_->opc_ + jump; + opcode = *pc; + jump = Pattern::Const::IMAX; + } + if (ch == EOF) + break; + } + else + { + if (ch == EOF) + break; + ch = get(); + if (Pattern::is_opcode_halt(opcode)) + { + if (back != Pattern::Const::IMAX) + { + pos_ = (txt_ - buf_) + bpos; + pc = pat_->opc_ + back; + DBGLOG("Backtrack: back = %u pos = %zu ch = %d", back, pos_, ch); + back = Pattern::Const::IMAX; + continue; + } + break; + } + DBGLOG("Get: ch = %d (0x%x) at pos %zu", ch, ch, pos_ - 1); + if (bin_ || (ch & 0xC0) != 0x80 || ch == EOF) + { + // save backtrack point (DFA and relative position in the match) + pc0 = pc; + len0 = pos_ - (txt_ - buf_); + } + if (ch == EOF) + break; + } + { + Pattern::Opcode lo = ch << 24; + Pattern::Opcode hi = lo | 0x00FFFFFF; +unrolled: + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + goto unrolled; + } + } + } + } + } + } + } + } + } + jump = Pattern::index_of(opcode); + if (jump == 0) + { + // loop back to start state w/o full match: advance to avoid backtracking + if (cap_ == 0 && method == Const::FIND) + { + if (cur_ + 1 == pos_) + { + // matched one char in a loop, do not backtrack here + ++cur_; + } + else + { + // check each char in buf_[cur_+1..pos_-1] if it is a starting char, if not then increase cur_ + while (cur_ + 1 < pos_ && !pat_->fst_.test(static_cast(buf_[cur_ + 1]))) + ++cur_; + } + } + } + else if (jump >= Pattern::Const::LONG) + { + if (jump == Pattern::Const::HALT) + { + if (back != Pattern::Const::IMAX) + { + pc = pat_->opc_ + back; + pos_ = (txt_ - buf_) + bpos; + DBGLOG("Backtrack: back = %u pos = %zu ch = %d", back, pos_, ch); + back = Pattern::Const::IMAX; + continue; + } + break; + } + jump = Pattern::long_index_of(pc[1]); + } + pc = pat_->opc_ + jump; + } + // exit fuzzy loop if nothing consumed + if (pos_ == static_cast(txt_ + len_ - buf_)) + break; + // match, i.e. cap_ > 0? + if (method == Const::MATCH) + { + // exit fuzzy loop if fuzzy match succeeds till end of input when insertions are allowed + if (cap_ > 0) + { + if (ch != EOF && ins_) + { + // text insertions are allowed + while (err_ < max_) + { + ++err_; + ch = get(); + // reached the end? + if (ch == EOF) + break; + // skip one (multibyte) char + if (!bin_ && ch >= 0xC0) + { + int n = (ch >= 0xE0) + (ch >= 0xF0); + while (n-- >= 0) + if ((ch = get()) == EOF) + break; + } + } + } + if (ch == EOF || ins_) + { + // reached the end? + if (at_end()) + { + DBGLOG("Match pos = %zu", pos_); + set_current(pos_); + break; + } + } + cap_ = 0; + } + } + else + { + // exit fuzzy loop if match or if first char mismatched + if (cap_ > 0 || pos_ == static_cast(txt_ + len_ - buf_ + 1)) + break; + } + // no match, use fuzzy matching with max error + if (ch == '\0' || ch == '\n' || ch == EOF) + { + // do not try to fuzzy match NUL, LF, or EOF + if (err_ < max_ && del_) + { + ++err_; + // set backtrack point to insert pattern char only, not substitute, if pc0 os a different point than the last + if (stack == 0 || bpt_[stack - 1].pc0 != pc0) + { + point(bpt_[stack++], pc0, len0, false, ch == EOF); + DBGLOG("Point[%u] at %zu pos %zu (\\0|\\nEOF)", stack - 1, pc0 - pat_->opc_, pos_ - 1); + } + } + else + { + // backtrack to try insertion or substitution of pattern char + pc = NULL; + while (stack > 0 && pc == NULL) + { + pc = backtrack(bpt_[stack - 1], ch); + if (pc == NULL) + --stack; + } + // exhausted all backtracking points? + if (pc == NULL) + break; + } + } + else + { + if (err_ < max_) + { + ++err_; + if (del_ || sub_) + { + // set backtrack point if pc0 is a different point than the last + if (stack == 0 || bpt_[stack - 1].pc0 != pc0) + { + point(bpt_[stack++], pc0, len0); + DBGLOG("Point[%u] at %zu pos %zu", stack - 1, pc0 - pat_->opc_, pos_ - 1); + } + } + if (ins_) + { + if (!bin_) + { + // try pattern char deletion (text insertion): skip one (multibyte) char then rerun opcode at pc0 + if (ch >= 0xC0) + { + int n = (ch >= 0xE0) + (ch >= 0xF0); + while (n-- >= 0) + if ((ch = get()) == EOF) + break; + } + else + { + while ((peek() & 0xC0) == 0x80) + if ((ch = get()) == EOF) + break; + } + } + pc = pc0; + DBGLOG("Insert: %d (0x%x) at pos %zu", ch, ch, pos_ - 1); + } + } + else + { + // backtrack to try insertion or substitution of pattern char + pc = NULL; + while (stack > 0 && pc == NULL) + { + pc = backtrack(bpt_[stack - 1], ch); + if (pc == NULL) + --stack; + } + // exhausted all backtracking points? + if (pc == NULL) + break; + } + } + } + } + // if fuzzy find/split with errors then perform a second pass ahead of this match to check for an exact match + if (cap_ > 0 && err_ > 0 && !sst.use && (method == Const::FIND || method == Const::SPLIT)) + { + // this part is based on advance() in matcher.cpp, limited to advancing ahead till the one of the first pattern char(s) match excluding \n + size_t loc = txt_ - buf_ + 1; + const char *s = buf_ + loc; + const char *e = static_cast(std::memchr(s, '\n', cur_ - loc)); + if (e == NULL) + e = buf_ + cur_; + if (pat_->len_ == 0) + { + if (pat_->min_ > 0) + { + while (s < e && !pat_->fst_.test(static_cast(*s))) + ++s; + if (s < e) + { + loc = s - buf_; + sst.use = true; + sst.loc = loc; + sst.cap = cap_; + sst.txt = txt_ - buf_; + sst.cur = cur_; + sst.pos = pos_; + size_t tmp = ded_; + ded_ = sst.ded; + sst.ded = tmp; + sst.mrk = mrk_; + sst.err = err_; + set_current(loc); + goto scan; + } + } + } + else if (s < e) + { + s = static_cast(std::memchr(s, *pat_->chr_, e - s)); + if (s != NULL) + { + loc = s - buf_; + sst.use = true; + sst.loc = loc; + sst.cap = cap_; + sst.txt = txt_ - buf_; + sst.cur = cur_; + sst.pos = pos_; + size_t tmp = ded_; + ded_ = sst.ded; + sst.ded = tmp; + sst.mrk = mrk_; + sst.err = err_; + set_current(loc); + goto scan; + } + } + } + else if (sst.use && (cap_ == 0 || err_ >= sst.err)) + { + // if the buffer was shifted then cur_, pos_ and txt_ are no longer at the same location in the buffer, we must adjust for this + size_t loc = txt_ - buf_; + size_t shift = sst.loc - loc; + cap_ = sst.cap; + cur_ = sst.cur - shift; + pos_ = sst.pos - shift; + ded_ = sst.ded; + mrk_ = sst.mrk; + err_ = sst.err; + txt_ = buf_ + sst.txt - shift; + } + else if (sst.use && cap_ > 0 && method == Const::SPLIT) + { + size_t loc = txt_ - buf_; + size_t shift = sst.loc - loc; + len_ = loc - sst.txt + shift; + } +#if !defined(WITH_NO_INDENT) + if (mrk_ && cap_ != Const::REDO) + { + if (col_ > 0 && (tab_.empty() || tab_.back() < col_)) + { + DBGLOG("Set new stop: tab_[%zu] = %zu", tab_.size(), col_); + tab_.push_back(col_); + } + else if (!tab_.empty() && tab_.back() > col_) + { + size_t n; + for (n = tab_.size() - 1; n > 0; --n) + if (tab_.at(n - 1) <= col_) + break; + ded_ += tab_.size() - n; + DBGLOG("Dedents: ded = %zu tab_ = %zu", ded_, tab_.size()); + tab_.resize(n); + // adjust stop when indents are not aligned (Python would give an error) + if (n > 0) + tab_.back() = col_; + } + } + if (ded_ > 0) + { + DBGLOG("Dedents: ded = %zu", ded_); + if (col_ == 0 && bol) + { + ded_ += tab_.size(); + tab_.resize(0); + DBGLOG("Rescan for pending dedents: ded = %zu", ded_); + pos_ = ind_; + // avoid looping, match \j exactly + bol = false; + goto redo; + } + --ded_; + } +#endif + if (method == Const::SPLIT) + { + DBGLOG("Split: len = %zu cap = %zu cur = %zu pos = %zu end = %zu txt-buf = %zu eob = %d got = %d", len_, cap_, cur_, pos_, end_, txt_-buf_, (int)eof_, got_); + if (cap_ == 0 || (cur_ == static_cast(txt_ - buf_) && !at_bob())) + { + if (!hit_end() && (txt_ + len_ < buf_ + end_ || peek() != EOF)) + { + ++len_; + DBGLOG("Split continue: len = %zu", len_); + set_current(++cur_); + goto find; + } + if (got_ != Const::EOB) + cap_ = Const::EMPTY; + else + cap_ = 0; + set_current(end_); + got_ = Const::EOB; + DBGLOG("Split at eof: cap = %zu txt = '%s' len = %zu", cap_, std::string(txt_, len_).c_str(), len_); + DBGLOG("END FuzzyMatcher::match()"); + return cap_; + } + if (cur_ == 0 && at_bob() && at_end()) + { + cap_ = Const::EMPTY; + got_ = Const::EOB; + } + else + { + set_current(cur_); + } + DBGLOG("Split: txt = '%s' len = %zu", std::string(txt_, len_).c_str(), len_); + DBGLOG("END FuzzyMatcher::match()"); + return cap_; + } + if (cap_ == 0) + { + if (method == Const::FIND) + { + if (!at_end()) + { + if (anc_) + { + cur_ = txt_ - buf_; // reset current to pattern start when a word boundary was encountered + anc_ = false; + } + // fuzzy search with find() can safely advance on a single prefix char of the regex + if (pos_ > cur_) + { + // this part is based on advance() in matcher.cpp, limited to advancing ahead till the one of the first pattern char(s) match + size_t loc = cur_ + 1; + if (pat_->len_ == 0) + { + if (pat_->min_ > 0) + { + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + while (s < e && !pat_->fst_.test(static_cast(*s))) + ++s; + if (s < e) + { + loc = s - buf_; + set_current(loc); + goto scan; + } + loc = e - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc >= end_) + break; + } + } + } + else + { + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + s = static_cast(std::memchr(s, *pat_->chr_, e - s)); + if (s != NULL) + { + loc = s - buf_; + set_current(loc); + goto scan; + } + loc = e - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + pat_->len_ > end_) + break; + } + } + } + } + txt_ = buf_ + cur_; + } + else + { + // SCAN and MATCH: no match: backup to begin of unmatched text to report as error + cur_ = txt_ - buf_; + } + } + len_ = cur_ - (txt_ - buf_); + if (len_ == 0 && !nul) + { + DBGLOG("Empty or no match cur = %zu pos = %zu end = %zu", cur_, pos_, end_); + pos_ = cur_; + if (at_end()) + { + set_current(cur_); + DBGLOG("Reject empty match at EOF"); + cap_ = 0; + } + else if (method == Const::FIND) + { + DBGLOG("Reject empty match and continue?"); + // skip one char to keep searching + set_current(++cur_); + // allow FIND with "N" to match an empty line, with ^$ etc. + if (cap_ == 0 || !opt_.N || (!bol && (ch == '\n' || (ch == '\r' && peek() == '\n')))) + goto scan; + DBGLOG("Accept empty match"); + } + else + { + set_current(cur_); + DBGLOG("Reject empty match"); + cap_ = 0; + } + } + else if (len_ == 0 && cur_ == end_) + { + DBGLOG("Hit end: got = %d", got_); + if (cap_ == Const::REDO && !opt_.A) + cap_ = 0; + } + else + { + set_current(cur_); + if (len_ > 0 && cap_ == Const::REDO && !opt_.A) + { + DBGLOG("Ignore accept and continue: len = %zu", len_); + len_ = 0; + if (method != Const::MATCH) + goto scan; + cap_ = 0; + } + } + DBGLOG("Return: cap = %zu txt = '%s' len = %zu pos = %zu got = %d", cap_, std::string(txt_, len_).c_str(), len_, pos_, got_); + DBGLOG("END match()"); + return cap_; + } + std::vector bpt_; ///< vector of backtrack points, max_ size + uint8_t max_; ///< max errors + uint8_t err_; ///< accumulated edit distance (not guaranteed minimal) + bool ins_; ///< fuzzy match permits inserted chars (extra chars in the input) + bool del_; ///< fuzzy match permits deleted chars (missing chars in the input) + bool sub_; ///< fuzzy match permits substituted chars + bool bin_; ///< fuzzy match bytes, not UTF-8 multibyte encodings +}; + +} // namespace reflex + +#endif diff --git a/ccl/rslang/import/reflex/include/reflex/linematcher.h b/ccl/rslang/import/reflex/include/reflex/linematcher.h index 3ab7f63..cf222fe 100644 --- a/ccl/rslang/import/reflex/include/reflex/linematcher.h +++ b/ccl/rslang/import/reflex/include/reflex/linematcher.h @@ -138,8 +138,8 @@ find: // option N also finds empty lines if (n == 0 && !opt_.N) goto find; - // option W only finds empty lines - if (n > 0 && opt_.W) + // option X only finds empty lines + if (n > 0 && opt_.X) goto find; break; case Const::SPLIT: diff --git a/ccl/rslang/import/reflex/include/reflex/matcher.h b/ccl/rslang/import/reflex/include/reflex/matcher.h index 224501c..e92766c 100644 --- a/ccl/rslang/import/reflex/include/reflex/matcher.h +++ b/ccl/rslang/import/reflex/include/reflex/matcher.h @@ -30,13 +30,18 @@ @file matcher.h @brief RE/flex matcher engine @author Robert van Engelen - engelen@genivia.com -@copyright (c) 2016-2022, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) 2016-2024, Robert van Engelen, Genivia Inc. All rights reserved. @copyright (c) BSD-3 License - see LICENSE.txt */ #ifndef REFLEX_MATCHER_H #define REFLEX_MATCHER_H +#if !defined(HAVE_NEON) +/// enable predict match patterns after strings longer than 4 chars (this is a bit slower on ARM NEON/AArch64) +#define WITH_STRING_PM +#endif + #include #include #include @@ -63,7 +68,7 @@ class Matcher : public PatternMatcher { const Input& input = Input(), ///< input character sequence for this matcher const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` : - PatternMatcher(pattern, input, opt) + PatternMatcher(pattern, input) { reset(opt); } @@ -73,7 +78,7 @@ class Matcher : public PatternMatcher { const Input& input = Input(), ///< input character sequence for this matcher const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` : - PatternMatcher(pattern, input, opt) + PatternMatcher(pattern, input) { reset(opt); } @@ -83,7 +88,7 @@ class Matcher : public PatternMatcher { const Input& input = Input(), ///< input character sequence for this matcher const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` : - PatternMatcher(pattern, input, opt) + PatternMatcher(pattern, input) { reset(opt); } @@ -93,7 +98,7 @@ class Matcher : public PatternMatcher { const Input& input = Input(), ///< input character sequence for this matcher const char *opt = NULL) ///< option string of the form `(A|N|T(=[[:digit:]])?|;)*` : - PatternMatcher(pattern, input, opt) + PatternMatcher(pattern, input) { reset(opt); } @@ -105,15 +110,66 @@ class Matcher : public PatternMatcher { tab_(matcher.tab_) { DBGLOG("Matcher::Matcher(matcher)"); + init_advance(); } - /// Assign a matcher. + /// Assign a matcher, the underlying pattern string is shared (not deep copied). Matcher& operator=(const Matcher& matcher) ///< matcher to copy { PatternMatcher::operator=(matcher); ded_ = matcher.ded_; tab_ = matcher.tab_; + init_advance(); return *this; } + /// Set the pattern to use with this matcher (the given pattern is shared and must be persistent). + Matcher& pattern(const Pattern& pattern) ///< pattern object for this matcher + /// @returns this matcher + { + DBGLOG("Matcher::pattern()"); + if (pat_ != &pattern) + { + PatternMatcher::pattern(pattern); + init_advance(); + } + return *this; + } + /// Set the pattern to use with this matcher (the given pattern is shared and must be persistent). + Matcher& pattern(const Pattern *pattern) ///< pattern object for this matcher + /// @returns this matcher + { + DBGLOG("Matcher::pattern()"); + if (pat_ != pattern) + { + PatternMatcher::pattern(pattern); + init_advance(); + } + return *this; + } + /// Set the pattern from a regex string to use with this matcher. + Matcher& pattern(const char *pattern) ///< regex string to instantiate internal pattern object + /// @returns this matcher + { + DBGLOG("Matcher::pattern(\"%s\")", pattern); + PatternMatcher::pattern(pattern); + init_advance(); + return *this; + } + /// Set the pattern from a regex string to use with this matcher. + Matcher& pattern(const std::string& pattern) ///< regex string to instantiate internal pattern object + /// @returns this matcher + { + DBGLOG("Matcher::pattern(\"%s\")", pattern.c_str()); + PatternMatcher::pattern(pattern); + init_advance(); + return *this; + } + /// Returns a reference to the pattern associated with this matcher. + virtual const Pattern& pattern() const + /// @returns reference to pattern + { + ASSERT(pat_ != NULL); + return *pat_; + } /// Polymorphic cloning. virtual Matcher *clone() { @@ -126,6 +182,7 @@ class Matcher : public PatternMatcher { PatternMatcher::reset(opt); ded_ = 0; tab_.resize(0); + init_advance(); } /// Returns captured text as a std::pair with string pointer (non-0-terminated) and length. virtual std::pair operator[](size_t n) const @@ -218,9 +275,9 @@ class Matcher : public PatternMatcher { stk_.pop(); } /// FSM code INIT. - inline void FSM_INIT(int& c1) + inline void FSM_INIT(int& c) { - c1 = fsm_.c1; + c = fsm_.ch; } /// FSM code FIND. inline void FSM_FIND() @@ -238,23 +295,30 @@ class Matcher : public PatternMatcher { return get(); } /// FSM code HALT. - inline void FSM_HALT(int c1 = AbstractMatcher::Const::UNK) + inline void FSM_HALT(int c = AbstractMatcher::Const::UNK) { - fsm_.c1 = c1; + fsm_.ch = c; } /// FSM code TAKE. inline void FSM_TAKE(Pattern::Accept cap) { - cap_ = cap; - cur_ = pos_; + int ch = peek(); + if (!opt_.W || at_we(ch, pos_)) + { + cap_ = cap; + cur_ = pos_; + } } /// FSM code TAKE. - inline void FSM_TAKE(Pattern::Accept cap, int c1) + inline void FSM_TAKE(Pattern::Accept cap, int c) { - cap_ = cap; - cur_ = pos_; - if (c1 != EOF) - --cur_; + if (!opt_.W || at_we(c, pos_ - 1)) + { + cap_ = cap; + cur_ = pos_; + if (c != EOF) + --cur_; + } } /// FSM code REDO. inline void FSM_REDO() @@ -263,11 +327,11 @@ class Matcher : public PatternMatcher { cur_ = pos_; } /// FSM code REDO. - inline void FSM_REDO(int c1) + inline void FSM_REDO(int c) { cap_ = Const::REDO; cur_ = pos_; - if (c1 != EOF) + if (c != EOF) --cur_; } /// FSM code HEAD. @@ -324,9 +388,9 @@ class Matcher : public PatternMatcher { } #endif /// FSM code META EOB. - inline bool FSM_META_EOB(int c1) + inline bool FSM_META_EOB(int c) { - return c1 == EOF; + return c == EOF; } /// FSM code META BOB. inline bool FSM_META_BOB() @@ -334,10 +398,10 @@ class Matcher : public PatternMatcher { return at_bob(); } /// FSM code META EOL. - inline bool FSM_META_EOL(int c1) + inline bool FSM_META_EOL(int c) { anc_ = true; - return c1 == EOF || c1 == '\n' || (c1 == '\r' && peek() == '\n'); + return c == EOF || c == '\n' || (c == '\r' && peek() == '\n'); } /// FSM code META BOL. inline bool FSM_META_BOL() @@ -346,78 +410,1048 @@ class Matcher : public PatternMatcher { return fsm_.bol; } /// FSM code META EWE. - inline bool FSM_META_EWE(int c0, int c1) + inline bool FSM_META_EWE(int c) { anc_ = true; - return (isword(c0) || opt_.W) && !isword(c1); + return at_ewe(c); } /// FSM code META BWE. - inline bool FSM_META_BWE(int c0, int c1) + inline bool FSM_META_BWE(int c) { anc_ = true; - return !isword(c0) && isword(c1); + return at_bwe(c); } /// FSM code META EWB. inline bool FSM_META_EWB() { anc_ = true; - return isword(got_) && !isword(static_cast(txt_[len_])); + return at_ewb(); } /// FSM code META BWB. inline bool FSM_META_BWB() { anc_ = true; - return !isword(got_) && (opt_.W || isword(static_cast(txt_[len_]))); + return at_bwb(); } /// FSM code META NWE. - inline bool FSM_META_NWE(int c0, int c1) + inline bool FSM_META_NWE(int c) { anc_ = true; - return isword(c0) == isword(c1); + return at_nwe(c); } /// FSM code META NWB. inline bool FSM_META_NWB() { anc_ = true; - return isword(got_) == isword(static_cast(txt_[len_])); + return at_nwb(); } /// FSM code META WBE. - inline bool FSM_META_WBE(int c0, int c1) + inline bool FSM_META_WBE(int c) { anc_ = true; - return isword(c0) != isword(c1); + return at_wbe(c); } /// FSM code META WBB. inline bool FSM_META_WBB() { anc_ = true; - return isword(got_) != isword(static_cast(txt_[len_])); + return at_wbb(); } protected: typedef std::vector Stops; ///< indent margin/tab stops /// FSM data for FSM code struct FSM { - FSM() : bol(), nul(), c1() { } + FSM() : bol(), nul(), ch() { } bool bol; bool nul; - int c1; + int ch; }; + /// Return true if Unicode word character. + static bool iswword(int c) ///< character to test + { + // table source: unicode/language_scripts.cpp Word[] array updated to Unicode 15.1 + static const int word[2*712] = { + 48, 57, + 65, 90, + 95, 95, + 97, 122, + 170, 170, + 181, 181, + 186, 186, + 192, 214, + 216, 246, + 248, 705, + 710, 721, + 736, 740, + 748, 748, + 750, 750, + 880, 884, + 886, 887, + 890, 893, + 895, 895, + 902, 902, + 904, 906, + 908, 908, + 910, 929, + 931, 1013, + 1015, 1153, + 1162, 1327, + 1329, 1366, + 1369, 1369, + 1376, 1416, + 1488, 1514, + 1519, 1522, + 1568, 1610, + 1632, 1641, + 1646, 1647, + 1649, 1747, + 1749, 1749, + 1765, 1766, + 1774, 1788, + 1791, 1791, + 1808, 1808, + 1810, 1839, + 1869, 1957, + 1969, 1969, + 1984, 2026, + 2036, 2037, + 2042, 2042, + 2048, 2069, + 2074, 2074, + 2084, 2084, + 2088, 2088, + 2112, 2136, + 2144, 2154, + 2160, 2183, + 2185, 2190, + 2208, 2249, + 2308, 2361, + 2365, 2365, + 2384, 2384, + 2392, 2401, + 2406, 2415, + 2417, 2432, + 2437, 2444, + 2447, 2448, + 2451, 2472, + 2474, 2480, + 2482, 2482, + 2486, 2489, + 2493, 2493, + 2510, 2510, + 2524, 2525, + 2527, 2529, + 2534, 2545, + 2556, 2556, + 2565, 2570, + 2575, 2576, + 2579, 2600, + 2602, 2608, + 2610, 2611, + 2613, 2614, + 2616, 2617, + 2649, 2652, + 2654, 2654, + 2662, 2671, + 2674, 2676, + 2693, 2701, + 2703, 2705, + 2707, 2728, + 2730, 2736, + 2738, 2739, + 2741, 2745, + 2749, 2749, + 2768, 2768, + 2784, 2785, + 2790, 2799, + 2809, 2809, + 2821, 2828, + 2831, 2832, + 2835, 2856, + 2858, 2864, + 2866, 2867, + 2869, 2873, + 2877, 2877, + 2908, 2909, + 2911, 2913, + 2918, 2927, + 2929, 2929, + 2947, 2947, + 2949, 2954, + 2958, 2960, + 2962, 2965, + 2969, 2970, + 2972, 2972, + 2974, 2975, + 2979, 2980, + 2984, 2986, + 2990, 3001, + 3024, 3024, + 3046, 3055, + 3077, 3084, + 3086, 3088, + 3090, 3112, + 3114, 3129, + 3133, 3133, + 3160, 3162, + 3165, 3165, + 3168, 3169, + 3174, 3183, + 3200, 3200, + 3205, 3212, + 3214, 3216, + 3218, 3240, + 3242, 3251, + 3253, 3257, + 3261, 3261, + 3293, 3294, + 3296, 3297, + 3302, 3311, + 3313, 3314, + 3332, 3340, + 3342, 3344, + 3346, 3386, + 3389, 3389, + 3406, 3406, + 3412, 3414, + 3423, 3425, + 3430, 3439, + 3450, 3455, + 3461, 3478, + 3482, 3505, + 3507, 3515, + 3517, 3517, + 3520, 3526, + 3558, 3567, + 3585, 3632, + 3634, 3635, + 3648, 3654, + 3664, 3673, + 3713, 3714, + 3716, 3716, + 3718, 3722, + 3724, 3747, + 3749, 3749, + 3751, 3760, + 3762, 3763, + 3773, 3773, + 3776, 3780, + 3782, 3782, + 3792, 3801, + 3804, 3807, + 3840, 3840, + 3872, 3881, + 3904, 3911, + 3913, 3948, + 3976, 3980, + 4096, 4138, + 4159, 4169, + 4176, 4181, + 4186, 4189, + 4193, 4193, + 4197, 4198, + 4206, 4208, + 4213, 4225, + 4238, 4238, + 4240, 4249, + 4256, 4293, + 4295, 4295, + 4301, 4301, + 4304, 4346, + 4348, 4680, + 4682, 4685, + 4688, 4694, + 4696, 4696, + 4698, 4701, + 4704, 4744, + 4746, 4749, + 4752, 4784, + 4786, 4789, + 4792, 4798, + 4800, 4800, + 4802, 4805, + 4808, 4822, + 4824, 4880, + 4882, 4885, + 4888, 4954, + 4992, 5007, + 5024, 5109, + 5112, 5117, + 5121, 5740, + 5743, 5759, + 5761, 5786, + 5792, 5866, + 5873, 5880, + 5888, 5905, + 5919, 5937, + 5952, 5969, + 5984, 5996, + 5998, 6000, + 6016, 6067, + 6103, 6103, + 6108, 6108, + 6112, 6121, + 6160, 6169, + 6176, 6264, + 6272, 6276, + 6279, 6312, + 6314, 6314, + 6320, 6389, + 6400, 6430, + 6470, 6509, + 6512, 6516, + 6528, 6571, + 6576, 6601, + 6608, 6617, + 6656, 6678, + 6688, 6740, + 6784, 6793, + 6800, 6809, + 6823, 6823, + 6917, 6963, + 6981, 6988, + 6992, 7001, + 7043, 7072, + 7086, 7141, + 7168, 7203, + 7232, 7241, + 7245, 7293, + 7296, 7304, + 7312, 7354, + 7357, 7359, + 7401, 7404, + 7406, 7411, + 7413, 7414, + 7418, 7418, + 7424, 7615, + 7680, 7957, + 7960, 7965, + 7968, 8005, + 8008, 8013, + 8016, 8023, + 8025, 8025, + 8027, 8027, + 8029, 8029, + 8031, 8061, + 8064, 8116, + 8118, 8124, + 8126, 8126, + 8130, 8132, + 8134, 8140, + 8144, 8147, + 8150, 8155, + 8160, 8172, + 8178, 8180, + 8182, 8188, + 8255, 8256, + 8276, 8276, + 8305, 8305, + 8319, 8319, + 8336, 8348, + 8450, 8450, + 8455, 8455, + 8458, 8467, + 8469, 8469, + 8473, 8477, + 8484, 8484, + 8486, 8486, + 8488, 8488, + 8490, 8493, + 8495, 8505, + 8508, 8511, + 8517, 8521, + 8526, 8526, + 8579, 8580, + 11264, 11492, + 11499, 11502, + 11506, 11507, + 11520, 11557, + 11559, 11559, + 11565, 11565, + 11568, 11623, + 11631, 11631, + 11648, 11670, + 11680, 11686, + 11688, 11694, + 11696, 11702, + 11704, 11710, + 11712, 11718, + 11720, 11726, + 11728, 11734, + 11736, 11742, + 11823, 11823, + 12293, 12294, + 12337, 12341, + 12347, 12348, + 12353, 12438, + 12445, 12447, + 12449, 12538, + 12540, 12543, + 12549, 12591, + 12593, 12686, + 12704, 12735, + 12784, 12799, + 13312, 19903, + 19968, 42124, + 42192, 42237, + 42240, 42508, + 42512, 42539, + 42560, 42606, + 42623, 42653, + 42656, 42725, + 42775, 42783, + 42786, 42888, + 42891, 42954, + 42960, 42961, + 42963, 42963, + 42965, 42969, + 42994, 43009, + 43011, 43013, + 43015, 43018, + 43020, 43042, + 43072, 43123, + 43138, 43187, + 43216, 43225, + 43250, 43255, + 43259, 43259, + 43261, 43262, + 43264, 43301, + 43312, 43334, + 43360, 43388, + 43396, 43442, + 43471, 43481, + 43488, 43492, + 43494, 43518, + 43520, 43560, + 43584, 43586, + 43588, 43595, + 43600, 43609, + 43616, 43638, + 43642, 43642, + 43646, 43695, + 43697, 43697, + 43701, 43702, + 43705, 43709, + 43712, 43712, + 43714, 43714, + 43739, 43741, + 43744, 43754, + 43762, 43764, + 43777, 43782, + 43785, 43790, + 43793, 43798, + 43808, 43814, + 43816, 43822, + 43824, 43866, + 43868, 43881, + 43888, 44002, + 44016, 44025, + 44032, 55203, + 55216, 55238, + 55243, 55291, + 63744, 64109, + 64112, 64217, + 64256, 64262, + 64275, 64279, + 64285, 64285, + 64287, 64296, + 64298, 64310, + 64312, 64316, + 64318, 64318, + 64320, 64321, + 64323, 64324, + 64326, 64433, + 64467, 64829, + 64848, 64911, + 64914, 64967, + 65008, 65019, + 65075, 65076, + 65101, 65103, + 65136, 65140, + 65142, 65276, + 65296, 65305, + 65313, 65338, + 65343, 65343, + 65345, 65370, + 65382, 65470, + 65474, 65479, + 65482, 65487, + 65490, 65495, + 65498, 65500, + 65536, 65547, + 65549, 65574, + 65576, 65594, + 65596, 65597, + 65599, 65613, + 65616, 65629, + 65664, 65786, + 66176, 66204, + 66208, 66256, + 66304, 66335, + 66349, 66368, + 66370, 66377, + 66384, 66421, + 66432, 66461, + 66464, 66499, + 66504, 66511, + 66560, 66717, + 66720, 66729, + 66736, 66771, + 66776, 66811, + 66816, 66855, + 66864, 66915, + 66928, 66938, + 66940, 66954, + 66956, 66962, + 66964, 66965, + 66967, 66977, + 66979, 66993, + 66995, 67001, + 67003, 67004, + 67072, 67382, + 67392, 67413, + 67424, 67431, + 67456, 67461, + 67463, 67504, + 67506, 67514, + 67584, 67589, + 67592, 67592, + 67594, 67637, + 67639, 67640, + 67644, 67644, + 67647, 67669, + 67680, 67702, + 67712, 67742, + 67808, 67826, + 67828, 67829, + 67840, 67861, + 67872, 67897, + 67968, 68023, + 68030, 68031, + 68096, 68096, + 68112, 68115, + 68117, 68119, + 68121, 68149, + 68192, 68220, + 68224, 68252, + 68288, 68295, + 68297, 68324, + 68352, 68405, + 68416, 68437, + 68448, 68466, + 68480, 68497, + 68608, 68680, + 68736, 68786, + 68800, 68850, + 68864, 68899, + 68912, 68921, + 69248, 69289, + 69296, 69297, + 69376, 69404, + 69415, 69415, + 69424, 69445, + 69488, 69505, + 69552, 69572, + 69600, 69622, + 69635, 69687, + 69734, 69743, + 69745, 69746, + 69749, 69749, + 69763, 69807, + 69840, 69864, + 69872, 69881, + 69891, 69926, + 69942, 69951, + 69956, 69956, + 69959, 69959, + 69968, 70002, + 70006, 70006, + 70019, 70066, + 70081, 70084, + 70096, 70106, + 70108, 70108, + 70144, 70161, + 70163, 70187, + 70207, 70208, + 70272, 70278, + 70280, 70280, + 70282, 70285, + 70287, 70301, + 70303, 70312, + 70320, 70366, + 70384, 70393, + 70405, 70412, + 70415, 70416, + 70419, 70440, + 70442, 70448, + 70450, 70451, + 70453, 70457, + 70461, 70461, + 70480, 70480, + 70493, 70497, + 70656, 70708, + 70727, 70730, + 70736, 70745, + 70751, 70753, + 70784, 70831, + 70852, 70853, + 70855, 70855, + 70864, 70873, + 71040, 71086, + 71128, 71131, + 71168, 71215, + 71236, 71236, + 71248, 71257, + 71296, 71338, + 71352, 71352, + 71360, 71369, + 71424, 71450, + 71472, 71481, + 71488, 71494, + 71680, 71723, + 71840, 71913, + 71935, 71942, + 71945, 71945, + 71948, 71955, + 71957, 71958, + 71960, 71983, + 71999, 71999, + 72001, 72001, + 72016, 72025, + 72096, 72103, + 72106, 72144, + 72161, 72161, + 72163, 72163, + 72192, 72192, + 72203, 72242, + 72250, 72250, + 72272, 72272, + 72284, 72329, + 72349, 72349, + 72368, 72440, + 72704, 72712, + 72714, 72750, + 72768, 72768, + 72784, 72793, + 72818, 72847, + 72960, 72966, + 72968, 72969, + 72971, 73008, + 73030, 73030, + 73040, 73049, + 73056, 73061, + 73063, 73064, + 73066, 73097, + 73112, 73112, + 73120, 73129, + 73440, 73458, + 73474, 73474, + 73476, 73488, + 73490, 73523, + 73552, 73561, + 73648, 73648, + 73728, 74649, + 74880, 75075, + 77712, 77808, + 77824, 78895, + 78913, 78918, + 82944, 83526, + 92160, 92728, + 92736, 92766, + 92768, 92777, + 92784, 92862, + 92864, 92873, + 92880, 92909, + 92928, 92975, + 92992, 92995, + 93008, 93017, + 93027, 93047, + 93053, 93071, + 93760, 93823, + 93952, 94026, + 94032, 94032, + 94099, 94111, + 94176, 94177, + 94179, 94179, + 94208, 100343, + 100352, 101589, + 101632, 101640, + 110576, 110579, + 110581, 110587, + 110589, 110590, + 110592, 110882, + 110898, 110898, + 110928, 110930, + 110933, 110933, + 110948, 110951, + 110960, 111355, + 113664, 113770, + 113776, 113788, + 113792, 113800, + 113808, 113817, + 119808, 119892, + 119894, 119964, + 119966, 119967, + 119970, 119970, + 119973, 119974, + 119977, 119980, + 119982, 119993, + 119995, 119995, + 119997, 120003, + 120005, 120069, + 120071, 120074, + 120077, 120084, + 120086, 120092, + 120094, 120121, + 120123, 120126, + 120128, 120132, + 120134, 120134, + 120138, 120144, + 120146, 120485, + 120488, 120512, + 120514, 120538, + 120540, 120570, + 120572, 120596, + 120598, 120628, + 120630, 120654, + 120656, 120686, + 120688, 120712, + 120714, 120744, + 120746, 120770, + 120772, 120779, + 120782, 120831, + 122624, 122654, + 122661, 122666, + 122928, 122989, + 123136, 123180, + 123191, 123197, + 123200, 123209, + 123214, 123214, + 123536, 123565, + 123584, 123627, + 123632, 123641, + 124112, 124139, + 124144, 124153, + 124896, 124902, + 124904, 124907, + 124909, 124910, + 124912, 124926, + 124928, 125124, + 125184, 125251, + 125259, 125259, + 125264, 125273, + 126464, 126467, + 126469, 126495, + 126497, 126498, + 126500, 126500, + 126503, 126503, + 126505, 126514, + 126516, 126519, + 126521, 126521, + 126523, 126523, + 126530, 126530, + 126535, 126535, + 126537, 126537, + 126539, 126539, + 126541, 126543, + 126545, 126546, + 126548, 126548, + 126551, 126551, + 126553, 126553, + 126555, 126555, + 126557, 126557, + 126559, 126559, + 126561, 126562, + 126564, 126564, + 126567, 126570, + 126572, 126578, + 126580, 126583, + 126585, 126588, + 126590, 126590, + 126592, 126601, + 126603, 126619, + 126625, 126627, + 126629, 126633, + 126635, 126651, + 130032, 130041, + 131072, 173791, + 173824, 177977, + 177984, 178205, + 178208, 183969, + 183984, 191456, + 191472, 192093, + 194560, 195101, + 196608, 201546, + 201552, 205743, + }; + static const uint16_t num = sizeof(word) / sizeof(int) / 2; + uint16_t min = 0; + uint16_t max = num - 1; + // binary search in table + if (c >= word[0] && c <= word[2 * num - 1]) + { + while (max >= min) + { + uint16_t mid = (min + max) / 2; + if (c < word[2 * mid]) + max = mid - 1; + else if (c > word[2 * mid + 1]) + min = mid + 1; + else + return true; + } + } + return false; + } + /// Check if a word begins before a match. + inline bool at_wb() + { +#if WITH_SPAN + int c = got_; + if (c == Const::BOB || c == Const::UNK || c == '\n') + return true; + if (c == '_') + return false; + if ((c & 0xc0) == 0x80 && cur_ > 0) + { + size_t k = cur_ - 1; + if (k > 0 && (buf_[--k] & 0xc0) == 0x80) + if (k > 0 && (buf_[--k] & 0xc0) == 0x80) + if (k > 0) + --k; + c = utf8(&buf_[k]); + return !iswword(c); + } + return !std::isalnum(static_cast(c)); +#else + return !isword(got_); +#endif + } + /// Check if a word ends after the match. + inline bool at_we( + int c, ///< character after the match + size_t k) ///< position in the buffer of the character after the match + { +#if WITH_SPAN + if (c == EOF) + return true; + if (c == '_') + return false; + if ((c & 0xc0) == 0xc0) + { + c = utf8(&buf_[k]); + return !iswword(c); + } + return !std::isalnum(static_cast(c)); +#else + (void)k; + return !isword(c); +#endif + } + /// Check if match begins a word (after split with len_ > 0 or len_ = 0 for find). + inline bool at_bw() + { +#if WITH_SPAN + int c = static_cast(txt_[len_]); + if (c == '_') + return true; + if ((c & 0xc0) == 0xc0) + { + c = utf8(&txt_[len_]); + return iswword(c); + } + return std::isalnum(static_cast(c)); +#else + return isword(static_cast(txt_[len_])) +#endif + } + /// Check if match ends a word. + inline bool at_ew(int c) + { + size_t k = pos_ + (c == EOF); + c = k > 1 ? static_cast(buf_[k - 2]) : got_; +#if WITH_SPAN + if (c == Const::BOB || c == Const::UNK || c == '\n') + return false; + if (c == '_') + return true; + if ((c & 0xc0) == 0x80 && k > 2) + { + k -= 3; + if ((buf_[k] & 0xc0) == 0x80) + if (k > 0 && (buf_[--k] & 0xc0) == 0x80) + if (k > 0) + --k; + c = utf8(&buf_[k]); + return iswword(c); + } + return std::isalnum(static_cast(c)); +#else + return isword(c); +#endif + } + /// Check end of word at match end boundary MATCH\>. + inline bool at_ewe(int c) ///< character last read with get() + { + return at_we(c, pos_) && at_ew(c); + } + /// Check begin of word at match end boundary MATCH\<. + inline bool at_bwe(int c) ///< character last read with get() + { + return !at_we(c, pos_) && !at_ew(c); + } + /// Check end of word at match begin boundary \>MATCH (after split with len_ > 0 or len_ = 0 for find). + inline bool at_ewb() + { + return !at_bw() && !at_wb(); + } + /// Check begin of word at match begin boundary \ 0 or len_ = 0 for find). + inline bool at_bwb() + { + return at_bw() && at_wb(); + } + /// Check not a word boundary at match end MATCH\B. + inline bool at_nwe(int c) ///< character last read with get() + { + return at_we(c, pos_) != at_ew(c); + } + /// Check not a word boundary at match begin \BMATCH (after split with len_ > 0 or len_ = 0 for find). + inline bool at_nwb() + { + return at_bw() != at_wb(); + } + /// Check word boundary at match end MATCH\b. + inline bool at_wbe(int c) ///< character last read with get() + { + return at_we(c, pos_) == at_ew(c); + } + /// Check word boundary at match begin \bMATCH (after split with len_ > 0 or len_ = 0 for find). + inline bool at_wbb() + { + return at_bw() == at_wb(); + } /// Returns true if input matched the pattern using method Const::SCAN, Const::FIND, Const::SPLIT, or Const::MATCH. virtual size_t match(Method method) ///< Const::SCAN, Const::FIND, Const::SPLIT, or Const::MATCH /// @returns nonzero if input matched the pattern ; - // match() with optimized AVX512BW string search scheme defined in matcher_avx512bw.cpp + /// match() with optimized AVX512BW string search scheme defined in matcher_avx512bw.cpp size_t simd_match_avx512bw(Method method); - // match() with optimized AVX2 string search scheme defined in matcher_avx2.cpp + /// match() with optimized AVX2 string search scheme defined in matcher_avx2.cpp size_t simd_match_avx2(Method method); - /// Returns true if able to advance to next possible match - bool advance() - /// @returns true if possible match found - ; - /// optimized AVX512BW version of advance() defined in matcher_avx512bw.cpp - bool simd_advance_avx512bw(); - /// optimized AVX2 version of advance() defined in matcher_avx2.cpp - bool simd_advance_avx2(); + /// Initialize specialized (+ SSE2/NEON) pattern search methods to advance the engine to a possible match + void init_advance(); + /// Initialize specialized AVX2 pattern search methods to advance the engine to a possible match + void simd_init_advance_avx2(); + /// Initialize specialized AVX512BW pattern search methods to advance the engine to a possible match + void simd_init_advance_avx512bw(); + /// Default method is none (unset) + bool advance_none(size_t loc); + // Single needle (SSE2/NEON) methods + bool advance_pattern_pin1_pma(size_t loc); + bool advance_pattern_pin1_pmh(size_t loc); + // Generated multi-needle SSE2 or NEON methods + bool advance_pattern_pin2_one(size_t loc); + bool advance_pattern_pin2_pma(size_t loc); + bool advance_pattern_pin2_pmh(size_t loc); + bool advance_pattern_pin3_one(size_t loc); + bool advance_pattern_pin3_pma(size_t loc); + bool advance_pattern_pin3_pmh(size_t loc); + bool advance_pattern_pin4_one(size_t loc); + bool advance_pattern_pin4_pma(size_t loc); + bool advance_pattern_pin4_pmh(size_t loc); + bool advance_pattern_pin5_one(size_t loc); + bool advance_pattern_pin5_pma(size_t loc); + bool advance_pattern_pin5_pmh(size_t loc); + bool advance_pattern_pin6_one(size_t loc); + bool advance_pattern_pin6_pma(size_t loc); + bool advance_pattern_pin6_pmh(size_t loc); + bool advance_pattern_pin7_one(size_t loc); + bool advance_pattern_pin7_pma(size_t loc); + bool advance_pattern_pin7_pmh(size_t loc); + bool advance_pattern_pin8_one(size_t loc); + bool advance_pattern_pin8_pma(size_t loc); + bool advance_pattern_pin8_pmh(size_t loc); + // Single needle AVX2 methods + bool simd_advance_pattern_pin1_pma_avx2(size_t loc); + bool simd_advance_pattern_pin1_pmh_avx2(size_t loc); + // Generated AVX2 multi-needle methods + bool simd_advance_pattern_pin2_one_avx2(size_t loc); + bool simd_advance_pattern_pin2_pma_avx2(size_t loc); + bool simd_advance_pattern_pin2_pmh_avx2(size_t loc); + bool simd_advance_pattern_pin3_one_avx2(size_t loc); + bool simd_advance_pattern_pin3_pma_avx2(size_t loc); + bool simd_advance_pattern_pin3_pmh_avx2(size_t loc); + bool simd_advance_pattern_pin4_one_avx2(size_t loc); + bool simd_advance_pattern_pin4_pma_avx2(size_t loc); + bool simd_advance_pattern_pin4_pmh_avx2(size_t loc); + bool simd_advance_pattern_pin5_one_avx2(size_t loc); + bool simd_advance_pattern_pin5_pma_avx2(size_t loc); + bool simd_advance_pattern_pin5_pmh_avx2(size_t loc); + bool simd_advance_pattern_pin6_one_avx2(size_t loc); + bool simd_advance_pattern_pin6_pma_avx2(size_t loc); + bool simd_advance_pattern_pin6_pmh_avx2(size_t loc); + bool simd_advance_pattern_pin7_one_avx2(size_t loc); + bool simd_advance_pattern_pin7_pma_avx2(size_t loc); + bool simd_advance_pattern_pin7_pmh_avx2(size_t loc); + bool simd_advance_pattern_pin8_one_avx2(size_t loc); + bool simd_advance_pattern_pin8_pma_avx2(size_t loc); + bool simd_advance_pattern_pin8_pmh_avx2(size_t loc); + bool simd_advance_pattern_pin16_one_avx2(size_t loc); + bool simd_advance_pattern_pin16_pma_avx2(size_t loc); + bool simd_advance_pattern_pin16_pmh_avx2(size_t loc); + // Minimal long patterns + bool advance_pattern_min1(size_t loc); + bool advance_pattern_min2(size_t loc); + bool advance_pattern_min3(size_t loc); + bool advance_pattern_min4(size_t loc); + // Minimal long patterns + bool advance_pattern(size_t loc); + // One char methods + bool advance_char(size_t loc); + bool advance_char_pma(size_t loc); + bool advance_char_pmh(size_t loc); + // Few chars methods + template bool advance_chars(size_t loc); + template bool advance_chars_pma(size_t loc); + template bool advance_chars_pmh(size_t loc); + // Few chars AVX2 methods + template bool simd_advance_chars_avx2(size_t loc); + template bool simd_advance_chars_pma_avx2(size_t loc); + template bool simd_advance_chars_pmh_avx2(size_t loc); + // Few chars AVX512BW methods + template bool simd_advance_chars_avx512bw(size_t loc); + template bool simd_advance_chars_pma_avx512bw(size_t loc); + template bool simd_advance_chars_pmh_avx512bw(size_t loc); + // String methods + bool advance_string(size_t loc); +#if defined(WITH_STRING_PM) + bool advance_string_pma(size_t loc); + bool advance_string_pmh(size_t loc); +#endif + // String AVX2 metnods + bool simd_advance_string_avx2(size_t loc); +#if defined(WITH_STRING_PM) + bool simd_advance_string_pma_avx2(size_t loc); + bool simd_advance_string_pmh_avx2(size_t loc); +#endif + // String AVX512BW metnods + bool simd_advance_string_avx512bw(size_t loc); +#if defined(WITH_STRING_PM) + bool simd_advance_string_pma_avx512bw(size_t loc); + bool simd_advance_string_pmh_avx512bw(size_t loc); +#endif + // String NEON metnods + bool simd_advance_string_neon(const char *&s, const char *e); +#if defined(WITH_STRING_PM) + bool simd_advance_string_pma_neon(const char *&s, const char *e); + bool simd_advance_string_pmh_neon(const char *&s, const char *e); +#endif + // Fallback Boyer-Moore methods + bool advance_string_bm(size_t loc); +#if defined(WITH_STRING_PM) + bool advance_string_bm_pma(size_t loc); + bool advance_string_bm_pmh(size_t loc); +#endif #if !defined(WITH_NO_INDENT) /// Update indentation column counter for indent() and dedent(). inline void newline() @@ -455,6 +1489,7 @@ class Matcher : public PatternMatcher { std::vector lap_; ///< lookahead position in input that heads a lookahead match (indexed by lookahead number) std::stack stk_; ///< stack to push/pop stops FSM fsm_; ///< local state for FSM code + bool (Matcher::* adv_)(size_t loc); ///< advance FIND bool mrk_; ///< indent \i or dedent \j in pattern found: should check and update indent stops bool anc_; ///< match is anchored, advance slowly to retry when searching }; diff --git a/ccl/rslang/import/reflex/include/reflex/pattern.h b/ccl/rslang/import/reflex/include/reflex/pattern.h index 577f470..4d53488 100644 --- a/ccl/rslang/import/reflex/include/reflex/pattern.h +++ b/ccl/rslang/import/reflex/include/reflex/pattern.h @@ -365,8 +365,8 @@ class Pattern { f |= pmh[h] & 4; h = hash(h, static_cast(*++s)); f |= pmh[h] & 8; - Pred m = 16; const char *e = s + n - 3; + Pred m = 16; while (f == 0 && ++s < e) { h = hash(h, static_cast(*s)); @@ -941,8 +941,7 @@ class Pattern { void check_dfa_closure( const DFA::State *state, int nest, - bool& peek, - bool& prev) const; + bool& peek) const; void gencode_dfa_closure( FILE *fd, const DFA::State *start, @@ -1172,7 +1171,7 @@ class Pattern { Index cut_; ///< DFA s-t cut to improve predict match and HFA accuracy together with lbk_ and cbk_ size_t len_; ///< length of chr_[], less or equal to 255 size_t min_; ///< patterns after the prefix are at least this long but no more than 8 - size_t pin_; ///< number of needles + size_t pin_; ///< number of needles, 0 to 16 std::bitset<256> cbk_; ///< characters to look back over when lbk_ > 0, never includes \n std::bitset<256> fst_; ///< the beginning characters of the pattern char chr_[256]; ///< pattern prefix string or character needles for needle-based search @@ -1183,7 +1182,7 @@ class Pattern { uint16_t lbm_; ///< loopback minimum distance when lbk_ > 0 uint16_t lcp_; ///< primary least common character position in the pattern or 0xffff uint16_t lcs_; ///< secondary least common character position in the pattern or 0xffff - size_t bmd_; ///< Boyer-Moore jump distance on mismatch, B-M is enabled when bmd_ > 0 + size_t bmd_; ///< Boyer-Moore jump distance on mismatch, B-M is enabled when bmd_ > 0 (<= 255) uint8_t bms_[256]; ///< Boyer-Moore skip array float pms_; ///< ms elapsed time to parse regex float vms_; ///< ms elapsed time to compile DFA vertices @@ -1192,6 +1191,7 @@ class Pattern { float ams_; ///< ms elapsed time to analyze DFA for predict match and HFA size_t npy_; ///< entropy derived from the bitap array bit_[] bool one_; ///< true if matching one string stored in chr_[] without meta/anchors + bool bol_; ///< true if matching all patterns at the begin of a line with anchor ^ }; } // namespace reflex diff --git a/ccl/rslang/import/reflex/include/reflex/simd.h b/ccl/rslang/import/reflex/include/reflex/simd.h index 0fc2f7f..f5c805b 100644 --- a/ccl/rslang/import/reflex/include/reflex/simd.h +++ b/ccl/rslang/import/reflex/include/reflex/simd.h @@ -28,15 +28,19 @@ /** @file simd.h -@brief RE/flex SIMD intrinsics +@brief RE/flex SIMD primitives @author Robert van Engelen - engelen@genivia.com -@copyright (c) 2016-2022, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) 2016-2024, Robert van Engelen, Genivia Inc. All rights reserved. @copyright (c) BSD-3 License - see LICENSE.txt */ #ifndef SIMD_H #define SIMD_H +#include +#include +#include + #if defined(HAVE_AVX512BW) # include #elif defined(HAVE_AVX2) @@ -133,15 +137,19 @@ inline uint32_t popcountl(uint64_t x) } #endif -// Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part -extern size_t simd_nlcount_sse2(const char*& b, const char *e); -extern size_t simd_nlcount_avx2(const char*& b, const char *e); -extern size_t simd_nlcount_avx512bw(const char*& b, const char *e); +// Partially count newlines in string b up to e, updates b close to e with uncounted part +extern size_t simd_nlcount_avx2(const char *&b, const char *e); +extern size_t simd_nlcount_avx512bw(const char *&b, const char *e); } // namespace reflex #endif +namespace reflex { + +// Count newlines in string s up to t +extern size_t nlcount(const char *s, const char *t); + +} // namespace reflex + #endif - - diff --git a/ccl/rslang/import/reflex/lib/convert.cpp b/ccl/rslang/import/reflex/lib/convert.cpp index 206236b..f4589c7 100644 --- a/ccl/rslang/import/reflex/lib/convert.cpp +++ b/ccl/rslang/import/reflex/lib/convert.cpp @@ -778,7 +778,11 @@ static void insert_posix_class(const char *pattern, size_t len, size_t& pos, con else if (name[0] == 'A' && name[1] == 's') name = const_cast("ASCII"); } - const int *wc = Posix::range(name); + const int *wc = NULL; + if ((flags & convert_flag::unicode)) + wc = Unicode::range(name); + if (wc == NULL) + wc = Posix::range(name); if (wc == NULL) throw regex_error(regex_error::invalid_class, pattern, pos); if (*buf == '^') diff --git a/ccl/rslang/import/reflex/lib/input.cpp b/ccl/rslang/import/reflex/lib/input.cpp index 01a0845..bf33a49 100644 --- a/ccl/rslang/import/reflex/lib/input.cpp +++ b/ccl/rslang/import/reflex/lib/input.cpp @@ -28,7 +28,7 @@ /** @file input.cpp -@brief RE/flex input character sequence class and simd.h CPUID check +@brief RE/flex input character sequence class @author Robert van Engelen - engelen@genivia.com @copyright (c) 2016-2020, Robert van Engelen, Genivia Inc. All rights reserved. @copyright (c) BSD-3 License - see LICENSE.txt @@ -1361,27 +1361,4 @@ void Input::file_encoding(unsigned short enc, const unsigned short *page) } } -#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) - -#include - -// simd.h get_HW() -static uint64_t get_HW() -{ - int CPUInfo1[4] = { 0, 0, 0, 0 }; - int CPUInfo7[4] = { 0, 0, 0, 0 }; - cpuidex(CPUInfo1, 0, 0); - int n = CPUInfo1[0]; - if (n <= 0) - return 0ULL; - cpuidex(CPUInfo1, 1, 0); // cpuid EAX=1 - if (n >= 7) - cpuidex(CPUInfo7, 7, 0); // cpuid EAX=7, ECX=0 - return static_cast(CPUInfo1[2]) | (static_cast(static_cast(CPUInfo7[1])) << 32); -} - -uint64_t HW = get_HW(); - -#endif - } // namespace reflex diff --git a/ccl/rslang/import/reflex/lib/matcher.cpp b/ccl/rslang/import/reflex/lib/matcher.cpp index 3918c00..87358f2 100644 --- a/ccl/rslang/import/reflex/lib/matcher.cpp +++ b/ccl/rslang/import/reflex/lib/matcher.cpp @@ -27,68 +27,21 @@ \******************************************************************************/ /** -@file matcher.cpp, matcher_avx2.cpp, matcher_avx512bw.cpp +@file matcher.cpp regex engine @brief RE/flex matcher engine @author Robert van Engelen - engelen@genivia.com -@copyright (c) 2016-2022, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) 2016-2024, Robert van Engelen, Genivia Inc. All rights reserved. @copyright (c) BSD-3 License - see LICENSE.txt */ -#if defined(COMPILE_AVX512BW) && !defined(HAVE_AVX512BW) - -// appease ranlib "has no symbols" -void matcher_not_compiled_with_avx512bw() { } - -#elif defined(COMPILE_AVX2) && !defined(HAVE_AVX2) && !defined(HAVE_AVX512BW) - -// appease ranlib "has no symbols" -void matcher_not_compiled_with_avx2() { } - -#else - #include namespace reflex { -/* - The simd_match_avx512bw() and simd_match_avx2() methods are AVX-optimized - versions of the match() method. To compile these methods separately with - the appropriate compilation flags, this file is copied to - matcher_avx512bw.cpp and matcher_avx2.cpp then compiled with -mavx512bw - -DCOMPILE_AVX512BW -DHAVE_AVX512BW and with -mavx2 -DCOMPILE_AVX2 - -DHAVE_AVX2, respectively. Likewise, the simd_advance_avx512bw() and - simd_advance_avx2() methods are optimized versions and separately compiled. - This approach is preferred over maintaining three separate copies of source - code files with these methods that only slightly differ. On the other hand, - combining these versions into one source file means more #if branches. - - If -DHAVE_AVX512BW is not defined, -DCOMPILE_AVX512BW has no effect. - Likewise, if -DHAVE_AVX2 is not defined, -DCOMPILE_AVX2 has no effect. -*/ - -#if defined(COMPILE_AVX512BW) -/// Compile an optimized AVX512BW version defined in matcher_avx2.cpp -size_t Matcher::simd_match_avx512bw(Method method) -{ -#elif defined(COMPILE_AVX2) -/// Compile an optimized AVX2 version defined in matcher_avx512bw.cpp -size_t Matcher::simd_match_avx2(Method method) -{ -#else /// Returns true if input matched the pattern using method Const::SCAN, Const::FIND, Const::SPLIT, or Const::MATCH. size_t Matcher::match(Method method) { DBGLOG("BEGIN Matcher::match()"); -#if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64)) - if (have_HW_AVX512BW()) - return simd_match_avx512bw(method); - if (have_HW_AVX2()) - return simd_match_avx2(method); -#elif defined(HAVE_AVX2) - if (have_HW_AVX2()) - return simd_match_avx2(method); -#endif -#endif reset_text(); len_ = 0; // split text length starts with 0 anc_ = false; // no word boundary anchor found and applied @@ -101,11 +54,11 @@ scan: col_ = 0; // count columns for indent matching #endif find: - int c1 = got_; + int ch = got_; bool bol = at_bol(); // at begin of line? #if !defined(WITH_NO_CODEGEN) if (pat_->fsm_ != NULL) - fsm_.c1 = c1; + fsm_.ch = ch; #endif #if !defined(WITH_NO_INDENT) redo: @@ -113,359 +66,367 @@ redo: lap_.resize(0); cap_ = 0; bool nul = method == Const::MATCH; + if (!opt_.W || at_wb()) + { + // skip to next line and keep searching if matching on anchor ^ and not at begin of line + if (method == Const::FIND && pat_->bol_ && !bol) + if (skip('\n')) + goto scan; #if !defined(WITH_NO_CODEGEN) - if (pat_->fsm_ != NULL) - { - DBGLOG("FSM code %p", pat_->fsm_); - fsm_.bol = bol; - fsm_.nul = nul; - pat_->fsm_(*this); - nul = fsm_.nul; - c1 = fsm_.c1; - } - else -#endif - if (pat_->opc_ != NULL) - { - const Pattern::Opcode *pc = pat_->opc_; - Pattern::Index back = Pattern::Const::IMAX; // where to jump back to - size_t bpos = 0; // backtrack position in the input - while (true) + if (pat_->fsm_ != NULL) { - Pattern::Index jump; - Pattern::Opcode opcode = *pc; - DBGLOG("Fetch: code[%zu] = 0x%08X", pc - pat_->opc_, opcode); - if (!Pattern::is_opcode_goto(opcode)) - { - switch (opcode >> 24) - { - case 0xFE: // TAKE - cap_ = Pattern::long_index_of(opcode); - cur_ = pos_; - ++pc; - DBGLOG("Take: cap = %zu", cap_); - continue; - case 0xFD: // REDO - cap_ = Const::REDO; - DBGLOG("Redo"); - cur_ = pos_; - ++pc; - continue; - case 0xFC: // TAIL - { - Pattern::Lookahead la = Pattern::lookahead_of(opcode); - DBGLOG("Tail: %u", la); - if (lap_.size() > la && lap_[la] >= 0) - cur_ = txt_ - buf_ + static_cast(lap_[la]); // mind the (new) gap - ++pc; - continue; - } - case 0xFB: // HEAD - { - Pattern::Lookahead la = Pattern::lookahead_of(opcode); - DBGLOG("Head: lookahead[%u] = %zu", la, pos_ - (txt_ - buf_)); - if (lap_.size() <= la) - lap_.resize(la + 1, -1); - lap_[la] = static_cast(pos_ - (txt_ - buf_)); // mind the gap - ++pc; - continue; - } -#if !defined(WITH_NO_INDENT) - case Pattern::META_DED - Pattern::META_MIN: - if (ded_ > 0) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(pc[1]); - DBGLOG("Dedent ded = %zu", ded_); // unconditional dedent matching \j - nul = true; - pc = pat_->opc_ + jump; - continue; - } + DBGLOG("FSM code %p", pat_->fsm_); + fsm_.bol = bol; + fsm_.nul = nul; + pat_->fsm_(*this); + nul = fsm_.nul; + ch = fsm_.ch; + } + else #endif - } - if (c1 == EOF) - break; - int c0 = c1; - c1 = get(); - DBGLOG("Get: c1 = %d", c1); - // to jump to longest sequence of matching metas - jump = Pattern::Const::IMAX; - while (true) + if (pat_->opc_ != NULL) + { + const Pattern::Opcode *pc = pat_->opc_; + Pattern::Index back = Pattern::Const::IMAX; // where to jump back to + size_t bpos = 0; // backtrack position in the input + while (true) + { + Pattern::Index jump; + Pattern::Opcode opcode = *pc; + DBGLOG("Fetch: code[%zu] = 0x%08X", pc - pat_->opc_, opcode); + if (!Pattern::is_opcode_goto(opcode)) { - if (jump == Pattern::Const::IMAX || back == Pattern::Const::IMAX) + switch (opcode >> 24) { - if (!Pattern::is_opcode_goto(opcode)) - { - // we no longer have to pass through all if jump and back are set - switch (opcode >> 24) + case 0xFE: // TAKE { - case 0xFE: // TAKE + int c; + if (!opt_.W || (c = peek(), at_we(c, pos_))) + { cap_ = Pattern::long_index_of(opcode); - cur_ = pos_; - if (c1 != EOF) - --cur_; // must unget one char - opcode = *++pc; DBGLOG("Take: cap = %zu", cap_); - continue; - case 0xFD: // REDO - cap_ = Const::REDO; - DBGLOG("Redo"); cur_ = pos_; - if (c1 != EOF) - --cur_; // must unget one char - opcode = *++pc; - continue; - case 0xFC: // TAIL - { - Pattern::Lookahead la = Pattern::lookahead_of(opcode); - DBGLOG("Tail: %u", la); - if (lap_.size() > la && lap_[la] >= 0) - cur_ = txt_ - buf_ + static_cast(lap_[la]); // mind the (new) gap + } + } + ++pc; + continue; + case 0xFD: // REDO + cap_ = Const::REDO; + DBGLOG("Redo"); + cur_ = pos_; + ++pc; + continue; + case 0xFC: // TAIL + { + Pattern::Lookahead la = Pattern::lookahead_of(opcode); + DBGLOG("Tail: %u", la); + if (lap_.size() > la && lap_[la] >= 0) + cur_ = txt_ - buf_ + static_cast(lap_[la]); // mind the (new) gap + ++pc; + continue; + } + case 0xFB: // HEAD + { + Pattern::Lookahead la = Pattern::lookahead_of(opcode); + DBGLOG("Head: lookahead[%u] = %zu", la, pos_ - (txt_ - buf_)); + if (lap_.size() <= la) + lap_.resize(la + 1, -1); + lap_[la] = static_cast(pos_ - (txt_ - buf_)); // mind the gap + ++pc; + continue; + } +#if !defined(WITH_NO_INDENT) + case Pattern::META_DED - Pattern::META_MIN: + if (ded_ > 0) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(pc[1]); + DBGLOG("Dedent ded = %zu", ded_); // unconditional dedent matching \j + nul = true; + pc = pat_->opc_ + jump; + continue; + } +#endif + } + if (ch == EOF) + break; + ch = get(); + DBGLOG("Get: ch = %d", ch); + // to jump to longest sequence of matching metas + jump = Pattern::Const::IMAX; + while (true) + { + if (jump == Pattern::Const::IMAX || back == Pattern::Const::IMAX) + { + if (!Pattern::is_opcode_goto(opcode)) + { + // we no longer have to pass through all if jump and back are set + switch (opcode >> 24) + { + case 0xFE: // TAKE + if (!opt_.W || at_we(ch, pos_ - 1)) + { + cap_ = Pattern::long_index_of(opcode); + DBGLOG("Take: cap = %zu", cap_); + cur_ = pos_; + if (ch != EOF) + --cur_; // must unget one char + } + opcode = *++pc; + continue; + case 0xFD: // REDO + cap_ = Const::REDO; + DBGLOG("Redo"); + cur_ = pos_; + if (ch != EOF) + --cur_; // must unget one char + opcode = *++pc; + continue; + case 0xFC: // TAIL + { + Pattern::Lookahead la = Pattern::lookahead_of(opcode); + DBGLOG("Tail: %u", la); + if (lap_.size() > la && lap_[la] >= 0) + cur_ = txt_ - buf_ + static_cast(lap_[la]); // mind the (new) gap + opcode = *++pc; + continue; + } + case 0xFB: // HEAD opcode = *++pc; continue; - } - case 0xFB: // HEAD - opcode = *++pc; - continue; #if !defined(WITH_NO_INDENT) - case Pattern::META_DED - Pattern::META_MIN: - DBGLOG("DED? %d", c1); - if (jump == Pattern::Const::IMAX && back == Pattern::Const::IMAX && bol && dedent()) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_IND - Pattern::META_MIN: - DBGLOG("IND? %d", c1); - if (jump == Pattern::Const::IMAX && back == Pattern::Const::IMAX && bol && indent()) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_UND - Pattern::META_MIN: - DBGLOG("UND"); - if (mrk_) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - mrk_ = false; - ded_ = 0; - opcode = *++pc; - continue; + case Pattern::META_DED - Pattern::META_MIN: + DBGLOG("DED? %d", ch); + if (jump == Pattern::Const::IMAX && back == Pattern::Const::IMAX && bol && dedent()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_IND - Pattern::META_MIN: + DBGLOG("IND? %d", ch); + if (jump == Pattern::Const::IMAX && back == Pattern::Const::IMAX && bol && indent()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_UND - Pattern::META_MIN: + DBGLOG("UND"); + if (mrk_) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + mrk_ = false; + ded_ = 0; + opcode = *++pc; + continue; #endif - case Pattern::META_EOB - Pattern::META_MIN: - DBGLOG("EOB? %d", c1); - if (jump == Pattern::Const::IMAX && c1 == EOF) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_BOB - Pattern::META_MIN: - DBGLOG("BOB? %d", at_bob()); - if (jump == Pattern::Const::IMAX && at_bob()) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_EOL - Pattern::META_MIN: - DBGLOG("EOL? %d", c1); - anc_ = true; - if (jump == Pattern::Const::IMAX && (c1 == EOF || c1 == '\n' || (c1 == '\r' && peek() == '\n'))) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_BOL - Pattern::META_MIN: - DBGLOG("BOL? %d", bol); - anc_ = true; - if (jump == Pattern::Const::IMAX && bol) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_EWE - Pattern::META_MIN: - DBGLOG("EWE? %d %d %d", c0, c1, isword(c0) && !isword(c1)); - anc_ = true; - if (jump == Pattern::Const::IMAX && (isword(c0) || opt_.W) && !isword(c1)) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_BWE - Pattern::META_MIN: - DBGLOG("BWE? %d %d %d", c0, c1, !isword(c0) && isword(c1)); - anc_ = true; - if (jump == Pattern::Const::IMAX && !isword(c0) && isword(c1)) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_EWB - Pattern::META_MIN: - DBGLOG("EWB? %d", at_eow()); - anc_ = true; - if (jump == Pattern::Const::IMAX && isword(got_) && - !isword(static_cast(txt_[len_]))) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_BWB - Pattern::META_MIN: - DBGLOG("BWB? %d", at_bow()); - anc_ = true; - if (jump == Pattern::Const::IMAX && !isword(got_) && - (opt_.W || isword(static_cast(txt_[len_])))) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_NWE - Pattern::META_MIN: - DBGLOG("NWE? %d %d %d", c0, c1, isword(c0) == isword(c1)); - anc_ = true; - if (jump == Pattern::Const::IMAX && isword(c0) == isword(c1)) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_NWB - Pattern::META_MIN: - DBGLOG("NWB? %d %d", at_bow(), at_eow()); - anc_ = true; - if (jump == Pattern::Const::IMAX && - isword(got_) == isword(static_cast(txt_[len_]))) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_WBE - Pattern::META_MIN: - DBGLOG("WBE? %d %d %d", c0, c1, isword(c0) != isword(c1)); - anc_ = true; - if (jump == Pattern::Const::IMAX && isword(c0) != isword(c1)) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case Pattern::META_WBB - Pattern::META_MIN: - DBGLOG("WBB? %d %d", at_bow(), at_eow()); - anc_ = true; - if (jump == Pattern::Const::IMAX && - isword(got_) != isword(static_cast(txt_[len_]))) - { - jump = Pattern::index_of(opcode); - if (jump == Pattern::Const::LONG) - jump = Pattern::long_index_of(*++pc); - } - opcode = *++pc; - continue; - case 0xFF: // LONG - opcode = *++pc; - continue; + case Pattern::META_EOB - Pattern::META_MIN: + DBGLOG("EOB? %d", ch); + if (jump == Pattern::Const::IMAX && ch == EOF) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BOB - Pattern::META_MIN: + DBGLOG("BOB? %d", at_bob()); + if (jump == Pattern::Const::IMAX && at_bob()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_EOL - Pattern::META_MIN: + DBGLOG("EOL? %d", ch); + anc_ = true; + if (jump == Pattern::Const::IMAX && + (ch == EOF || ch == '\n' || (ch == '\r' && peek() == '\n'))) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BOL - Pattern::META_MIN: + DBGLOG("BOL? %d", bol); + anc_ = true; + if (jump == Pattern::Const::IMAX && bol) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_EWE - Pattern::META_MIN: + DBGLOG("EWE? %d", at_ewe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_ewe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BWE - Pattern::META_MIN: + DBGLOG("BWE? %d", at_bwe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_bwe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_EWB - Pattern::META_MIN: + DBGLOG("EWB? %d", at_ewb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_ewb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_BWB - Pattern::META_MIN: + DBGLOG("BWB? %d", at_bwb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_bwb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_NWE - Pattern::META_MIN: + DBGLOG("NWE? %d", at_nwe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_nwe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_NWB - Pattern::META_MIN: + DBGLOG("NWB? %d", at_nwb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_nwb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_WBE - Pattern::META_MIN: + DBGLOG("WBE? %d", at_wbe(ch)); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_wbe(ch)) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case Pattern::META_WBB - Pattern::META_MIN: + DBGLOG("WBB? %d", at_wbb()); + anc_ = true; + if (jump == Pattern::Const::IMAX && at_wbb()) + { + jump = Pattern::index_of(opcode); + if (jump == Pattern::Const::LONG) + jump = Pattern::long_index_of(*++pc); + } + opcode = *++pc; + continue; + case 0xFF: // LONG + opcode = *++pc; + continue; + } } - } - else if (c1 != EOF && !Pattern::is_opcode_halt(opcode)) - { - if (jump == Pattern::Const::IMAX) - break; - if (back == Pattern::Const::IMAX) + else if (ch != EOF && !Pattern::is_opcode_halt(opcode)) { - back = static_cast(pc - pat_->opc_); - bpos = pos_ - (txt_ - buf_) - 1; - DBGLOG("Backtrack point: back = %u pos = %zu", back, bpos); + if (jump == Pattern::Const::IMAX) + break; + if (back == Pattern::Const::IMAX) + { + back = static_cast(pc - pat_->opc_); + bpos = pos_ - (txt_ - buf_) - 1; + DBGLOG("Backtrack point: back = %u pos = %zu", back, bpos); + } + pc = pat_->opc_ + jump; + opcode = *pc; } - pc = pat_->opc_ + jump; - opcode = *pc; } + if (jump == Pattern::Const::IMAX) + { + if (back != Pattern::Const::IMAX) + { + pc = pat_->opc_ + back; + opcode = *pc; + back = Pattern::Const::IMAX; + } + break; + } + DBGLOG("Try jump = %u", jump); + if (back == Pattern::Const::IMAX) + { + back = static_cast(pc - pat_->opc_); + bpos = pos_ - (txt_ - buf_) - 1; + DBGLOG("Backtrack point: back = %u pos = %zu", back, bpos); + } + pc = pat_->opc_ + jump; + opcode = *pc; + jump = Pattern::Const::IMAX; } - if (jump == Pattern::Const::IMAX) + if (ch == EOF) + break; + } + else + { + if (Pattern::is_opcode_halt(opcode)) { if (back != Pattern::Const::IMAX) { + pos_ = (txt_ - buf_) + bpos; pc = pat_->opc_ + back; - opcode = *pc; + DBGLOG("Backtrack: back = %u pos = %zu ch = %d", back, pos_, ch); back = Pattern::Const::IMAX; + continue; } break; } - DBGLOG("Try jump = %u", jump); - if (back == Pattern::Const::IMAX) - { - back = static_cast(pc - pat_->opc_); - bpos = pos_ - (txt_ - buf_) - 1; - DBGLOG("Backtrack point: back = %u pos = %zu", back, bpos); - } - pc = pat_->opc_ + jump; - opcode = *pc; - jump = Pattern::Const::IMAX; + if (ch == EOF) + break; + ch = get(); + DBGLOG("Get: ch = %d (0x%x) at pos %zu", ch, ch, pos_ - 1); + if (ch == EOF) + break; } - if (c1 == EOF) - break; - } - else - { - if (Pattern::is_opcode_halt(opcode)) - { - if (cap_ == 0 && back != Pattern::Const::IMAX) - { - pos_ = (txt_ - buf_) + bpos; - pc = pat_->opc_ + back; - DBGLOG("Backtrack: back = %u pos = %zu c1 = %d", back, pos_, c1); - back = Pattern::Const::IMAX; - continue; - } - break; - } - if (c1 == EOF) - break; - c1 = get(); - DBGLOG("Get: c1 = %d (0x%x) at pos %zu", c1, c1, pos_ - 1); - if (c1 == EOF) - break; - } - Pattern::Opcode lo = c1 << 24; - Pattern::Opcode hi = lo | 0x00FFFFFF; -unrolled: - if (hi < opcode || lo > (opcode << 8)) - { - opcode = *++pc; + Pattern::Opcode lo = ch << 24; + Pattern::Opcode hi = lo | 0x00FFFFFF; + unrolled: if (hi < opcode || lo > (opcode << 8)) { opcode = *++pc; @@ -487,7 +448,11 @@ unrolled: if (hi < opcode || lo > (opcode << 8)) { opcode = *++pc; - goto unrolled; + if (hi < opcode || lo > (opcode << 8)) + { + opcode = *++pc; + goto unrolled; + } } } } @@ -495,36 +460,49 @@ unrolled: } } } - } - jump = Pattern::index_of(opcode); - if (jump == 0) - { - // loop back to start state w/o full match: advance to avoid backtracking, not used for lookback - if (cap_ == 0 && pos_ > cur_ && method == Const::FIND) + jump = Pattern::index_of(opcode); + if (jump == 0) { - // use bit_[] to check each char in buf_[cur_+1..pos_-1] if it is a starting char, if not then increase cur_ - while (++cur_ < pos_ && !pat_->fst_.test(static_cast(buf_[cur_]))) - if (retry > 0) - --retry; - } - } - else if (jump >= Pattern::Const::LONG) - { - if (jump == Pattern::Const::HALT) - { - if (cap_ == 0 && back != Pattern::Const::IMAX) + // loop back to start state w/o full match: advance to avoid backtracking + if (cap_ == 0 && method == Const::FIND) { - pc = pat_->opc_ + back; - pos_ = (txt_ - buf_) + bpos; - DBGLOG("Backtrack: back = %u pos = %zu c1 = %d", back, pos_, c1); - back = Pattern::Const::IMAX; - continue; + if (cur_ + 1 == pos_) + { + // matched one char in a loop, do not backtrack here + ++cur_; + if (retry > 0) + --retry; + } + else + { + // check each char in buf_[cur_+1..pos_-1] if it is a starting char, if not then increase cur_ + while (cur_ + 1 < pos_ && !pat_->fst_.test(static_cast(buf_[cur_ + 1]))) + { + ++cur_; + if (retry > 0) + --retry; + } + } } - break; } - jump = Pattern::long_index_of(pc[1]); + else if (jump >= Pattern::Const::LONG) + { + if (jump == Pattern::Const::HALT) + { + if (back != Pattern::Const::IMAX) + { + pc = pat_->opc_ + back; + pos_ = (txt_ - buf_) + bpos; + DBGLOG("Backtrack: back = %u pos = %zu ch = %d", back, pos_, ch); + back = Pattern::Const::IMAX; + continue; + } + break; + } + jump = Pattern::long_index_of(pc[1]); + } + pc = pat_->opc_ + jump; } - pc = pat_->opc_ + jump; } } #if !defined(WITH_NO_INDENT) @@ -615,23 +593,15 @@ unrolled: DBGLOG("Find: try next pos %zu", cur_); goto scan; } - // + // anchor or boundary? if (anc_) { cur_ = txt_ - buf_; // reset current to pattern start when a word boundary was encountered anc_ = false; } - if (pos_ > cur_) // if we didn't fail on META alone + if (cur_ < pos_) // if we didn't fail on META alone { - if ( -#if defined(COMPILE_AVX512BW) - simd_advance_avx512bw() -#elif defined(COMPILE_AVX2) - simd_advance_avx2() -#else - advance() -#endif - ) + if ((this->*adv_)(cur_ + 1)) { if (pat_->lbk_ > 0) { @@ -655,14 +625,18 @@ unrolled: } if (!pat_->one_) goto scan; + size_t k = cur_ + pat_->len_; + ch = k < end_ ? static_cast(buf_[k]) : EOF; + if (opt_.W && (!at_wb() || !(at_end() || at_we(ch, k)))) + goto scan; txt_ = buf_ + cur_; len_ = pat_->len_; - set_current(cur_ + len_); + set_current(k); return cap_ = 1; } } - txt_ = buf_ + cur_; } + txt_ = buf_ + cur_; } else { @@ -690,18 +664,8 @@ unrolled: // if we found an empty match, we keep looking for non-empty matches when "N" is off if (cap_ != 0) { - if ( -#if defined(COMPILE_AVX512BW) - simd_advance_avx512bw() -#elif defined(COMPILE_AVX2) - simd_advance_avx2() -#else - advance() -#endif - ) - { + if ((this->*adv_)(cur_ + 1)) goto scan; - } set_current(++cur_); // at end of input, no matches remain cap_ = 0; @@ -717,8 +681,8 @@ unrolled: { // advance one char to keep searching at the next character position when we return set_current(++cur_); + DBGLOG("Accept empty match"); } - DBGLOG("Accept empty match"); } else { @@ -750,2457 +714,2746 @@ unrolled: return cap_; } -#if defined(COMPILE_AVX512BW) -/// Compile an optimized AVX512BW version defined in matcher_avx512bw.cpp -bool Matcher::simd_advance_avx512bw() +/// Initialize specialized pattern search methods to advance the engine to a possible match +void Matcher::init_advance() { -#elif defined(COMPILE_AVX2) -/// Compile an optimized AVX2 version defined in matcher_avx2.cpp -bool Matcher::simd_advance_avx2() -{ -#else -/// advance input cursor position after mismatch to align input for the next match -bool Matcher::advance() -{ -#endif - size_t loc = cur_ + 1; - size_t min = pat_->min_; - const Pattern::Pred *pma = pat_->pma_; - const Pattern::Pred *pmh = pat_->pmh_; + adv_ = &Matcher::advance_none; + if (pat_ == NULL) + return; if (pat_->len_ == 0) { - if (min == 0) + if (pat_->min_ == 0 && opt_.N) + return; + switch (pat_->pin_) { - // if "N" is on (non-empty pattern matches only), then there is nothing to match - if (opt_.N) - return false; - // if "N" is off, then match an empty-matching pattern as if non-empty - min = 1; + case 1: + if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin1_pma; + else + adv_ = &Matcher::advance_pattern_pin1_pmh; + break; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) || defined(HAVE_NEON) + case 2: + if (pat_->min_ == 1) + adv_ = &Matcher::advance_pattern_pin2_one; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin2_pma; + else + adv_ = &Matcher::advance_pattern_pin2_pmh; + break; + case 3: + if (pat_->min_ == 1) + adv_ = &Matcher::advance_pattern_pin3_one; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin3_pma; + else + adv_ = &Matcher::advance_pattern_pin3_pmh; + break; + case 4: + if (pat_->min_ == 1) + adv_ = &Matcher::advance_pattern_pin4_one; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin4_pma; + else + adv_ = &Matcher::advance_pattern_pin4_pmh; + break; + case 5: + if (pat_->min_ == 1) + adv_ = &Matcher::advance_pattern_pin5_one; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin5_pma; + else + adv_ = &Matcher::advance_pattern_pin5_pmh; + break; + case 6: + if (pat_->min_ == 1) + adv_ = &Matcher::advance_pattern_pin6_one; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin6_pma; + else + adv_ = &Matcher::advance_pattern_pin6_pmh; + break; + case 7: + if (pat_->min_ == 1) + adv_ = &Matcher::advance_pattern_pin7_one; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin7_pma; + else + adv_ = &Matcher::advance_pattern_pin7_pmh; + break; + case 8: + if (pat_->min_ == 1) + adv_ = &Matcher::advance_pattern_pin8_one; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_pattern_pin8_pma; + else + adv_ = &Matcher::advance_pattern_pin8_pmh; + break; +#endif + default: + if (pat_->min_ >= 4 || pat_->npy_ < 16 || (pat_->min_ >= 2 && pat_->npy_ >= 56)) + { + switch (pat_->min_) + { + case 0: + case 1: + adv_ = &Matcher::advance_pattern_min1; + break; + case 2: + adv_ = &Matcher::advance_pattern_min2; + break; + case 3: + adv_ = &Matcher::advance_pattern_min3; + break; + default: + adv_ = &Matcher::advance_pattern_min4; + break; + } + } + else + { + adv_ = &Matcher::advance_pattern; + } } - if (loc + min > end_) + } + else if (pat_->len_ == 1) + { + if (pat_->min_ == 0) + adv_ = &Matcher::advance_char; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_char_pma; + else + adv_ = &Matcher::advance_char_pmh; + } + else if (pat_->len_ == 2) + { + if (pat_->min_ == 0) + adv_ = &Matcher::advance_chars<2>; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_chars_pma<2>; + else + adv_ = &Matcher::advance_chars_pmh<2>; + } + else if (pat_->len_ == 3) + { + if (pat_->min_ == 0) + adv_ = &Matcher::advance_chars<3>; + else if (pat_->min_ < 4) + adv_ = &Matcher::advance_chars_pma<3>; + else + adv_ = &Matcher::advance_chars_pmh<3>; + } + else if (pat_->bmd_ == 0) + { +#if defined(WITH_STRING_PM) + if (pat_->min_ >= 4) + adv_ = &Matcher::advance_string_pmh; + else if (pat_->min_ > 0) + adv_ = &Matcher::advance_string_pma; + else +#endif + adv_ = &Matcher::advance_string; + } + else + { +#if defined(WITH_STRING_PM) + if (pat_->min_ >= 4) + adv_ = &Matcher::advance_string_bm_pmh; + else if (pat_->min_ > 0) + adv_ = &Matcher::advance_string_bm_pma; + else +#endif + adv_ = &Matcher::advance_string_bm; + } +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) + // AVX2 runtime optimized function callback overrides + if (have_HW_AVX2()) + simd_init_advance_avx2(); +#endif +#if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64)) + // AVX512BW runtime optimized function callback overrides + if (have_HW_AVX512BW()) + simd_init_advance_avx512bw(); +#endif +} + +/// Default method is none (unset) +bool Matcher::advance_none(size_t) +{ + return false; +} + +/// My "needle search" method when pin=1 +bool Matcher::advance_pattern_pin1_pma(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + __m128i vlcp = _mm_set1_epi8(chr[0]); + __m128i vlcs = _mm_set1_epi8(chr[1]); + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - min + 1; + while (s <= e - 16) { + __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i veqlcp = _mm_cmpeq_epi8(vlcp, vstrlcp); + __m128i veqlcs = _mm_cmpeq_epi8(vlcs, vstrlcs); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + loc = s - lcp + offset - buf_; + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) + { + set_current(loc); + return true; + } + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + min > end_) + return false; + if (loc + min + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + uint8x16_t vlcp = vdupq_n_u8(chr[0]); + uint8x16_t vlcs = vdupq_n_u8(chr[1]); + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - min + 1; + while (s <= e - 16) + { + uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); + uint8x16_t vmasklcp8 = vceqq_u8(vlcp, vstrlcp); + uint8x16_t vmasklcs8 = vceqq_u8(vlcs, vstrlcs); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) + { + if ((mask & 0xff)) + { + loc = s - lcp + i - buf_; + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) + { + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff)) + { + loc = s - lcp + i - buf_; + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + min > end_) + return false; + if (loc + min + 15 > end_) + break; + } +#endif + char chr0 = chr[0]; + char chr1 = chr[1]; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_; + if (s < e && (s = static_cast(std::memchr(s, chr0, e - s))) != NULL) + { + s -= lcp; + loc = s - buf_; + if (s > e - 4 || (s[lcs] == chr1 && Pattern::predict_match(pma, s) == 0)) + { + set_current(loc); + return true; + } + ++loc; + } + else + { + loc = e - buf_; set_current_and_peek_more(loc - 1); loc = cur_ + 1; if (loc + min > end_) return false; } - // look for a needle - if (pat_->pin_ == 1) + } +} + +/// My "needle search" method when pin=1 +bool Matcher::advance_pattern_pin1_pmh(size_t loc) +{ + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + __m128i vlcp = _mm_set1_epi8(chr[0]); + __m128i vlcs = _mm_set1_epi8(chr[1]); + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - min + 1; + while (s <= e - 16) { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; -#if defined(COMPILE_AVX512BW) || defined(COMPILE_AVX2) - __m256i vlcp = _mm256_set1_epi8(chr[0]); - __m256i vlcs = _mm256_set1_epi8(chr[1]); - while (true) + __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i veqlcp = _mm_cmpeq_epi8(vlcp, vstrlcp); + __m128i veqlcs = _mm_cmpeq_epi8(vlcs, vstrlcs); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); + while (mask != 0) { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) + uint32_t offset = ctz(mask); + loc = s - lcp + offset - buf_; + if (Pattern::predict_match(pmh, &buf_[loc], min)) { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs, vstrlcs); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } -#elif defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) - __m128i vlcp = _mm_set1_epi8(chr[0]); - __m128i vlcs = _mm_set1_epi8(chr[1]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs, vstrlcs); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } -#elif defined(HAVE_NEON) - uint8x16_t vlcp = vdupq_n_u8(chr[0]); - uint8x16_t vlcs = vdupq_n_u8(chr[1]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = vceqq_u8(vlcp, vstrlcp); - uint8x16_t vmasklcs8 = vceqq_u8(vlcs, vstrlcs); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } -#else - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_; - if (s < e && (s = static_cast(std::memchr(s, chr[0], e - s))) != NULL) - { - s -= lcp; - loc = s - buf_; - set_current(loc); - if (min >= 4) - { - if (s + min > e || (s[lcs] == chr[1] && Pattern::predict_match(pmh, s, min))) - return true; - } - else - { - if (s > e - 4 || Pattern::predict_match(pma, s) == 0) - return true; - } - ++loc; - } - else - { - loc = e - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - } - } -#endif - } -#if defined(COMPILE_AVX512BW) || defined(COMPILE_AVX2) - // look for needles - else if (pat_->pin_ == 2) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcs0 = _mm256_set1_epi8(chr[2]); - __m256i vlcs1 = _mm256_set1_epi8(chr[3]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } - else if (pat_->pin_ == 3) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcp2 = _mm256_set1_epi8(chr[2]); - __m256i vlcs0 = _mm256_set1_epi8(chr[3]); - __m256i vlcs1 = _mm256_set1_epi8(chr[4]); - __m256i vlcs2 = _mm256_set1_epi8(chr[5]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } - else if (pat_->pin_ == 4) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcp2 = _mm256_set1_epi8(chr[2]); - __m256i vlcp3 = _mm256_set1_epi8(chr[3]); - __m256i vlcs0 = _mm256_set1_epi8(chr[4]); - __m256i vlcs1 = _mm256_set1_epi8(chr[5]); - __m256i vlcs2 = _mm256_set1_epi8(chr[6]); - __m256i vlcs3 = _mm256_set1_epi8(chr[7]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } - else if (pat_->pin_ == 5) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcp2 = _mm256_set1_epi8(chr[2]); - __m256i vlcp3 = _mm256_set1_epi8(chr[3]); - __m256i vlcp4 = _mm256_set1_epi8(chr[4]); - __m256i vlcs0 = _mm256_set1_epi8(chr[5]); - __m256i vlcs1 = _mm256_set1_epi8(chr[6]); - __m256i vlcs2 = _mm256_set1_epi8(chr[7]); - __m256i vlcs3 = _mm256_set1_epi8(chr[8]); - __m256i vlcs4 = _mm256_set1_epi8(chr[9]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } - else if (pat_->pin_ == 6) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcp2 = _mm256_set1_epi8(chr[2]); - __m256i vlcp3 = _mm256_set1_epi8(chr[3]); - __m256i vlcp4 = _mm256_set1_epi8(chr[4]); - __m256i vlcp5 = _mm256_set1_epi8(chr[5]); - __m256i vlcs0 = _mm256_set1_epi8(chr[6]); - __m256i vlcs1 = _mm256_set1_epi8(chr[7]); - __m256i vlcs2 = _mm256_set1_epi8(chr[8]); - __m256i vlcs3 = _mm256_set1_epi8(chr[9]); - __m256i vlcs4 = _mm256_set1_epi8(chr[10]); - __m256i vlcs5 = _mm256_set1_epi8(chr[11]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } - else if (pat_->pin_ == 7) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcp2 = _mm256_set1_epi8(chr[2]); - __m256i vlcp3 = _mm256_set1_epi8(chr[3]); - __m256i vlcp4 = _mm256_set1_epi8(chr[4]); - __m256i vlcp5 = _mm256_set1_epi8(chr[5]); - __m256i vlcp6 = _mm256_set1_epi8(chr[6]); - __m256i vlcs0 = _mm256_set1_epi8(chr[7]); - __m256i vlcs1 = _mm256_set1_epi8(chr[8]); - __m256i vlcs2 = _mm256_set1_epi8(chr[9]); - __m256i vlcs3 = _mm256_set1_epi8(chr[10]); - __m256i vlcs4 = _mm256_set1_epi8(chr[11]); - __m256i vlcs5 = _mm256_set1_epi8(chr[12]); - __m256i vlcs6 = _mm256_set1_epi8(chr[13]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp6, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs6, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } - else if (pat_->pin_ == 8) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcp2 = _mm256_set1_epi8(chr[2]); - __m256i vlcp3 = _mm256_set1_epi8(chr[3]); - __m256i vlcp4 = _mm256_set1_epi8(chr[4]); - __m256i vlcp5 = _mm256_set1_epi8(chr[5]); - __m256i vlcp6 = _mm256_set1_epi8(chr[6]); - __m256i vlcp7 = _mm256_set1_epi8(chr[7]); - __m256i vlcs0 = _mm256_set1_epi8(chr[8]); - __m256i vlcs1 = _mm256_set1_epi8(chr[9]); - __m256i vlcs2 = _mm256_set1_epi8(chr[10]); - __m256i vlcs3 = _mm256_set1_epi8(chr[11]); - __m256i vlcs4 = _mm256_set1_epi8(chr[12]); - __m256i vlcs5 = _mm256_set1_epi8(chr[13]); - __m256i vlcs6 = _mm256_set1_epi8(chr[14]); - __m256i vlcs7 = _mm256_set1_epi8(chr[15]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp6, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp7, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs6, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs7, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } - else if (pat_->pin_ == 16) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m256i vlcp0 = _mm256_set1_epi8(chr[0]); - __m256i vlcp1 = _mm256_set1_epi8(chr[1]); - __m256i vlcp2 = _mm256_set1_epi8(chr[2]); - __m256i vlcp3 = _mm256_set1_epi8(chr[3]); - __m256i vlcp4 = _mm256_set1_epi8(chr[4]); - __m256i vlcp5 = _mm256_set1_epi8(chr[5]); - __m256i vlcp6 = _mm256_set1_epi8(chr[6]); - __m256i vlcp7 = _mm256_set1_epi8(chr[7]); - __m256i vlcp8 = _mm256_set1_epi8(chr[8]); - __m256i vlcp9 = _mm256_set1_epi8(chr[9]); - __m256i vlcpa = _mm256_set1_epi8(chr[10]); - __m256i vlcpb = _mm256_set1_epi8(chr[11]); - __m256i vlcpc = _mm256_set1_epi8(chr[12]); - __m256i vlcpd = _mm256_set1_epi8(chr[13]); - __m256i vlcpe = _mm256_set1_epi8(chr[14]); - __m256i vlcpf = _mm256_set1_epi8(chr[15]); - __m256i vlcs0 = _mm256_set1_epi8(chr[16]); - __m256i vlcs1 = _mm256_set1_epi8(chr[17]); - __m256i vlcs2 = _mm256_set1_epi8(chr[18]); - __m256i vlcs3 = _mm256_set1_epi8(chr[19]); - __m256i vlcs4 = _mm256_set1_epi8(chr[20]); - __m256i vlcs5 = _mm256_set1_epi8(chr[21]); - __m256i vlcs6 = _mm256_set1_epi8(chr[22]); - __m256i vlcs7 = _mm256_set1_epi8(chr[23]); - __m256i vlcs8 = _mm256_set1_epi8(chr[24]); - __m256i vlcs9 = _mm256_set1_epi8(chr[25]); - __m256i vlcsa = _mm256_set1_epi8(chr[26]); - __m256i vlcsb = _mm256_set1_epi8(chr[27]); - __m256i vlcsc = _mm256_set1_epi8(chr[28]); - __m256i vlcsd = _mm256_set1_epi8(chr[29]); - __m256i vlcse = _mm256_set1_epi8(chr[30]); - __m256i vlcsf = _mm256_set1_epi8(chr[31]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 32) - { - __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); - __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp6, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp7, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp8, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp9, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpa, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpb, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpc, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpd, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpe, vstrlcp)); - veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpf, vstrlcp)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs6, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs7, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs8, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs9, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsa, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsb, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsc, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsd, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcse, vstrlcs)); - veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsf, vstrlcs)); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 32; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 31 > end_) - break; - } - } -#elif defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) - // look for needles - else if (pat_->pin_ == 2) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m128i vlcp0 = _mm_set1_epi8(chr[0]); - __m128i vlcp1 = _mm_set1_epi8(chr[1]); - __m128i vlcs0 = _mm_set1_epi8(chr[2]); - __m128i vlcs1 = _mm_set1_epi8(chr[3]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 3) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m128i vlcp0 = _mm_set1_epi8(chr[0]); - __m128i vlcp1 = _mm_set1_epi8(chr[1]); - __m128i vlcp2 = _mm_set1_epi8(chr[2]); - __m128i vlcs0 = _mm_set1_epi8(chr[3]); - __m128i vlcs1 = _mm_set1_epi8(chr[4]); - __m128i vlcs2 = _mm_set1_epi8(chr[5]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 4) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m128i vlcp0 = _mm_set1_epi8(chr[0]); - __m128i vlcp1 = _mm_set1_epi8(chr[1]); - __m128i vlcp2 = _mm_set1_epi8(chr[2]); - __m128i vlcp3 = _mm_set1_epi8(chr[3]); - __m128i vlcs0 = _mm_set1_epi8(chr[4]); - __m128i vlcs1 = _mm_set1_epi8(chr[5]); - __m128i vlcs2 = _mm_set1_epi8(chr[6]); - __m128i vlcs3 = _mm_set1_epi8(chr[7]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 5) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m128i vlcp0 = _mm_set1_epi8(chr[0]); - __m128i vlcp1 = _mm_set1_epi8(chr[1]); - __m128i vlcp2 = _mm_set1_epi8(chr[2]); - __m128i vlcp3 = _mm_set1_epi8(chr[3]); - __m128i vlcp4 = _mm_set1_epi8(chr[4]); - __m128i vlcs0 = _mm_set1_epi8(chr[5]); - __m128i vlcs1 = _mm_set1_epi8(chr[6]); - __m128i vlcs2 = _mm_set1_epi8(chr[7]); - __m128i vlcs3 = _mm_set1_epi8(chr[8]); - __m128i vlcs4 = _mm_set1_epi8(chr[9]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 6) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m128i vlcp0 = _mm_set1_epi8(chr[0]); - __m128i vlcp1 = _mm_set1_epi8(chr[1]); - __m128i vlcp2 = _mm_set1_epi8(chr[2]); - __m128i vlcp3 = _mm_set1_epi8(chr[3]); - __m128i vlcp4 = _mm_set1_epi8(chr[4]); - __m128i vlcp5 = _mm_set1_epi8(chr[5]); - __m128i vlcs0 = _mm_set1_epi8(chr[6]); - __m128i vlcs1 = _mm_set1_epi8(chr[7]); - __m128i vlcs2 = _mm_set1_epi8(chr[8]); - __m128i vlcs3 = _mm_set1_epi8(chr[9]); - __m128i vlcs4 = _mm_set1_epi8(chr[10]); - __m128i vlcs5 = _mm_set1_epi8(chr[11]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp5, vstrlcp)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs5, vstrlcs)); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 7) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m128i vlcp0 = _mm_set1_epi8(chr[0]); - __m128i vlcp1 = _mm_set1_epi8(chr[1]); - __m128i vlcp2 = _mm_set1_epi8(chr[2]); - __m128i vlcp3 = _mm_set1_epi8(chr[3]); - __m128i vlcp4 = _mm_set1_epi8(chr[4]); - __m128i vlcp5 = _mm_set1_epi8(chr[5]); - __m128i vlcp6 = _mm_set1_epi8(chr[6]); - __m128i vlcs0 = _mm_set1_epi8(chr[7]); - __m128i vlcs1 = _mm_set1_epi8(chr[8]); - __m128i vlcs2 = _mm_set1_epi8(chr[9]); - __m128i vlcs3 = _mm_set1_epi8(chr[10]); - __m128i vlcs4 = _mm_set1_epi8(chr[11]); - __m128i vlcs5 = _mm_set1_epi8(chr[12]); - __m128i vlcs6 = _mm_set1_epi8(chr[13]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp5, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp6, vstrlcp)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs5, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs6, vstrlcs)); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 8) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - __m128i vlcp0 = _mm_set1_epi8(chr[0]); - __m128i vlcp1 = _mm_set1_epi8(chr[1]); - __m128i vlcp2 = _mm_set1_epi8(chr[2]); - __m128i vlcp3 = _mm_set1_epi8(chr[3]); - __m128i vlcp4 = _mm_set1_epi8(chr[4]); - __m128i vlcp5 = _mm_set1_epi8(chr[5]); - __m128i vlcp6 = _mm_set1_epi8(chr[6]); - __m128i vlcp7 = _mm_set1_epi8(chr[7]); - __m128i vlcs0 = _mm_set1_epi8(chr[8]); - __m128i vlcs1 = _mm_set1_epi8(chr[9]); - __m128i vlcs2 = _mm_set1_epi8(chr[10]); - __m128i vlcs3 = _mm_set1_epi8(chr[11]); - __m128i vlcs4 = _mm_set1_epi8(chr[12]); - __m128i vlcs5 = _mm_set1_epi8(chr[13]); - __m128i vlcs6 = _mm_set1_epi8(chr[14]); - __m128i vlcs7 = _mm_set1_epi8(chr[15]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); - __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp5, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp6, vstrlcp)); - veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp7, vstrlcp)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs5, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs6, vstrlcs)); - veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs7, vstrlcs)); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - mask &= mask - 1; - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } -#elif defined(HAVE_NEON) - // look for needles - else if (pat_->pin_ == 2) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); - uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); - uint8x16_t vlcs0 = vdupq_n_u8(chr[2]); - uint8x16_t vlcs1 = vdupq_n_u8(chr[3]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = vorrq_u8(vceqq_u8(vlcp0, vstrlcp), vceqq_u8(vlcp1, vstrlcp)); - uint8x16_t vmasklcs8 = vorrq_u8(vceqq_u8(vlcs0, vstrlcs), vceqq_u8(vlcs1, vstrlcs)); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 3) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); - uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); - uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); - uint8x16_t vlcs0 = vdupq_n_u8(chr[3]); - uint8x16_t vlcs1 = vdupq_n_u8(chr[4]); - uint8x16_t vlcs2 = vdupq_n_u8(chr[5]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcp0, vstrlcp), - vceqq_u8(vlcp1, vstrlcp)), - vceqq_u8(vlcp2, vstrlcp)); - uint8x16_t vmasklcs8 = - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcs0, vstrlcs), - vceqq_u8(vlcs1, vstrlcs)), - vceqq_u8(vlcs2, vstrlcs)); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 4) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); - uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); - uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); - uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); - uint8x16_t vlcs0 = vdupq_n_u8(chr[4]); - uint8x16_t vlcs1 = vdupq_n_u8(chr[5]); - uint8x16_t vlcs2 = vdupq_n_u8(chr[6]); - uint8x16_t vlcs3 = vdupq_n_u8(chr[7]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcp0, vstrlcp), - vceqq_u8(vlcp1, vstrlcp)), - vceqq_u8(vlcp2, vstrlcp)), - vceqq_u8(vlcp3, vstrlcp)); - uint8x16_t vmasklcs8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcs0, vstrlcs), - vceqq_u8(vlcs1, vstrlcs)), - vceqq_u8(vlcs2, vstrlcs)), - vceqq_u8(vlcs3, vstrlcs)); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 5) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); - uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); - uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); - uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); - uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); - uint8x16_t vlcs0 = vdupq_n_u8(chr[5]); - uint8x16_t vlcs1 = vdupq_n_u8(chr[6]); - uint8x16_t vlcs2 = vdupq_n_u8(chr[7]); - uint8x16_t vlcs3 = vdupq_n_u8(chr[8]); - uint8x16_t vlcs4 = vdupq_n_u8(chr[9]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcp0, vstrlcp), - vceqq_u8(vlcp1, vstrlcp)), - vceqq_u8(vlcp2, vstrlcp)), - vceqq_u8(vlcp3, vstrlcp)), - vceqq_u8(vlcp4, vstrlcp)); - uint8x16_t vmasklcs8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcs0, vstrlcs), - vceqq_u8(vlcs1, vstrlcs)), - vceqq_u8(vlcs2, vstrlcs)), - vceqq_u8(vlcs3, vstrlcs)), - vceqq_u8(vlcs4, vstrlcs)); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 6) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); - uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); - uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); - uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); - uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); - uint8x16_t vlcp5 = vdupq_n_u8(chr[5]); - uint8x16_t vlcs0 = vdupq_n_u8(chr[6]); - uint8x16_t vlcs1 = vdupq_n_u8(chr[7]); - uint8x16_t vlcs2 = vdupq_n_u8(chr[8]); - uint8x16_t vlcs3 = vdupq_n_u8(chr[9]); - uint8x16_t vlcs4 = vdupq_n_u8(chr[10]); - uint8x16_t vlcs5 = vdupq_n_u8(chr[11]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcp0, vstrlcp), - vceqq_u8(vlcp1, vstrlcp)), - vceqq_u8(vlcp2, vstrlcp)), - vceqq_u8(vlcp3, vstrlcp)), - vceqq_u8(vlcp4, vstrlcp)), - vceqq_u8(vlcp5, vstrlcp)); - uint8x16_t vmasklcs8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcs0, vstrlcs), - vceqq_u8(vlcs1, vstrlcs)), - vceqq_u8(vlcs2, vstrlcs)), - vceqq_u8(vlcs3, vstrlcs)), - vceqq_u8(vlcs4, vstrlcs)), - vceqq_u8(vlcs5, vstrlcs)); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 7) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); - uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); - uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); - uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); - uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); - uint8x16_t vlcp5 = vdupq_n_u8(chr[5]); - uint8x16_t vlcp6 = vdupq_n_u8(chr[6]); - uint8x16_t vlcs0 = vdupq_n_u8(chr[7]); - uint8x16_t vlcs1 = vdupq_n_u8(chr[8]); - uint8x16_t vlcs2 = vdupq_n_u8(chr[9]); - uint8x16_t vlcs3 = vdupq_n_u8(chr[10]); - uint8x16_t vlcs4 = vdupq_n_u8(chr[11]); - uint8x16_t vlcs5 = vdupq_n_u8(chr[12]); - uint8x16_t vlcs6 = vdupq_n_u8(chr[13]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcp0, vstrlcp), - vceqq_u8(vlcp1, vstrlcp)), - vceqq_u8(vlcp2, vstrlcp)), - vceqq_u8(vlcp3, vstrlcp)), - vceqq_u8(vlcp4, vstrlcp)), - vceqq_u8(vlcp5, vstrlcp)), - vceqq_u8(vlcp6, vstrlcp)); - uint8x16_t vmasklcs8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcs0, vstrlcs), - vceqq_u8(vlcs1, vstrlcs)), - vceqq_u8(vlcs2, vstrlcs)), - vceqq_u8(vlcs3, vstrlcs)), - vceqq_u8(vlcs4, vstrlcs)), - vceqq_u8(vlcs5, vstrlcs)), - vceqq_u8(vlcs6, vstrlcs)); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } - else if (pat_->pin_ == 8) - { - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - const char *chr = pat_->chr_; - uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); - uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); - uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); - uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); - uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); - uint8x16_t vlcp5 = vdupq_n_u8(chr[5]); - uint8x16_t vlcp6 = vdupq_n_u8(chr[6]); - uint8x16_t vlcp7 = vdupq_n_u8(chr[7]); - uint8x16_t vlcs0 = vdupq_n_u8(chr[8]); - uint8x16_t vlcs1 = vdupq_n_u8(chr[9]); - uint8x16_t vlcs2 = vdupq_n_u8(chr[10]); - uint8x16_t vlcs3 = vdupq_n_u8(chr[11]); - uint8x16_t vlcs4 = vdupq_n_u8(chr[12]); - uint8x16_t vlcs5 = vdupq_n_u8(chr[13]); - uint8x16_t vlcs6 = vdupq_n_u8(chr[14]); - uint8x16_t vlcs7 = vdupq_n_u8(chr[15]); - while (true) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - min + 1; - while (s <= e - 16) - { - uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); - uint8x16_t vmasklcp8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcp0, vstrlcp), - vceqq_u8(vlcp1, vstrlcp)), - vceqq_u8(vlcp2, vstrlcp)), - vceqq_u8(vlcp3, vstrlcp)), - vceqq_u8(vlcp4, vstrlcp)), - vceqq_u8(vlcp5, vstrlcp)), - vceqq_u8(vlcp6, vstrlcp)), - vceqq_u8(vlcp7, vstrlcp)); - uint8x16_t vmasklcs8 = - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vorrq_u8( - vceqq_u8(vlcs0, vstrlcs), - vceqq_u8(vlcs1, vstrlcs)), - vceqq_u8(vlcs2, vstrlcs)), - vceqq_u8(vlcs3, vstrlcs)), - vceqq_u8(vlcs4, vstrlcs)), - vceqq_u8(vlcs5, vstrlcs)), - vceqq_u8(vlcs6, vstrlcs)), - vceqq_u8(vlcs7, vstrlcs)); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff)) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min >= 4) - { - if (Pattern::predict_match(pmh, &buf_[loc], min)) - return true; - } - else - { - if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) - return true; - } - } - mask >>= 8; - } - } - s += 16; - } - s -= lcp; - loc = s - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + min > end_) - return false; - if (loc + min + 15 > end_) - break; - } - } -#endif - if (min >= 4 || pat_->npy_ < 16 || (min >= 2 && pat_->npy_ >= 56)) - { - if (min >= 4) - { - const Pattern::Pred *bit = pat_->bit_; - Pattern::Pred state1 = ~0; - Pattern::Pred state2 = ~0; - Pattern::Pred mask = (1 << (min - 1)); - while (true) - { - const char *s = buf_ + loc; - const char *e = buf_ + end_; - while (s < e - 1) - { - state2 = (state1 << 1) | bit[static_cast(*s)]; - ++s; - state1 = (state2 << 1) | bit[static_cast(*s)]; - if ((state1 & state2 & mask) == 0) - break; - ++s; - } - if ((state2 & mask) == 0) - { - state1 = state2; - state2 = ~0; - --s; - } - else if ((state1 & mask) != 0 && s == e - 1) - { - state1 = (state1 << 1) | bit[static_cast(*s)]; - if ((state1 & mask) != 0) - ++s; - } - if (s < e) - { - s -= min - 1; - loc = s - buf_; - if (Pattern::predict_match(pmh, s, min)) - { - set_current(loc); - return true; - } - loc += min; - } - else - { - loc = s - buf_; - set_current_and_peek_more(loc - min); - loc = cur_ + min; - if (loc >= end_) - return false; - } - } - } - if (min == 3) - { - const Pattern::Pred *bit = pat_->bit_; - Pattern::Pred state = ~0; - while (true) - { - const char *s = buf_ + loc; - const char *e = buf_ + end_; - while (s < e) - { - state = (state << 1) | bit[static_cast(*s)]; - if ((state & 4) == 0) - break; - ++s; - } - if (s < e) - { - s -= 2; - loc = s - buf_; - if (s > e - 4 || Pattern::predict_match(pma, s) == 0) - { - set_current(loc); - return true; - } - loc += 3; - } - else - { - loc = s - buf_; - set_current_and_peek_more(loc - 3); - loc = cur_ + 3; - if (loc >= end_) - return false; - } - } - } - if (min == 2) - { - const Pattern::Pred *bit = pat_->bit_; - Pattern::Pred state = ~0; - while (true) - { - const char *s = buf_ + loc; - const char *e = buf_ + end_; - while (s < e) - { - state = (state << 1) | bit[static_cast(*s)]; - if ((state & 2) == 0) - break; - ++s; - } - if (s < e) - { - s -= 1; - loc = s - buf_; - if (s > e - 4 || Pattern::predict_match(pma, s) == 0) - { - set_current(loc); - return true; - } - loc += 2; - } - else - { - loc = s - buf_; - set_current_and_peek_more(loc - 2); - loc = cur_ + 2; - if (loc >= end_) - return false; - } - } - } - const Pattern::Pred *bit = pat_->bit_; - while (true) - { - const char *s = buf_ + loc; - const char *e = buf_ + end_ - 3; - bool f = true; - while (s < e && - (f = ((bit[static_cast(*s)] & 1) && - (bit[static_cast(*++s)] & 1) && - (bit[static_cast(*++s)] & 1) && - (bit[static_cast(*++s)] & 1)))) - { - ++s; - } - loc = s - buf_; - if (!f) - { - if (s < e && Pattern::predict_match(pma, s)) - { - ++loc; - continue; - } set_current(loc); return true; } - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + 3 >= end_) + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + min > end_) + return false; + if (loc + min + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + uint8x16_t vlcp = vdupq_n_u8(chr[0]); + uint8x16_t vlcs = vdupq_n_u8(chr[1]); + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - min + 1; + while (s <= e - 16) + { + uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); + uint8x16_t vmasklcp8 = vceqq_u8(vlcp, vstrlcp); + uint8x16_t vmasklcs8 = vceqq_u8(vlcs, vstrlcs); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) { - set_current(loc); - return loc + min <= end_; + if ((mask & 0xff)) + { + loc = s - lcp + i - buf_; + if (Pattern::predict_match(pmh, &buf_[loc], min)) + { + set_current(loc); + return true; + } + } + mask >>= 8; } } - } - while (true) - { - const char *s = buf_ + loc; - const char *e = buf_ + end_ - 6; - bool f = true; - while (s < e && - (f = (Pattern::predict_match(pma, s) && - Pattern::predict_match(pma, ++s) && - Pattern::predict_match(pma, ++s) && - Pattern::predict_match(pma, ++s)))) + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) { - ++s; + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff)) + { + loc = s - lcp + i - buf_; + if (Pattern::predict_match(pmh, &buf_[loc], min)) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + min > end_) + return false; + if (loc + min + 15 > end_) + break; + } +#endif + int chr0 = chr[0]; + int chr1 = chr[1]; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_; + if (s < e && (s = static_cast(std::memchr(s, chr0, e - s))) != NULL) + { + s -= lcp; loc = s - buf_; - if (!f) + if (s + min > e || (s[lcs] == chr1 && Pattern::predict_match(pmh, s, min))) { set_current(loc); return true; } - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + 6 >= end_) - { - set_current(loc); - return loc + min <= end_; - } - } - } - const char *chr = pat_->chr_; - size_t len = pat_->len_; // actually never more than 255 - if (len == 1) - { - while (true) - { - const char *s = buf_ + loc; - const char *e = buf_ + end_; - s = static_cast(std::memchr(s, *chr, e - s)); - if (s != NULL) - { - loc = s - buf_; - set_current(loc); - if (min >= 4) - { - if (s + 1 + min > e || Pattern::predict_match(pmh, s + 1, min)) - return true; - } - else - { - if (min == 0 || s > e - 5 || Pattern::predict_match(pma, s + 1) == 0) - return true; - } - ++loc; - } - else - { - loc = e - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + len > end_) - return false; - } - } - } - size_t lcp = pat_->lcp_; - size_t lcs = pat_->lcs_; - while (true) - { - if (pat_->bmd_ == 0) - { - const char *s = buf_ + loc + lcp; - const char *e = buf_ + end_ + lcp - len + 1; -#if defined(COMPILE_AVX512BW) - // implements AVX512BW string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html - // enhanced with least frequent character matching - __m512i vlcp = _mm512_set1_epi8(chr[lcp]); - __m512i vlcs = _mm512_set1_epi8(chr[lcs]); - while (s <= e - 64) - { - __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast(s)); - __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast(s + lcs - lcp)); - uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm); - while (mask != 0) - { - uint32_t offset = ctzl(mask); - if (std::memcmp(s - lcp + offset, chr, len) == 0) - { - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) - return true; - } - else - { - if (min == 0 || loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) - return true; - } - } - mask &= mask - 1; - } - s += 64; - } -#elif defined(COMPILE_AVX2) - // implements AVX2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html - // enhanced with least frequent character matching - __m256i vlcp = _mm256_set1_epi8(chr[lcp]); - __m256i vlcs = _mm256_set1_epi8(chr[lcs]); - while (s <= e - 32) - { - __m256i vlcpm = _mm256_loadu_si256(reinterpret_cast(s)); - __m256i vlcsm = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); - __m256i vlcpeq = _mm256_cmpeq_epi8(vlcp, vlcpm); - __m256i vlcseq = _mm256_cmpeq_epi8(vlcs, vlcsm); - uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(vlcpeq, vlcseq)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - if (std::memcmp(s - lcp + offset, chr, len) == 0) - { - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) - return true; - } - else - { - if (min == 0 || loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) - return true; - } - } - mask &= mask - 1; - } - s += 32; - } -#elif defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) - // implements SSE2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html - // enhanced with least frequent character matching - __m128i vlcp = _mm_set1_epi8(chr[lcp]); - __m128i vlcs = _mm_set1_epi8(chr[lcs]); - while (s <= e - 16) - { - __m128i vlcpm = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vlcsm = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); - __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm); - __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm); - uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq)); - while (mask != 0) - { - uint32_t offset = ctz(mask); - if (std::memcmp(s - lcp + offset, chr, len) == 0) - { - loc = s - lcp + offset - buf_; - set_current(loc); - if (min >= 4) - { - if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) - return true; - } - else - { - if (min == 0 || loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) - return true; - } - } - mask &= mask - 1; - } - s += 16; - } -#elif defined(HAVE_NEON) - // implements NEON/AArch64 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html 64 bit optimized - // enhanced with least frequent character matching - uint8x16_t vlcp = vdupq_n_u8(chr[lcp]); - uint8x16_t vlcs = vdupq_n_u8(chr[lcs]); - if (min >= 4) - { - while (s <= e - 16) - { - uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); - uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); - uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); - uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) - return true; - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff) && std::memcmp(s - lcp + i + 8, chr, len) == 0) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) - return true; - } - mask >>= 8; - } - } - s += 16; - } - } - else - { - while (s <= e - 16) - { - uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); - uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); - uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); - uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); - uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); - uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); - uint64_t mask = vgetq_lane_u64(vmask64, 0); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) - { - loc = s - lcp + i - buf_; - set_current(loc); - if (min == 0 || loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) - return true; - } - mask >>= 8; - } - } - mask = vgetq_lane_u64(vmask64, 1); - if (mask != 0) - { - for (int i = 0; i < 8; ++i) - { - if ((mask & 0xff) && std::memcmp(s - lcp + i + 8, chr, len) == 0) - { - loc = s - lcp + i + 8 - buf_; - set_current(loc); - if (min == 0 || loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) - return true; - } - mask >>= 8; - } - } - s += 16; - } - } -#endif - while (s < e) - { - do - s = static_cast(std::memchr(s, chr[lcp], e - s)); - while (s != NULL && s[lcs - lcp] != chr[lcs] && ++s < e); - if (s == NULL || s >= e) - { - s = e; - break; - } - if (len <= 2 || memcmp(s - lcp, chr, len) == 0) - { - loc = s - lcp - buf_; - set_current(loc); - if (min == 0) - return true; - if (min >= 4) - { - if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) - return true; - } - else - { - if (loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) - return true; - } - } - ++s; - } - loc = s - lcp - buf_; - set_current_and_peek_more(loc - 1); - loc = cur_ + 1; - if (loc + len > end_) - return false; + ++loc; } else { - // apply our improved Boyer-Moore scheme as a fallback - const char *s = buf_ + loc + len - 1; - const char *e = buf_ + end_; - const char *t = chr + len - 1; - size_t bmd = pat_->bmd_; - const uint8_t *bms = pat_->bms_; - while (s < e) - { - size_t k = 0; - do - s += k = bms[static_cast(*s)]; - while (k > 0 ? s < e : s[lcp - len + 1] != chr[lcp] && (s += bmd) < e); - if (s >= e) - break; - const char *p = t - 1; - const char *q = s - 1; - while (p >= chr && *p == *q) - { - --p; - --q; - } - if (p < chr) - { - loc = q - buf_ + 1; - set_current(loc); - if (min >= 4) - { - if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) - return true; - } - else - { - if (min == 0 || loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) - return true; - } - } - if (chr + bmd >= p) - { - s += bmd; - } - else - { - size_t k = bms[static_cast(*q)]; - if (p + k > t + bmd) - s += k - (t - p); - else - s += bmd; - } - } - s -= len - 1; - loc = s - buf_; + loc = e - buf_; set_current_and_peek_more(loc - 1); loc = cur_ + 1; - if (loc + len > end_) + if (loc + min > end_) return false; } } } -} // namespace reflex +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + +/// My "needle search" methods +#define ADV_PAT_PIN_ONE(N, INIT, COMP) \ +bool Matcher::advance_pattern_pin##N##_one(size_t loc) \ +{ \ + const Pattern::Pred *pma = pat_->pma_; \ + const char *chr = pat_->chr_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc; \ + const char *e = buf_ + end_; \ + while (s <= e - 16) \ + { \ + __m128i vstr = _mm_loadu_si128(reinterpret_cast(s)); \ + __m128i veq = _mm_cmpeq_epi8(v0, vstr); \ + COMP \ + uint32_t mask = _mm_movemask_epi8(veq); \ + while (mask != 0) \ + { \ + uint32_t offset = ctz(mask); \ + loc = s + offset - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + mask &= mask - 1; \ + } \ + s += 16; \ + } \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + 1 > end_) \ + return false; \ + if (loc + 16 > end_) \ + break; \ + } \ + return advance_pattern(loc); \ +} + +ADV_PAT_PIN_ONE(2, \ + __m128i v0 = _mm_set1_epi8(chr[0]); \ + __m128i v1 = _mm_set1_epi8(chr[1]); \ + , \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v1, vstr)); \ + ) + +ADV_PAT_PIN_ONE(3, \ + __m128i v0 = _mm_set1_epi8(chr[0]); \ + __m128i v1 = _mm_set1_epi8(chr[1]); \ + __m128i v2 = _mm_set1_epi8(chr[2]); \ + , \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v1, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v2, vstr)); \ + ) + +ADV_PAT_PIN_ONE(4, \ + __m128i v0 = _mm_set1_epi8(chr[0]); \ + __m128i v1 = _mm_set1_epi8(chr[1]); \ + __m128i v2 = _mm_set1_epi8(chr[2]); \ + __m128i v3 = _mm_set1_epi8(chr[3]); \ + , \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v1, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v2, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v3, vstr)); \ + ) + +ADV_PAT_PIN_ONE(5, \ + __m128i v0 = _mm_set1_epi8(chr[0]); \ + __m128i v1 = _mm_set1_epi8(chr[1]); \ + __m128i v2 = _mm_set1_epi8(chr[2]); \ + __m128i v3 = _mm_set1_epi8(chr[3]); \ + __m128i v4 = _mm_set1_epi8(chr[4]); \ + , \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v1, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v2, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v3, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v4, vstr)); \ + ) + +ADV_PAT_PIN_ONE(6, \ + __m128i v0 = _mm_set1_epi8(chr[0]); \ + __m128i v1 = _mm_set1_epi8(chr[1]); \ + __m128i v2 = _mm_set1_epi8(chr[2]); \ + __m128i v3 = _mm_set1_epi8(chr[3]); \ + __m128i v4 = _mm_set1_epi8(chr[4]); \ + __m128i v5 = _mm_set1_epi8(chr[5]); \ + , \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v1, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v2, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v3, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v4, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v5, vstr)); \ + ) + +ADV_PAT_PIN_ONE(7, \ + __m128i v0 = _mm_set1_epi8(chr[0]); \ + __m128i v1 = _mm_set1_epi8(chr[1]); \ + __m128i v2 = _mm_set1_epi8(chr[2]); \ + __m128i v3 = _mm_set1_epi8(chr[3]); \ + __m128i v4 = _mm_set1_epi8(chr[4]); \ + __m128i v5 = _mm_set1_epi8(chr[5]); \ + __m128i v6 = _mm_set1_epi8(chr[6]); \ + , \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v1, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v2, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v3, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v4, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v5, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v6, vstr)); \ + ) + +ADV_PAT_PIN_ONE(8, \ + __m128i v0 = _mm_set1_epi8(chr[0]); \ + __m128i v1 = _mm_set1_epi8(chr[1]); \ + __m128i v2 = _mm_set1_epi8(chr[2]); \ + __m128i v3 = _mm_set1_epi8(chr[3]); \ + __m128i v4 = _mm_set1_epi8(chr[4]); \ + __m128i v5 = _mm_set1_epi8(chr[5]); \ + __m128i v6 = _mm_set1_epi8(chr[6]); \ + __m128i v7 = _mm_set1_epi8(chr[7]); \ + , \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v1, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v2, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v3, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v4, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v5, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v6, vstr)); \ + veq = _mm_or_si128(veq, _mm_cmpeq_epi8(v7, vstr)); \ + ) + +/// My "needle search" methods +#define ADV_PAT_PIN(N, INIT, COMP) \ +bool Matcher::advance_pattern_pin##N##_pma(size_t loc) \ +{ \ + const Pattern::Pred *pma = pat_->pma_; \ + const char *chr = pat_->chr_; \ + size_t min = pat_->min_; \ + uint16_t lcp = pat_->lcp_; \ + uint16_t lcs = pat_->lcs_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc + lcp; \ + const char *e = buf_ + end_ + lcp - min + 1; \ + while (s <= e - 16) \ + { \ + __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); \ + __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); \ + __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); \ + __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); \ + COMP \ + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); \ + while (mask != 0) \ + { \ + uint32_t offset = ctz(mask); \ + loc = s - lcp + offset - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + mask &= mask - 1; \ + } \ + s += 16; \ + } \ + s -= lcp; \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + min > end_) \ + return false; \ + if (loc + min + 15 > end_) \ + break; \ + } \ + return advance_pattern(loc); \ +} \ +\ +bool Matcher::advance_pattern_pin##N##_pmh(size_t loc) \ +{ \ + const Pattern::Pred *pmh = pat_->pmh_; \ + const char *chr = pat_->chr_; \ + size_t min = pat_->min_; \ + uint16_t lcp = pat_->lcp_; \ + uint16_t lcs = pat_->lcs_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc + lcp; \ + const char *e = buf_ + end_ + lcp - min + 1; \ + while (s <= e - 16) \ + { \ + __m128i vstrlcp = _mm_loadu_si128(reinterpret_cast(s)); \ + __m128i vstrlcs = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); \ + __m128i veqlcp = _mm_cmpeq_epi8(vlcp0, vstrlcp); \ + __m128i veqlcs = _mm_cmpeq_epi8(vlcs0, vstrlcs); \ + COMP \ + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(veqlcp, veqlcs)); \ + while (mask != 0) \ + { \ + uint32_t offset = ctz(mask); \ + loc = s - lcp + offset - buf_; \ + if (Pattern::predict_match(pmh, &buf_[loc], min)) \ + { \ + set_current(loc); \ + return true; \ + } \ + mask &= mask - 1; \ + } \ + s += 16; \ + } \ + s -= lcp; \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + min > end_) \ + return false; \ + if (loc + min + 15 > end_) \ + break; \ + } \ + return advance_pattern_min4(loc); \ +} + +ADV_PAT_PIN(2, \ + __m128i vlcp0 = _mm_set1_epi8(chr[0]); \ + __m128i vlcp1 = _mm_set1_epi8(chr[1]); \ + __m128i vlcs0 = _mm_set1_epi8(chr[2]); \ + __m128i vlcs1 = _mm_set1_epi8(chr[3]); \ + , \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); \ + ) + +ADV_PAT_PIN(3, \ + __m128i vlcp0 = _mm_set1_epi8(chr[0]); \ + __m128i vlcp1 = _mm_set1_epi8(chr[1]); \ + __m128i vlcp2 = _mm_set1_epi8(chr[2]); \ + __m128i vlcs0 = _mm_set1_epi8(chr[3]); \ + __m128i vlcs1 = _mm_set1_epi8(chr[4]); \ + __m128i vlcs2 = _mm_set1_epi8(chr[5]); \ + , \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); \ + ) + +ADV_PAT_PIN(4, \ + __m128i vlcp0 = _mm_set1_epi8(chr[0]); \ + __m128i vlcp1 = _mm_set1_epi8(chr[1]); \ + __m128i vlcp2 = _mm_set1_epi8(chr[2]); \ + __m128i vlcp3 = _mm_set1_epi8(chr[3]); \ + __m128i vlcs0 = _mm_set1_epi8(chr[4]); \ + __m128i vlcs1 = _mm_set1_epi8(chr[5]); \ + __m128i vlcs2 = _mm_set1_epi8(chr[6]); \ + __m128i vlcs3 = _mm_set1_epi8(chr[7]); \ + , \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); \ + ) + +ADV_PAT_PIN(5, \ + __m128i vlcp0 = _mm_set1_epi8(chr[0]); \ + __m128i vlcp1 = _mm_set1_epi8(chr[1]); \ + __m128i vlcp2 = _mm_set1_epi8(chr[2]); \ + __m128i vlcp3 = _mm_set1_epi8(chr[3]); \ + __m128i vlcp4 = _mm_set1_epi8(chr[4]); \ + __m128i vlcs0 = _mm_set1_epi8(chr[5]); \ + __m128i vlcs1 = _mm_set1_epi8(chr[6]); \ + __m128i vlcs2 = _mm_set1_epi8(chr[7]); \ + __m128i vlcs3 = _mm_set1_epi8(chr[8]); \ + __m128i vlcs4 = _mm_set1_epi8(chr[9]); \ + , \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); \ + ) + +ADV_PAT_PIN(6, \ + __m128i vlcp0 = _mm_set1_epi8(chr[0]); \ + __m128i vlcp1 = _mm_set1_epi8(chr[1]); \ + __m128i vlcp2 = _mm_set1_epi8(chr[2]); \ + __m128i vlcp3 = _mm_set1_epi8(chr[3]); \ + __m128i vlcp4 = _mm_set1_epi8(chr[4]); \ + __m128i vlcp5 = _mm_set1_epi8(chr[5]); \ + __m128i vlcs0 = _mm_set1_epi8(chr[6]); \ + __m128i vlcs1 = _mm_set1_epi8(chr[7]); \ + __m128i vlcs2 = _mm_set1_epi8(chr[8]); \ + __m128i vlcs3 = _mm_set1_epi8(chr[9]); \ + __m128i vlcs4 = _mm_set1_epi8(chr[10]); \ + __m128i vlcs5 = _mm_set1_epi8(chr[11]); \ + , \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp5, vstrlcp)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs5, vstrlcs)); \ + ) + +ADV_PAT_PIN(7, \ + __m128i vlcp0 = _mm_set1_epi8(chr[0]); \ + __m128i vlcp1 = _mm_set1_epi8(chr[1]); \ + __m128i vlcp2 = _mm_set1_epi8(chr[2]); \ + __m128i vlcp3 = _mm_set1_epi8(chr[3]); \ + __m128i vlcp4 = _mm_set1_epi8(chr[4]); \ + __m128i vlcp5 = _mm_set1_epi8(chr[5]); \ + __m128i vlcp6 = _mm_set1_epi8(chr[6]); \ + __m128i vlcs0 = _mm_set1_epi8(chr[7]); \ + __m128i vlcs1 = _mm_set1_epi8(chr[8]); \ + __m128i vlcs2 = _mm_set1_epi8(chr[9]); \ + __m128i vlcs3 = _mm_set1_epi8(chr[10]); \ + __m128i vlcs4 = _mm_set1_epi8(chr[11]); \ + __m128i vlcs5 = _mm_set1_epi8(chr[12]); \ + __m128i vlcs6 = _mm_set1_epi8(chr[13]); \ + , \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp5, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp6, vstrlcp)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs5, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs6, vstrlcs)); \ + ) + +ADV_PAT_PIN(8, \ + __m128i vlcp0 = _mm_set1_epi8(chr[0]); \ + __m128i vlcp1 = _mm_set1_epi8(chr[1]); \ + __m128i vlcp2 = _mm_set1_epi8(chr[2]); \ + __m128i vlcp3 = _mm_set1_epi8(chr[3]); \ + __m128i vlcp4 = _mm_set1_epi8(chr[4]); \ + __m128i vlcp5 = _mm_set1_epi8(chr[5]); \ + __m128i vlcp6 = _mm_set1_epi8(chr[6]); \ + __m128i vlcp7 = _mm_set1_epi8(chr[7]); \ + __m128i vlcs0 = _mm_set1_epi8(chr[8]); \ + __m128i vlcs1 = _mm_set1_epi8(chr[9]); \ + __m128i vlcs2 = _mm_set1_epi8(chr[10]); \ + __m128i vlcs3 = _mm_set1_epi8(chr[11]); \ + __m128i vlcs4 = _mm_set1_epi8(chr[12]); \ + __m128i vlcs5 = _mm_set1_epi8(chr[13]); \ + __m128i vlcs6 = _mm_set1_epi8(chr[14]); \ + __m128i vlcs7 = _mm_set1_epi8(chr[15]); \ + , \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp5, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp6, vstrlcp)); \ + veqlcp = _mm_or_si128(veqlcp, _mm_cmpeq_epi8(vlcp7, vstrlcp)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs4, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs5, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs6, vstrlcs)); \ + veqlcs = _mm_or_si128(veqlcs, _mm_cmpeq_epi8(vlcs7, vstrlcs)); \ + ) + +#elif defined(HAVE_NEON) + +/// My "needle search" methods +#define ADV_PAT_PIN_ONE(N, INIT, COMP) \ +bool Matcher::advance_pattern_pin##N##_one(size_t loc) \ +{ \ + const Pattern::Pred *pma = pat_->pma_; \ + const char *chr = pat_->chr_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc; \ + const char *e = buf_ + end_; \ + while (s <= e - 16) \ + { \ + uint8x16_t vstr = vld1q_u8(reinterpret_cast(s)); \ + COMP \ + uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); \ + uint64_t mask = vgetq_lane_u64(vmask64, 0); \ + if (mask != 0) \ + { \ + for (uint16_t i = 0; i < 8; ++i) \ + { \ + if ((mask & 0xff)) \ + { \ + loc = s + i - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + } \ + mask >>= 8; \ + } \ + } \ + mask = vgetq_lane_u64(vmask64, 1); \ + if (mask != 0) \ + { \ + for (uint16_t i = 8; i < 16; ++i) \ + { \ + if ((mask & 0xff)) \ + { \ + loc = s + i - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + } \ + mask >>= 8; \ + } \ + } \ + s += 16; \ + } \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + 1 > end_) \ + return false; \ + if (loc + 16 > end_) \ + break; \ + } \ + return advance_pattern(loc); \ +} + +ADV_PAT_PIN_ONE(2, \ + uint8x16_t v0 = vdupq_n_u8(chr[0]); \ + uint8x16_t v1 = vdupq_n_u8(chr[1]); \ + , \ + uint8x16_t vmask8 = vorrq_u8(vceqq_u8(v0, vstr), vceqq_u8(v1, vstr)); \ + ) + +ADV_PAT_PIN_ONE(3, \ + uint8x16_t v0 = vdupq_n_u8(chr[0]); \ + uint8x16_t v1 = vdupq_n_u8(chr[1]); \ + uint8x16_t v2 = vdupq_n_u8(chr[2]); \ + , \ + uint8x16_t vmask8 = \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(v0, vstr), \ + vceqq_u8(v1, vstr)), \ + vceqq_u8(v2, vstr)); \ + ) + +ADV_PAT_PIN_ONE(4, \ + uint8x16_t v0 = vdupq_n_u8(chr[0]); \ + uint8x16_t v1 = vdupq_n_u8(chr[1]); \ + uint8x16_t v2 = vdupq_n_u8(chr[2]); \ + uint8x16_t v3 = vdupq_n_u8(chr[3]); \ + , \ + uint8x16_t vmask8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(v0, vstr), \ + vceqq_u8(v1, vstr)), \ + vceqq_u8(v2, vstr)), \ + vceqq_u8(v3, vstr)); \ + ) + +ADV_PAT_PIN_ONE(5, \ + uint8x16_t v0 = vdupq_n_u8(chr[0]); \ + uint8x16_t v1 = vdupq_n_u8(chr[1]); \ + uint8x16_t v2 = vdupq_n_u8(chr[2]); \ + uint8x16_t v3 = vdupq_n_u8(chr[3]); \ + uint8x16_t v4 = vdupq_n_u8(chr[4]); \ + , \ + uint8x16_t vmask8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(v0, vstr), \ + vceqq_u8(v1, vstr)), \ + vceqq_u8(v2, vstr)), \ + vceqq_u8(v3, vstr)), \ + vceqq_u8(v4, vstr)); \ + ) + +ADV_PAT_PIN_ONE(6, \ + uint8x16_t v0 = vdupq_n_u8(chr[0]); \ + uint8x16_t v1 = vdupq_n_u8(chr[1]); \ + uint8x16_t v2 = vdupq_n_u8(chr[2]); \ + uint8x16_t v3 = vdupq_n_u8(chr[3]); \ + uint8x16_t v4 = vdupq_n_u8(chr[4]); \ + uint8x16_t v5 = vdupq_n_u8(chr[5]); \ + , \ + uint8x16_t vmask8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(v0, vstr), \ + vceqq_u8(v1, vstr)), \ + vceqq_u8(v2, vstr)), \ + vceqq_u8(v3, vstr)), \ + vceqq_u8(v4, vstr)), \ + vceqq_u8(v5, vstr)); \ + ) + +ADV_PAT_PIN_ONE(7, \ + uint8x16_t v0 = vdupq_n_u8(chr[0]); \ + uint8x16_t v1 = vdupq_n_u8(chr[1]); \ + uint8x16_t v2 = vdupq_n_u8(chr[2]); \ + uint8x16_t v3 = vdupq_n_u8(chr[3]); \ + uint8x16_t v4 = vdupq_n_u8(chr[4]); \ + uint8x16_t v5 = vdupq_n_u8(chr[5]); \ + uint8x16_t v6 = vdupq_n_u8(chr[6]); \ + , \ + uint8x16_t vmask8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(v0, vstr), \ + vceqq_u8(v1, vstr)), \ + vceqq_u8(v2, vstr)), \ + vceqq_u8(v3, vstr)), \ + vceqq_u8(v4, vstr)), \ + vceqq_u8(v5, vstr)), \ + vceqq_u8(v6, vstr)); \ + ) + +ADV_PAT_PIN_ONE(8, \ + uint8x16_t v0 = vdupq_n_u8(chr[0]); \ + uint8x16_t v1 = vdupq_n_u8(chr[1]); \ + uint8x16_t v2 = vdupq_n_u8(chr[2]); \ + uint8x16_t v3 = vdupq_n_u8(chr[3]); \ + uint8x16_t v4 = vdupq_n_u8(chr[4]); \ + uint8x16_t v5 = vdupq_n_u8(chr[5]); \ + uint8x16_t v6 = vdupq_n_u8(chr[6]); \ + uint8x16_t v7 = vdupq_n_u8(chr[7]); \ + , \ + uint8x16_t vmask8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(v0, vstr), \ + vceqq_u8(v1, vstr)), \ + vceqq_u8(v2, vstr)), \ + vceqq_u8(v3, vstr)), \ + vceqq_u8(v4, vstr)), \ + vceqq_u8(v5, vstr)), \ + vceqq_u8(v6, vstr)), \ + vceqq_u8(v7, vstr)); \ + ) + +/// My "needle search" methods +#define ADV_PAT_PIN(N, INIT, COMP) \ +bool Matcher::advance_pattern_pin##N##_pma(size_t loc) \ +{ \ + const Pattern::Pred *pma = pat_->pma_; \ + const char *chr = pat_->chr_; \ + size_t min = pat_->min_; \ + uint16_t lcp = pat_->lcp_; \ + uint16_t lcs = pat_->lcs_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc + lcp; \ + const char *e = buf_ + end_ + lcp - min + 1; \ + while (s <= e - 16) \ + { \ + uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); \ + uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); \ + COMP \ + uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); \ + uint64_t mask = vgetq_lane_u64(vmask64, 0); \ + if (mask != 0) \ + { \ + for (uint16_t i = 0; i < 8; ++i) \ + { \ + if ((mask & 0xff)) \ + { \ + loc = s - lcp + i - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + } \ + mask >>= 8; \ + } \ + } \ + mask = vgetq_lane_u64(vmask64, 1); \ + if (mask != 0) \ + { \ + for (uint16_t i = 8; i < 16; ++i) \ + { \ + if ((mask & 0xff)) \ + { \ + loc = s - lcp + i - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + } \ + mask >>= 8; \ + } \ + } \ + s += 16; \ + } \ + s -= lcp; \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + min > end_) \ + return false; \ + if (loc + min + 15 > end_) \ + break; \ + } \ + return advance_pattern(loc); \ +} \ +\ +bool Matcher::advance_pattern_pin##N##_pmh(size_t loc) \ +{ \ + const Pattern::Pred *pmh = pat_->pmh_; \ + const char *chr = pat_->chr_; \ + size_t min = pat_->min_; \ + uint16_t lcp = pat_->lcp_; \ + uint16_t lcs = pat_->lcs_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc + lcp; \ + const char *e = buf_ + end_ + lcp - min + 1; \ + while (s <= e - 16) \ + { \ + uint8x16_t vstrlcp = vld1q_u8(reinterpret_cast(s)); \ + uint8x16_t vstrlcs = vld1q_u8(reinterpret_cast(s + lcs - lcp)); \ + COMP \ + uint64x2_t vmask64 = vreinterpretq_u64_u8(vandq_u8(vmasklcp8, vmasklcs8)); \ + uint64_t mask = vgetq_lane_u64(vmask64, 0); \ + if (mask != 0) \ + { \ + for (uint16_t i = 0; i < 8; ++i) \ + { \ + if ((mask & 0xff)) \ + { \ + loc = s - lcp + i - buf_; \ + if (Pattern::predict_match(pmh, &buf_[loc], min)) \ + { \ + set_current(loc); \ + return true; \ + } \ + } \ + mask >>= 8; \ + } \ + } \ + mask = vgetq_lane_u64(vmask64, 1); \ + if (mask != 0) \ + { \ + for (uint16_t i = 8; i < 16; ++i) \ + { \ + if ((mask & 0xff)) \ + { \ + loc = s - lcp + i - buf_; \ + if (Pattern::predict_match(pmh, &buf_[loc], min)) \ + { \ + set_current(loc); \ + return true; \ + } \ + } \ + mask >>= 8; \ + } \ + } \ + s += 16; \ + } \ + s -= lcp; \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + min > end_) \ + return false; \ + if (loc + min + 15 > end_) \ + break; \ + } \ + return advance_pattern_min4(loc); \ +} + +ADV_PAT_PIN(2, \ + uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); \ + uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); \ + uint8x16_t vlcs0 = vdupq_n_u8(chr[2]); \ + uint8x16_t vlcs1 = vdupq_n_u8(chr[3]); \ + , \ + uint8x16_t vmasklcp8 = vorrq_u8(vceqq_u8(vlcp0, vstrlcp), vceqq_u8(vlcp1, vstrlcp)); \ + uint8x16_t vmasklcs8 = vorrq_u8(vceqq_u8(vlcs0, vstrlcs), vceqq_u8(vlcs1, vstrlcs)); \ + ) + +ADV_PAT_PIN(3, \ + uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); \ + uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); \ + uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); \ + uint8x16_t vlcs0 = vdupq_n_u8(chr[3]); \ + uint8x16_t vlcs1 = vdupq_n_u8(chr[4]); \ + uint8x16_t vlcs2 = vdupq_n_u8(chr[5]); \ + , \ + uint8x16_t vmasklcp8 = \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcp0, vstrlcp), \ + vceqq_u8(vlcp1, vstrlcp)), \ + vceqq_u8(vlcp2, vstrlcp)); \ + uint8x16_t vmasklcs8 = \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcs0, vstrlcs), \ + vceqq_u8(vlcs1, vstrlcs)), \ + vceqq_u8(vlcs2, vstrlcs)); \ + ) + +ADV_PAT_PIN(4, \ + uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); \ + uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); \ + uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); \ + uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); \ + uint8x16_t vlcs0 = vdupq_n_u8(chr[4]); \ + uint8x16_t vlcs1 = vdupq_n_u8(chr[5]); \ + uint8x16_t vlcs2 = vdupq_n_u8(chr[6]); \ + uint8x16_t vlcs3 = vdupq_n_u8(chr[7]); \ + , \ + uint8x16_t vmasklcp8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcp0, vstrlcp), \ + vceqq_u8(vlcp1, vstrlcp)), \ + vceqq_u8(vlcp2, vstrlcp)), \ + vceqq_u8(vlcp3, vstrlcp)); \ + uint8x16_t vmasklcs8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcs0, vstrlcs), \ + vceqq_u8(vlcs1, vstrlcs)), \ + vceqq_u8(vlcs2, vstrlcs)), \ + vceqq_u8(vlcs3, vstrlcs)); \ + ) + +ADV_PAT_PIN(5, \ + uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); \ + uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); \ + uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); \ + uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); \ + uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); \ + uint8x16_t vlcs0 = vdupq_n_u8(chr[5]); \ + uint8x16_t vlcs1 = vdupq_n_u8(chr[6]); \ + uint8x16_t vlcs2 = vdupq_n_u8(chr[7]); \ + uint8x16_t vlcs3 = vdupq_n_u8(chr[8]); \ + uint8x16_t vlcs4 = vdupq_n_u8(chr[9]); \ + , \ + uint8x16_t vmasklcp8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcp0, vstrlcp), \ + vceqq_u8(vlcp1, vstrlcp)), \ + vceqq_u8(vlcp2, vstrlcp)), \ + vceqq_u8(vlcp3, vstrlcp)), \ + vceqq_u8(vlcp4, vstrlcp)); \ + uint8x16_t vmasklcs8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcs0, vstrlcs), \ + vceqq_u8(vlcs1, vstrlcs)), \ + vceqq_u8(vlcs2, vstrlcs)), \ + vceqq_u8(vlcs3, vstrlcs)), \ + vceqq_u8(vlcs4, vstrlcs)); \ + ) + +ADV_PAT_PIN(6, \ + uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); \ + uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); \ + uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); \ + uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); \ + uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); \ + uint8x16_t vlcp5 = vdupq_n_u8(chr[5]); \ + uint8x16_t vlcs0 = vdupq_n_u8(chr[6]); \ + uint8x16_t vlcs1 = vdupq_n_u8(chr[7]); \ + uint8x16_t vlcs2 = vdupq_n_u8(chr[8]); \ + uint8x16_t vlcs3 = vdupq_n_u8(chr[9]); \ + uint8x16_t vlcs4 = vdupq_n_u8(chr[10]); \ + uint8x16_t vlcs5 = vdupq_n_u8(chr[11]); \ + , \ + uint8x16_t vmasklcp8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcp0, vstrlcp), \ + vceqq_u8(vlcp1, vstrlcp)), \ + vceqq_u8(vlcp2, vstrlcp)), \ + vceqq_u8(vlcp3, vstrlcp)), \ + vceqq_u8(vlcp4, vstrlcp)), \ + vceqq_u8(vlcp5, vstrlcp)); \ + uint8x16_t vmasklcs8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcs0, vstrlcs), \ + vceqq_u8(vlcs1, vstrlcs)), \ + vceqq_u8(vlcs2, vstrlcs)), \ + vceqq_u8(vlcs3, vstrlcs)), \ + vceqq_u8(vlcs4, vstrlcs)), \ + vceqq_u8(vlcs5, vstrlcs)); \ + ) + +ADV_PAT_PIN(7, \ + uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); \ + uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); \ + uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); \ + uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); \ + uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); \ + uint8x16_t vlcp5 = vdupq_n_u8(chr[5]); \ + uint8x16_t vlcp6 = vdupq_n_u8(chr[6]); \ + uint8x16_t vlcs0 = vdupq_n_u8(chr[7]); \ + uint8x16_t vlcs1 = vdupq_n_u8(chr[8]); \ + uint8x16_t vlcs2 = vdupq_n_u8(chr[9]); \ + uint8x16_t vlcs3 = vdupq_n_u8(chr[10]); \ + uint8x16_t vlcs4 = vdupq_n_u8(chr[11]); \ + uint8x16_t vlcs5 = vdupq_n_u8(chr[12]); \ + uint8x16_t vlcs6 = vdupq_n_u8(chr[13]); \ + , \ + uint8x16_t vmasklcp8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcp0, vstrlcp), \ + vceqq_u8(vlcp1, vstrlcp)), \ + vceqq_u8(vlcp2, vstrlcp)), \ + vceqq_u8(vlcp3, vstrlcp)), \ + vceqq_u8(vlcp4, vstrlcp)), \ + vceqq_u8(vlcp5, vstrlcp)), \ + vceqq_u8(vlcp6, vstrlcp)); \ + uint8x16_t vmasklcs8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcs0, vstrlcs), \ + vceqq_u8(vlcs1, vstrlcs)), \ + vceqq_u8(vlcs2, vstrlcs)), \ + vceqq_u8(vlcs3, vstrlcs)), \ + vceqq_u8(vlcs4, vstrlcs)), \ + vceqq_u8(vlcs5, vstrlcs)), \ + vceqq_u8(vlcs6, vstrlcs)); \ + ) + +ADV_PAT_PIN(8, \ + uint8x16_t vlcp0 = vdupq_n_u8(chr[0]); \ + uint8x16_t vlcp1 = vdupq_n_u8(chr[1]); \ + uint8x16_t vlcp2 = vdupq_n_u8(chr[2]); \ + uint8x16_t vlcp3 = vdupq_n_u8(chr[3]); \ + uint8x16_t vlcp4 = vdupq_n_u8(chr[4]); \ + uint8x16_t vlcp5 = vdupq_n_u8(chr[5]); \ + uint8x16_t vlcp6 = vdupq_n_u8(chr[6]); \ + uint8x16_t vlcp7 = vdupq_n_u8(chr[7]); \ + uint8x16_t vlcs0 = vdupq_n_u8(chr[8]); \ + uint8x16_t vlcs1 = vdupq_n_u8(chr[9]); \ + uint8x16_t vlcs2 = vdupq_n_u8(chr[10]); \ + uint8x16_t vlcs3 = vdupq_n_u8(chr[11]); \ + uint8x16_t vlcs4 = vdupq_n_u8(chr[12]); \ + uint8x16_t vlcs5 = vdupq_n_u8(chr[13]); \ + uint8x16_t vlcs6 = vdupq_n_u8(chr[14]); \ + uint8x16_t vlcs7 = vdupq_n_u8(chr[15]); \ + , \ + uint8x16_t vmasklcp8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcp0, vstrlcp), \ + vceqq_u8(vlcp1, vstrlcp)), \ + vceqq_u8(vlcp2, vstrlcp)), \ + vceqq_u8(vlcp3, vstrlcp)), \ + vceqq_u8(vlcp4, vstrlcp)), \ + vceqq_u8(vlcp5, vstrlcp)), \ + vceqq_u8(vlcp6, vstrlcp)), \ + vceqq_u8(vlcp7, vstrlcp)); \ + uint8x16_t vmasklcs8 = \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vorrq_u8( \ + vceqq_u8(vlcs0, vstrlcs), \ + vceqq_u8(vlcs1, vstrlcs)), \ + vceqq_u8(vlcs2, vstrlcs)), \ + vceqq_u8(vlcs3, vstrlcs)), \ + vceqq_u8(vlcs4, vstrlcs)), \ + vceqq_u8(vlcs5, vstrlcs)), \ + vceqq_u8(vlcs6, vstrlcs)), \ + vceqq_u8(vlcs7, vstrlcs)); \ + ) #endif + +/// Minimal 1 char pattern using bitap and PM4 +bool Matcher::advance_pattern_min1(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + const Pattern::Pred *bit = pat_->bit_; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_ - 3; + bool f = true; + while (s < e && + (f = ((bit[static_cast(*s)] & 1) && + (bit[static_cast(*++s)] & 1) && + (bit[static_cast(*++s)] & 1) && + (bit[static_cast(*++s)] & 1)))) + { + ++s; + } + loc = s - buf_; + if (!f) + { + if (s < e && Pattern::predict_match(pma, s) != 0) + { + ++loc; + continue; + } + set_current(loc); + return true; + } + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + 3 >= end_) + { + set_current(loc); + return loc + 1 <= end_; + } + } +} + +/// Minimal 2 char pattern using bitam and PM4 +bool Matcher::advance_pattern_min2(size_t loc) +{ + const Pattern::Pred *bit = pat_->bit_; + const Pattern::Pred *pma = pat_->pma_; + Pattern::Pred state = ~0; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + while (s < e) + { + state = (state << 1) | bit[static_cast(*s)]; + if ((state & 2) == 0) + break; + ++s; + } + if (s < e) + { + s -= 1; + loc = s - buf_; + if (s > e - 4 || Pattern::predict_match(pma, s) == 0) + { + set_current(loc); + return true; + } + loc += 2; + } + else + { + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + 2 > end_) + return false; + } + } +} + +/// Minimal 3 char pattern using bitam and PM4 +bool Matcher::advance_pattern_min3(size_t loc) +{ + const Pattern::Pred *bit = pat_->bit_; + const Pattern::Pred *pma = pat_->pma_; + Pattern::Pred state = ~0; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + while (s < e) + { + state = (state << 1) | bit[static_cast(*s)]; + if ((state & 4) == 0) + break; + ++s; + } + if (s < e) + { + s -= 2; + loc = s - buf_; + if (s > e - 4 || Pattern::predict_match(pma, s) == 0) + { + set_current(loc); + return true; + } + loc += 3; + } + else + { + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + 3 > end_) + return false; + } + } +} + +/// Minimal 4 char pattern using bitam and PM hashing +bool Matcher::advance_pattern_min4(size_t loc) +{ + const Pattern::Pred *bit = pat_->bit_; + const Pattern::Pred *pmh = pat_->pmh_; + size_t min = pat_->min_; + Pattern::Pred state1 = ~0; + Pattern::Pred state2 = ~0; + Pattern::Pred mask = (1 << (min - 1)); + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + while (s < e - 1) + { + state2 = (state1 << 1) | bit[static_cast(*s)]; + ++s; + state1 = (state2 << 1) | bit[static_cast(*s)]; + if ((state1 & state2 & mask) == 0) + break; + ++s; + } + if ((state2 & mask) == 0) + { + state1 = state2; + state2 = ~0; + --s; + } + else if ((state1 & mask) != 0 && s == e - 1) + { + state1 = (state1 << 1) | bit[static_cast(*s)]; + if ((state1 & mask) != 0) + ++s; + } + if (s < e) + { + s -= min - 1; + loc = s - buf_; + if (Pattern::predict_match(pmh, s, min)) + { + set_current(loc); + return true; + } + loc += min; + } + else + { + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + min > end_) + return false; + } + } +} + +/// Minimal 1 char pattern using PM4 +bool Matcher::advance_pattern(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + size_t min = pat_->min_; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_ - 6; + bool f = true; + while (s < e && + (f = (Pattern::predict_match(pma, s) != 0 && + Pattern::predict_match(pma, ++s) != 0 && + Pattern::predict_match(pma, ++s) != 0 && + Pattern::predict_match(pma, ++s) != 0))) + { + ++s; + } + loc = s - buf_; + if (!f) + { + set_current(loc); + return true; + } + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + 6 >= end_) + { + set_current(loc); + return loc + min <= end_; + } + } +} + +/// One char +bool Matcher::advance_char(size_t loc) +{ + char chr0 = pat_->chr_[0]; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + s = static_cast(std::memchr(s, chr0, e - s)); + if (s != NULL) + { + loc = s - buf_; + set_current(loc); + return true; + } + loc = e - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + 1 > end_) + return false; + } +} + +/// One char followed by 1 to 3 minimal char pattern +bool Matcher::advance_char_pma(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + char chr0 = pat_->chr_[0]; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + s = static_cast(std::memchr(s, chr0, e - s)); + if (s != NULL) + { + loc = s - buf_; + set_current(loc); + if (s > e - 5 || Pattern::predict_match(pma, s + 1) == 0) + return true; + ++loc; + } + else + { + loc = e - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + 1 > end_) + return false; + } + } +} + +/// One char followed by 4 minimal char pattern +bool Matcher::advance_char_pmh(size_t loc) +{ + const Pattern::Pred *pmh = pat_->pmh_; + char chr0 = pat_->chr_[0]; + size_t min = pat_->min_; + while (true) + { + const char *s = buf_ + loc; + const char *e = buf_ + end_; + s = static_cast(std::memchr(s, chr0, e - s)); + if (s != NULL) + { + loc = s - buf_; + if (s + 1 + min > e || Pattern::predict_match(pmh, s + 1, min)) + { + set_current(loc); + return true; + } + ++loc; + } + else + { + loc = e - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + 1 > end_) + return false; + } + } +} + +/// Few chars +template +bool Matcher::advance_chars(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const char *chr = pat_->chr_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m128i vlcp = _mm_set1_epi8(chr[lcp]); + __m128i vlcs = _mm_set1_epi8(chr[lcs]); + while (s <= e - 16) + { + __m128i vlcpm = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vlcsm = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm); + __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + loc = s - lcp + offset - buf_; + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + set_current(loc); + return true; + } + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN > end_) + return false; + if (loc + LEN + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + uint8x16_t vlcp = vdupq_n_u8(chr[lcp]); + uint8x16_t vlcs = vdupq_n_u8(chr[lcs]); + while (s <= e - 16) + { + uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); + uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); + uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); + uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) + { + if ((mask & 0xff) && + (LEN == 2 || + (LEN == 3 ? s[i + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + i, chr + 1, LEN - 2) == 0))) + { + loc = s - lcp + i - buf_; + set_current(loc); + return true; + } + mask >>= 8; + } + } + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) + { + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff) && + (LEN == 2 || + (LEN == 3 ? s[i + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + i, chr + 1, LEN - 2) == 0))) + { + loc = s - lcp + i - buf_; + set_current(loc); + return true; + } + mask >>= 8; + } + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN > end_) + return false; + if (loc + LEN + 15 > end_) + break; + } +#endif + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + while (s < e) + { + do + s = static_cast(std::memchr(s, chr[lcp], e - s)); + while (s != NULL && s[lcs - lcp] != chr[lcs] && ++s < e); + if (s == NULL || s >= e) + { + s = e; + break; + } + if (LEN == 2 || + (LEN == 3 ? s[1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp - buf_; + set_current(loc); + return true; + } + ++s; + } + loc = s - lcp - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN > end_) + return false; + } +} + +/// Few chars followed by 2 to 3 minimal char pattern +template +bool Matcher::advance_chars_pma(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m128i vlcp = _mm_set1_epi8(chr[lcp]); + __m128i vlcs = _mm_set1_epi8(chr[lcs]); + while (s <= e - 16) + { + __m128i vlcpm = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vlcsm = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm); + __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + if (loc + LEN + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + LEN]) == 0) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + uint8x16_t vlcp = vdupq_n_u8(chr[lcp]); + uint8x16_t vlcs = vdupq_n_u8(chr[lcs]); + while (s <= e - 16) + { + uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); + uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); + uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); + uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) + { + if ((mask & 0xff) && + (LEN == 2 || + (LEN == 3 ? s[i + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + i, chr + 1, LEN - 2) == 0))) + { + loc = s - lcp + i - buf_; + if (loc + LEN + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + LEN]) == 0) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) + { + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff) && + (LEN == 2 || + (LEN == 3 ? s[i + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + i, chr + 1, LEN - 2) == 0))) + { + loc = s - lcp + i - buf_; + if (loc + LEN + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + LEN]) == 0) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 15 > end_) + break; + } +#endif + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + while (s < e) + { + do + s = static_cast(std::memchr(s, chr[lcp], e - s)); + while (s != NULL && s[lcs - lcp] != chr[lcs] && ++s < e); + if (s == NULL || s >= e) + { + s = e; + break; + } + if (LEN == 2 || + (LEN == 3 ? s[1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp - buf_; + if (loc + LEN + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + LEN]) == 0) + { + set_current(loc); + return true; + } + } + ++s; + } + loc = s - lcp - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + } +} + +/// Few chars followed by 4 minimal char pattern +template +bool Matcher::advance_chars_pmh(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m128i vlcp = _mm_set1_epi8(chr[lcp]); + __m128i vlcs = _mm_set1_epi8(chr[lcs]); + while (s <= e - 16) + { + __m128i vlcpm = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vlcsm = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm); + __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + if (loc + LEN + min > end_ || Pattern::predict_match(pmh, &buf_[loc + LEN], min)) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + uint8x16_t vlcp = vdupq_n_u8(chr[lcp]); + uint8x16_t vlcs = vdupq_n_u8(chr[lcs]); + while (s <= e - 16) + { + uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); + uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); + uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); + uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) + { + if ((mask & 0xff) && + (LEN == 2 || + (LEN == 3 ? s[i + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + i, chr + 1, LEN - 2) == 0))) + { + size_t loc = s - lcp + i - buf_; + if (loc + LEN + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + LEN], min)) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) + { + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff) && + (LEN == 2 || + (LEN == 3 ? s[i + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + i, chr + 1, LEN - 2) == 0))) + { + size_t loc = s - lcp + i - buf_; + if (loc + LEN + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + LEN], min)) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 15 > end_) + break; + } +#endif + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + while (s < e) + { + do + s = static_cast(std::memchr(s, chr[lcp], e - s)); + while (s != NULL && s[lcs - lcp] != chr[lcs] && ++s < e); + if (s == NULL || s >= e) + { + s = e; + break; + } + if (LEN == 2 || + (LEN == 3 ? s[1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp - buf_; + if (loc + LEN + min > end_ || Pattern::predict_match(pmh, &buf_[loc + LEN], min)) + { + set_current(loc); + return true; + } + } + ++s; + } + loc = s - lcp - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + } +} + +/// String +bool Matcher::advance_string(size_t loc) +{ + const char *chr = pat_->chr_; + size_t len = pat_->len_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + // implements SSE2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m128i vlcp = _mm_set1_epi8(chr[lcp]); + __m128i vlcs = _mm_set1_epi8(chr[lcs]); + while (s <= e - 16) + { + __m128i vlcpm = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vlcsm = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm); + __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + set_current(loc); + return true; + } + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + if (loc + len + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + if (simd_advance_string_neon(s, e)) + return true; + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + if (loc + len + 15 > end_) + break; + } +#endif + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + while (s < e) + { + do + s = static_cast(std::memchr(s, chr[lcp], e - s)); + while (s != NULL && s[lcs - lcp] != chr[lcs] && ++s < e); + if (s == NULL || s >= e) + { + s = e; + break; + } + if (std::memcmp(s - lcp, chr, len) == 0) + { + loc = s - lcp - buf_; + set_current(loc); + return true; + } + ++s; + } + loc = s - lcp - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + } +} + +#if defined(WITH_STRING_PM) + +/// String followed by 1 to 3 minimal char pattern +bool Matcher::advance_string_pma(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t len = pat_->len_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + // implements SSE2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m128i vlcp = _mm_set1_epi8(chr[lcp]); + __m128i vlcs = _mm_set1_epi8(chr[lcs]); + while (s <= e - 16) + { + __m128i vlcpm = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vlcsm = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm); + __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + if (loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + if (simd_advance_string_pma_neon(s, e)) + return true; + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 15 > end_) + break; + } +#endif + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + while (s < e) + { + do + s = static_cast(std::memchr(s, chr[lcp], e - s)); + while (s != NULL && s[lcs - lcp] != chr[lcs] && ++s < e); + if (s == NULL || s >= e) + { + s = e; + break; + } + if (std::memcmp(s - lcp, chr, len) == 0) + { + loc = s - lcp - buf_; + if (loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) + { + set_current(loc); + return true; + } + } + ++s; + } + loc = s - lcp - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + } +} + +/// String followed by 4 minimal char pattern +bool Matcher::advance_string_pmh(size_t loc) +{ + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t len = pat_->len_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + // implements SSE2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m128i vlcp = _mm_set1_epi8(chr[lcp]); + __m128i vlcs = _mm_set1_epi8(chr[lcs]); + while (s <= e - 16) + { + __m128i vlcpm = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vlcsm = _mm_loadu_si128(reinterpret_cast(s + lcs - lcp)); + __m128i vlcpeq = _mm_cmpeq_epi8(vlcp, vlcpm); + __m128i vlcseq = _mm_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm_movemask_epi8(_mm_and_si128(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 16; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 15 > end_) + break; + } +#elif defined(HAVE_NEON) + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + if (simd_advance_string_pmh_neon(s, e)) + return true; + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 15 > end_) + break; + } +#endif + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + while (s < e) + { + do + s = static_cast(std::memchr(s, chr[lcp], e - s)); + while (s != NULL && s[lcs - lcp] != chr[lcs] && ++s < e); + if (s == NULL || s >= e) + { + s = e; + break; + } + if (std::memcmp(s - lcp, chr, len) == 0) + { + loc = s - lcp - buf_; + if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) + { + set_current(loc); + return true; + } + } + ++s; + } + loc = s - lcp - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + } +} + +#endif // WITH_STRING_PM + +#if defined(HAVE_NEON) + +// Implements NEON/AArch64 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html 64 bit optimized +bool Matcher::simd_advance_string_neon(const char *&s, const char *e) +{ + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + size_t len = pat_->len_; + const char *chr = pat_->chr_; + uint8x16_t vlcp = vdupq_n_u8(chr[lcp]); + uint8x16_t vlcs = vdupq_n_u8(chr[lcs]); + while (s <= e - 16) + { + uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); + uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); + uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); + uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) + { + if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) + { + size_t loc = s - lcp + i - buf_; + set_current(loc); + return true; + } + mask >>= 8; + } + } + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) + { + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) + { + size_t loc = s - lcp + i - buf_; + set_current(loc); + return true; + } + mask >>= 8; + } + } + s += 16; + } + return false; +} + +#if defined(WITH_STRING_PM) + +// Implements NEON/AArch64 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html 64 bit optimized +bool Matcher::simd_advance_string_pma_neon(const char *&s, const char *e) +{ + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + size_t len = pat_->len_; + const char *chr = pat_->chr_; + uint8x16_t vlcp = vdupq_n_u8(chr[lcp]); + uint8x16_t vlcs = vdupq_n_u8(chr[lcs]); + while (s <= e - 16) + { + uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); + uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); + uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); + uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) + { + if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) + { + size_t loc = s - lcp + i - buf_; + if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) + { + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) + { + size_t loc = s - lcp + i - buf_; + if (loc + len + 4 > end_ || Pattern::predict_match(pat_->pma_, &buf_[loc + len]) == 0) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + s += 16; + } + return false; +} + +// Implements NEON/AArch64 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html 64 bit optimized +bool Matcher::simd_advance_string_pmh_neon(const char *&s, const char *e) +{ + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + size_t len = pat_->len_; + size_t min = pat_->min_; // min >= 4 + const char *chr = pat_->chr_; + uint8x16_t vlcp = vdupq_n_u8(chr[lcp]); + uint8x16_t vlcs = vdupq_n_u8(chr[lcs]); + while (s <= e - 16) + { + uint8x16_t vlcpm = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vlcsm = vld1q_u8(reinterpret_cast(s) + lcs - lcp); + uint8x16_t vlcpeq = vceqq_u8(vlcp, vlcpm); + uint8x16_t vlcseq = vceqq_u8(vlcs, vlcsm); + uint8x16_t vmask8 = vandq_u8(vlcpeq, vlcseq); + uint64x2_t vmask64 = vreinterpretq_u64_u8(vmask8); + uint64_t mask = vgetq_lane_u64(vmask64, 0); + if (mask != 0) + { + for (uint16_t i = 0; i < 8; ++i) + { + if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) + { + size_t loc = s - lcp + i - buf_; + if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min)) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + mask = vgetq_lane_u64(vmask64, 1); + if (mask != 0) + { + for (uint16_t i = 8; i < 16; ++i) + { + if ((mask & 0xff) && std::memcmp(s - lcp + i, chr, len) == 0) + { + size_t loc = s - lcp + i - buf_; + if (loc + len + min > end_ || Pattern::predict_match(pat_->pmh_, &buf_[loc + len], min)) + { + set_current(loc); + return true; + } + } + mask >>= 8; + } + } + s += 16; + } + return false; +} + +#endif // WITH_STRING_PM + +#endif // HAVE_NEON + +/// My improved Boyer-Moore string search +bool Matcher::advance_string_bm(size_t loc) +{ + const char *chr = pat_->chr_; + const uint8_t *bms = pat_->bms_; + size_t len = pat_->len_; + size_t bmd = pat_->bmd_; + uint16_t lcp = pat_->lcp_; + while (true) + { + const char *s = buf_ + loc + len - 1; + const char *e = buf_ + end_; + const char *t = chr + len - 1; + while (s < e) + { + size_t k = 0; + do + s += k = bms[static_cast(*s)]; + while (k > 0 ? s < e : s[lcp - len + 1] != chr[lcp] && (s += bmd) < e); + if (s >= e) + break; + const char *p = t - 1; + const char *q = s - 1; + while (p >= chr && *p == *q) + { + --p; + --q; + } + if (p < chr) + { + loc = q - buf_ + 1; + set_current(loc); + return true; + } + if (chr + bmd >= p) + { + s += bmd; + } + else + { + size_t k = bms[static_cast(*q)]; + if (p + k > t + bmd) + s += k - (t - p); + else + s += bmd; + } + } + s -= len - 1; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + } +} + +#if defined(WITH_STRING_PM) + +/// My improved Boyer-Moore string search followed by a 1 to 3 minimal char pattern, using PM4 +bool Matcher::advance_string_bm_pma(size_t loc) +{ + const char *chr = pat_->chr_; + const Pattern::Pred *pma = pat_->pma_; + const uint8_t *bms = pat_->bms_; + size_t len = pat_->len_; + size_t bmd = pat_->bmd_; + uint16_t lcp = pat_->lcp_; + while (true) + { + const char *s = buf_ + loc + len - 1; + const char *e = buf_ + end_; + const char *t = chr + len - 1; + while (s < e) + { + size_t k = 0; + do + s += k = bms[static_cast(*s)]; + while (k > 0 ? s < e : s[lcp - len + 1] != chr[lcp] && (s += bmd) < e); + if (s >= e) + break; + const char *p = t - 1; + const char *q = s - 1; + while (p >= chr && *p == *q) + { + --p; + --q; + } + if (p < chr) + { + loc = q - buf_ + 1; + if (loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) + { + set_current(loc); + return true; + } + } + if (chr + bmd >= p) + { + s += bmd; + } + else + { + size_t k = bms[static_cast(*q)]; + if (p + k > t + bmd) + s += k - (t - p); + else + s += bmd; + } + } + s -= len - 1; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + } +} + +/// My improved Boyer-Moore string search followed by a 4 minimal char pattern, using PM4 +bool Matcher::advance_string_bm_pmh(size_t loc) +{ + const char *chr = pat_->chr_; + const Pattern::Pred *pmh = pat_->pmh_; + const uint8_t *bms = pat_->bms_; + size_t bmd = pat_->bmd_; + size_t len = pat_->len_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + while (true) + { + const char *s = buf_ + loc + len - 1; + const char *e = buf_ + end_; + const char *t = chr + len - 1; + while (s < e) + { + size_t k = 0; + do + s += k = bms[static_cast(*s)]; + while (k > 0 ? s < e : s[lcp - len + 1] != chr[lcp] && (s += bmd) < e); + if (s >= e) + break; + const char *p = t - 1; + const char *q = s - 1; + while (p >= chr && *p == *q) + { + --p; + --q; + } + if (p < chr) + { + loc = q - buf_ + 1; + if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) + { + set_current(loc); + return true; + } + } + if (chr + bmd >= p) + { + s += bmd; + } + else + { + size_t k = bms[static_cast(*q)]; + if (p + k > t + bmd) + s += k - (t - p); + else + s += bmd; + } + } + s -= len - 1; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + } +} + +#endif // WITH_STRING_PM + +} // namespace reflex diff --git a/ccl/rslang/import/reflex/lib/matcher_avx2.cpp b/ccl/rslang/import/reflex/lib/matcher_avx2.cpp index f560af6..304b168 100644 --- a/ccl/rslang/import/reflex/lib/matcher_avx2.cpp +++ b/ccl/rslang/import/reflex/lib/matcher_avx2.cpp @@ -27,18 +27,1004 @@ \******************************************************************************/ /** -@file matcher.cpp, matcher_avx2.cpp, matcher_avx512bw.cpp +@file matcher_avx2.cpp @brief RE/flex matcher engine @author Robert van Engelen - engelen@genivia.com -@copyright (c) 2016-2022, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) 2016-2024, Robert van Engelen, Genivia Inc. All rights reserved. @copyright (c) BSD-3 License - see LICENSE.txt */ -#if defined(HAVE_AVX2) -# if !defined(__AVX2__) -# error matcher_avx2.cpp must be compiled with -mavx2 or /arch:avx2. +#if defined(HAVE_AVX2) || defined(HAVE_AVX512BW) +# if !defined(__AVX2__) && !defined(__AVX512BW__) +# error simd_avx2.cpp must be compiled with -mavx2 or /arch:avx2. # endif #endif -#define COMPILE_AVX2 -#include "matcher.cpp" +#include + +namespace reflex { + +#if defined(HAVE_AVX2) || defined(HAVE_AVX512BW) + +// AVX2 runtime optimized function callback overrides +void Matcher::simd_init_advance_avx2() +{ + if (pat_->len_ == 0) + { + switch (pat_->pin_) + { + case 1: + if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin1_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin1_pmh_avx2; + break; + case 2: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin2_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin2_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin2_pmh_avx2; + break; + case 3: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin3_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin3_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin3_pmh_avx2; + break; + case 4: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin4_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin4_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin4_pmh_avx2; + break; + case 5: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin5_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin5_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin5_pmh_avx2; + break; + case 6: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin6_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin6_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin6_pmh_avx2; + break; + case 7: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin7_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin7_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin7_pmh_avx2; + break; + case 8: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin8_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin8_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin8_pmh_avx2; + break; + case 16: + if (pat_->min_ == 1) + adv_ = &Matcher::simd_advance_pattern_pin16_one_avx2; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_pattern_pin16_pma_avx2; + else + adv_ = &Matcher::simd_advance_pattern_pin16_pmh_avx2; + break; + } + } + else if (pat_->len_ == 1) + { + // no specialization + } + else if (pat_->len_ == 2) + { + if (pat_->min_ == 0) + adv_ = &Matcher::simd_advance_chars_avx2<2>; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_chars_pma_avx2<2>; + else + adv_ = &Matcher::simd_advance_chars_pmh_avx2<2>; + } + else if (pat_->len_ == 3) + { + if (pat_->min_ == 0) + adv_ = &Matcher::simd_advance_chars_avx2<3>; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_chars_pma_avx2<3>; + else + adv_ = &Matcher::simd_advance_chars_pmh_avx2<3>; + } + else if (pat_->bmd_ == 0) + { +#if defined(WITH_STRING_PM) + if (pat_->min_ >= 4) + adv_ = &Matcher::simd_advance_string_pmh_avx2; + else if (pat_->min_ > 0) + adv_ = &Matcher::simd_advance_string_pma_avx2; + else +#endif + adv_ = &Matcher::simd_advance_string_avx2; + } +} + +// My "needle search" method when pin=1 +bool Matcher::simd_advance_pattern_pin1_pma_avx2(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + __m256i vlcp = _mm256_set1_epi8(chr[0]); + __m256i vlcs = _mm256_set1_epi8(chr[1]); + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - min + 1; + while (s <= e - 32) + { + __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i veqlcp = _mm256_cmpeq_epi8(vlcp, vstrlcp); + __m256i veqlcs = _mm256_cmpeq_epi8(vlcs, vstrlcs); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + loc = s - lcp + offset - buf_; + set_current(loc); + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) + return true; + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + min > end_) + return false; + if (loc + min + 31 > end_) + break; + } + return advance_pattern_pin1_pma(loc); +} + +// My "needle search" method when pin=1 +bool Matcher::simd_advance_pattern_pin1_pmh_avx2(size_t loc) +{ + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + __m256i vlcp = _mm256_set1_epi8(chr[0]); + __m256i vlcs = _mm256_set1_epi8(chr[1]); + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - min + 1; + while (s <= e - 32) + { + __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i veqlcp = _mm256_cmpeq_epi8(vlcp, vstrlcp); + __m256i veqlcs = _mm256_cmpeq_epi8(vlcs, vstrlcs); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + loc = s - lcp + offset - buf_; + set_current(loc); + if (Pattern::predict_match(pmh, &buf_[loc], min)) + return true; + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + min > end_) + return false; + if (loc + min + 31 > end_) + break; + } + return advance_pattern_pin1_pmh(loc); +} + +// My "needle search" methods +#define ADV_PAT_PIN_ONE(N, INIT, COMP) \ +bool Matcher::simd_advance_pattern_pin##N##_one_avx2(size_t loc) \ +{ \ + const Pattern::Pred *pma = pat_->pma_; \ + const char *chr = pat_->chr_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc; \ + const char *e = buf_ + end_; \ + while (s <= e - 32) \ + { \ + __m256i vstr = _mm256_loadu_si256(reinterpret_cast(s)); \ + __m256i veq = _mm256_cmpeq_epi8(v0, vstr); \ + COMP \ + uint32_t mask = _mm256_movemask_epi8(veq); \ + while (mask != 0) \ + { \ + uint32_t offset = ctz(mask); \ + loc = s + offset - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + mask &= mask - 1; \ + } \ + s += 32; \ + } \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + 1 > end_) \ + return false; \ + if (loc + 32 > end_) \ + break; \ + } \ + return advance_pattern(loc); \ +} + +ADV_PAT_PIN_ONE(2, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + ) + +ADV_PAT_PIN_ONE(3, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + __m256i v2 = _mm256_set1_epi8(chr[2]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v2, vstr)); \ + ) + +ADV_PAT_PIN_ONE(4, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + __m256i v2 = _mm256_set1_epi8(chr[2]); \ + __m256i v3 = _mm256_set1_epi8(chr[3]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v2, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v3, vstr)); \ + ) + +ADV_PAT_PIN_ONE(5, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + __m256i v2 = _mm256_set1_epi8(chr[2]); \ + __m256i v3 = _mm256_set1_epi8(chr[3]); \ + __m256i v4 = _mm256_set1_epi8(chr[4]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v2, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v3, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v4, vstr)); \ + ) + +ADV_PAT_PIN_ONE(6, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + __m256i v2 = _mm256_set1_epi8(chr[2]); \ + __m256i v3 = _mm256_set1_epi8(chr[3]); \ + __m256i v4 = _mm256_set1_epi8(chr[4]); \ + __m256i v5 = _mm256_set1_epi8(chr[5]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v2, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v3, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v4, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v5, vstr)); \ + ) + +ADV_PAT_PIN_ONE(7, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + __m256i v2 = _mm256_set1_epi8(chr[2]); \ + __m256i v3 = _mm256_set1_epi8(chr[3]); \ + __m256i v4 = _mm256_set1_epi8(chr[4]); \ + __m256i v5 = _mm256_set1_epi8(chr[5]); \ + __m256i v6 = _mm256_set1_epi8(chr[6]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v2, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v3, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v4, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v5, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v6, vstr)); \ + ) + +ADV_PAT_PIN_ONE(8, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + __m256i v2 = _mm256_set1_epi8(chr[2]); \ + __m256i v3 = _mm256_set1_epi8(chr[3]); \ + __m256i v4 = _mm256_set1_epi8(chr[4]); \ + __m256i v5 = _mm256_set1_epi8(chr[5]); \ + __m256i v6 = _mm256_set1_epi8(chr[6]); \ + __m256i v7 = _mm256_set1_epi8(chr[7]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v2, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v3, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v4, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v5, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v6, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v7, vstr)); \ + ) + +ADV_PAT_PIN_ONE(16, \ + __m256i v0 = _mm256_set1_epi8(chr[0]); \ + __m256i v1 = _mm256_set1_epi8(chr[1]); \ + __m256i v2 = _mm256_set1_epi8(chr[2]); \ + __m256i v3 = _mm256_set1_epi8(chr[3]); \ + __m256i v4 = _mm256_set1_epi8(chr[4]); \ + __m256i v5 = _mm256_set1_epi8(chr[5]); \ + __m256i v6 = _mm256_set1_epi8(chr[6]); \ + __m256i v7 = _mm256_set1_epi8(chr[7]); \ + __m256i v8 = _mm256_set1_epi8(chr[8]); \ + __m256i v9 = _mm256_set1_epi8(chr[9]); \ + __m256i va = _mm256_set1_epi8(chr[10]); \ + __m256i vb = _mm256_set1_epi8(chr[11]); \ + __m256i vc = _mm256_set1_epi8(chr[12]); \ + __m256i vd = _mm256_set1_epi8(chr[13]); \ + __m256i ve = _mm256_set1_epi8(chr[14]); \ + __m256i vf = _mm256_set1_epi8(chr[15]); \ + , \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v1, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v2, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v3, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v4, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v5, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v6, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v7, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v8, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(v9, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(va, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(vb, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(vc, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(vd, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(ve, vstr)); \ + veq = _mm256_or_si256(veq, _mm256_cmpeq_epi8(vf, vstr)); \ + ) + +// My "needle search" methods +#define ADV_PAT_PIN(N, INIT, COMP) \ +bool Matcher::simd_advance_pattern_pin##N##_pma_avx2(size_t loc) \ +{ \ + const Pattern::Pred *pma = pat_->pma_; \ + const char *chr = pat_->chr_; \ + size_t min = pat_->min_; \ + uint16_t lcp = pat_->lcp_; \ + uint16_t lcs = pat_->lcs_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc + lcp; \ + const char *e = buf_ + end_ + lcp - min + 1; \ + while (s <= e - 32) \ + { \ + __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); \ + __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); \ + __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); \ + __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); \ + COMP \ + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); \ + while (mask != 0) \ + { \ + uint32_t offset = ctz(mask); \ + loc = s - lcp + offset - buf_; \ + if (loc + 4 > end_ || Pattern::predict_match(pma, &buf_[loc]) == 0) \ + { \ + set_current(loc); \ + return true; \ + } \ + mask &= mask - 1; \ + } \ + s += 32; \ + } \ + s -= lcp; \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + min > end_) \ + return false; \ + if (loc + min + 31 > end_) \ + break; \ + } \ + return advance_pattern(loc); \ +} \ +\ +bool Matcher::simd_advance_pattern_pin##N##_pmh_avx2(size_t loc) \ +{ \ + const Pattern::Pred *pmh = pat_->pmh_; \ + const char *chr = pat_->chr_; \ + size_t min = pat_->min_; \ + uint16_t lcp = pat_->lcp_; \ + uint16_t lcs = pat_->lcs_; \ + INIT \ + while (true) \ + { \ + const char *s = buf_ + loc + lcp; \ + const char *e = buf_ + end_ + lcp - min + 1; \ + while (s <= e - 32) \ + { \ + __m256i vstrlcp = _mm256_loadu_si256(reinterpret_cast(s)); \ + __m256i vstrlcs = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); \ + __m256i veqlcp = _mm256_cmpeq_epi8(vlcp0, vstrlcp); \ + __m256i veqlcs = _mm256_cmpeq_epi8(vlcs0, vstrlcs); \ + COMP \ + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(veqlcp, veqlcs)); \ + while (mask != 0) \ + { \ + uint32_t offset = ctz(mask); \ + loc = s - lcp + offset - buf_; \ + if (Pattern::predict_match(pmh, &buf_[loc], min)) \ + { \ + set_current(loc); \ + return true; \ + } \ + mask &= mask - 1; \ + } \ + s += 32; \ + } \ + s -= lcp; \ + loc = s - buf_; \ + set_current_and_peek_more(loc - 1); \ + loc = cur_ + 1; \ + if (loc + min > end_) \ + return false; \ + if (loc + min + 31 > end_) \ + break; \ + } \ + return advance_pattern_min4(loc); \ +} + +ADV_PAT_PIN(2, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[3]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + ) + +ADV_PAT_PIN(3, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcp2 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[3]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[4]); \ + __m256i vlcs2 = _mm256_set1_epi8(chr[5]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); \ + ) + +ADV_PAT_PIN(4, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcp2 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcp3 = _mm256_set1_epi8(chr[3]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[4]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[5]); \ + __m256i vlcs2 = _mm256_set1_epi8(chr[6]); \ + __m256i vlcs3 = _mm256_set1_epi8(chr[7]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); \ + ) + +ADV_PAT_PIN(5, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcp2 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcp3 = _mm256_set1_epi8(chr[3]); \ + __m256i vlcp4 = _mm256_set1_epi8(chr[4]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[5]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[6]); \ + __m256i vlcs2 = _mm256_set1_epi8(chr[7]); \ + __m256i vlcs3 = _mm256_set1_epi8(chr[8]); \ + __m256i vlcs4 = _mm256_set1_epi8(chr[9]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); \ + ) + +ADV_PAT_PIN(6, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcp2 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcp3 = _mm256_set1_epi8(chr[3]); \ + __m256i vlcp4 = _mm256_set1_epi8(chr[4]); \ + __m256i vlcp5 = _mm256_set1_epi8(chr[5]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[6]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[7]); \ + __m256i vlcs2 = _mm256_set1_epi8(chr[8]); \ + __m256i vlcs3 = _mm256_set1_epi8(chr[9]); \ + __m256i vlcs4 = _mm256_set1_epi8(chr[10]); \ + __m256i vlcs5 = _mm256_set1_epi8(chr[11]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); \ + ) + +ADV_PAT_PIN(7, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcp2 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcp3 = _mm256_set1_epi8(chr[3]); \ + __m256i vlcp4 = _mm256_set1_epi8(chr[4]); \ + __m256i vlcp5 = _mm256_set1_epi8(chr[5]); \ + __m256i vlcp6 = _mm256_set1_epi8(chr[6]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[7]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[8]); \ + __m256i vlcs2 = _mm256_set1_epi8(chr[9]); \ + __m256i vlcs3 = _mm256_set1_epi8(chr[10]); \ + __m256i vlcs4 = _mm256_set1_epi8(chr[11]); \ + __m256i vlcs5 = _mm256_set1_epi8(chr[12]); \ + __m256i vlcs6 = _mm256_set1_epi8(chr[13]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp6, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs6, vstrlcs)); \ + ) + +ADV_PAT_PIN(8, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcp2 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcp3 = _mm256_set1_epi8(chr[3]); \ + __m256i vlcp4 = _mm256_set1_epi8(chr[4]); \ + __m256i vlcp5 = _mm256_set1_epi8(chr[5]); \ + __m256i vlcp6 = _mm256_set1_epi8(chr[6]); \ + __m256i vlcp7 = _mm256_set1_epi8(chr[7]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[8]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[9]); \ + __m256i vlcs2 = _mm256_set1_epi8(chr[10]); \ + __m256i vlcs3 = _mm256_set1_epi8(chr[11]); \ + __m256i vlcs4 = _mm256_set1_epi8(chr[12]); \ + __m256i vlcs5 = _mm256_set1_epi8(chr[13]); \ + __m256i vlcs6 = _mm256_set1_epi8(chr[14]); \ + __m256i vlcs7 = _mm256_set1_epi8(chr[15]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp6, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp7, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs6, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs7, vstrlcs)); \ + ) + +ADV_PAT_PIN(16, \ + __m256i vlcp0 = _mm256_set1_epi8(chr[0]); \ + __m256i vlcp1 = _mm256_set1_epi8(chr[1]); \ + __m256i vlcp2 = _mm256_set1_epi8(chr[2]); \ + __m256i vlcp3 = _mm256_set1_epi8(chr[3]); \ + __m256i vlcp4 = _mm256_set1_epi8(chr[4]); \ + __m256i vlcp5 = _mm256_set1_epi8(chr[5]); \ + __m256i vlcp6 = _mm256_set1_epi8(chr[6]); \ + __m256i vlcp7 = _mm256_set1_epi8(chr[7]); \ + __m256i vlcp8 = _mm256_set1_epi8(chr[8]); \ + __m256i vlcp9 = _mm256_set1_epi8(chr[9]); \ + __m256i vlcpa = _mm256_set1_epi8(chr[10]); \ + __m256i vlcpb = _mm256_set1_epi8(chr[11]); \ + __m256i vlcpc = _mm256_set1_epi8(chr[12]); \ + __m256i vlcpd = _mm256_set1_epi8(chr[13]); \ + __m256i vlcpe = _mm256_set1_epi8(chr[14]); \ + __m256i vlcpf = _mm256_set1_epi8(chr[15]); \ + __m256i vlcs0 = _mm256_set1_epi8(chr[16]); \ + __m256i vlcs1 = _mm256_set1_epi8(chr[17]); \ + __m256i vlcs2 = _mm256_set1_epi8(chr[18]); \ + __m256i vlcs3 = _mm256_set1_epi8(chr[19]); \ + __m256i vlcs4 = _mm256_set1_epi8(chr[20]); \ + __m256i vlcs5 = _mm256_set1_epi8(chr[21]); \ + __m256i vlcs6 = _mm256_set1_epi8(chr[22]); \ + __m256i vlcs7 = _mm256_set1_epi8(chr[23]); \ + __m256i vlcs8 = _mm256_set1_epi8(chr[24]); \ + __m256i vlcs9 = _mm256_set1_epi8(chr[25]); \ + __m256i vlcsa = _mm256_set1_epi8(chr[26]); \ + __m256i vlcsb = _mm256_set1_epi8(chr[27]); \ + __m256i vlcsc = _mm256_set1_epi8(chr[28]); \ + __m256i vlcsd = _mm256_set1_epi8(chr[29]); \ + __m256i vlcse = _mm256_set1_epi8(chr[30]); \ + __m256i vlcsf = _mm256_set1_epi8(chr[31]); \ + , \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp1, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp2, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp3, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp4, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp5, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp6, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp7, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp8, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcp9, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpa, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpb, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpc, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpd, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpe, vstrlcp)); \ + veqlcp = _mm256_or_si256(veqlcp, _mm256_cmpeq_epi8(vlcpf, vstrlcp)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs1, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs2, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs3, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs4, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs5, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs6, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs7, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs8, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcs9, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsa, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsb, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsc, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsd, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcse, vstrlcs)); \ + veqlcs = _mm256_or_si256(veqlcs, _mm256_cmpeq_epi8(vlcsf, vstrlcs)); \ + ) + +/// Few chars +template +bool Matcher::simd_advance_chars_avx2(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const char *chr = pat_->chr_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m256i vlcp = _mm256_set1_epi8(chr[lcp]); + __m256i vlcs = _mm256_set1_epi8(chr[lcs]); + while (s <= e - 32) + { + __m256i vlcpm = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vlcsm = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i vlcpeq = _mm256_cmpeq_epi8(vlcp, vlcpm); + __m256i vlcseq = _mm256_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + set_current(loc); + return true; + } + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN > end_) + return false; + if (loc + LEN + 31 > end_) + break; + } + return advance_chars(loc); +} + +/// Few chars followed by 2 to 3 minimal char pattern +template +bool Matcher::simd_advance_chars_pma_avx2(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m256i vlcp = _mm256_set1_epi8(chr[lcp]); + __m256i vlcs = _mm256_set1_epi8(chr[lcs]); + while (s <= e - 32) + { + __m256i vlcpm = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vlcsm = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i vlcpeq = _mm256_cmpeq_epi8(vlcp, vlcpm); + __m256i vlcseq = _mm256_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + if (loc + LEN + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + LEN]) == 0) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 31 > end_) + break; + } + return advance_chars_pma(loc); +} + +/// Few chars followed by 4 minimal char pattern +template +bool Matcher::simd_advance_chars_pmh_avx2(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m256i vlcp = _mm256_set1_epi8(chr[lcp]); + __m256i vlcs = _mm256_set1_epi8(chr[lcs]); + while (s <= e - 32) + { + __m256i vlcpm = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vlcsm = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i vlcpeq = _mm256_cmpeq_epi8(vlcp, vlcpm); + __m256i vlcseq = _mm256_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + set_current(loc); + if (loc + LEN + min > end_ || Pattern::predict_match(pmh, &buf_[loc + LEN], min)) + return true; + } + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 31 > end_) + break; + } + return advance_chars_pmh(loc); +} + +/// Implements AVX2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html +bool Matcher::simd_advance_string_avx2(size_t loc) +{ + const char *chr = pat_->chr_; + size_t len = pat_->len_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m256i vlcp = _mm256_set1_epi8(chr[lcp]); + __m256i vlcs = _mm256_set1_epi8(chr[lcs]); + while (s <= e - 32) + { + __m256i vlcpm = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vlcsm = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i vlcpeq = _mm256_cmpeq_epi8(vlcp, vlcpm); + __m256i vlcseq = _mm256_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + set_current(loc); + return true; + } + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + if (loc + len + 31 > end_) + break; + } + return advance_string(loc); +} + +#if defined(WITH_STRING_PM) + +/// Implements AVX2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html +bool Matcher::simd_advance_string_pma_avx2(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t len = pat_->len_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m256i vlcp = _mm256_set1_epi8(chr[lcp]); + __m256i vlcs = _mm256_set1_epi8(chr[lcs]); + while (s <= e - 32) + { + __m256i vlcpm = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vlcsm = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i vlcpeq = _mm256_cmpeq_epi8(vlcp, vlcpm); + __m256i vlcseq = _mm256_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + if (loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 31 > end_) + break; + } + return advance_string_pma(loc); +} + +/// Implements AVX2 string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html +bool Matcher::simd_advance_string_pmh_avx2(size_t loc) +{ + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t len = pat_->len_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m256i vlcp = _mm256_set1_epi8(chr[lcp]); + __m256i vlcs = _mm256_set1_epi8(chr[lcs]); + while (s <= e - 32) + { + __m256i vlcpm = _mm256_loadu_si256(reinterpret_cast(s)); + __m256i vlcsm = _mm256_loadu_si256(reinterpret_cast(s + lcs - lcp)); + __m256i vlcpeq = _mm256_cmpeq_epi8(vlcp, vlcpm); + __m256i vlcseq = _mm256_cmpeq_epi8(vlcs, vlcsm); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(vlcpeq, vlcseq)); + while (mask != 0) + { + uint32_t offset = ctz(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + set_current(loc); + if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) + return true; + } + mask &= mask - 1; + } + s += 32; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 31 > end_) + break; + } + return advance_string_pmh(loc); +} + +#endif // WITH_STRING_PM + +#else + +// appease ranlib "has no symbols" +void matcher_not_compiled_with_avx2() { } + +#endif + +} // namespace reflex diff --git a/ccl/rslang/import/reflex/lib/matcher_avx512bw.cpp b/ccl/rslang/import/reflex/lib/matcher_avx512bw.cpp index 35e46a7..4af50af 100644 --- a/ccl/rslang/import/reflex/lib/matcher_avx512bw.cpp +++ b/ccl/rslang/import/reflex/lib/matcher_avx512bw.cpp @@ -27,10 +27,10 @@ \******************************************************************************/ /** -@file matcher.cpp, matcher_avx2.cpp, matcher_avx512bw.cpp +@file matcher_avx512bw.cpp @brief RE/flex matcher engine @author Robert van Engelen - engelen@genivia.com -@copyright (c) 2016-2022, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) 2016-2024, Robert van Engelen, Genivia Inc. All rights reserved. @copyright (c) BSD-3 License - see LICENSE.txt */ @@ -40,5 +40,344 @@ # endif #endif -#define COMPILE_AVX512BW -#include "matcher.cpp" +#include + +namespace reflex { + +#if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64)) + +// AVX512BW runtime optimized function callback overrides +void Matcher::simd_init_advance_avx512bw() +{ + if (pat_->len_ == 0) + { + // no specialization + } + else if (pat_->len_ == 1) + { + // no specialization + } + else if (pat_->len_ == 2) + { + if (pat_->min_ == 0) + adv_ = &Matcher::simd_advance_chars_avx512bw<2>; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_chars_pma_avx512bw<2>; + else + adv_ = &Matcher::simd_advance_chars_pmh_avx512bw<2>; + } + else if (pat_->len_ == 3) + { + if (pat_->min_ == 0) + adv_ = &Matcher::simd_advance_chars_avx512bw<3>; + else if (pat_->min_ < 4) + adv_ = &Matcher::simd_advance_chars_pma_avx512bw<3>; + else + adv_ = &Matcher::simd_advance_chars_pmh_avx512bw<3>; + } + else if (pat_->bmd_ == 0) + { +#if defined(WITH_STRING_PM) + if (pat_->min_ >= 4) + adv_ = &Matcher::simd_advance_string_pmh_avx512bw; + else if (pat_->min_ > 0) + adv_ = &Matcher::simd_advance_string_pma_avx512bw; + else +#endif + adv_ = &Matcher::simd_advance_string_avx512bw; + } +} + +/// Few chars +template +bool Matcher::simd_advance_chars_avx512bw(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const char *chr = pat_->chr_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m512i vlcp = _mm512_set1_epi8(chr[lcp]); + __m512i vlcs = _mm512_set1_epi8(chr[lcs]); + while (s <= e - 64) + { + __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast(s)); + __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast(s + lcs - lcp)); + uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm); + while (mask != 0) + { + uint32_t offset = ctzl(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + set_current(loc); + return true; + } + mask &= mask - 1; + } + s += 64; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN > end_) + return false; + if (loc + LEN + 63 > end_) + break; + } + return advance_chars(loc); +} + +/// Few chars followed by 2 to 3 minimal char pattern +template +bool Matcher::simd_advance_chars_pma_avx512bw(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m512i vlcp = _mm512_set1_epi8(chr[lcp]); + __m512i vlcs = _mm512_set1_epi8(chr[lcs]); + while (s <= e - 64) + { + __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast(s)); + __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast(s + lcs - lcp)); + uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm); + while (mask != 0) + { + uint32_t offset = ctzl(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + if (loc + LEN + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + LEN]) == 0) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 64; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 63 > end_) + break; + } + return advance_chars_pma(loc); +} + +/// Few chars followed by 4 minimal char pattern +template +bool Matcher::simd_advance_chars_pmh_avx512bw(size_t loc) +{ + static const uint16_t lcp = 0; + static const uint16_t lcs = LEN - 1; + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t min = pat_->min_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - LEN + 1; + __m512i vlcp = _mm512_set1_epi8(chr[lcp]); + __m512i vlcs = _mm512_set1_epi8(chr[lcs]); + while (s <= e - 64) + { + __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast(s)); + __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast(s + lcs - lcp)); + uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm); + while (mask != 0) + { + uint32_t offset = ctzl(mask); + if (LEN == 2 || + (LEN == 3 ? s[offset + 1 - lcp] == chr[1] : std::memcmp(s + 1 - lcp + offset, chr + 1, LEN - 2) == 0)) + { + loc = s - lcp + offset - buf_; + if (loc + LEN + min > end_ || Pattern::predict_match(pmh, &buf_[loc + LEN], min)) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 64; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + LEN + min > end_) + return false; + if (loc + LEN + min + 63 > end_) + break; + } + return advance_chars_pmh(loc); +} + +/// Implements AVX512BW string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html +bool Matcher::simd_advance_string_avx512bw(size_t loc) +{ + const char *chr = pat_->chr_; + size_t len = pat_->len_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m512i vlcp = _mm512_set1_epi8(chr[lcp]); + __m512i vlcs = _mm512_set1_epi8(chr[lcs]); + while (s <= e - 64) + { + __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast(s)); + __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast(s + lcs - lcp)); + uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm); + while (mask != 0) + { + uint32_t offset = ctzl(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + set_current(loc); + return true; + } + mask &= mask - 1; + } + s += 64; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len > end_) + return false; + if (loc + len + 63 > end_) + break; + } + return advance_string(loc); +} + +#if defined(WITH_STRING_PM) + +/// Implements AVX512BW string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html +bool Matcher::simd_advance_string_pma_avx512bw(size_t loc) +{ + const Pattern::Pred *pma = pat_->pma_; + const char *chr = pat_->chr_; + size_t len = pat_->len_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m512i vlcp = _mm512_set1_epi8(chr[lcp]); + __m512i vlcs = _mm512_set1_epi8(chr[lcs]); + while (s <= e - 64) + { + __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast(s)); + __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast(s + lcs - lcp)); + uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm); + while (mask != 0) + { + uint32_t offset = ctzl(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + if (loc + len + 4 > end_ || Pattern::predict_match(pma, &buf_[loc + len]) == 0) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 64; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 63 > end_) + break; + } + return advance_string_pma(loc); +} + +/// Implements AVX512BW string search scheme based on http://0x80.pl/articles/simd-friendly-karp-rabin.html +bool Matcher::simd_advance_string_pmh_avx512bw(size_t loc) +{ + const Pattern::Pred *pmh = pat_->pmh_; + const char *chr = pat_->chr_; + size_t len = pat_->len_; + size_t min = pat_->min_; + uint16_t lcp = pat_->lcp_; + uint16_t lcs = pat_->lcs_; + while (true) + { + const char *s = buf_ + loc + lcp; + const char *e = buf_ + end_ + lcp - len + 1; + __m512i vlcp = _mm512_set1_epi8(chr[lcp]); + __m512i vlcs = _mm512_set1_epi8(chr[lcs]); + while (s <= e - 64) + { + __m512i vlcpm = _mm512_loadu_si512(reinterpret_cast(s)); + __m512i vlcsm = _mm512_loadu_si512(reinterpret_cast(s + lcs - lcp)); + uint64_t mask = _mm512_cmpeq_epi8_mask(vlcp, vlcpm) & _mm512_cmpeq_epi8_mask(vlcs, vlcsm); + while (mask != 0) + { + uint32_t offset = ctzl(mask); + if (std::memcmp(s - lcp + offset, chr, len) == 0) + { + loc = s - lcp + offset - buf_; + if (loc + len + min > end_ || Pattern::predict_match(pmh, &buf_[loc + len], min)) + { + set_current(loc); + return true; + } + } + mask &= mask - 1; + } + s += 64; + } + s -= lcp; + loc = s - buf_; + set_current_and_peek_more(loc - 1); + loc = cur_ + 1; + if (loc + len + min > end_) + return false; + if (loc + len + min + 63 > end_) + break; + } + return advance_string_pmh(loc); +} + +#endif // WITH_STRING_PM + +#else + +// appease ranlib "has no symbols" +void matcher_not_compiled_with_avx512bw() { } + +#endif + +} // namespace reflex diff --git a/ccl/rslang/import/reflex/lib/pattern.cpp b/ccl/rslang/import/reflex/lib/pattern.cpp index c2f7fc6..100e1f5 100644 --- a/ccl/rslang/import/reflex/lib/pattern.cpp +++ b/ccl/rslang/import/reflex/lib/pattern.cpp @@ -45,10 +45,10 @@ /// DFA compaction: -1 == reverse order edge compression (best); 1 == edge compression; 0 == no edge compression. /** Edge compression reorders edges to produce fewer tests when executed in the compacted order. For example ([a-cg-ik]|d|[e-g]|j|y|[x-z]) after reverse edge compression has only 2 edges: - c1 = m.FSM_CHAR(); - if ('x' <= c1 && c1 <= 'z') goto S3; - if ('a' <= c1 && c1 <= 'k') goto S3; - return m.FSM_HALT(c1); + c = m.FSM_CHAR(); + if ('x' <= c && c <= 'z') goto S3; + if ('a' <= c && c <= 'k') goto S3; + return m.FSM_HALT(c); */ #define WITH_COMPACT_DFA -1 @@ -177,6 +177,7 @@ void Pattern::init(const char *options, const uint8_t *pred) bmd_ = 0; npy_ = 0; one_ = false; + bol_ = false; vno_ = 0; eno_ = 0; hno_ = 0; @@ -197,36 +198,43 @@ void Pattern::init(const char *options, const uint8_t *pred) len_ = pred[0]; min_ = pred[1] & 0x0f; one_ = pred[1] & 0x10; + bol_ = pred[1] & 0x40; memcpy(chr_, pred + 2, len_); - size_t n = len_ + 2; + size_t n = 2 + len_; if (len_ == 0) { + // get bitap bit_[] parameters for (size_t i = 0; i < 256; ++i) bit_[i] = ~pred[i + n]; n += 256; } - if (min_ >= 4) - { - for (size_t i = 0; i < Const::HASH; ++i) - pmh_[i] = ~pred[i + n]; - } - else + if (min_ < 4) { + // get predict match PM4 pma_[] parameters for (size_t i = 0; i < Const::HASH; ++i) pma_[i] = ~pred[i + n]; } + else + { + // get predict match hash pmh_[] parameters + for (size_t i = 0; i < Const::HASH; ++i) + pmh_[i] = ~pred[i + n]; + } + n += Const::HASH; if ((pred[1] & 0x20) != 0) { - n += Const::HASH; + // get lookback parameters lbk_ lbm_ and cbk_[] after s-t cut and first s-t cut pattern characters fst_[] lbk_ = pred[n + 0] | (pred[n + 1] << 8); lbm_ = pred[n + 2] | (pred[n + 3] << 8); for (size_t i = 0; i < 256; ++i) cbk_.set(i, pred[n + 4 + (i >> 3)] & (1 << (i & 7))); for (size_t i = 0; i < 256; ++i) - fst_.set(i, pred[n + 32 + 4 + (i >> 3)] & (1 << (i & 7))); + fst_.set(i, pred[n + 4 + 32 + (i >> 3)] & (1 << (i & 7))); + n += 4 + 32 + 32; } else { + // get first pattern characters fst_[] from bitap for (size_t i = 0; i < 256; ++i) fst_.set(i, (bit_[i] & 1) == 0); } @@ -311,8 +319,7 @@ void Pattern::init(const char *options, const uint8_t *pred) } // needle count and frequency thresholds to enable needle-based search uint16_t pinmax = 8; - uint8_t freqmax1 = 91; // one position - uint8_t freqmax2 = 251; // two positions + uint8_t freqmax = 251; #if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) if (have_HW_AVX512BW() || have_HW_AVX2()) pinmax = 16; @@ -331,7 +338,6 @@ void Pattern::init(const char *options, const uint8_t *pred) lcs_ = 0; uint16_t nlcp = 65535; // max and undefined uint16_t nlcs = 65535; // max and undefined - uint16_t freqsum = 0; uint8_t freqlcp = 255; // max uint8_t freqlcs = 255; // max size_t min = (min_ == 0 ? 1 : min_); @@ -339,7 +345,6 @@ void Pattern::init(const char *options, const uint8_t *pred) { Pred mask = 1 << k; uint16_t n = 0; - uint16_t sum = 0; uint8_t max = 0; // at position k count the matching characters and find the max character frequency for (uint16_t i = 0; i < 256; ++i) @@ -348,14 +353,13 @@ void Pattern::init(const char *options, const uint8_t *pred) { ++n; uint8_t freq = frequency(static_cast(i)); - sum += freq; if (freq > max) max = freq; } } if (n <= pinmax) { - // pick the fewest and rarest (least frequently occurring) needles to search + // pick the fewest and rarest (less frequently occurring) needles to search if (max < freqlcp || (n < nlcp && max == freqlcp)) { lcs_ = lcp_; @@ -363,7 +367,6 @@ void Pattern::init(const char *options, const uint8_t *pred) freqlcs = freqlcp; lcp_ = static_cast(k); nlcp = n; - freqsum = sum; freqlcp = max; } else if (n < nlcs || @@ -377,8 +380,8 @@ void Pattern::init(const char *options, const uint8_t *pred) } } } - // one position to pin: make lcp and lcs equal (compared and optimized later) - if (min == 1 || ((freqsum <= freqlcp || nlcs == 65535) && freqsum <= freqmax1)) + // one position to pin: make lcp and lcs equal to 0 (only one position at 0) + if (min == 1 || nlcs == 65535) { nlcs = nlcp; lcs_ = lcp_; @@ -387,7 +390,7 @@ void Pattern::init(const char *options, const uint8_t *pred) uint16_t n = nlcp > nlcs ? nlcp : nlcs; DBGLOG("min=%zu lcp=%hu(%hu) pin=%hu nlcp=%hu(%hu) freq=%hu(%hu) freqsum=%hu npy=%zu", min, lcp_, lcs_, n, nlcp, nlcs, freqlcp, freqlcs, freqsum, npy_); // determine if a needle-based search is worthwhile, below or meeting the thresholds - if (n <= pinmax && freqlcp <= freqmax2) + if (n <= pinmax && freqlcp <= freqmax) { // bridge the gap from 9 to 16 to handle 9 to 16 combined if (n > 8) @@ -412,7 +415,7 @@ void Pattern::init(const char *options, const uint8_t *pred) } else if (len_ > 1) { - // Boyer-Moore preprocessing of the given string pattern pat of length len, generates bmd_ > 0 and bms_[] shifts + // produce lcp and lcs positions and Boyer-Moore bms_[] shifts when bmd_ > 0 uint8_t n = static_cast(len_); // okay to cast: actually never more than 255 uint16_t i; for (i = 0; i < 256; ++i) @@ -433,13 +436,14 @@ void Pattern::init(const char *options, const uint8_t *pred) lcs_ = lcp_; lcp_ = i; } - else if (lcpch != pch && frequency(lcsch) > freqpch) + else if (frequency(lcsch) > freqpch || + (frequency(lcsch) == freqpch && + abs(static_cast(lcp_) - static_cast(lcs_)) < abs(static_cast(lcp_) - static_cast(i)))) { lcs_ = i; } } } - DBGLOG("len=%zu lcp=%hu(%hu)", len_, lcp_, lcs_); uint16_t j; for (i = n - 1, j = i; j > 0; --j) if (chr_[j - 1] == chr_[i]) @@ -469,7 +473,34 @@ void Pattern::init(const char *options, const uint8_t *pred) #endif #endif if (lcs_ < 0xffff) - bmd_ = 0; // do not use B-M + { + // do not use B-M + bmd_ = 0; + // spread lcp and lcs apart if lcp and lcs are adjacent (chars are possibly correlated) + if (len_ == 3 && (lcp_ == 1 || lcs_ == 1)) + { + lcp_ = 0; + lcs_ = 2; + } + else if (len_ > 3 && (lcp_ + 1 == lcs_ || lcs_ + 1 == lcp_)) + { + uint8_t freqlcs = 255; + for (i = 0; i < n; ++i) + { + if (i > lcp_ + 1 || i + 1 < lcp_) + { + uint8_t pch = static_cast(chr_[i]); + uint8_t freqpch = frequency(pch); + if (freqlcs > freqpch) + { + lcs_ = i; + freqlcs = freqpch; + } + } + } + } + } + DBGLOG("len=%zu bmd=%zu lcp=%hu(%hu)", len_, bmd_, lcp_, lcs_); } } @@ -629,6 +660,7 @@ void Pattern::parse( loc = 0; } } + bol_ = at(loc) == '^'; do { Location end = loc; @@ -733,6 +765,8 @@ void Pattern::parse( } else { + if (at(loc) != '^') + bol_ = false; parse2( true, loc, @@ -2961,8 +2995,8 @@ void Pattern::gencode_dfa(const DFA::State *start) const ::fprintf(file, "void reflex_code_%s(reflex::Matcher& m)\n" "{\n" - " int c0 = 0, c1 = 0;\n" - " m.FSM_INIT(c1);\n", opt_.n.empty() ? "FSM" : opt_.n.c_str()); + " int c = 0;\n" + " m.FSM_INIT(c);\n", opt_.n.empty() ? "FSM" : opt_.n.c_str()); for (const DFA::State *state = start; state != NULL; state = state->next) { ::fprintf(file, "\nS%u:\n", state->index); @@ -2978,8 +3012,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const ::fprintf(file, " m.FSM_HEAD(%u);\n", *i); if (state->edges.rbegin() != state->edges.rend() && state->edges.rbegin()->first == META_DED) ::fprintf(file, " if (m.FSM_DENT()) goto S%u;\n", state->edges.rbegin()->second.second->index); - bool peek = false; // if we need to read a character into c1 - bool prev = false; // if we need to keep the previous character in c0 + bool peek = false; // if we need to read a character into c for (DFA::State::Edges::const_reverse_iterator i = state->edges.rbegin(); i != state->edges.rend(); ++i) { #if WITH_COMPACT_DFA == -1 @@ -2993,13 +3026,12 @@ void Pattern::gencode_dfa(const DFA::State *start) const { do { - if (lo == META_EOB || lo == META_EOL) + if (lo == META_EOB || lo == META_EOL || lo == META_EWE || lo == META_BWE || lo == META_NWE || lo == META_WBE) + { peek = true; - else if (lo == META_EWE || lo == META_BWE || lo == META_NWE || lo == META_WBE) - prev = peek = true; - if (prev && peek) break; - check_dfa_closure(i->second.second, 1, peek, prev); + } + check_dfa_closure(i->second.second, 1, peek); } while (++lo <= hi); } else @@ -3025,10 +3057,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const target_index = i->second.second->index; if (read) { - if (prev) - ::fprintf(file, " c0 = c1, c1 = m.FSM_CHAR();\n"); - else - ::fprintf(file, " c1 = m.FSM_CHAR();\n"); + ::fprintf(file, " c = m.FSM_CHAR();\n"); read = false; } if (is_meta(lo)) @@ -3039,14 +3068,6 @@ void Pattern::gencode_dfa(const DFA::State *start) const { case META_EOB: case META_EOL: - ::fprintf(file, " "); - if (elif) - ::fprintf(file, "else "); - ::fprintf(file, "if (m.FSM_META_%s(c1)) {\n", meta_label[lo - META_MIN]); - gencode_dfa_closure(file, i->second.second, 2, peek); - ::fprintf(file, " }\n"); - elif = true; - break; case META_EWE: case META_BWE: case META_NWE: @@ -3054,7 +3075,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const ::fprintf(file, " "); if (elif) ::fprintf(file, "else "); - ::fprintf(file, "if (m.FSM_META_%s(c0, c1)) {\n", meta_label[lo - META_MIN]); + ::fprintf(file, "if (m.FSM_META_%s(c)) {\n", meta_label[lo - META_MIN]); gencode_dfa_closure(file, i->second.second, 2, peek); ::fprintf(file, " }\n"); elif = true; @@ -3077,7 +3098,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const break; if (lo == hi) { - ::fprintf(file, " if (c1 == "); + ::fprintf(file, " if (c == "); print_char(file, lo); ::fprintf(file, ")"); } @@ -3085,20 +3106,20 @@ void Pattern::gencode_dfa(const DFA::State *start) const { ::fprintf(file, " if ("); print_char(file, lo); - ::fprintf(file, " <= c1)"); + ::fprintf(file, " <= c)"); } else { ::fprintf(file, " if ("); print_char(file, lo); - ::fprintf(file, " <= c1 && c1 <= "); + ::fprintf(file, " <= c && c <= "); print_char(file, hi); ::fprintf(file, ")"); } if (target_index == Const::IMAX) { if (peek) - ::fprintf(file, " return m.FSM_HALT(c1);\n"); + ::fprintf(file, " return m.FSM_HALT(c);\n"); else ::fprintf(file, " return m.FSM_HALT();\n"); } @@ -3117,10 +3138,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const { if (read) { - if (prev) - ::fprintf(file, " c0 = c1, c1 = m.FSM_CHAR();\n"); - else - ::fprintf(file, " c1 = m.FSM_CHAR();\n"); + ::fprintf(file, " c = m.FSM_CHAR();\n"); read = false; } do @@ -3129,14 +3147,6 @@ void Pattern::gencode_dfa(const DFA::State *start) const { case META_EOB: case META_EOL: - ::fprintf(file, " "); - if (elif) - ::fprintf(file, "else "); - ::fprintf(file, "if (m.FSM_META_%s(c1)) {\n", meta_label[lo - META_MIN]); - gencode_dfa_closure(file, i->second.second, 2, peek); - ::fprintf(file, " }\n"); - elif = true; - break; case META_EWE: case META_BWE: case META_NWE: @@ -3144,7 +3154,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const ::fprintf(file, " "); if (elif) ::fprintf(file, "else "); - ::fprintf(file, "if (m.FSM_META_%s(c0, c1)) {\n", meta_label[lo - META_MIN]); + ::fprintf(file, "if (m.FSM_META_%s(c)) {\n", meta_label[lo - META_MIN]); gencode_dfa_closure(file, i->second.second, 2, peek); ::fprintf(file, " }\n"); elif = true; @@ -3170,10 +3180,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const target_index = i->second.second->index; if (read) { - if (prev) - ::fprintf(file, " c0 = c1, c1 = m.FSM_CHAR();\n"); - else - ::fprintf(file, " c1 = m.FSM_CHAR();\n"); + ::fprintf(file, " c = m.FSM_CHAR();\n"); read = false; } if (!is_meta(lo)) @@ -3183,7 +3190,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const break; if (lo == hi) { - ::fprintf(file, " if (c1 == "); + ::fprintf(file, " if (c == "); print_char(file, lo); ::fprintf(file, ")"); } @@ -3191,20 +3198,20 @@ void Pattern::gencode_dfa(const DFA::State *start) const { ::fprintf(file, " if ("); print_char(file, lo); - ::fprintf(file, " <= c1)"); + ::fprintf(file, " <= c)"); } else { ::fprintf(file, " if ("); print_char(file, lo); - ::fprintf(file, " <= c1 && c1 <= "); + ::fprintf(file, " <= c && c <= "); print_char(file, hi); ::fprintf(file, ")"); } if (target_index == Const::IMAX) { if (peek) - ::fprintf(file, " return m.FSM_HALT(c1);\n"); + ::fprintf(file, " return m.FSM_HALT(c);\n"); else ::fprintf(file, " return m.FSM_HALT();\n"); } @@ -3216,7 +3223,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const } #endif if (peek) - ::fprintf(file, " return m.FSM_HALT(c1);\n"); + ::fprintf(file, " return m.FSM_HALT(c);\n"); else ::fprintf(file, " return m.FSM_HALT();\n"); } @@ -3234,7 +3241,7 @@ void Pattern::gencode_dfa(const DFA::State *start) const } #ifndef WITH_NO_CODEGEN -void Pattern::check_dfa_closure(const DFA::State *state, int nest, bool& peek, bool& prev) const +void Pattern::check_dfa_closure(const DFA::State *state, int nest, bool& peek) const { if (nest > 5) return; @@ -3251,13 +3258,12 @@ void Pattern::check_dfa_closure(const DFA::State *state, int nest, bool& peek, b { do { - if (lo == META_EOB || lo == META_EOL) + if (lo == META_EOB || lo == META_EOL || lo == META_EWE || lo == META_BWE || lo == META_NWE || lo == META_WBE) + { peek = true; - else if (lo == META_EWE || lo == META_BWE || lo == META_NWE || lo == META_WBE) - prev = peek = true; - if (prev && peek) break; - check_dfa_closure(i->second.second, nest + 1, peek, prev); + } + check_dfa_closure(i->second.second, nest + 1, peek); } while (++lo <= hi); } } @@ -3271,14 +3277,14 @@ void Pattern::gencode_dfa_closure(FILE *file, const DFA::State *state, int nest, if (state->redo) { if (peek) - ::fprintf(file, "%*sm.FSM_REDO(c1);\n", 2*nest, ""); + ::fprintf(file, "%*sm.FSM_REDO(c);\n", 2*nest, ""); else ::fprintf(file, "%*sm.FSM_REDO();\n", 2*nest, ""); } else if (state->accept > 0) { if (peek) - ::fprintf(file, "%*sm.FSM_TAKE(%u, c1);\n", 2*nest, "", state->accept); + ::fprintf(file, "%*sm.FSM_TAKE(%u, c);\n", 2*nest, "", state->accept); else ::fprintf(file, "%*sm.FSM_TAKE(%u);\n", 2*nest, "", state->accept); } @@ -3303,14 +3309,6 @@ void Pattern::gencode_dfa_closure(FILE *file, const DFA::State *state, int nest, { case META_EOB: case META_EOL: - ::fprintf(file, "%*s", 2*nest, ""); - if (elif) - ::fprintf(file, "else "); - ::fprintf(file, "if (m.FSM_META_%s(c1)) {\n", meta_label[lo - META_MIN]); - gencode_dfa_closure(file, i->second.second, nest + 1, peek); - ::fprintf(file, "%*s}\n", 2*nest, ""); - elif = true; - break; case META_EWE: case META_BWE: case META_NWE: @@ -3318,7 +3316,7 @@ void Pattern::gencode_dfa_closure(FILE *file, const DFA::State *state, int nest, ::fprintf(file, "%*s", 2*nest, ""); if (elif) ::fprintf(file, "else "); - ::fprintf(file, "if (m.FSM_META_%s(c0, c1)) {\n", meta_label[lo - META_MIN]); + ::fprintf(file, "if (m.FSM_META_%s(c)) {\n", meta_label[lo - META_MIN]); gencode_dfa_closure(file, i->second.second, nest + 1, peek); ::fprintf(file, "%*s}\n", 2*nest, ""); elif = true; @@ -3346,7 +3344,7 @@ void Pattern::gencode_dfa_closure(FILE *file, const DFA::State *state, int nest, ::fprintf(file, "%*s", 2*nest, ""); if (lo == hi) { - ::fprintf(file, "if (c1 == "); + ::fprintf(file, "if (c == "); print_char(file, lo); ::fprintf(file, ")"); } @@ -3354,20 +3352,20 @@ void Pattern::gencode_dfa_closure(FILE *file, const DFA::State *state, int nest, { ::fprintf(file, "if ("); print_char(file, lo); - ::fprintf(file, " <= c1)"); + ::fprintf(file, " <= c)"); } else { ::fprintf(file, "if ("); print_char(file, lo); - ::fprintf(file, " <= c1 && c1 <= "); + ::fprintf(file, " <= c && c <= "); print_char(file, hi); ::fprintf(file, ")"); } if (target_index == Const::IMAX) { if (peek) - ::fprintf(file, " return m.FSM_HALT(c1);\n"); + ::fprintf(file, " return m.FSM_HALT(c);\n"); else ::fprintf(file, " return m.FSM_HALT();\n"); } @@ -3394,7 +3392,7 @@ void Pattern::gencode_dfa_closure(FILE *file, const DFA::State *state, int nest, ::fprintf(file, "%*s", 2*nest, ""); if (lo == hi) { - ::fprintf(file, "if (c1 == "); + ::fprintf(file, "if (c == "); print_char(file, lo); ::fprintf(file, ")"); } @@ -3402,20 +3400,20 @@ void Pattern::gencode_dfa_closure(FILE *file, const DFA::State *state, int nest, { ::fprintf(file, "if ("); print_char(file, lo); - ::fprintf(file, " <= c1)"); + ::fprintf(file, " <= c)"); } else { ::fprintf(file, "if ("); print_char(file, lo); - ::fprintf(file, " <= c1 && c1 <= "); + ::fprintf(file, " <= c && c <= "); print_char(file, hi); ::fprintf(file, ")"); } if (target_index == Const::IMAX) { if (peek) - ::fprintf(file, " return m.FSM_HALT(c1);\n"); + ::fprintf(file, " return m.FSM_HALT(c);\n"); else ::fprintf(file, " return m.FSM_HALT();\n"); } @@ -4560,26 +4558,31 @@ bool Pattern::match_hfa_transitions(size_t level, const HFA::Hashes& hashes, con void Pattern::write_predictor(FILE *file) const { ::fprintf(file, "extern const reflex::Pattern::Pred reflex_pred_%s[%zu] = {", opt_.n.empty() ? "FSM" : opt_.n.c_str(), 2 + len_ + (len_ == 0) * 256 + Const::HASH + (lbk_ > 0) * 68); - ::fprintf(file, "\n %3hhu,%3hhu,", static_cast(len_), (static_cast(min_ | (one_ << 4) | ((lbk_ > 0) << 5)))); + ::fprintf(file, "\n %3hhu,%3hhu,", static_cast(len_), (static_cast(min_ | (one_ << 4) | ((lbk_ > 0) << 5) | (bol_ << 6)))); + // save match characters chr_[0..len_-1] for (size_t i = 0; i < len_; ++i) ::fprintf(file, "%s%3hhu,", ((i + 2) & 0xF) ? "" : "\n ", static_cast(chr_[i])); if (len_ == 0) { + // save bitap bit_[] parameters for (Char i = 0; i < 256; ++i) ::fprintf(file, "%s%3hhu,", (i & 0xF) ? "" : "\n ", static_cast(~bit_[i])); } - if (min_ >= 4) - { - for (Hash i = 0; i < Const::HASH; ++i) - ::fprintf(file, "%s%3hhu,", (i & 0xF) ? "" : "\n ", static_cast(~pmh_[i])); - } - else + if (min_ < 4) { + // save predict match PM4 pma_[] parameters for (Hash i = 0; i < Const::HASH; ++i) ::fprintf(file, "%s%3hhu,", (i & 0xF) ? "" : "\n ", static_cast(~pma_[i])); } + else + { + // save predict match hash pmh_[] parameters + for (Hash i = 0; i < Const::HASH; ++i) + ::fprintf(file, "%s%3hhu,", (i & 0xF) ? "" : "\n ", static_cast(~pmh_[i])); + } if (lbk_ > 0) { + // save lookback parameters lbk_ lbm_ cbk_[] after s-t cut and first s-t cut pattern characters fst_[] ::fprintf(file, "\n %3hhu,%3hhu,%3hhu,%3hhu,", static_cast(lbk_ & 0xff), static_cast(lbk_ >> 8), static_cast(lbm_ & 0xff), static_cast(lbm_ >> 8)); for (size_t i = 0; i < 256; i += 8) { diff --git a/ccl/rslang/import/reflex/lib/simd.cpp b/ccl/rslang/import/reflex/lib/simd.cpp new file mode 100644 index 0000000..766776a --- /dev/null +++ b/ccl/rslang/import/reflex/lib/simd.cpp @@ -0,0 +1,157 @@ +/******************************************************************************\ +* Copyright (c) 2016, Robert van Engelen, Genivia Inc. All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without * +* modification, are permitted provided that the following conditions are met: * +* * +* (1) Redistributions of source code must retain the above copyright notice, * +* this list of conditions and the following disclaimer. * +* * +* (2) Redistributions in binary form must reproduce the above copyright * +* notice, this list of conditions and the following disclaimer in the * +* documentation and/or other materials provided with the distribution. * +* * +* (3) The name of the author may not be used to endorse or promote products * +* derived from this software without specific prior written permission. * +* * +* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * +* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * +* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * +* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * +* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * +* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * +* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * +\******************************************************************************/ + +/** +@file simd.cpp +@brief RE/flex SIMD primitives +@author Robert van Engelen - engelen@genivia.com +@copyright (c) 2016-2024, Robert van Engelen, Genivia Inc. All rights reserved. +@copyright (c) BSD-3 License - see LICENSE.txt +*/ + +#include + +namespace reflex { + +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + +// simd.h get_HW() +static uint64_t get_HW() +{ + int CPUInfo1[4] = { 0, 0, 0, 0 }; + int CPUInfo7[4] = { 0, 0, 0, 0 }; + cpuidex(CPUInfo1, 0, 0); + int n = CPUInfo1[0]; + if (n <= 0) + return 0ULL; + cpuidex(CPUInfo1, 1, 0); // cpuid EAX=1 + if (n >= 7) + cpuidex(CPUInfo7, 7, 0); // cpuid EAX=7, ECX=0 + return static_cast(CPUInfo1[2]) | (static_cast(static_cast(CPUInfo7[1])) << 32); +} + +uint64_t HW = get_HW(); + +#endif + +size_t nlcount(const char *s, const char *t) +{ + size_t n = 0; + if (s <= t - 256) + { +#if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64)) + if (have_HW_AVX512BW()) + n = simd_nlcount_avx512bw(s, t); + else if (have_HW_AVX2()) + n = simd_nlcount_avx2(s, t); + else +#elif defined(HAVE_AVX512BW) || defined(HAVE_AVX2) + if (have_HW_AVX2()) + n = simd_nlcount_avx2(s, t); + else +#endif +#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) + { + const char *e = t - 64; + // align on 16 bytes + while ((reinterpret_cast(s) & 0x0f) != 0) + n += (*s++ == '\n'); + __m128i vlcn = _mm_set1_epi8('\n'); + while (s <= e) + { + __m128i vlcm1 = _mm_loadu_si128(reinterpret_cast(s)); + __m128i vlcm2 = _mm_loadu_si128(reinterpret_cast(s + 16)); + __m128i vlcm3 = _mm_loadu_si128(reinterpret_cast(s + 32)); + __m128i vlcm4 = _mm_loadu_si128(reinterpret_cast(s + 48)); + __m128i vlceq1 = _mm_cmpeq_epi8(vlcm1, vlcn); + __m128i vlceq2 = _mm_cmpeq_epi8(vlcm2, vlcn); + __m128i vlceq3 = _mm_cmpeq_epi8(vlcm3, vlcn); + __m128i vlceq4 = _mm_cmpeq_epi8(vlcm4, vlcn); + n += popcount(_mm_movemask_epi8(vlceq1)) + + popcount(_mm_movemask_epi8(vlceq2)) + + popcount(_mm_movemask_epi8(vlceq3)) + + popcount(_mm_movemask_epi8(vlceq4)); + s += 64; + } + } +#elif defined(HAVE_NEON) + const char *e = t - 64; + uint8x16_t vlcn = vdupq_n_u8('\n'); + while (s <= e) + { + uint8x16_t vlcm0 = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vleq0 = vceqq_u8(vlcm0, vlcn); + s += 16; + uint8x16_t vlcm1 = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vleq1 = vceqq_u8(vlcm1, vlcn); + s += 16; + uint8x16_t vlcm2 = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vleq2 = vceqq_u8(vlcm2, vlcn); + s += 16; + uint8x16_t vlcm3 = vld1q_u8(reinterpret_cast(s)); + uint8x16_t vleq3 = vceqq_u8(vlcm3, vlcn); + s += 16; +#if defined(__aarch64__) + n += vaddvq_s8(vqabsq_s8(vreinterpretq_s8_u8(vaddq_u8(vleq0, vaddq_u8(vleq1, vaddq_u8(vleq2, vleq3)))))); +#else + // my homebrew horizontal sum (we have a very limited range 0..4 to sum to a total max 4x16=64 < 256) + uint64x2_t vsum = vreinterpretq_u64_s8(vqabsq_s8(vreinterpretq_s8_u8(vaddq_u8(vleq0, vaddq_u8(vleq1, vaddq_u8(vleq2, vleq3)))))); + uint64_t sum0 = vgetq_lane_u64(vsum, 0) + vgetq_lane_u64(vsum, 1); + uint32_t sum1 = static_cast(sum0) + (sum0 >> 32); + uint16_t sum2 = static_cast(sum1) + (sum1 >> 16); + n += static_cast(sum2) + (sum2 >> 8); +#endif + } +#endif + } + // 4-way auto-vectorizable loop + uint32_t n0 = 0, n1 = 0, n2 = 0, n3 = 0; + while (s < t - 3) + { + n0 += s[0] == '\n'; + n1 += s[1] == '\n'; + n2 += s[2] == '\n'; + n3 += s[3] == '\n'; + s += 4; + } + n += n0 + n1 + n2 + n3; + // epilogue + if (s < t) + { + n += *s == '\n'; + if (++s < t) + { + n += *s == '\n'; + if (++s < t) + n += *s == '\n'; + } + } + return n; +} + +} // namespace reflex diff --git a/ccl/rslang/import/reflex/lib/simd_avx2.cpp b/ccl/rslang/import/reflex/lib/simd_avx2.cpp index d128099..47c6751 100644 --- a/ccl/rslang/import/reflex/lib/simd_avx2.cpp +++ b/ccl/rslang/import/reflex/lib/simd_avx2.cpp @@ -34,15 +34,20 @@ @copyright (c) BSD-3 License - see LICENSE.txt */ -#include -#include +#if defined(HAVE_AVX2) || defined(HAVE_AVX512BW) +# if !defined(__AVX2__) && !defined(__AVX512BW__) +# error simd_avx2.cpp must be compiled with -mavx2 or /arch:avx2. +# endif +#endif + +#include namespace reflex { -// Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part +// Partially count newlines in string b up to e, updates b close to e with uncounted part size_t simd_nlcount_avx2(const char*& b, const char *e) { -#if defined(HAVE_AVX2) +#if defined(HAVE_AVX2) || defined(HAVE_AVX512BW) const char *s = b; e -= 128; if (s > e) @@ -73,42 +78,4 @@ size_t simd_nlcount_avx2(const char*& b, const char *e) #endif } -// Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part -size_t simd_nlcount_sse2(const char*& b, const char *e) -{ -#if defined(HAVE_SSE2) - const char *s = b; - e -= 64; - if (s > e) - return 0; - size_t n = 0; - // align on 16 bytes - while ((reinterpret_cast(s) & 0x0f) != 0) - n += (*s++ == '\n'); - __m128i vlcn = _mm_set1_epi8('\n'); - while (s <= e) - { - __m128i vlcm1 = _mm_loadu_si128(reinterpret_cast(s)); - __m128i vlcm2 = _mm_loadu_si128(reinterpret_cast(s + 16)); - __m128i vlcm3 = _mm_loadu_si128(reinterpret_cast(s + 32)); - __m128i vlcm4 = _mm_loadu_si128(reinterpret_cast(s + 48)); - __m128i vlceq1 = _mm_cmpeq_epi8(vlcm1, vlcn); - __m128i vlceq2 = _mm_cmpeq_epi8(vlcm2, vlcn); - __m128i vlceq3 = _mm_cmpeq_epi8(vlcm3, vlcn); - __m128i vlceq4 = _mm_cmpeq_epi8(vlcm4, vlcn); - n += popcount(_mm_movemask_epi8(vlceq1)) - + popcount(_mm_movemask_epi8(vlceq2)) - + popcount(_mm_movemask_epi8(vlceq3)) - + popcount(_mm_movemask_epi8(vlceq4)); - s += 64; - } - b = s; - return n; -#else - (void)b; - (void)e; - return 0; -#endif -} - } // namespace reflex diff --git a/ccl/rslang/import/reflex/lib/simd_avx512bw.cpp b/ccl/rslang/import/reflex/lib/simd_avx512bw.cpp index 1a762d8..af9a47a 100644 --- a/ccl/rslang/import/reflex/lib/simd_avx512bw.cpp +++ b/ccl/rslang/import/reflex/lib/simd_avx512bw.cpp @@ -34,12 +34,17 @@ @copyright (c) BSD-3 License - see LICENSE.txt */ -#include -#include +#if defined(HAVE_AVX512BW) +# if !defined(__AVX512BW__) +# error simd_avx512bw.cpp must be compiled with -mavx512bw or /arch:avx512. +# endif +#endif + +#include namespace reflex { -// Partially count newlines in string b up to and including position e in b, updates b close to e with uncounted part +// Partially count newlines in string b up to e, updates b close to e with uncounted part size_t simd_nlcount_avx512bw(const char*& b, const char *e) { #if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64)) diff --git a/ccl/rslang/import/reflex/lib/unicode.cpp b/ccl/rslang/import/reflex/lib/unicode.cpp index a63badf..4dd782e 100644 --- a/ccl/rslang/import/reflex/lib/unicode.cpp +++ b/ccl/rslang/import/reflex/lib/unicode.cpp @@ -83,9 +83,15 @@ Tables::Tables() range["Control"] = range["Cc"]; range["Format"] = range["Cf"]; - range["d"] = range["Decimal_Digit_Number"]; - range["l"] = range["Lowercase_Letter"]; - range["u"] = range["Uppercase_Letter"]; + range["Cntrl"] = range["C"]; + range["Digit"] = range["Nd"]; + range["Lower"] = range["Ll"]; + range["Punct"] = range["P"]; + range["Upper"] = range["Lu"]; + + range["d"] = range["Digit"]; + range["l"] = range["Lower"]; + range["u"] = range["Upper"]; range["s"] = range["Space"]; range["w"] = range["Word"]; } diff --git a/ccl/rslang/import/reflex/unicode/language_scripts.cpp b/ccl/rslang/import/reflex/unicode/language_scripts.cpp index 89f57e5..496a4a4 100644 --- a/ccl/rslang/import/reflex/unicode/language_scripts.cpp +++ b/ccl/rslang/import/reflex/unicode/language_scripts.cpp @@ -2107,6 +2107,36 @@ void reflex::Unicode::Tables::language_scripts(void) 0, 0 }; range["Grantha"] = Grantha; + static const int Graph[] = { + 33, 126, + 161, 172, + 174, 1535, + 1542, 1563, + 1565, 1756, + 1758, 1806, + 1808, 2191, + 2194, 2273, + 2275, 5759, + 5761, 6157, + 6159, 8191, + 8208, 8231, + 8240, 8286, + 8293, 8293, + 8304, 12287, + 12289, 55295, + 57344, 65278, + 65280, 65528, + 65532, 69820, + 69822, 69836, + 69838, 78895, + 78912, 113823, + 113828, 119154, + 119163, 917504, + 917506, 917535, + 917632, 1114111, + 0, 0 + }; + range["Graph"] = Graph; static const int Greek[] = { 880, 883, 885, 887, @@ -7430,6 +7460,34 @@ void reflex::Unicode::Tables::language_scripts(void) 0, 0 }; range["Po"] = Po; + static const int Print[] = { + 32, 126, + 160, 172, + 174, 1535, + 1542, 1563, + 1565, 1756, + 1758, 1806, + 1808, 2191, + 2194, 2273, + 2275, 6157, + 6159, 8202, + 8208, 8233, + 8239, 8287, + 8293, 8293, + 8304, 55295, + 57344, 65278, + 65280, 65528, + 65532, 69820, + 69822, 69836, + 69838, 78895, + 78912, 113823, + 113828, 119154, + 119163, 917504, + 917506, 917535, + 917632, 1114111, + 0, 0 + }; + range["Print"] = Print; static const int Ps[] = { 40, 40, 91, 91, diff --git a/ccl/rslang/import/reflex/unicode/letter_scripts.cpp b/ccl/rslang/import/reflex/unicode/letter_scripts.cpp index 8416669..3716251 100644 --- a/ccl/rslang/import/reflex/unicode/letter_scripts.cpp +++ b/ccl/rslang/import/reflex/unicode/letter_scripts.cpp @@ -2,6 +2,377 @@ #include void reflex::Unicode::Tables::letter_scripts(void) { + static const int Alnum[] = { + 48, 57, + 65, 90, + 97, 122, + 181, 181, + 192, 214, + 216, 246, + 248, 442, + 444, 447, + 452, 452, + 454, 455, + 457, 458, + 460, 497, + 499, 659, + 661, 687, + 880, 883, + 886, 887, + 891, 893, + 895, 895, + 902, 902, + 904, 906, + 908, 908, + 910, 929, + 931, 1013, + 1015, 1153, + 1162, 1327, + 1329, 1366, + 1376, 1416, + 1632, 1641, + 1776, 1785, + 1984, 1993, + 2406, 2415, + 2534, 2543, + 2662, 2671, + 2790, 2799, + 2918, 2927, + 3046, 3055, + 3174, 3183, + 3302, 3311, + 3430, 3439, + 3558, 3567, + 3664, 3673, + 3792, 3801, + 3872, 3881, + 4160, 4169, + 4240, 4249, + 4256, 4293, + 4295, 4295, + 4301, 4301, + 4304, 4346, + 4349, 4351, + 5024, 5109, + 5112, 5117, + 6112, 6121, + 6160, 6169, + 6470, 6479, + 6608, 6617, + 6784, 6793, + 6800, 6809, + 6992, 7001, + 7088, 7097, + 7232, 7241, + 7248, 7257, + 7296, 7304, + 7312, 7354, + 7357, 7359, + 7424, 7467, + 7531, 7543, + 7545, 7578, + 7680, 7957, + 7960, 7965, + 7968, 8005, + 8008, 8013, + 8016, 8023, + 8025, 8025, + 8027, 8027, + 8029, 8029, + 8031, 8061, + 8064, 8071, + 8080, 8087, + 8096, 8103, + 8112, 8116, + 8118, 8123, + 8126, 8126, + 8130, 8132, + 8134, 8139, + 8144, 8147, + 8150, 8155, + 8160, 8172, + 8178, 8180, + 8182, 8187, + 8450, 8450, + 8455, 8455, + 8458, 8467, + 8469, 8469, + 8473, 8477, + 8484, 8484, + 8486, 8486, + 8488, 8488, + 8490, 8493, + 8495, 8500, + 8505, 8505, + 8508, 8511, + 8517, 8521, + 8526, 8526, + 8579, 8580, + 11264, 11387, + 11390, 11492, + 11499, 11502, + 11506, 11507, + 11520, 11557, + 11559, 11559, + 11565, 11565, + 42528, 42537, + 42560, 42605, + 42624, 42651, + 42786, 42863, + 42865, 42887, + 42891, 42894, + 42896, 42954, + 42960, 42961, + 42963, 42963, + 42965, 42969, + 42997, 42998, + 43002, 43002, + 43216, 43225, + 43264, 43273, + 43472, 43481, + 43504, 43513, + 43600, 43609, + 43824, 43866, + 43872, 43880, + 43888, 43967, + 44016, 44025, + 64256, 64262, + 64275, 64279, + 65296, 65305, + 65313, 65338, + 65345, 65370, + 66560, 66639, + 66720, 66729, + 66736, 66771, + 66776, 66811, + 66928, 66938, + 66940, 66954, + 66956, 66962, + 66964, 66965, + 66967, 66977, + 66979, 66993, + 66995, 67001, + 67003, 67004, + 68736, 68786, + 68800, 68850, + 68912, 68921, + 69734, 69743, + 69872, 69881, + 69942, 69951, + 70096, 70105, + 70384, 70393, + 70736, 70745, + 70864, 70873, + 71248, 71257, + 71360, 71369, + 71472, 71481, + 71840, 71913, + 72016, 72025, + 72784, 72793, + 73040, 73049, + 73120, 73129, + 73552, 73561, + 92768, 92777, + 92864, 92873, + 93008, 93017, + 93760, 93823, + 119808, 119892, + 119894, 119964, + 119966, 119967, + 119970, 119970, + 119973, 119974, + 119977, 119980, + 119982, 119993, + 119995, 119995, + 119997, 120003, + 120005, 120069, + 120071, 120074, + 120077, 120084, + 120086, 120092, + 120094, 120121, + 120123, 120126, + 120128, 120132, + 120134, 120134, + 120138, 120144, + 120146, 120485, + 120488, 120512, + 120514, 120538, + 120540, 120570, + 120572, 120596, + 120598, 120628, + 120630, 120654, + 120656, 120686, + 120688, 120712, + 120714, 120744, + 120746, 120770, + 120772, 120779, + 120782, 120831, + 122624, 122633, + 122635, 122654, + 122661, 122666, + 123200, 123209, + 123632, 123641, + 124144, 124153, + 125184, 125251, + 125264, 125273, + 130032, 130041, + 0, 0 + }; + range["Alnum"] = Alnum; + static const int Alpha[] = { + 65, 90, + 97, 122, + 181, 181, + 192, 214, + 216, 246, + 248, 442, + 444, 447, + 452, 452, + 454, 455, + 457, 458, + 460, 497, + 499, 659, + 661, 687, + 880, 883, + 886, 887, + 891, 893, + 895, 895, + 902, 902, + 904, 906, + 908, 908, + 910, 929, + 931, 1013, + 1015, 1153, + 1162, 1327, + 1329, 1366, + 1376, 1416, + 4256, 4293, + 4295, 4295, + 4301, 4301, + 4304, 4346, + 4349, 4351, + 5024, 5109, + 5112, 5117, + 7296, 7304, + 7312, 7354, + 7357, 7359, + 7424, 7467, + 7531, 7543, + 7545, 7578, + 7680, 7957, + 7960, 7965, + 7968, 8005, + 8008, 8013, + 8016, 8023, + 8025, 8025, + 8027, 8027, + 8029, 8029, + 8031, 8061, + 8064, 8071, + 8080, 8087, + 8096, 8103, + 8112, 8116, + 8118, 8123, + 8126, 8126, + 8130, 8132, + 8134, 8139, + 8144, 8147, + 8150, 8155, + 8160, 8172, + 8178, 8180, + 8182, 8187, + 8450, 8450, + 8455, 8455, + 8458, 8467, + 8469, 8469, + 8473, 8477, + 8484, 8484, + 8486, 8486, + 8488, 8488, + 8490, 8493, + 8495, 8500, + 8505, 8505, + 8508, 8511, + 8517, 8521, + 8526, 8526, + 8579, 8580, + 11264, 11387, + 11390, 11492, + 11499, 11502, + 11506, 11507, + 11520, 11557, + 11559, 11559, + 11565, 11565, + 42560, 42605, + 42624, 42651, + 42786, 42863, + 42865, 42887, + 42891, 42894, + 42896, 42954, + 42960, 42961, + 42963, 42963, + 42965, 42969, + 42997, 42998, + 43002, 43002, + 43824, 43866, + 43872, 43880, + 43888, 43967, + 64256, 64262, + 64275, 64279, + 65313, 65338, + 65345, 65370, + 66560, 66639, + 66736, 66771, + 66776, 66811, + 66928, 66938, + 66940, 66954, + 66956, 66962, + 66964, 66965, + 66967, 66977, + 66979, 66993, + 66995, 67001, + 67003, 67004, + 68736, 68786, + 68800, 68850, + 71840, 71903, + 93760, 93823, + 119808, 119892, + 119894, 119964, + 119966, 119967, + 119970, 119970, + 119973, 119974, + 119977, 119980, + 119982, 119993, + 119995, 119995, + 119997, 120003, + 120005, 120069, + 120071, 120074, + 120077, 120084, + 120086, 120092, + 120094, 120121, + 120123, 120126, + 120128, 120132, + 120134, 120134, + 120138, 120144, + 120146, 120485, + 120488, 120512, + 120514, 120538, + 120540, 120570, + 120572, 120596, + 120598, 120628, + 120630, 120654, + 120656, 120686, + 120688, 120712, + 120714, 120744, + 120746, 120770, + 120772, 120779, + 122624, 122633, + 122635, 122654, + 122661, 122666, + 125184, 125251, + 0, 0 + }; + range["Alpha"] = Alpha; static const int Ll[] = { 97, 122, 181, 181, diff --git a/ccl/rslang/unity/reflex_unity2.cpp b/ccl/rslang/unity/reflex_unity2.cpp index 66bdf31..c497dc9 100644 --- a/ccl/rslang/unity/reflex_unity2.cpp +++ b/ccl/rslang/unity/reflex_unity2.cpp @@ -31,6 +31,7 @@ #include "../lib/error.cpp" #include "../lib/input.cpp" #include "../lib/matcher.cpp" +#include "../lib/simd.cpp" #undef min #undef max diff --git a/scripts/BuildLexers.ps1 b/scripts/BuildLexers.ps1 index ca36017..f4ea456 100644 --- a/scripts/BuildLexers.ps1 +++ b/scripts/BuildLexers.ps1 @@ -2,6 +2,11 @@ $workDir = Resolve-Path -Path "${PSScriptRoot}\..\ccl\rslang\src" +# Change default relative path according to your work directory setup +# Re-flex repository: https://github.com/Genivia/RE-flex +$reflexRelative = Resolve-Path -Path "${PSScriptRoot}\..\..\GH-RE-flex\bin\win64" +$Env:PATH += ";${reflexRelative}" + function BuildLexers { Set-Location -Path ${workDir} BuildSyntax('AsciiLexer') @@ -21,4 +26,4 @@ function BuildSyntax([string] $lexerName) { Move-Item -Path "${lexer}.hpp" -Destination "..\header\${lexer}.hpp" -Force } -BuildLexers +BuildLexers \ No newline at end of file