Skip to content

Commit

Permalink
Add teragrep regexextract command (#82)
Browse files Browse the repository at this point in the history
  • Loading branch information
51-code authored Oct 8, 2024
1 parent 1269004 commit 840f155
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/main/antlr4/imports/COMMAND_TERAGREP_MODE.g4
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ COMMAND_TERAGREP_MODE_HOST: 'host' -> pushMode(COMMAND_TERAGREP_IP_MODE);
COMMAND_TERAGREP_MODE_PORT: 'port' -> pushMode(COMMAND_TERAGREP_IP_MODE);
COMMAND_TERAGREP_MODE_DOT: '.';
COMMAND_TERAGREP_MODE_TOKENIZER: 'tokenizer';
COMMAND_TERAGREP_MODE_REGEXEXTRACT: 'regexextract';
COMMAND_TERAGREP_MODE_SYSLOG: 'syslog';
COMMAND_TERAGREP_MODE_STREAM: 'stream';
COMMAND_TERAGREP_MODE_LOAD: 'load';
Expand All @@ -96,6 +97,7 @@ COMMAND_TERAGREP_MODE_DEFAULT_FORMAT: ('default'|'DEFAULT'|'avro'|'AVRO');
COMMAND_TERAGREP_MODE_FORMAT: 'format' -> pushMode(GET_STRING);
COMMAND_TERAGREP_MODE_INPUT: 'input' -> pushMode(GET_FIELD);
COMMAND_TERAGREP_MODE_OUTPUT: 'output' -> pushMode(GET_FIELD);
COMMAND_TERAGREP_MODE_REGEX: 'regex' -> pushMode(GET_STRING);
COMMAND_TERAGREP_MODE_ESTIMATES: 'estimates' -> pushMode(GET_FIELD);
COMMAND_TERAGREP_MODE_HEADER: 'header=' -> pushMode(GET_BOOLEAN);
COMMAND_TERAGREP_MODE_SCHEMA: 'schema=' -> pushMode(GET_STRING);
Expand Down
15 changes: 12 additions & 3 deletions src/main/antlr4/imports/DPLParserTransform_teragrep.g4
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ t_execParameter
| t_kafkaSaveModeParameter
| t_bloomModeParameter
| t_tokenizerParameter
| t_regexextractParameter
| t_dynatraceParameter)
;

Expand All @@ -67,14 +68,22 @@ t_dynatraceParameter
;

t_tokenizerParameter
: COMMAND_TERAGREP_MODE_TOKENIZER t_formatParameter? t_inputParamater? t_outputParameter?
: COMMAND_TERAGREP_MODE_TOKENIZER t_formatParameter? t_inputParameter? t_outputParameter?
;

t_regexextractParameter
: COMMAND_TERAGREP_MODE_REGEXEXTRACT t_regexParameter? t_inputParameter? t_outputParameter?
;

t_formatParameter
: COMMAND_TERAGREP_MODE_FORMAT stringType
;

t_inputParamater
t_regexParameter
: COMMAND_TERAGREP_MODE_REGEX stringType
;

t_inputParameter
: COMMAND_TERAGREP_MODE_INPUT fieldType
;

Expand Down Expand Up @@ -131,7 +140,7 @@ t_getArchiveSummaryParameter
;

t_bloomOptionParameter
: COMMAND_TERAGREP_MODE_UPDATE t_estimatesParameter? t_inputParamater? | COMMAND_TERAGREP_MODE_CREATE t_estimatesParameter? t_inputParamater? | COMMAND_TERAGREP_MODE_ESTIMATE t_inputParamater? t_outputParameter?
: COMMAND_TERAGREP_MODE_UPDATE t_estimatesParameter? t_inputParameter? | COMMAND_TERAGREP_MODE_CREATE t_estimatesParameter? t_inputParameter? | COMMAND_TERAGREP_MODE_ESTIMATE t_inputParameter? t_outputParameter?
;

t_hostParameter
Expand Down
37 changes: 36 additions & 1 deletion src/test/java/com/teragrep/pth_03/tests/TeragrepSyntaxTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
import com.teragrep.pth_03.ParserStructureTestingUtility;
import com.teragrep.pth_03.ParserSyntaxTestingUtility;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.w3c.dom.NodeList;
Expand Down Expand Up @@ -358,4 +357,40 @@ void testHdfsSaveAllParameters(String arg) {
assertEquals(1, headerNodes.getLength());
assertEquals(1, pathNodes.getLength());
}

@ParameterizedTest
@ValueSource(strings = {
"teragrep_regexextract",
})
void testRegexExtract(String arg) {
ParserStructureTestingUtility pstu = new ParserStructureTestingUtility();
String fileName = "src/test/resources/antlr4/commands/teragrep/" + arg + ".txt";
String regexextractPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter";
NodeList regexextractNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, regexextractPath, false));
// Check that 1 found
assertEquals(1, regexextractNodes.getLength());
}

@ParameterizedTest
@ValueSource(strings = {
"teragrep_regexextract_params",
})
void testRegexExtractWithParams(String arg) {
ParserStructureTestingUtility pstu = new ParserStructureTestingUtility();
String fileName = "src/test/resources/antlr4/commands/teragrep/" + arg + ".txt";
String regexextractPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter";
String regexPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter/t_regexParameter";
String inputPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter/t_inputParameter";
String outputPath = "/root/transformStatement/teragrepTransformation/t_execParameter/t_regexextractParameter/t_outputParameter";
NodeList regexextractNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, regexextractPath, true));
NodeList regexNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, regexPath, false));
NodeList inputNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, inputPath, false));
NodeList outputNodes = Assertions.assertDoesNotThrow(() -> (NodeList) pstu.xpathQueryFile(fileName, outputPath, false));

// Check that 1 found for each path
assertEquals(1, regexextractNodes.getLength());
assertEquals(1, regexNodes.getLength());
assertEquals(1, inputNodes.getLength());
assertEquals(1, outputNodes.getLength());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<!-- /*
* Teragrep Data Processing Language Parser Library PTH-03
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://github.com/teragrep/teragrep/blob/main/LICENSE>.
*
*
* Additional permission under GNU Affero General Public License version 3
* section 7
*
* If you modify this Program, or any covered work, by linking or combining it
* with other code, such other code is not for that reason alone subject to any
* of the requirements of the GNU Affero GPL version 3 as long as this Program
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
* modifications.
*
* Supplemented terms under GNU Affero General Public License version 3
* section 7
*
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
* versions must be marked as "Modified version of" The Program.
*
* Names of the licensors and authors may not be used for publicity purposes.
*
* No rights are granted for use of trade names, trademarks, or service marks
* which are in The Program if any.
*
* Licensee must indemnify licensors and authors for any liability that these
* contractual assumptions impose on licensors and authors.
*
* To the extent this program is licensed as part of the Commercial versions of
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/ -->
| teragrep exec regexextract
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<!-- /*
* Teragrep Data Processing Language Parser Library PTH-03
* Copyright (C) 2019, 2020, 2021, 2022, 2023 Suomen Kanuuna Oy
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://github.com/teragrep/teragrep/blob/main/LICENSE>.
*
*
* Additional permission under GNU Affero General Public License version 3
* section 7
*
* If you modify this Program, or any covered work, by linking or combining it
* with other code, such other code is not for that reason alone subject to any
* of the requirements of the GNU Affero GPL version 3 as long as this Program
* is the same Program as licensed from Suomen Kanuuna Oy without any additional
* modifications.
*
* Supplemented terms under GNU Affero General Public License version 3
* section 7
*
* Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
* versions must be marked as "Modified version of" The Program.
*
* Names of the licensors and authors may not be used for publicity purposes.
*
* No rights are granted for use of trade names, trademarks, or service marks
* which are in The Program if any.
*
* Licensee must indemnify licensors and authors for any liability that these
* contractual assumptions impose on licensors and authors.
*
* To the extent this program is licensed as part of the Commercial versions of
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/ -->
| teragrep exec regexextract regex=regex input=input output=output

0 comments on commit 840f155

Please sign in to comment.