diff --git a/README.md b/README.md index c737de6f..6930232b 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,32 @@ All environment variables and properties begin with the prefix `PRAVEGA_SENSOR_C For a list of commonly-used configuration values, see the [sample configuration files](pravega-sensor-collector/src/main/dist/conf). +#### Sample configuration properties + + +| Configuration Parameter | Value | Description | +|-------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| `CREATE_SCOPE` | `false` | Boolean value. | +| `ROUTING_KEY` | `routingkey1` | Pravega routing key | +| `ENABLE_PRAVEGA` | `true` | Boolean parameter. Default value = true | +| `pravega_client_auth_method` | `Bearer` | Authentication type to connect to Pravega client | +| `pravega_client_auth_loadDynamic` | `true` | Boolean parameter. Default value = true | +| `KEYCLOAK_SERVICE_ACCOUNT_FILE` | `/opt/Pravega-sensor-collector/PSC_Files/keycloak-project1.json` | Path for keycloak service account file | +| `PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS` | Raw File: `io.pravega.sensor.collector.file.rawfile.RawFileIngestService`
CSV file: `io.pravega.sensor.collector.file.csvfile.CsvFileIngestService`
Parquet file: `io.pravega.sensor.collector.file.parquet.ParquetFileIngestService` | Pravega sensor collector class package | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC` | `/opt/Pravega-sensor-collector/files1` | The application reads files for processing from a specified directory path | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_EXTENSION` | `parquet` | Types of file Example:-
Raw File: parquet
CSV file: csv
Parquet file: parquet | | | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_DATABASE_FILE` | `/opt/Pravega-sensor-collector/PSC_Files/datafile.db` | Directory path where database file gets created Example: /opt/database/databasefile.db | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI` | `tls://pravega-controller.foggy-nelson.ns.sdp.hop.lab.emc.com:443` | Pravega controller URI EX: Pravega Controller URI | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_SCOPE` | `scope1` | Scope name for Pravega sensor collector | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_STREAM` | `stream1` | Stream name for Pravega sensor collector | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_ROUTING_KEY` | `routingkey1` | Routing key for Pravega Sensor collector | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_DELETE_COMPLETED_FILES` | `false` | If true, PSC immediately delete the file soon after processing | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_TRANSACTION_TIMEOUT_MINUTES` | `2.0` | Timeout for each transaction. Default value is 2 minutes | +| `PRAVEGA_SENSOR_COLLECTOR_RAW1_CREATE_SCOPE` | `false` | If Pravega is on SDP, set this to `false`. Accept Boolean value. | +| `HADOOP_HOME` | `${HOME}/dev` | For windows, Hadoop requires native libraries on Windows to work properly. You can download `Winutils.exe` to fix this.
See [here](https://cwiki.apache.org/confluence/display/HADOOP2/WindowsProblems). Add the location of bin/winutils.exe in the parameter HADOOP_HOME.
**This is required only for Parquet file type not for CSV and Raw file ingestion type** | + + + ### Install the Service 1. The only prerequisite on the target system is Java 11. @@ -270,11 +296,13 @@ If DNS is not configured throughout your network, you may need to edit the /etc/ ### Running as a Windows Service -1. Download winsw.exe from https://github.com/winsw/winsw/releases and rename it as PravegaSensorCollectorApp.exe. +1. Download winsw.exe from https://github.com/winsw/winsw/releases and rename it as PravegaSensorCollectorApp.exe. Save it in the same folder as [PravegaSensorCollectorApp.xml](windows-service/PravegaSensorCollectorApp.xml). + +2. Modify PravegaSensorCollectorApp.xml. Check PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI. -2. Modify [PravegaSensorCollectorApp.xml](windows-service/PravegaSensorCollectorApp.xml). Check PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI. +4. Add the path to the jar file `pravega-sensor-collector/build/libs/pravega-sensor-collector-${APP_VERSION}.jar` generated by build-installer.sh in PravegaSensorCollectorApp.xml. -3. Install and run the service using following commands: +5. Install and run the service using following commands: ``` PravegaSensorCollectorApp.exe install PravegaSensorCollectorApp.exe start diff --git a/build.gradle b/build.gradle index 0c778046..916bc9e9 100644 --- a/build.gradle +++ b/build.gradle @@ -15,7 +15,7 @@ * user guide available at https://docs.gradle.org/3.4.1/userguide/tutorial_java_projects.html */ -configurations.all { +configurations.configureEach { // Check for updates every build resolutionStrategy.cacheChangingModulesFor 0, "seconds" } @@ -23,6 +23,7 @@ configurations.all { subprojects { repositories { mavenLocal() + mavenCentral() maven { url "https://oss.jfrog.org/jfrog-dependencies" } diff --git a/config/checkstyle.xml b/config/checkstyle.xml new file mode 100644 index 00000000..b910873f --- /dev/null +++ b/config/checkstyle.xml @@ -0,0 +1,144 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/config/eclipse.xml b/config/eclipse.xml new file mode 100644 index 00000000..937f5828 --- /dev/null +++ b/config/eclipse.xml @@ -0,0 +1,313 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/config/import-control.xml b/config/import-control.xml new file mode 100644 index 00000000..b9c48b02 --- /dev/null +++ b/config/import-control.xml @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/config/intelij.xml b/config/intelij.xml new file mode 100644 index 00000000..df3b6fa0 --- /dev/null +++ b/config/intelij.xml @@ -0,0 +1,39 @@ + + + + + + + + + + diff --git a/config/spotbugs-exclude.xml b/config/spotbugs-exclude.xml new file mode 100644 index 00000000..170dff33 --- /dev/null +++ b/config/spotbugs-exclude.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/config/spotbugs-include.xml b/config/spotbugs-include.xml new file mode 100644 index 00000000..b03c9895 --- /dev/null +++ b/config/spotbugs-include.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/config/suppressions.xml b/config/suppressions.xml new file mode 100644 index 00000000..fdb40328 --- /dev/null +++ b/config/suppressions.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + diff --git a/gradle.properties b/gradle.properties index 0b6c38b5..fa99c486 100644 --- a/gradle.properties +++ b/gradle.properties @@ -13,23 +13,27 @@ commonsCLIVersion=1.4 commonsCSVVersion=1.8 commonsCodecVersion=1.14 commonsMath3Version=3.6.1 -grizzlyVersion=2.25.1 -gsonVersion=2.8.9 +grizzlyVersion=3.1.3 +gsonVersion=2.10.1 includePravegaCredentials=true -jacksonVersion=2.9.10.3 -junitVersion=4.12 +jacksonVersion=2.15.2 +junitVersion=5.6.2 jakartaBindVersion=2.3.2 jaxbVersion=2.3.2 javaxServletApiVersion=3.0.1 miloVersion=0.6.8 pravegaCredentialsVersion=0.12.0 pravegaVersion=0.12.0 -qosLogbackVersion=1.2.3 -slf4jApiVersion=1.7.25 -sqliteVersion=3.32.3 -parquetVersion=1.12.1 +qosLogbackVersion=1.4.11 +shadowPluginVersion=7.1.0 +slf4jApiVersion=2.0.9 +sqliteVersion=3.43.0.0 +parquetVersion=1.13.1 hadoopVersion=3.2.1 - +mockitoVersion=3.12.4 +spotbugsVersion=4.8.1 +spotbugsPluginVersion=5.1.4 +checkstyleVersion=10.12.5 # Application version. This will be overridden by APP_VERSION in scripts/env.sh when using scripts/publish.sh. version=unknown diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index b45533d3..06abd188 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -11,4 +11,4 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.3-all.zip diff --git a/parquet-file-sample-data/sub1.parquet b/parquet-file-sample-data/sub1.parquet new file mode 100644 index 00000000..05b51be6 Binary files /dev/null and b/parquet-file-sample-data/sub1.parquet differ diff --git a/parquet-file-sample-data/sub2.parquet b/parquet-file-sample-data/sub2.parquet new file mode 100644 index 00000000..c4344b12 Binary files /dev/null and b/parquet-file-sample-data/sub2.parquet differ diff --git a/parquet-file-sample-data/sub3.parquet b/parquet-file-sample-data/sub3.parquet new file mode 100644 index 00000000..d6568258 Binary files /dev/null and b/parquet-file-sample-data/sub3.parquet differ diff --git a/parquet-file-sample-data/test_file/f1-f10/sub1.parquet b/parquet-file-sample-data/test_file/f1-f10/sub1.parquet new file mode 100644 index 00000000..05b51be6 Binary files /dev/null and b/parquet-file-sample-data/test_file/f1-f10/sub1.parquet differ diff --git a/parquet-file-sample-data/test_file/sub1.parquet b/parquet-file-sample-data/test_file/sub1.parquet new file mode 100644 index 00000000..05b51be6 Binary files /dev/null and b/parquet-file-sample-data/test_file/sub1.parquet differ diff --git a/parquet-file-sample-data/test_file/sub2.parquet b/parquet-file-sample-data/test_file/sub2.parquet new file mode 100644 index 00000000..c4344b12 Binary files /dev/null and b/parquet-file-sample-data/test_file/sub2.parquet differ diff --git a/parquet-file-sample-data/test_file/sub3.parquet b/parquet-file-sample-data/test_file/sub3.parquet new file mode 100644 index 00000000..d6568258 Binary files /dev/null and b/parquet-file-sample-data/test_file/sub3.parquet differ diff --git a/pravega-sensor-collector/build.gradle b/pravega-sensor-collector/build.gradle index 2c63d8b7..4c5031d0 100644 --- a/pravega-sensor-collector/build.gradle +++ b/pravega-sensor-collector/build.gradle @@ -7,9 +7,25 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ + +buildscript { + repositories { + maven { + url = uri("https://plugins.gradle.org/m2/") + } + } + dependencies { + classpath("com.github.spotbugs.snom:spotbugs-gradle-plugin:${spotbugsPluginVersion}") + classpath("gradle.plugin.com.github.johnrengelman:shadow:${shadowPluginVersion}") + } +} + apply plugin: "java" -apply plugin: "maven" +apply plugin: "maven-publish" +apply plugin: "com.github.spotbugs" +apply plugin: 'checkstyle' apply plugin: "application" +apply plugin: "com.github.johnrengelman.shadow" group = "io.pravega" archivesBaseName = "pravega-sensor-collector" @@ -18,48 +34,58 @@ mainClassName = "io.pravega.sensor.collector.PravegaSensorCollectorApp" sourceCompatibility = 11 targetCompatibility = 11 +test { + useJUnitPlatform() +} + dependencies { - compile "org.slf4j:slf4j-api:${slf4jApiVersion}" - compile "ch.qos.logback:logback-classic:${qosLogbackVersion}" - compile "ch.qos.logback:logback-core:${qosLogbackVersion}" + implementation "org.slf4j:slf4j-api:${slf4jApiVersion}" + implementation "ch.qos.logback:logback-classic:${qosLogbackVersion}" + implementation "ch.qos.logback:logback-core:${qosLogbackVersion}" - compile "io.pravega:pravega-client:${pravegaVersion}", + implementation "io.pravega:pravega-client:${pravegaVersion}", "io.pravega:pravega-common:${pravegaVersion}", "commons-cli:commons-cli:${commonsCLIVersion}" if (includePravegaCredentials.toBoolean()) { - compile "io.pravega:pravega-keycloak-client:${pravegaCredentialsVersion}" + implementation "io.pravega:pravega-keycloak-client:${pravegaCredentialsVersion}" } - compile "com.fasterxml.jackson.core:jackson-databind:${jacksonVersion}" - compile "org.xerial:sqlite-jdbc:${sqliteVersion}" - compile "org.apache.commons:commons-math3:${commonsMath3Version}" - compile "org.apache.commons:commons-csv:${commonsCSVVersion}" - compile "commons-codec:commons-codec:${commonsCodecVersion}" - compile "com.github.vladimir-bukhtoyarov:bucket4j-core:${bucket4jVersion}" - compile "org.eclipse.milo:sdk-client:${miloVersion}" - compile "com.google.code.gson:gson:${gsonVersion}" + implementation "com.fasterxml.jackson.core:jackson-databind:${jacksonVersion}" + implementation "org.xerial:sqlite-jdbc:${sqliteVersion}" + implementation "org.apache.commons:commons-math3:${commonsMath3Version}" + implementation "org.apache.commons:commons-csv:${commonsCSVVersion}" + implementation "commons-codec:commons-codec:${commonsCodecVersion}" + implementation "com.github.vladimir-bukhtoyarov:bucket4j-core:${bucket4jVersion}" + implementation "org.eclipse.milo:sdk-client:${miloVersion}" + implementation "com.google.code.gson:gson:${gsonVersion}" + + implementation "org.apache.parquet:parquet-avro:${parquetVersion}" + implementation "org.apache.parquet:parquet-hadoop:${parquetVersion}" + implementation "org.apache.hadoop:hadoop-client:${hadoopVersion}" - compile "org.apache.parquet:parquet-avro:${parquetVersion}" - compile "org.apache.parquet:parquet-hadoop:${parquetVersion}" - compile "org.apache.hadoop:hadoop-client:${hadoopVersion}" + testImplementation "org.junit.jupiter:junit-jupiter-api:${junitVersion}" + testImplementation "org.junit.vintage:junit-vintage-engine:${junitVersion}" + testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${junitVersion}" + testImplementation "org.junit.platform:junit-platform-launcher" - testCompile "junit:junit:${junitVersion}" + testImplementation "org.mockito:mockito-core:${mockitoVersion}" - testCompile "org.glassfish.jersey.containers:jersey-container-grizzly2-http:${grizzlyVersion}" - testCompile "jakarta.xml.bind:jakarta.xml.bind-api:${jakartaBindVersion}" - testCompile "org.glassfish.jaxb:jaxb-runtime:${jaxbVersion}" - testCompile "javax.servlet:javax.servlet-api:${javaxServletApiVersion}" + testImplementation "org.glassfish.jersey.containers:jersey-container-grizzly2-http:${grizzlyVersion}" + testImplementation "jakarta.xml.bind:jakarta.xml.bind-api:${jakartaBindVersion}" + testImplementation "org.glassfish.jaxb:jaxb-runtime:${jaxbVersion}" + testImplementation "javax.servlet:javax.servlet-api:${javaxServletApiVersion}" + spotbugsPlugins 'com.h3xstream.findsecbugs:findsecbugs-plugin:1.12.0' } -tasks.withType(JavaCompile) { +tasks.withType(JavaCompile).configureEach { options.encoding = "UTF-8" } distributions { main { - baseName = archivesBaseName + distributionBaseName = archivesBaseName } } @@ -77,9 +103,56 @@ startScripts { } } -task runLeapAPIMockServer(type: JavaExec) { +shadowJar{ + archiveBaseName = 'pravega-sensor-collector' + archiveClassifier = '' +} + +tasks.register('runLeapAPIMockServer', JavaExec) { group = "Execution" description = "Run the mock Leap server" classpath = sourceSets.test.runtimeClasspath main = "io.pravega.sensor.collector.leap.LeapAPIMock" } + +tasks.withType(com.github.spotbugs.snom.SpotBugsTask) { + reports { + xml { + required.set(false) + } + html { + required.set(true) + } + } +} + +spotbugs { + toolVersion = spotbugsVersion + ignoreFailures = true + showProgress = true + effort = 'max' + reportLevel = 'default' + includeFilter = file("$rootDir/config/spotbugs-include.xml") + excludeFilter = file("$rootDir/config/spotbugs-exclude.xml") + //baselineFile = file("baseline.xml") +} +checkstyle { + toolVersion = checkstyleVersion + configFile = file("$rootDir/config/checkstyle.xml") + ignoreFailures = true + configProperties = [importControlFile: "$rootDir/config/import-control.xml", + suppressionsFile: "$rootDir/config/suppressions.xml"] + checkstyleMain { + source = sourceSets.main.allSource + } + configurations { + checkstyle + } + + dependencies{ + assert project.hasProperty("checkstyleVersion") + + checkstyle "com.puppycrawl.tools:checkstyle:${checkstyleVersion}" + } +} + diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java index 6f25b591..f917934d 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java @@ -9,106 +9,25 @@ */ package io.pravega.sensor.collector.file; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.CountingInputStream; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; -import org.apache.commons.lang3.tuple.ImmutablePair; +import io.pravega.sensor.collector.util.PravegaWriterEvent; import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Map; import java.util.function.Consumer; -public class EventGenerator { - private static final Logger log = LoggerFactory.getLogger(EventGenerator.class); - - private final String routingKey; - private final int maxRecordsPerEvent; - private final ObjectNode eventTemplate; - private final ObjectMapper mapper; - - public EventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) { - this.routingKey = routingKey; - this.maxRecordsPerEvent = maxRecordsPerEvent; - this.eventTemplate = eventTemplate; - this.mapper = mapper; - } - - public static EventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) { - try { - final ObjectMapper mapper = new ObjectMapper(); - final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr); - eventTemplate.put("WriterId", writerId); - return new EventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static EventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException { - return create(routingKey, maxRecordsPerEvent, "{}", "MyWriterId"); - } - - /** - * @param inputStream - * @param firstSequenceNumber - * @return next sequence number, end offset - */ - protected Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader(); - final CSVParser parser = CSVParser.parse(inputStream, StandardCharsets.UTF_8, format); - long nextSequenceNumber = firstSequenceNumber; - int numRecordsInEvent = 0; - ObjectNode jsonEvent = null; - for (CSVRecord record: parser) { - if (numRecordsInEvent >= maxRecordsPerEvent) { - consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(jsonEvent))); - nextSequenceNumber++; - jsonEvent = null; - numRecordsInEvent = 0; - } - if (jsonEvent == null) { - jsonEvent = eventTemplate.deepCopy(); - } - for (Map.Entry entry: record.toMap().entrySet()) { - addValueToArray(jsonEvent, entry.getKey(), entry.getValue()); - } - numRecordsInEvent++; - } - if (jsonEvent != null) { - consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(jsonEvent))); - nextSequenceNumber++; - } - final long endOffset = inputStream.getCount(); - return new ImmutablePair<>(nextSequenceNumber, endOffset); - } +/** + *The EventGenerator is responsible for generating events depending on file type + */ +public interface EventGenerator{ - protected JsonNode stringValueToJsonNode(String s) { - // TODO: convert timestamp - try { - return mapper.getNodeFactory().numberNode(Long.parseLong(s)); - } catch (NumberFormatException ignored) {} - try { - return mapper.getNodeFactory().numberNode(Double.parseDouble(s)); - } catch (NumberFormatException ignored) {} - return mapper.getNodeFactory().textNode(s); - } + /* + * Generate events from Input stream. + * Depending on file type event generation logic defers + * @param inputStream + * @param firstSequenceNumber + * @return next sequence number, end offset + * */ + Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException; - protected void addValueToArray(ObjectNode objectNode, String key, String value) { - final JsonNode node = objectNode.get(key); - final JsonNode valueNode = stringValueToJsonNode(value); - if (node instanceof ArrayNode ) { - ((ArrayNode) node).add(valueNode); - } else { - objectNode.putArray(key).add(valueNode); - } - } } diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileConfig.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileConfig.java similarity index 72% rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileConfig.java rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileConfig.java index 78111602..d4b96d37 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileConfig.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileConfig.java @@ -7,19 +7,19 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.sensor.collector.parquet; +package io.pravega.sensor.collector.file; -/** - * Config passed to Pravega Sensor Collector +/* + * Configuration file. */ -public class ParquetFileConfig { +public class FileConfig { public final String stateDatabaseFileName; public final String fileSpec; public final String fileExtension; public final String routingKey; public final String streamName; public final String eventTemplateStr; - + public final String fileType; /** * Also known as samplesPerEvent. */ @@ -29,7 +29,9 @@ public class ParquetFileConfig { public final boolean exactlyOnce; public final double transactionTimeoutMinutes; - public ParquetFileConfig(String stateDatabaseFileName, String fileSpec, String fileExtension, String routingKey, String streamName, String eventTemplateStr, int maxRecordsPerEvent, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes) { + public final long minTimeInMillisToUpdateFile; + + public FileConfig(String stateDatabaseFileName, String fileSpec, String fileExtension, String routingKey, String streamName, String eventTemplateStr, int maxRecordsPerEvent, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes, long minTimeInMillisToUpdateFile, String fileType) { this.stateDatabaseFileName = stateDatabaseFileName; this.fileSpec = fileSpec; this.fileExtension = fileExtension; @@ -40,14 +42,17 @@ public ParquetFileConfig(String stateDatabaseFileName, String fileSpec, String f this.enableDeleteCompletedFiles = enableDeleteCompletedFiles; this.exactlyOnce = exactlyOnce; this.transactionTimeoutMinutes = transactionTimeoutMinutes; + this.minTimeInMillisToUpdateFile = minTimeInMillisToUpdateFile; + this.fileType = fileType; } @Override public String toString() { - return "ParquetFileConfig{" + + return "FileConfig{" + "stateDatabaseFileName='" + stateDatabaseFileName + '\'' + ", fileSpec='" + fileSpec + '\'' + ", fileExtension='" + fileExtension + '\'' + + ", fileType='" + fileType + '\'' + ", routingKey='" + routingKey + '\'' + ", streamName='" + streamName + '\'' + ", eventTemplateStr='" + eventTemplateStr + '\'' + @@ -55,8 +60,7 @@ public String toString() { ", enableDeleteCompletedFiles=" + enableDeleteCompletedFiles + ", exactlyOnce=" + exactlyOnce + ", transactionTimeoutMinutes=" + transactionTimeoutMinutes + + ", minTimeInMillisToUpdateFile=" + minTimeInMillisToUpdateFile + '}'; } - - } diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileIngestService.java similarity index 63% rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileIngestService.java rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileIngestService.java index 97a8cd3e..6926a8c0 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileIngestService.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileIngestService.java @@ -7,30 +7,27 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.sensor.collector.parquet; - -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; +package io.pravega.sensor.collector.file; import com.google.common.util.concurrent.ThreadFactoryBuilder; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import io.pravega.client.EventStreamClientFactory; import io.pravega.sensor.collector.DeviceDriver; import io.pravega.sensor.collector.DeviceDriverConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; /** - * Ingestion service for parquet file data. + * Ingestion service with common implementation logic for all files. */ -public class ParquetFileIngestService extends DeviceDriver{ - private static final Logger log = LoggerFactory.getLogger(ParquetFileIngestService.class); - +public abstract class FileIngestService extends DeviceDriver { + private static final Logger log = LoggerFactory.getLogger(FileIngestService.class); + private static final String FILE_SPEC_KEY = "FILE_SPEC"; private static final String FILE_EXT= "FILE_EXTENSION"; private static final String DELETE_COMPLETED_FILES_KEY = "DELETE_COMPLETED_FILES"; @@ -44,14 +41,17 @@ public class ParquetFileIngestService extends DeviceDriver{ private static final String ROUTING_KEY_KEY = "ROUTING_KEY"; private static final String EXACTLY_ONCE_KEY = "EXACTLY_ONCE"; private static final String TRANSACTION_TIMEOUT_MINUTES_KEY = "TRANSACTION_TIMEOUT_MINUTES"; + private static final String MIN_TIME_IN_MILLIS_TO_UPDATE_FILE_KEY = "MIN_TIME_IN_MILLIS_TO_UPDATE_FILE"; - private final ParquetFileProcessor processor; + private final FileProcessor processor; private final ScheduledExecutorService executor; - private ScheduledFuture task; - public ParquetFileIngestService(DeviceDriverConfig config){ + private ScheduledFuture watchFiletask; + private ScheduledFuture processFileTask; + + public FileIngestService(DeviceDriverConfig config) { super(config); - final ParquetFileConfig parquetFileConfig = new ParquetFileConfig( + final FileConfig fileSequenceConfig = new FileConfig( getDatabaseFileName(), getFileSpec(), getFileExtension(), @@ -61,24 +61,23 @@ public ParquetFileIngestService(DeviceDriverConfig config){ getSamplesPerEvent(), getDeleteCompletedFiles(), getExactlyOnce(), - getTransactionTimeoutMinutes()); - log.info("Parquet File Ingest Config: {}", parquetFileConfig); + getTransactionTimeoutMinutes(), + getMinTimeInMillisToUpdateFile(), + config.getClassName()); + log.info("File Ingest Config: {}", fileSequenceConfig); final String scopeName = getScopeName(); log.info("Scope: {}", scopeName); createStream(scopeName, getStreamName()); - final EventStreamClientFactory clientFactory = getEventStreamClientFactory(scopeName); - processor = ParquetFileProcessor.create(parquetFileConfig, clientFactory); + processor =FileProcessor.create(fileSequenceConfig, clientFactory); ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat( - ParquetFileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build(); + FileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build(); executor = Executors.newScheduledThreadPool(1, namedThreadFactory); - } String getFileSpec() { return getProperty(FILE_SPEC_KEY); } - String getFileExtension() { return getProperty(FILE_EXT, ""); } @@ -126,30 +125,55 @@ boolean getExactlyOnce() { return Double.parseDouble(getProperty(TRANSACTION_TIMEOUT_MINUTES_KEY, Double.toString(18.0 * 60.0))); } - protected void ingestParquetFiles() { - log.trace("ingestParquetFiles: BEGIN"); + long getMinTimeInMillisToUpdateFile() { + return Long.parseLong(getProperty(MIN_TIME_IN_MILLIS_TO_UPDATE_FILE_KEY, "5000")); + } + + protected void watchFiles() { + log.trace("watchFiles: BEGIN"); try { - processor.ingestParquetFiles(); + processor.watchFiles(); } catch (Exception e) { - log.error("Error", e); + log.error("watchFiles: watch file error", e); // Continue on any errors. We will retry on the next iteration. } - log.trace("ingestParquetFiles: END"); + log.trace("watchFiles: END"); + } + protected void processFiles() { + log.trace("processFiles: BEGIN"); + try { + processor.processFiles(); + } catch (Exception e) { + log.error("processFiles: Process file error", e); + // Continue on any errors. We will retry on the next iteration. + } + log.trace("processFiles: END"); } @Override protected void doStart() { - task = executor.scheduleAtFixedRate( - this::ingestParquetFiles, + watchFiletask = executor.scheduleAtFixedRate( + this::watchFiles, 0, getIntervalMs(), TimeUnit.MILLISECONDS); - notifyStarted(); + /* + Submits a periodic action that becomes enabled immediately for the first time, + and subsequently with the delay of 1 milliseconds between the termination of one execution and the commencement of the next + ie immediately after completion of first action. + */ + processFileTask = executor.scheduleWithFixedDelay( + this::processFiles, + 0, + 1, + TimeUnit.MILLISECONDS); + notifyStarted(); } @Override protected void doStop() { - task.cancel(false); + log.info("doStop: Cancelling ingestion task and process file task"); + watchFiletask.cancel(false); + processFileTask.cancel(false); } - } diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileNameWithOffset.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileNameWithOffset.java deleted file mode 100644 index 43f71456..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileNameWithOffset.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -import java.util.Objects; - -public class FileNameWithOffset implements Comparable { - public final String fileName; - /** - * In some contexts, this is the size of the file. - * In the future, this will represent the offset in the file for incrementally ingesting growing log files. - * This is partially implemented today. - * TODO: Clarify usage of offset. - */ - public final long offset; - - public FileNameWithOffset(String fileName, long offset) { - this.fileName = fileName; - this.offset = offset; - } - - @Override - public String toString() { - return "FileNameWithOffset{" + - "fileName='" + fileName + '\'' + - ", offset=" + offset + - '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - FileNameWithOffset that = (FileNameWithOffset) o; - return offset == that.offset && - Objects.equals(fileName, that.fileName); - } - - @Override - public int hashCode() { - return Objects.hash(fileName, offset); - } - - @Override - public int compareTo(FileNameWithOffset o) { - return this.fileName.compareTo(o.fileName); - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessor.java new file mode 100644 index 00000000..4f5e6d58 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessor.java @@ -0,0 +1,284 @@ +package io.pravega.sensor.collector.file; +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +import com.google.common.io.CountingInputStream; +import io.pravega.client.EventStreamClientFactory; +import io.pravega.client.stream.EventWriterConfig; +import io.pravega.client.stream.Transaction; +import io.pravega.client.stream.TxnFailedException; +import io.pravega.client.stream.impl.ByteArraySerializer; +import io.pravega.sensor.collector.util.*; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.Path; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.List; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Set; +import java.util.HashSet; +import java.util.UUID; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Get list of files obtained from config. Process each file for ingestion. + * Keep track of new files and delete ingested files if "DELETE_COMPLETED_FILES"=true. + */ +public abstract class FileProcessor { + private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); + + private final FileConfig config; + private final TransactionStateDB state; + private final EventWriter writer; + private final TransactionCoordinator transactionCoordinator; + private final EventGenerator eventGenerator; + private final Path movedFilesDirectory; + + public FileProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator) { + this.config = config; + this.state = state; + this.writer = writer; + this.transactionCoordinator = transactionCoordinator; + this.eventGenerator = getEventGenerator(config); + this.movedFilesDirectory = Paths.get(config.stateDatabaseFileName).getParent(); + } + + public static FileProcessor create( + FileConfig config, EventStreamClientFactory clientFactory){ + + final Connection connection = SQliteDBUtility.createDatabase(config.stateDatabaseFileName); + + final String writerId = new PersistentId(connection).getPersistentId().toString(); + log.info("Writer ID: {}", writerId); + + final EventWriter writer = EventWriter.create( + clientFactory, + writerId, + config.streamName, + new ByteArraySerializer(), + EventWriterConfig.builder() + .enableConnectionPooling(true) + .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0)) + .build(), + config.exactlyOnce); + + final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer); + transactionCoordinator.performRecovery(); + + final TransactionStateDB state = new TransactionStateSQLiteImpl(connection, transactionCoordinator); + return FileProcessorFactory.createFileSequenceProcessor(config, state, writer, transactionCoordinator,writerId); + + } + + /* The abstract method serves as an Event Generator responsible for generating events. + * This logic is tailored to specific file types, and as such, it will be implemented in their respective classes. + * @param config configurations parameters + * @return eventGenerator + */ + public abstract EventGenerator getEventGenerator(FileConfig config); + public void watchFiles() throws Exception { + findAndRecordNewFiles(); + } + public void processFiles() throws Exception { + log.debug("processFiles: BEGIN"); + if (config.enableDeleteCompletedFiles) { + log.debug("processFiles: Deleting completed files"); + deleteCompletedFiles(); + } + processNewFiles(); + log.debug("processFiles: END"); + } + + public void processNewFiles() throws Exception { + for (;;) { + // If nextFile is null then check for new files to process is handled as part of scheduleWithDelay + final Pair nextFile = state.getNextPendingFileRecord(); + if (nextFile == null) { + log.debug("processNewFiles: No more files to watch"); + break; + } else { + processFile(nextFile.getLeft(), nextFile.getRight()); + } + } + } + + protected void findAndRecordNewFiles() throws Exception { + final List directoryListing = getDirectoryListing(); + final List completedFiles = state.getCompletedFileRecords(); + final List newFiles = getNewFiles(directoryListing, completedFiles); + state.addPendingFileRecords(newFiles); + } + + /** + * @return list of file name and file size in bytes + */ + protected List getDirectoryListing() throws IOException { + log.debug("getDirectoryListing: fileSpec={}", config.fileSpec); + //Invalid files will be moved to a separate folder Failed_Files parallel to the database file + log.debug("movedFilesDirectory: {}", movedFilesDirectory); + final List directoryListing = FileUtils.getDirectoryListing(config.fileSpec, config.fileExtension, movedFilesDirectory, config.minTimeInMillisToUpdateFile); + log.debug("getDirectoryListing: directoryListing={}", directoryListing); + return directoryListing; + } + + /** + * @return sorted list of file name and file size in bytes + */ + protected List getNewFiles(List directoryListing, List completedFiles) { + final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing); + Collections.sort(sortedDirectoryListing); + final List newFiles = new ArrayList<>(); + final Set setCompletedFiles = new HashSet<>(completedFiles); + log.trace("setCompletedFiles={}", setCompletedFiles); + sortedDirectoryListing.forEach(dirFile -> { + if (!setCompletedFiles.contains(dirFile)) { + newFiles.add(new FileNameWithOffset(dirFile.fileName, 0)); + } else { + try { + FileUtils.moveCompletedFile(dirFile, movedFilesDirectory); + log.warn("File: {} already marked as completed, moving now", dirFile.fileName); + } catch (IOException e) { + log.error("File: {} already marked as completed, but failed to move, error:{}", dirFile.fileName,e.getMessage()); + } + } + }); + log.info("getNewFiles: new file lists = {}", newFiles); + return newFiles; + } + + void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception { + log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}", + fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber); + + AtomicLong numOfBytes = new AtomicLong(0); + long timestamp = System.nanoTime(); + // In case a previous iteration encountered an error, we need to ensure that + // previous flushed transactions are committed and any unflushed transactions as aborted. + transactionCoordinator.performRecovery(); + /* Check if transactions can be aborted. + * Will fail with {@link TxnFailedException} if the transaction has already been committed or aborted. + */ + log.debug("processFile: Transaction status {} ", writer.getTransactionStatus()); + if(writer.getTransactionStatus() == Transaction.Status.OPEN){ + writer.abort(); + } + + File pendingFile = new File(fileNameWithBeginOffset.fileName); + if(!pendingFile.exists()){ + log.warn("File {} does not exist. It was deleted before processing", fileNameWithBeginOffset.fileName); + state.deletePendingFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset); + return; + } + + try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) { + final CountingInputStream countingInputStream = new CountingInputStream(inputStream); + countingInputStream.skip(fileNameWithBeginOffset.offset); + final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber, + e -> { + log.trace("processFile: event={}", e); + try { + writer.writeEvent(e.routingKey, e.bytes); + numOfBytes.addAndGet(e.bytes.length); + } catch (TxnFailedException ex) { + log.error("processFile: Write event to transaction failed with exception {} while processing file: {}, event: {}", ex, fileNameWithBeginOffset.fileName, e); + + /* TODO while writing event if we get Transaction failed exception then should we abort the trasaction and process again? + This will occur only if Transaction state is not open*/ + + throw new RuntimeException(ex); + } + }); + final Optional txnId = writer.flush(); + final long nextSequenceNumber = result.getLeft(); + final long endOffset = result.getRight(); + + // injectCommitFailure(); + try { + // commit fails only if Transaction is not in open state. + log.info("processFile: Commit transaction for Id: {}; file: {}", txnId.orElse(null), fileNameWithBeginOffset.fileName); + writer.commit(); + } catch (TxnFailedException ex) { + log.error("processFile: Commit transaction for id: {}, file : {}, failed with exception: {}", txnId, fileNameWithBeginOffset.fileName, ex); + throw new RuntimeException(ex); + } + log.debug("processFile: Adding completed file: {}", fileNameWithBeginOffset.fileName); + state.addCompletedFileRecord(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId); + // Add to completed file list only if commit is successfull else it will be taken care as part of recovery + if(txnId.isPresent()){ + Transaction.Status status = writer.getTransactionStatus(txnId.get()); + if(status == Transaction.Status.COMMITTED || status == Transaction.Status.ABORTED) + state.deleteTransactionToCommit(txnId); + } + + double elapsedSec = (System.nanoTime() - timestamp) / 1_000_000_000.0; + double megabyteCount = numOfBytes.getAndSet(0) / 1_000_000.0; + double megabytesPerSec = megabyteCount / elapsedSec; + log.info("Sent {} MB in {} sec. Transfer rate: {} MB/sec ", megabyteCount, elapsedSec, megabytesPerSec ); + log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}", + fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber); + } + FileUtils.moveCompletedFile(fileNameWithBeginOffset, movedFilesDirectory); + // Delete file right after ingesting + if (config.enableDeleteCompletedFiles) { + deleteCompletedFiles(); + } + } + + void deleteCompletedFiles() throws Exception { + final List completedFiles = state.getCompletedFileRecords(); + completedFiles.forEach(file -> { + //Obtain a lock on file + Path completedFilesPath = movedFilesDirectory.resolve(FileUtils.COMPLETED_FILES); + String completedFileName = FileUtils.createCompletedFileName(completedFilesPath, file.fileName); + Path filePath = completedFilesPath.resolve(completedFileName); + log.debug("deleteCompletedFiles: Deleting File default name:{}, and it's completed file name:{}.", file.fileName, filePath); + try { + /** + * If file gets deleted from completed files directory, or it does not exist in default ingestion directory + * then only remove the record from DB. + */ + if(Files.deleteIfExists(filePath) || Files.notExists(Paths.get(file.fileName))) { + state.deleteCompletedFileRecord(file.fileName); + log.debug("deleteCompletedFiles: Deleted File default name:{}, and it's completed file name:{}.", file.fileName, filePath); + } else { + /** + * This situation occurs because at first attempt moving file to completed directory fails, but the file still exists in default ingestion directory. + * Moving file from default directory to completed directory will be taken care in next iteration, post which delete will be taken care. + */ + log.warn("deleteCompletedFiles: File {} doesn't exists in completed directory but still exist in default ingestion directory.", filePath); + } + } catch (Exception e) { + log.warn("Unable to delete ingested file default name:{}, and it's completed file name:{}, error: {}.", file.fileName, filePath, e.getMessage()); + log.warn("Deletion will be retried on the next iteration."); + // We can continue on this error. Deletion will be retried on the next iteration. + } + }); + } + + /** + * Inject a failure before commit for testing. + */ + protected void injectCommitFailure() { + if (Math.random() < 0.3) { + throw new RuntimeException("injectCommitFailure: Commit failure test exception"); + } + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessorFactory.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessorFactory.java new file mode 100644 index 00000000..6897ec96 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessorFactory.java @@ -0,0 +1,39 @@ +package io.pravega.sensor.collector.file; + +import io.pravega.sensor.collector.file.csvfile.CsvFileSequenceProcessor; +import io.pravega.sensor.collector.file.parquet.ParquetFileProcessor; +import io.pravega.sensor.collector.file.rawfile.RawFileProcessor; +import io.pravega.sensor.collector.util.EventWriter; +import io.pravega.sensor.collector.util.TransactionCoordinator; +import io.pravega.sensor.collector.util.TransactionStateDB; +import io.pravega.sensor.collector.util.TransactionStateSQLiteImpl; + +/* + * The FileProcessorFactory class is responsible for creating instances of file processors based on the type of the input file. + * + */ +public class FileProcessorFactory { + + public static FileProcessor createFileSequenceProcessor(final FileConfig config, TransactionStateDB state, + EventWriter writer, + TransactionCoordinator transactionCoordinator, + String writerId){ + + final String className = config.fileType.substring(config.fileType.lastIndexOf(".")+1); + + switch(className){ + case "ParquetFileIngestService": + return new ParquetFileProcessor(config, state, writer, transactionCoordinator, writerId); + + case "CsvFileIngestService": + return new CsvFileSequenceProcessor(config, state, writer, transactionCoordinator, writerId); + + case "RawFileIngestService": + return new RawFileProcessor(config, state, writer, transactionCoordinator, writerId); + + default : + throw new RuntimeException("Unsupported className: "+ className); + } + + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileIngestService.java deleted file mode 100644 index 5bf5b70f..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileIngestService.java +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import io.pravega.client.EventStreamClientFactory; -import io.pravega.sensor.collector.DeviceDriver; -import io.pravega.sensor.collector.DeviceDriverConfig; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; - -public class LogFileIngestService extends DeviceDriver { - private static final Logger log = LoggerFactory.getLogger(LogFileIngestService.class); - - private static final String FILE_SPEC_KEY = "FILE_SPEC"; - private static final String DELETE_COMPLETED_FILES_KEY = "DELETE_COMPLETED_FILES"; - private static final String DATABASE_FILE_KEY = "DATABASE_FILE"; - private static final String EVENT_TEMPLATE_KEY = "EVENT_TEMPLATE"; - private static final String SAMPLES_PER_EVENT_KEY = "SAMPLES_PER_EVENT"; - private static final String INTERVAL_MS_KEY = "INTERVAL_MS"; - - private static final String SCOPE_KEY = "SCOPE"; - private static final String STREAM_KEY = "STREAM"; - private static final String ROUTING_KEY_KEY = "ROUTING_KEY"; - private static final String EXACTLY_ONCE_KEY = "EXACTLY_ONCE"; - private static final String TRANSACTION_TIMEOUT_MINUTES_KEY = "TRANSACTION_TIMEOUT_MINUTES"; - - private final LogFileSequenceProcessor processor; - private final ScheduledExecutorService executor; - - private ScheduledFuture task; - - public LogFileIngestService(DeviceDriverConfig config) { - super(config); - final LogFileSequenceConfig logFileSequenceConfig = new LogFileSequenceConfig( - getDatabaseFileName(), - getFileSpec(), - getRoutingKey(), - getStreamName(), - getEventTemplate(), - getSamplesPerEvent(), - getDeleteCompletedFiles(), - getExactlyOnce(), - getTransactionTimeoutMinutes()); - log.info("Log File Ingest Config: {}", logFileSequenceConfig); - final String scopeName = getScopeName(); - log.info("Scope: {}", scopeName); - createStream(scopeName, getStreamName()); - final EventStreamClientFactory clientFactory = getEventStreamClientFactory(scopeName); - processor = LogFileSequenceProcessor.create(logFileSequenceConfig, clientFactory); - ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat( - LogFileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build(); - executor = Executors.newScheduledThreadPool(1, namedThreadFactory); - } - - String getFileSpec() { - return getProperty(FILE_SPEC_KEY); - } - - boolean getDeleteCompletedFiles() { - return Boolean.parseBoolean(getProperty(DELETE_COMPLETED_FILES_KEY, Boolean.toString(true))); - } - - String getDatabaseFileName() { - return getProperty(DATABASE_FILE_KEY); - } - - String getEventTemplate() { - return getProperty(EVENT_TEMPLATE_KEY, "{}"); - } - - int getSamplesPerEvent() { - return Integer.parseInt(getProperty(SAMPLES_PER_EVENT_KEY, Integer.toString(100))); - } - - long getIntervalMs() { - return Long.parseLong(getProperty(INTERVAL_MS_KEY, Long.toString(10000))); - } - - String getScopeName() { - return getProperty(SCOPE_KEY); - } - - String getStreamName() { - return getProperty(STREAM_KEY); - } - - protected String getRoutingKey() { - return getProperty(ROUTING_KEY_KEY, ""); - } - - boolean getExactlyOnce() { - return Boolean.parseBoolean(getProperty(EXACTLY_ONCE_KEY, Boolean.toString(true))); - } - - /** - * This time duration must not exceed the controller property controller.transaction.maxLeaseValue (milliseconds). - */ - double getTransactionTimeoutMinutes() { - return Double.parseDouble(getProperty(TRANSACTION_TIMEOUT_MINUTES_KEY, Double.toString(18.0 * 60.0))); - } - - protected void ingestLogFiles() { - log.info("ingestLogFiles: BEGIN"); - try { - processor.ingestLogFiles(); - } catch (Exception e) { - log.error("Error", e); - // Continue on any errors. We will retry on the next iteration. - } - log.info("ingestLogFiles: END"); - } - - @Override - protected void doStart() { - task = executor.scheduleAtFixedRate( - this::ingestLogFiles, - 0, - getIntervalMs(), - TimeUnit.MILLISECONDS); - notifyStarted(); - } - - @Override - protected void doStop() { - task.cancel(false); - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceConfig.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceConfig.java deleted file mode 100644 index d9693d0b..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceConfig.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -public class LogFileSequenceConfig { - public final String stateDatabaseFileName; - public final String fileSpec; - public final String routingKey; - public final String streamName; - public final String eventTemplateStr; - - /** - * Also known as samplesPerEvent. - */ - public final int maxRecordsPerEvent; - - public final boolean enableDeleteCompletedFiles; - public final boolean exactlyOnce; - public final double transactionTimeoutMinutes; - - public LogFileSequenceConfig(String stateDatabaseFileName, String fileSpec, String routingKey, String streamName, String eventTemplateStr, int maxRecordsPerEvent, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes) { - this.stateDatabaseFileName = stateDatabaseFileName; - this.fileSpec = fileSpec; - this.routingKey = routingKey; - this.streamName = streamName; - this.eventTemplateStr = eventTemplateStr; - this.maxRecordsPerEvent = maxRecordsPerEvent; - this.enableDeleteCompletedFiles = enableDeleteCompletedFiles; - this.exactlyOnce = exactlyOnce; - this.transactionTimeoutMinutes = transactionTimeoutMinutes; - } - - @Override - public String toString() { - return "LogFileSequenceConfig{" + - "stateDatabaseFileName='" + stateDatabaseFileName + '\'' + - ", fileSpec='" + fileSpec + '\'' + - ", routingKey='" + routingKey + '\'' + - ", streamName='" + streamName + '\'' + - ", eventTemplateStr='" + eventTemplateStr + '\'' + - ", maxRecordsPerEvent=" + maxRecordsPerEvent + - ", enableDeleteCompletedFiles=" + enableDeleteCompletedFiles + - ", exactlyOnce=" + exactlyOnce + - ", transactionTimeoutMinutes=" + transactionTimeoutMinutes + - '}'; - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessor.java deleted file mode 100644 index d99db339..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessor.java +++ /dev/null @@ -1,218 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -import com.google.common.io.CountingInputStream; -import io.pravega.client.ClientConfig; -import io.pravega.client.EventStreamClientFactory; -import io.pravega.client.admin.StreamManager; -import io.pravega.client.stream.EventWriterConfig; -import io.pravega.client.stream.StreamConfiguration; -import io.pravega.client.stream.TxnFailedException; -import io.pravega.client.stream.impl.ByteArraySerializer; -import io.pravega.sensor.collector.util.EventWriter; -import io.pravega.sensor.collector.util.PersistentId; -import io.pravega.sensor.collector.util.TransactionCoordinator; -import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.DirectoryStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.sql.Connection; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -public class LogFileSequenceProcessor { - private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorState.class); - - private final LogFileSequenceConfig config; - private final LogFileSequenceProcessorState state; - private final EventWriter writer; - private final TransactionCoordinator transactionCoordinator; - private final EventGenerator eventGenerator; - - public LogFileSequenceProcessor(LogFileSequenceConfig config, LogFileSequenceProcessorState state, EventWriter writer, TransactionCoordinator transactionCoordinator, EventGenerator eventGenerator) { - this.config = config; - this.state = state; - this.writer = writer; - this.transactionCoordinator = transactionCoordinator; - this.eventGenerator = eventGenerator; - } - - public static LogFileSequenceProcessor create( - LogFileSequenceConfig config, EventStreamClientFactory clientFactory){ - - final Connection connection = LogFileSequenceProcessorState.createDatabase(config.stateDatabaseFileName); - - final String writerId = new PersistentId(connection).getPersistentId().toString(); - log.info("Writer ID: {}", writerId); - - final EventWriter writer = EventWriter.create( - clientFactory, - writerId, - config.streamName, - new ByteArraySerializer(), - EventWriterConfig.builder() - .enableConnectionPooling(true) - .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0)) - .build(), - config.exactlyOnce); - - final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer); - transactionCoordinator.performRecovery(); - - final EventGenerator eventGenerator = EventGenerator.create( - config.routingKey, - config.maxRecordsPerEvent, - config.eventTemplateStr, - writerId); - final LogFileSequenceProcessorState state = new LogFileSequenceProcessorState(connection, transactionCoordinator); - return new LogFileSequenceProcessor(config, state, writer, transactionCoordinator, eventGenerator); - } - - public void ingestLogFiles() throws Exception { - log.info("ingestLogFiles: BEGIN"); - findAndRecordNewFiles(); - processNewFiles(); - if (config.enableDeleteCompletedFiles) { - deleteCompletedFiles(); - } - log.info("ingestLogFiles: END"); - } - - public void processNewFiles() throws Exception { - for (;;) { - final Pair nextFile = state.getNextPendingFile(); - if (nextFile == null) { - log.info("No more files to ingest"); - break; - } else { - processFile(nextFile.getLeft(), nextFile.getRight()); - } - } - } - - protected void findAndRecordNewFiles() throws Exception { - final List directoryListing = getDirectoryListing(); - final List completedFiles = state.getCompletedFiles(); - final List newFiles = getNewFiles(directoryListing, completedFiles); - state.addPendingFiles(newFiles); - } - - /** - * @return list of file name and file size in bytes - */ - protected List getDirectoryListing() throws IOException { - log.info("getDirectoryListing: fileSpec={}", config.fileSpec); - final List directoryListing = getDirectoryListing(config.fileSpec); - log.trace("getDirectoryListing: directoryListing={}", directoryListing); - return directoryListing; - } - - /** - * @return list of file name and file size in bytes - */ - static protected List getDirectoryListing(String fileSpec) throws IOException { - final Path pathSpec = Paths.get(fileSpec); - try (DirectoryStream dirStream = Files.newDirectoryStream(pathSpec.getParent(), pathSpec.getFileName().toString())) { - return StreamSupport.stream(dirStream.spliterator(), false) - .map(f -> new FileNameWithOffset(f.toAbsolutePath().toString(), f.toFile().length())) - .collect(Collectors.toList()); - } - } - - /** - * @return sorted list of file name and file size in bytes - */ - static protected List getNewFiles(List directoryListing, List completedFiles) { - final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing); - Collections.sort(sortedDirectoryListing); - final List newFiles = new ArrayList<>(); - final Set setCompletedFiles = new HashSet<>(completedFiles); - log.trace("setCompletedFiles={}", setCompletedFiles); - sortedDirectoryListing.forEach(dirFile -> { - if (!setCompletedFiles.contains(dirFile)) { - newFiles.add(new FileNameWithOffset(dirFile.fileName, 0)); - } - }); - log.info("getNewFiles={}", newFiles); - return newFiles; - } - - void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception { - log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}", - fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber); - - // In case a previous iteration encountered an error, we need to ensure that - // previous flushed transactions are committed and any unflushed transactions as aborted. - transactionCoordinator.performRecovery(); - writer.abort(); - - try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) { - final CountingInputStream countingInputStream = new CountingInputStream(inputStream); - countingInputStream.skip(fileNameWithBeginOffset.offset); - final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber, - e -> { - log.trace("processFile: event={}", e); - try { - writer.writeEvent(e.routingKey, e.bytes); - } catch (TxnFailedException ex) { - throw new RuntimeException(ex); - } - }); - final Optional txnId = writer.flush(); - final long nextSequenceNumber = result.getLeft(); - final long endOffset = result.getRight(); - state.addCompletedFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId); - // injectCommitFailure(); - writer.commit(); - state.deleteTransactionToCommit(txnId); - log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}", - fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber); - } - } - - void deleteCompletedFiles() throws Exception { - final List completedFiles = state.getCompletedFiles(); - completedFiles.forEach(file -> { - try { - Files.deleteIfExists(Paths.get(file.fileName)); - log.info("deleteCompletedFiles: Deleted file {}", file.fileName); - // Only remove from database if we could delete file. - state.deleteCompletedFile(file.fileName); - } catch (Exception e) { - log.warn("Unable to delete ingested file {}", e); - // We can continue on this error. It will be retried on the next iteration. - } - }); - } - - /** - * Inject a failure before commit for testing. - */ - protected void injectCommitFailure() { - if (Math.random() < 0.3) { - throw new RuntimeException("injectCommitFailure: Commit failure test exception"); - } - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorState.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorState.java deleted file mode 100644 index 53629cf1..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorState.java +++ /dev/null @@ -1,179 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -import com.google.common.annotations.VisibleForTesting; -import io.pravega.sensor.collector.util.AutoRollback; -import io.pravega.sensor.collector.util.TransactionCoordinator; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.UUID; - -import static java.sql.Connection.TRANSACTION_SERIALIZABLE; - -public class LogFileSequenceProcessorState implements AutoCloseable { - private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorState.class); - - private final Connection connection; - private final TransactionCoordinator transactionCoordinator; - - public LogFileSequenceProcessorState(Connection connection, TransactionCoordinator transactionCoordinator) { - this.connection = connection; - this.transactionCoordinator = transactionCoordinator; - } - - public static Connection createDatabase(String fileName) { - try { - final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName); - try (final Statement statement = connection.createStatement()) { - // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database. - //statement.execute("PRAGMA locking_mode = EXCLUSIVE"); - statement.execute( - "create table if not exists PendingFiles (" + - "id integer primary key autoincrement, " + - "fileName string unique not null, " + - "offset bigint not null)"); - statement.execute( - "create table if not exists CompletedFiles (" + - "fileName string primary key not null, " + - "offset bigint not null)"); - statement.execute( - "create table if not exists SequenceNumber (" + - "id integer primary key check (id = 0), " + - "nextSequenceNumber bigint not null)"); - statement.execute( - "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)"); - } - connection.setAutoCommit(false); - connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE); - return connection; - } catch (SQLException e) { - throw new RuntimeException(e); - } - } - - @VisibleForTesting - public static LogFileSequenceProcessorState create(String fileName) { - final Connection connection = createDatabase(fileName); - final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null); - return new LogFileSequenceProcessorState(connection, transactionCoordinator); - } - - @Override - public void close() throws SQLException { - connection.close(); - } - - public void addPendingFiles(List files) throws SQLException { - try (final PreparedStatement insertStatement = connection.prepareStatement( - "insert or ignore into PendingFiles (fileName, offset) values (?, ?)"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - for (FileNameWithOffset file: files) { - insertStatement.setString(1, file.fileName); - insertStatement.setLong(2, file.offset); - insertStatement.execute(); - } - autoRollback.commit(); - } - } - - /** - * @return ((file name, begin offset), sequence number) or null if there is no pending file - */ - public Pair getNextPendingFile() throws SQLException { - try (final Statement statement = connection.createStatement(); - final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) { - if (rs.next()) { - final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); - try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) { - rsSequenceNumber.next(); - final long nextSequenceNumber = rsSequenceNumber.getLong(1); - return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber); - } - } else { - return null; - } - } finally { - connection.commit(); - } - } - - public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException { - try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement( - "update SequenceNumber set nextSequenceNumber = ?"); - final PreparedStatement insertCompletedFileStatement = connection.prepareStatement( - "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)"); - final PreparedStatement deletePendingFileStatement = connection.prepareStatement( - "delete from PendingFiles where fileName = ? and offset <= ?"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - // Update sequence number. - updateSequenceNumberStatement.setLong(1, newNextSequenceNumber); - updateSequenceNumberStatement.execute(); - // Add completed file. - insertCompletedFileStatement.setString(1, fileName); - insertCompletedFileStatement.setLong(2, endOffset); - insertCompletedFileStatement.execute(); - // Remove pending file. - deletePendingFileStatement.setString(1, fileName); - deletePendingFileStatement.setLong(2, beginOffset); - deletePendingFileStatement.execute(); - transactionCoordinator.addTransactionToCommit(txnId); - autoRollback.commit(); - } - } - - @VisibleForTesting - public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException { - addCompletedFile(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty()); - } - - public void deleteTransactionToCommit(Optional txnId) { - transactionCoordinator.deleteTransactionToCommit(txnId); - } - - /** - * @return list of file name and end offset (file size) - */ - public List getCompletedFiles() throws SQLException { - try (final Statement statement = connection.createStatement(); - final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) { - final List files = new ArrayList<>(); - while (rs.next()) { - final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); - files.add(fileNameWithOffset); - } - return files; - } finally { - connection.commit(); - } - } - - public void deleteCompletedFile(String fileName) throws SQLException { - try (final PreparedStatement deleteStatement = connection.prepareStatement( - "delete from CompletedFiles where fileName = ?"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - deleteStatement.setString(1, fileName); - deleteStatement.execute(); - autoRollback.commit(); - } - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/PravegaWriterEvent.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/PravegaWriterEvent.java deleted file mode 100644 index 377766db..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/PravegaWriterEvent.java +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -public class PravegaWriterEvent { - public final String routingKey; - public final long sequenceNumber; - public final byte[] bytes; - - public PravegaWriterEvent(String routingKey, long sequenceNumber, byte[] bytes) { - this.routingKey = routingKey; - this.sequenceNumber = sequenceNumber; - this.bytes = bytes; - } - - @Override - public String toString() { - return "PravegaWriterEvent{" + - "routingKey='" + routingKey + '\'' + - ", sequenceNumber=" + sequenceNumber + - ", bytes=" + new String(bytes) + - '}'; - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileEventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileEventGenerator.java new file mode 100644 index 00000000..42e11502 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileEventGenerator.java @@ -0,0 +1,110 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.file.csvfile; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.io.CountingInputStream; +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.util.PravegaWriterEvent; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.nio.charset.StandardCharsets; +import java.util.function.Consumer; + +/** + * Generate Event from CSV file + */ +public class CsvFileEventGenerator implements EventGenerator { + private static final Logger log = LoggerFactory.getLogger(CsvFileEventGenerator.class); + + private final String routingKey; + private final int maxRecordsPerEvent; + private final ObjectNode eventTemplate; + private final ObjectMapper mapper; + + public CsvFileEventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) { + this.routingKey = routingKey; + this.maxRecordsPerEvent = maxRecordsPerEvent; + this.eventTemplate = eventTemplate; + this.mapper = mapper; + } + + public static CsvFileEventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) { + try { + final ObjectMapper mapper = new ObjectMapper(); + final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr); + eventTemplate.put("WriterId", writerId); + return new CsvFileEventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static CsvFileEventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException { + return create(routingKey, maxRecordsPerEvent, "{}", "MyWriterId"); + } + + /** Generate event from input stream. number of records in one event is defined in input config file + * @param inputStream + * @param firstSequenceNumber + * @return next sequence number, end offset + */ + public Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader(); + final CSVParser parser = CSVParser.parse(inputStream, StandardCharsets.UTF_8, format); + long nextSequenceNumber = firstSequenceNumber; + int numRecordsInEvent = 0; + List> eventBatch = new ArrayList<>(); + for (CSVRecord record : parser) { + HashMap recordDataMap = new HashMap(); + for(int i=0; i= maxRecordsPerEvent) { + consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(eventBatch))); + nextSequenceNumber++; + eventBatch.clear(); + numRecordsInEvent = 0; + } + } + if (!eventBatch.isEmpty()) { + consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(eventBatch))); + nextSequenceNumber++; + eventBatch.clear(); + } + final long endOffset = inputStream.getCount(); + return new ImmutablePair<>(nextSequenceNumber, endOffset); + } + + public Object convertValue(String s) { + // TODO: convert timestamp + try { + return Long.parseLong(s); + } catch (NumberFormatException ignored) {} + try { + return Double.parseDouble(s); + } catch (NumberFormatException ignored) {} + return s; + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileIngestService.java new file mode 100644 index 00000000..85d25e00 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileIngestService.java @@ -0,0 +1,29 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.file.csvfile; + +import io.pravega.sensor.collector.DeviceDriverConfig; +import io.pravega.sensor.collector.file.FileIngestService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Ingestion service for csv files. + */ +public class CsvFileIngestService extends FileIngestService { + private static final Logger log = LoggerFactory.getLogger(CsvFileIngestService.class); + + + public CsvFileIngestService(DeviceDriverConfig config) { + + super(config); + } + +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessor.java new file mode 100644 index 00000000..58f27e63 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessor.java @@ -0,0 +1,46 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.file.csvfile; + +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.file.FileConfig; +import io.pravega.sensor.collector.file.FileProcessor; +import io.pravega.sensor.collector.util.TransactionStateDB; +import io.pravega.sensor.collector.util.EventWriter; +import io.pravega.sensor.collector.util.TransactionCoordinator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CsvFileSequenceProcessor extends FileProcessor { + private static final Logger log = LoggerFactory.getLogger(CsvFileSequenceProcessor.class); + private final FileConfig config; + private final String writerId; + + + public CsvFileSequenceProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator, String writerId) { + super(config, state, writer, transactionCoordinator); + this.config =config; + this.writerId = writerId; + } + + /** + * Event generator for CSV file + * @param config configurations parameters + * @return eventGenerator + */ + @Override + public EventGenerator getEventGenerator(FileConfig config) { + return CsvFileEventGenerator.create( + config.routingKey, + config.maxRecordsPerEvent, + config.eventTemplateStr, + writerId); + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/EventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetEventGenerator.java similarity index 83% rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/EventGenerator.java rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetEventGenerator.java index 8b2a3c84..5a7311d1 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/EventGenerator.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetEventGenerator.java @@ -8,21 +8,13 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.sensor.collector.parquet; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.function.Consumer; -import java.util.stream.Collectors; +package io.pravega.sensor.collector.file.parquet; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.CountingInputStream; - +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.util.PravegaWriterEvent; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; @@ -44,47 +36,58 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.function.Consumer; +import java.util.stream.Collectors; + /** - * Generate Event from file + * Generate Event from Parquet file */ -public class EventGenerator { - private static final Logger log = LoggerFactory.getLogger(EventGenerator.class); +public class ParquetEventGenerator implements EventGenerator { + private static final Logger log = LoggerFactory.getLogger(ParquetEventGenerator.class); private final String routingKey; private final int maxRecordsPerEvent; private final ObjectNode eventTemplate; private final ObjectMapper mapper; - public EventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) { + public ParquetEventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) { this.routingKey = routingKey; this.maxRecordsPerEvent = maxRecordsPerEvent; this.eventTemplate = eventTemplate; this.mapper = mapper; } - public static EventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) { + public static ParquetEventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) { try { final ObjectMapper mapper = new ObjectMapper(); final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr); eventTemplate.put("WriterId", writerId); - return new EventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper); + return new ParquetEventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper); } catch (IOException e) { throw new RuntimeException(e); } } - public static EventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException { + public static ParquetEventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException { return create(routingKey, maxRecordsPerEvent, "{}", "MyWriterId"); } /** - * Convert Parquet to Json + * Generate event from input stream. number of records in one event is defined in input config file + * Convert Parquet to Json. + * * @param inputStream * @param firstSequenceNumber * @return next sequence number, end offset */ - protected Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException { + public Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException { File tempFile = File.createTempFile("temp", ".parquet"); FileOutputStream outputStream = new FileOutputStream(tempFile); IOUtils.copy(inputStream,outputStream); diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileIngestService.java new file mode 100644 index 00000000..57a45403 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileIngestService.java @@ -0,0 +1,30 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.file.parquet; + +import io.pravega.sensor.collector.DeviceDriverConfig; +import io.pravega.sensor.collector.file.FileIngestService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Ingestion service for parquet file data. + */ +public class ParquetFileIngestService extends FileIngestService { + private static final Logger log = LoggerFactory.getLogger(ParquetFileIngestService.class); + + + public ParquetFileIngestService(DeviceDriverConfig config){ + super(config); + + } + +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessor.java new file mode 100644 index 00000000..0474a1e4 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessor.java @@ -0,0 +1,47 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.file.parquet; + +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.file.FileConfig; +import io.pravega.sensor.collector.file.FileProcessor; +import io.pravega.sensor.collector.util.TransactionStateDB; +import io.pravega.sensor.collector.util.EventWriter; +import io.pravega.sensor.collector.util.TransactionCoordinator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class ParquetFileProcessor extends FileProcessor { + private static final Logger log = LoggerFactory.getLogger(ParquetFileProcessor.class); + + private final FileConfig config; + private final String writerId; + + public ParquetFileProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator, String writerId) { + super(config,state,writer,transactionCoordinator); + this.config =config; + this.writerId = writerId; + } + + /** Event generator for Parquet file + * @param config configurations parameters + * @return eventGenerator + */ + @Override + public EventGenerator getEventGenerator(FileConfig config) { + return ParquetEventGenerator.create( + config.routingKey, + config.maxRecordsPerEvent, + config.eventTemplateStr, + writerId); + } + +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/EventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawEventGenerator.java similarity index 57% rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/EventGenerator.java rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawEventGenerator.java index 52882e18..9ce9ec44 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/EventGenerator.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawEventGenerator.java @@ -8,48 +8,52 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.sensor.collector.rawfile; - -import java.io.IOException; -import java.util.function.Consumer; +package io.pravega.sensor.collector.file.rawfile; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.CountingInputStream; +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.util.PravegaWriterEvent; +import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.util.function.Consumer; + /** - * Generate Event from file + * Generate Event from RAW file */ -public class EventGenerator { - private static final Logger log = LoggerFactory.getLogger(EventGenerator.class); +public class RawEventGenerator implements EventGenerator { + private static final Logger log = LoggerFactory.getLogger(RawEventGenerator.class); private final String routingKey; private final ObjectNode eventTemplate; private final ObjectMapper mapper; - public EventGenerator(String routingKey, ObjectNode eventTemplate, ObjectMapper mapper) { + public RawEventGenerator(String routingKey, ObjectNode eventTemplate, ObjectMapper mapper) { this.routingKey = routingKey; this.eventTemplate = eventTemplate; this.mapper = mapper; } - public static EventGenerator create(String routingKey, String eventTemplateStr, String writerId) { + public static RawEventGenerator create(String routingKey, String eventTemplateStr, String writerId) { try { final ObjectMapper mapper = new ObjectMapper(); final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr); eventTemplate.put("WriterId", writerId); - return new EventGenerator(routingKey, eventTemplate, mapper); + return new RawEventGenerator(routingKey, eventTemplate, mapper); } catch (IOException e) { throw new RuntimeException(e); } } - public static EventGenerator create(String routingKey) throws IOException { + public static RawEventGenerator create(String routingKey) throws IOException { return create(routingKey, "{}", "MyWriterId"); } @@ -60,17 +64,17 @@ public static EventGenerator create(String routingKey) throws IOException { * @param firstSequenceNumber * @return next sequence number, end offset */ - protected Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException { - + public Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException { long nextSequenceNumber = firstSequenceNumber; try{ - byte[] byteArray = inputStream.readAllBytes(); - //TODO: Batching + BufferedInputStream bis = new BufferedInputStream(inputStream); + byte[] byteArray = IOUtils.toByteArray(bis); - consumer.accept(new RawFileWriterEvent(routingKey, nextSequenceNumber, byteArray)); - nextSequenceNumber++; + if (byteArray.length > 0) { //non-empty file + consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, byteArray)); + nextSequenceNumber++; + } final long endOffset = inputStream.getCount(); - return new ImmutablePair<>(nextSequenceNumber, endOffset); } catch (Exception e){ log.error("Exception = {}",e); diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileIngestService.java new file mode 100644 index 00000000..176600d4 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileIngestService.java @@ -0,0 +1,26 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.file.rawfile; + +import io.pravega.sensor.collector.DeviceDriverConfig; +import io.pravega.sensor.collector.file.FileIngestService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Ingestion service for raw file data. + */ +public class RawFileIngestService extends FileIngestService { + private static final Logger log = LoggerFactory.getLogger(RawFileIngestService.class); + + public RawFileIngestService(DeviceDriverConfig config){ + super(config); + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessor.java new file mode 100644 index 00000000..07b349f8 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessor.java @@ -0,0 +1,46 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.file.rawfile; + +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.file.FileConfig; +import io.pravega.sensor.collector.file.FileProcessor; +import io.pravega.sensor.collector.util.TransactionStateDB; +import io.pravega.sensor.collector.util.EventWriter; +import io.pravega.sensor.collector.util.TransactionCoordinator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class RawFileProcessor extends FileProcessor { + private static final Logger log = LoggerFactory.getLogger(RawFileProcessor.class); + private final FileConfig config; + private final String writerId; + + public RawFileProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator, String writerId) { + super(config, state, writer, transactionCoordinator); + this.config =config; + this.writerId = writerId; + } + + /** Event generator for Raw file + * @param config configurations parameters + * @return eventGenerator + */ + @Override + public EventGenerator getEventGenerator(FileConfig config) { + return RawEventGenerator.create( + config.routingKey, + config.eventTemplateStr, + writerId); + } + +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/FileNameWithOffset.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/FileNameWithOffset.java deleted file mode 100644 index 1d84f533..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/FileNameWithOffset.java +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.parquet; - -import java.util.Objects; - -/** - * File name and file size - */ -public class FileNameWithOffset implements Comparable { - public final String fileName; - /** - * In some contexts, this is the size of the file. - * In the future, this will represent the offset in the file for incrementally ingesting growing log files. - * This is partially implemented today. - * TODO: Clarify usage of offset. - */ - public final long offset; - - public FileNameWithOffset(String fileName, long offset) { - this.fileName = fileName; - this.offset = offset; - } - - @Override - public String toString() { - return "FileNameWithOffset{" + - "fileName='" + fileName + '\'' + - ", offset=" + offset + - '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - FileNameWithOffset that = (FileNameWithOffset) o; - return offset == that.offset && - Objects.equals(fileName, that.fileName); - } - - @Override - public int hashCode() { - return Objects.hash(fileName, offset); - } - - @Override - public int compareTo(FileNameWithOffset o) { - return this.fileName.compareTo(o.fileName); - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileProcessor.java deleted file mode 100644 index fa9a68c6..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileProcessor.java +++ /dev/null @@ -1,264 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.parquet; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.channels.FileChannel; -import java.nio.channels.FileLock; -import java.nio.file.DirectoryStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.sql.Connection; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicLong; - -import com.google.common.io.CountingInputStream; - -import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import io.pravega.client.EventStreamClientFactory; -import io.pravega.client.stream.EventWriterConfig; -import io.pravega.client.stream.TxnFailedException; -import io.pravega.client.stream.impl.ByteArraySerializer; -import io.pravega.sensor.collector.util.EventWriter; -import io.pravega.sensor.collector.util.PersistentId; -import io.pravega.sensor.collector.util.TransactionCoordinator; - -/** - * Get list of files obtained from config. Process each file for ingestion. - * Keep track of new files and delete ingested files if "DELETE_COMPLETED_FILES"=true. - */ -public class ParquetFileProcessor { - private static final Logger log = LoggerFactory.getLogger(ParquetFileIngestService.class); - - private final ParquetFileConfig config; - private final ParquetFileState state; - private final EventWriter writer; - private final TransactionCoordinator transactionCoordinator; - private final EventGenerator eventGenerator; - - public ParquetFileProcessor(ParquetFileConfig config, ParquetFileState state, EventWriter writer, TransactionCoordinator transactionCoordinator, EventGenerator eventGenerator) { - this.config = config; - this.state = state; - this.writer = writer; - this.transactionCoordinator = transactionCoordinator; - this.eventGenerator = eventGenerator; - } - - public static ParquetFileProcessor create(ParquetFileConfig config, EventStreamClientFactory clientFactory){ - final Connection connection = ParquetFileState.createDatabase(config.stateDatabaseFileName); - - final String writerId = new PersistentId(connection).getPersistentId().toString(); - log.info("Writer ID: {}", writerId); - - final EventWriter writer = EventWriter.create( - clientFactory, - writerId, - config.streamName, - new ByteArraySerializer(), - EventWriterConfig.builder() - .enableConnectionPooling(false) - .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0)) - .build(), - config.exactlyOnce); - - final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer); - transactionCoordinator.performRecovery(); - - final EventGenerator eventGenerator = EventGenerator.create( - config.routingKey, - config.maxRecordsPerEvent, - config.eventTemplateStr, - writerId); - final ParquetFileState state = new ParquetFileState(connection, transactionCoordinator); - return new ParquetFileProcessor(config, state, writer, transactionCoordinator, eventGenerator); - } - - public void ingestParquetFiles() throws Exception { - log.trace("ingestParquetFiles: BEGIN"); - // delete leftover completed files - if (config.enableDeleteCompletedFiles) { - deleteCompletedFiles(); - } - findAndRecordNewFiles(); - processNewFiles(); - log.trace("ingestParquetFiles: END"); - } - - public void processNewFiles() throws Exception { - for (;;) { - final Pair nextFile = state.getNextPendingFile(); - if (nextFile == null) { - log.trace("No more files to ingest"); - break; - } else { - processFile(nextFile.getLeft(), nextFile.getRight()); - } - } - } - - protected void findAndRecordNewFiles() throws Exception { - final List directoryListing = getDirectoryListing(); - final List completedFiles = state.getCompletedFiles(); - final List newFiles = getNewFiles(directoryListing, completedFiles); - state.addPendingFiles(newFiles); - } - - /** - * @return list of file name and file size in bytes - */ - protected List getDirectoryListing() throws IOException { - log.trace("getDirectoryListing: fileSpec={}", config.fileSpec); - final List directoryListing = getDirectoryListing(config.fileSpec, config.fileExtension); - log.trace("getDirectoryListing: directoryListing={}", directoryListing); - return directoryListing; - } - - /** - * @return list of file name and file size in bytes - */ - static protected List getDirectoryListing(String fileSpec, String fileExtension) throws IOException { - final Path pathSpec = Paths.get(fileSpec); - List directoryListing = new ArrayList<>(); - try(DirectoryStream dirStream=Files.newDirectoryStream(pathSpec)){ - for(Path path: dirStream){ - if(Files.isDirectory(path)) //traverse subdirectories - directoryListing.addAll(getDirectoryListing(path.toString(), fileExtension)); - else { - FileNameWithOffset fileEntry = new FileNameWithOffset(path.toAbsolutePath().toString(), path.toFile().length()); - // If extension is null, ingest all files - if(fileExtension.isEmpty() || fileExtension.equals(fileEntry.fileName.substring(fileEntry.fileName.lastIndexOf(".")+1))) - directoryListing.add(fileEntry); - } - } - } - return directoryListing; - } - - /** - * @return sorted list of file name and file size in bytes - */ - static protected List getNewFiles(List directoryListing, List completedFiles) { - final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing); - Collections.sort(sortedDirectoryListing); - final List newFiles = new ArrayList<>(); - final Set setCompletedFiles = new HashSet<>(completedFiles); - log.trace("setCompletedFiles={}", setCompletedFiles); - sortedDirectoryListing.forEach(dirFile -> { - if (!setCompletedFiles.contains(dirFile)) { - newFiles.add(new FileNameWithOffset(dirFile.fileName, 0)); - } - }); - if(!newFiles.isEmpty()) - log.info("{} New file(s) = {}", newFiles.size(), newFiles); - return newFiles; - } - - - // PROCESS FILE - - void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception { - log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}", - fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber); - - AtomicLong numofbytes = new AtomicLong(0); - long timestamp = System.nanoTime(); - - // In case a previous iteration encountered an error, we need to ensure that - // previous flushed transactions are committed and any unflushed transactions as aborted. - transactionCoordinator.performRecovery(); - writer.abort(); - - try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) { - try(final CountingInputStream countingInputStream = new CountingInputStream(inputStream)) { - countingInputStream.skip(fileNameWithBeginOffset.offset); - final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber, - e -> { - log.trace("processFile: event={}", e); - try { - writer.writeEvent(e.routingKey, e.bytes); - numofbytes.addAndGet(e.bytes.length); - - } catch (TxnFailedException ex) { - throw new RuntimeException(ex); - } - }); - final Optional txnId = writer.flush(); - final long nextSequenceNumber = result.getLeft(); - final long endOffset = result.getRight(); - state.addCompletedFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId); - // injectCommitFailure(); - writer.commit(); - state.deleteTransactionToCommit(txnId); - - double elapsedSec = (System.nanoTime() - timestamp) / 1_000_000_000.0; - double megabyteCount = numofbytes.getAndSet(0) / 1_000_000.0; - double megabytesPerSec = megabyteCount / elapsedSec; - log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}", - fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber); - log.info("Sent {} MB in {} sec", megabyteCount, elapsedSec ); - log.info("Transfer rate: {} MB/sec", megabytesPerSec); - } - } - - // Delete file right after ingesting - if (config.enableDeleteCompletedFiles) { - deleteCompletedFiles(); - } - - } - - void deleteCompletedFiles() throws Exception { - final List completedFiles = state.getCompletedFiles(); - completedFiles.forEach(file -> { - //Obtain a lock on file - try(FileChannel channel = FileChannel.open(Paths.get(file.fileName),StandardOpenOption.WRITE)){ - try(FileLock lock = channel.tryLock()) { - if(lock!=null){ - Files.deleteIfExists(Paths.get(file.fileName)); - log.info("deleteCompletedFiles: Deleted file {}", file.fileName); - lock.release(); - // Only remove from database if we could delete file. - state.deleteCompletedFile(file.fileName); - } - else{ - log.warn("Unable to obtain lock on file {}. File is locked by another process.", file.fileName); - throw new Exception(); - } - } - } catch (Exception e) { - log.warn("Unable to delete ingested file {}", e.getMessage()); - log.warn("Deletion will be retried on the next iteration."); - // We can continue on this error. Deletion will be retried on the next iteration. - } - }); - } - - /** - * Inject a failure before commit for testing. - */ - protected void injectCommitFailure() { - if (Math.random() < 0.3) { - throw new RuntimeException("injectCommitFailure: Commit failure test exception"); - } - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileState.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileState.java deleted file mode 100644 index 712b3a72..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileState.java +++ /dev/null @@ -1,186 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.parquet; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.UUID; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import io.pravega.sensor.collector.parquet.FileNameWithOffset; -import io.pravega.sensor.collector.util.AutoRollback; -import io.pravega.sensor.collector.util.TransactionCoordinator; - -import static java.sql.Connection.TRANSACTION_SERIALIZABLE; - -/** - * Maintain state of pending and completed files in SQLite database. - */ -public class ParquetFileState implements AutoCloseable{ - private static final Logger log = LoggerFactory.getLogger(ParquetFileState.class); - - private final Connection connection; - private final TransactionCoordinator transactionCoordinator; - - public ParquetFileState(Connection connection, TransactionCoordinator transactionCoordinator) { - this.connection = connection; - this.transactionCoordinator = transactionCoordinator; - } - - public static Connection createDatabase(String fileName) { - try { - final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName); - try (final Statement statement = connection.createStatement()) { - // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database. - //statement.execute("PRAGMA locking_mode = EXCLUSIVE"); - statement.execute( - "create table if not exists PendingFiles (" + - "id integer primary key autoincrement, " + - "fileName string unique not null, " + - "offset bigint not null)"); - statement.execute( - "create table if not exists CompletedFiles (" + - "fileName string primary key not null, " + - "offset bigint not null)"); - statement.execute( - "create table if not exists SequenceNumber (" + - "id integer primary key check (id = 0), " + - "nextSequenceNumber bigint not null)"); - statement.execute( - "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)"); - } - connection.setAutoCommit(false); - connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE); - return connection; - } catch (SQLException e) { - throw new RuntimeException(e); - } - } - - @VisibleForTesting - public static ParquetFileState create(String fileName) { - final Connection connection = createDatabase(fileName); - final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null); - return new ParquetFileState(connection, transactionCoordinator); - } - - @Override - public void close() throws SQLException { - connection.close(); - } - - public void addPendingFiles(List files) throws SQLException { - try (final PreparedStatement insertStatement = connection.prepareStatement( - "insert or ignore into PendingFiles (fileName, offset) values (?, ?)"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - for (FileNameWithOffset file: files) { - insertStatement.setString(1, file.fileName); - insertStatement.setLong(2, file.offset); - insertStatement.execute(); - } - autoRollback.commit(); - } - } - - /** - * @return ((file name, begin offset), sequence number) or null if there is no pending file - */ - public Pair getNextPendingFile() throws SQLException { - try (final Statement statement = connection.createStatement(); - final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) { - if (rs.next()) { - final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); - try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) { - rsSequenceNumber.next(); - final long nextSequenceNumber = rsSequenceNumber.getLong(1); - return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber); - } - } else { - return null; - } - } finally { - connection.commit(); - } - } - - public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException { - try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement( - "update SequenceNumber set nextSequenceNumber = ?"); - final PreparedStatement insertCompletedFileStatement = connection.prepareStatement( - "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)"); - final PreparedStatement deletePendingFileStatement = connection.prepareStatement( - "delete from PendingFiles where fileName = ? and offset <= ?"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - // Update sequence number. - updateSequenceNumberStatement.setLong(1, newNextSequenceNumber); - updateSequenceNumberStatement.execute(); - // Add completed file. - insertCompletedFileStatement.setString(1, fileName); - insertCompletedFileStatement.setLong(2, endOffset); - insertCompletedFileStatement.execute(); - // Remove pending file. - deletePendingFileStatement.setString(1, fileName); - deletePendingFileStatement.setLong(2, beginOffset); - deletePendingFileStatement.execute(); - transactionCoordinator.addTransactionToCommit(txnId); - autoRollback.commit(); - } - } - - @VisibleForTesting - public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException { - addCompletedFile(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty()); - } - - public void deleteTransactionToCommit(Optional txnId) { - transactionCoordinator.deleteTransactionToCommit(txnId); - } - - /** - * @return list of file name and end offset (file size) - */ - public List getCompletedFiles() throws SQLException { - try (final Statement statement = connection.createStatement(); - final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) { - final List files = new ArrayList<>(); - while (rs.next()) { - final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); - files.add(fileNameWithOffset); - } - return files; - } finally { - connection.commit(); - } - } - - public void deleteCompletedFile(String fileName) throws SQLException { - try (final PreparedStatement deleteStatement = connection.prepareStatement( - "delete from CompletedFiles where fileName = ?"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - deleteStatement.setString(1, fileName); - deleteStatement.execute(); - autoRollback.commit(); - } - } - -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileConfig.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileConfig.java deleted file mode 100644 index aea1adb2..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileConfig.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.rawfile; - -/** - * Config passed to Pravega Sensor Collector - */ -public class RawFileConfig { - public final String stateDatabaseFileName; - public final String fileSpec; - public final String fileExtension; - public final String routingKey; - public final String streamName; - public final String eventTemplateStr; - - - public final boolean enableDeleteCompletedFiles; - public final boolean exactlyOnce; - public final double transactionTimeoutMinutes; - - public RawFileConfig(String stateDatabaseFileName, String fileSpec, String fileExtension, String routingKey, String streamName, String eventTemplateStr, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes) { - this.stateDatabaseFileName = stateDatabaseFileName; - this.fileSpec = fileSpec; - this.fileExtension = fileExtension; - this.routingKey = routingKey; - this.streamName = streamName; - this.eventTemplateStr = eventTemplateStr; - this.enableDeleteCompletedFiles = enableDeleteCompletedFiles; - this.exactlyOnce = exactlyOnce; - this.transactionTimeoutMinutes = transactionTimeoutMinutes; - } - - @Override - public String toString() { - return "RawFileConfig{" + - "stateDatabaseFileName='" + stateDatabaseFileName + '\'' + - ", fileSpec='" + fileSpec + '\'' + - ", fileExtension='" + fileExtension + '\'' + - ", routingKey='" + routingKey + '\'' + - ", streamName='" + streamName + '\'' + - ", eventTemplateStr='" + eventTemplateStr + '\'' + - ", enableDeleteCompletedFiles=" + enableDeleteCompletedFiles + - ", exactlyOnce=" + exactlyOnce + - ", transactionTimeoutMinutes=" + transactionTimeoutMinutes + - '}'; - } - -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileIngestService.java deleted file mode 100644 index df2941e0..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileIngestService.java +++ /dev/null @@ -1,148 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.rawfile; - -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import io.pravega.client.EventStreamClientFactory; -import io.pravega.sensor.collector.DeviceDriver; -import io.pravega.sensor.collector.DeviceDriverConfig; - -/** - * Ingestion service for raw file data. - */ -public class RawFileIngestService extends DeviceDriver{ - private static final Logger log = LoggerFactory.getLogger(RawFileIngestService.class); - - private static final String FILE_SPEC_KEY = "FILE_SPEC"; - private static final String FILE_EXT= "FILE_EXTENSION"; - private static final String DELETE_COMPLETED_FILES_KEY = "DELETE_COMPLETED_FILES"; - private static final String DATABASE_FILE_KEY = "DATABASE_FILE"; - private static final String EVENT_TEMPLATE_KEY = "EVENT_TEMPLATE"; - private static final String INTERVAL_MS_KEY = "INTERVAL_MS"; - - private static final String SCOPE_KEY = "SCOPE"; - private static final String STREAM_KEY = "STREAM"; - private static final String ROUTING_KEY_KEY = "ROUTING_KEY"; - private static final String EXACTLY_ONCE_KEY = "EXACTLY_ONCE"; - private static final String TRANSACTION_TIMEOUT_MINUTES_KEY = "TRANSACTION_TIMEOUT_MINUTES"; - - private final RawFileProcessor processor; - private final ScheduledExecutorService executor; - private ScheduledFuture task; - - public RawFileIngestService(DeviceDriverConfig config){ - super(config); - final RawFileConfig rawFileConfig = new RawFileConfig( - getDatabaseFileName(), - getFileSpec(), - getFileExtension(), - getRoutingKey(), - getStreamName(), - getEventTemplate(), - getDeleteCompletedFiles(), - getExactlyOnce(), - getTransactionTimeoutMinutes()); - log.info("Raw File Ingest Config: {}", rawFileConfig); - final String scopeName = getScopeName(); - log.info("Scope: {}", scopeName); - createStream(scopeName, getStreamName()); - - final EventStreamClientFactory clientFactory = getEventStreamClientFactory(scopeName); - processor = RawFileProcessor.create(rawFileConfig, clientFactory); - ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat( - RawFileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build(); - executor = Executors.newScheduledThreadPool(1, namedThreadFactory); - - } - - String getFileSpec() { - return getProperty(FILE_SPEC_KEY); - } - - String getFileExtension() { - return getProperty(FILE_EXT, ""); - } - - boolean getDeleteCompletedFiles() { - return Boolean.parseBoolean(getProperty(DELETE_COMPLETED_FILES_KEY, Boolean.toString(true))); - } - - String getDatabaseFileName() { - return getProperty(DATABASE_FILE_KEY); - } - - String getEventTemplate() { - return getProperty(EVENT_TEMPLATE_KEY, "{}"); - } - - long getIntervalMs() { - return Long.parseLong(getProperty(INTERVAL_MS_KEY, Long.toString(10000))); - } - - String getScopeName() { - return getProperty(SCOPE_KEY); - } - - String getStreamName() { - return getProperty(STREAM_KEY); - } - - protected String getRoutingKey() { - return getProperty(ROUTING_KEY_KEY, ""); - } - - boolean getExactlyOnce() { - return Boolean.parseBoolean(getProperty(EXACTLY_ONCE_KEY, Boolean.toString(true))); - } - - /** - * This time duration must not exceed the controller property controller.transaction.maxLeaseValue (milliseconds). - */ - double getTransactionTimeoutMinutes() { - return Double.parseDouble(getProperty(TRANSACTION_TIMEOUT_MINUTES_KEY, Double.toString(18.0 * 60.0))); - } - - protected void ingestRawFiles() { - log.trace("ingestRawFiles: BEGIN"); - try { - processor.ingestRawFiles(); - } catch (Exception e) { - log.error("Error", e); - // Continue on any errors. We will retry on the next iteration. - } - log.trace("ingestRawFiles: END"); - } - - @Override - protected void doStart() { - task = executor.scheduleAtFixedRate( - this::ingestRawFiles, - 0, - getIntervalMs(), - TimeUnit.MILLISECONDS); - notifyStarted(); - } - - @Override - protected void doStop() { - task.cancel(false); - } - -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileProcessor.java deleted file mode 100644 index e420493d..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileProcessor.java +++ /dev/null @@ -1,260 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.rawfile; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.channels.FileChannel; -import java.nio.channels.FileLock; -import java.nio.file.DirectoryStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.sql.Connection; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicLong; - -import com.google.common.io.CountingInputStream; - -import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import io.pravega.client.EventStreamClientFactory; -import io.pravega.client.stream.EventWriterConfig; -import io.pravega.client.stream.TxnFailedException; -import io.pravega.client.stream.impl.ByteArraySerializer; -import io.pravega.sensor.collector.util.EventWriter; -import io.pravega.sensor.collector.util.PersistentId; -import io.pravega.sensor.collector.util.TransactionCoordinator; - -/** - * Get list of files obtained from config. Process each file for ingestion. - * Keep track of new files and delete ingested files if "DELETE_COMPLETED_FILES"=true. - */ -public class RawFileProcessor { - private static final Logger log = LoggerFactory.getLogger(RawFileIngestService.class); - - private final RawFileConfig config; - private final RawFileState state; - private final EventWriter writer; - private final TransactionCoordinator transactionCoordinator; - private final EventGenerator eventGenerator; - - public RawFileProcessor(RawFileConfig config, RawFileState state, EventWriter writer, TransactionCoordinator transactionCoordinator, EventGenerator eventGenerator) { - this.config = config; - this.state = state; - this.writer = writer; - this.transactionCoordinator = transactionCoordinator; - this.eventGenerator = eventGenerator; - } - - public static RawFileProcessor create(RawFileConfig config, EventStreamClientFactory clientFactory){ - final Connection connection = RawFileState.createDatabase(config.stateDatabaseFileName); - - final String writerId = new PersistentId(connection).getPersistentId().toString(); - log.info("Writer ID: {}", writerId); - - final EventWriter writer = EventWriter.create( - clientFactory, - writerId, - config.streamName, - new ByteArraySerializer(), - EventWriterConfig.builder() - .enableConnectionPooling(false) - .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0)) - .build(), - config.exactlyOnce); - - final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer); - transactionCoordinator.performRecovery(); - - final EventGenerator eventGenerator = EventGenerator.create( - config.routingKey, - config.eventTemplateStr, - writerId); - final RawFileState state = new RawFileState(connection, transactionCoordinator); - return new RawFileProcessor(config, state, writer, transactionCoordinator, eventGenerator); - } - - public void ingestRawFiles() throws Exception { - log.trace("ingestRawFiles: BEGIN"); - // delete leftover completed files - if (config.enableDeleteCompletedFiles) { - deleteCompletedFiles(); - } - findAndRecordNewFiles(); - processNewFiles(); - log.trace("ingestRawFiles: END"); - } - - public void processNewFiles() throws Exception { - for (;;) { - final Pair nextFile = state.getNextPendingFile(); - if (nextFile == null) { - log.trace("No more files to ingest"); - break; - } else { - processFile(nextFile.getLeft(), nextFile.getRight()); - } - } - } - - protected void findAndRecordNewFiles() throws Exception { - final List directoryListing = getDirectoryListing(); - final List completedFiles = state.getCompletedFiles(); - final List newFiles = getNewFiles(directoryListing, completedFiles); - state.addPendingFiles(newFiles); - } - - /** - * @return list of file name and file size in bytes - */ - protected List getDirectoryListing() throws IOException { - log.trace("getDirectoryListing: fileSpec={}", config.fileSpec); - final List directoryListing = getDirectoryListing(config.fileSpec, config.fileExtension); - log.trace("getDirectoryListing: directoryListing={}", directoryListing); - return directoryListing; - } - - /** - * @return list of file name and file size in bytes - */ - static protected List getDirectoryListing(String fileSpec, String fileExtension) throws IOException { - final Path pathSpec = Paths.get(fileSpec); - List directoryListing = new ArrayList<>(); - try(DirectoryStream dirStream=Files.newDirectoryStream(pathSpec)){ - for(Path path: dirStream){ - if(Files.isDirectory(path)) - directoryListing.addAll(getDirectoryListing(path.toString(), fileExtension)); - else { - FileNameWithOffset fileEntry = new FileNameWithOffset(path.toAbsolutePath().toString(), path.toFile().length()); - // If extension is null, ingest all files - if(fileExtension.isEmpty() || fileExtension.equals(fileEntry.fileName.substring(fileEntry.fileName.lastIndexOf(".")+1))) - directoryListing.add(fileEntry); - } - } - } - return directoryListing; - } - - /** - * @return sorted list of file name and file size in bytes - */ - static protected List getNewFiles(List directoryListing, List completedFiles) { - final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing); - Collections.sort(sortedDirectoryListing); - final List newFiles = new ArrayList<>(); - final Set setCompletedFiles = new HashSet<>(completedFiles); - log.trace("setCompletedFiles={}", setCompletedFiles); - sortedDirectoryListing.forEach(dirFile -> { - if (!setCompletedFiles.contains(dirFile)) { - newFiles.add(new FileNameWithOffset(dirFile.fileName, 0)); - } - }); - if(!newFiles.isEmpty()) - log.info("{} New file(s) = {}", newFiles.size(), newFiles); - return newFiles; - } - - - void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception { - log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}", - fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber); - - AtomicLong numofbytes = new AtomicLong(0); - long timestamp = System.nanoTime(); - - // In case a previous iteration encountered an error, we need to ensure that - // previous flushed transactions are committed and any unflushed transactions as aborted. - transactionCoordinator.performRecovery(); - writer.abort(); - - try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) { - try(final CountingInputStream countingInputStream = new CountingInputStream(inputStream)) { - countingInputStream.skip(fileNameWithBeginOffset.offset); - final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber, - e -> { - log.trace("processFile: event={}", e); - try { - writer.writeEvent(e.routingKey, e.bytes); - numofbytes.addAndGet(e.bytes.length); - - } catch (TxnFailedException ex) { - throw new RuntimeException(ex); - } - }); - final Optional txnId = writer.flush(); - final long nextSequenceNumber = result.getLeft(); - final long endOffset = result.getRight(); - state.addCompletedFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId); - // injectCommitFailure(); - writer.commit(); - state.deleteTransactionToCommit(txnId); - - double elapsedSec = (System.nanoTime() - timestamp) / 1_000_000_000.0; - double megabyteCount = numofbytes.getAndSet(0) / 1_000_000.0; - double megabytesPerSec = megabyteCount / elapsedSec; - log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}", - fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber); - log.info("Sent {} MB in {} sec", megabyteCount, elapsedSec ); - log.info("Transfer rate: {} MB/sec", megabytesPerSec); - } - } - - // Delete file right after ingesting - if (config.enableDeleteCompletedFiles) { - deleteCompletedFiles(); - } - } - - void deleteCompletedFiles() throws Exception { - final List completedFiles = state.getCompletedFiles(); - completedFiles.forEach(file -> { - //Obtain a lock on file - try(FileChannel channel = FileChannel.open(Paths.get(file.fileName),StandardOpenOption.WRITE)){ - try(FileLock lock = channel.tryLock()) { - if(lock!=null){ - Files.deleteIfExists(Paths.get(file.fileName)); - log.info("deleteCompletedFiles: Deleted file {}", file.fileName); - lock.release(); - // Only remove from database if we could delete file. - state.deleteCompletedFile(file.fileName); - } - else{ - log.warn("Unable to obtain lock on file {}. File is locked by another process.", file.fileName); - throw new Exception(); - } - } - } catch (Exception e) { - log.warn("Unable to delete ingested file {}", e.getMessage()); - log.warn("Deletion will be retried on the next iteration."); - // We can continue on this error. Deletion will be retried on the next iteration. - } - }); - } - - /** - * Inject a failure before commit for testing. - */ - protected void injectCommitFailure() { - if (Math.random() < 0.3) { - throw new RuntimeException("injectCommitFailure: Commit failure test exception"); - } - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileState.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileState.java deleted file mode 100644 index 97079b4a..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileState.java +++ /dev/null @@ -1,185 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.rawfile; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.UUID; - -import com.google.common.annotations.VisibleForTesting; - -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import io.pravega.sensor.collector.util.AutoRollback; -import io.pravega.sensor.collector.util.TransactionCoordinator; - -import static java.sql.Connection.TRANSACTION_SERIALIZABLE; - -/** - * Maintain state of pending and completed files in SQLite database. - */ -public class RawFileState implements AutoCloseable{ - private static final Logger log = LoggerFactory.getLogger(RawFileState.class); - - private final Connection connection; - private final TransactionCoordinator transactionCoordinator; - - public RawFileState(Connection connection, TransactionCoordinator transactionCoordinator) { - this.connection = connection; - this.transactionCoordinator = transactionCoordinator; - } - - public static Connection createDatabase(String fileName) { - try { - final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName); - try (final Statement statement = connection.createStatement()) { - // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database. - //statement.execute("PRAGMA locking_mode = EXCLUSIVE"); - statement.execute( - "create table if not exists PendingFiles (" + - "id integer primary key autoincrement, " + - "fileName string unique not null, " + - "offset bigint not null)"); - statement.execute( - "create table if not exists CompletedFiles (" + - "fileName string primary key not null, " + - "offset bigint not null)"); - statement.execute( - "create table if not exists SequenceNumber (" + - "id integer primary key check (id = 0), " + - "nextSequenceNumber bigint not null)"); - statement.execute( - "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)"); - } - connection.setAutoCommit(false); - connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE); - return connection; - } catch (SQLException e) { - throw new RuntimeException(e); - } - } - - @VisibleForTesting - public static RawFileState create(String fileName) { - final Connection connection = createDatabase(fileName); - final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null); - return new RawFileState(connection, transactionCoordinator); - } - - @Override - public void close() throws SQLException { - connection.close(); - } - - public void addPendingFiles(List files) throws SQLException { - try (final PreparedStatement insertStatement = connection.prepareStatement( - "insert or ignore into PendingFiles (fileName, offset) values (?, ?)"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - for (FileNameWithOffset file: files) { - insertStatement.setString(1, file.fileName); - insertStatement.setLong(2, file.offset); - insertStatement.execute(); - } - autoRollback.commit(); - } - } - - /** - * @return ((file name, begin offset), sequence number) or null if there is no pending file - */ - public Pair getNextPendingFile() throws SQLException { - try (final Statement statement = connection.createStatement(); - final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) { - if (rs.next()) { - final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); - try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) { - rsSequenceNumber.next(); - final long nextSequenceNumber = rsSequenceNumber.getLong(1); - return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber); - } - } else { - return null; - } - } finally { - connection.commit(); - } - } - - public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException { - try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement( - "update SequenceNumber set nextSequenceNumber = ?"); - final PreparedStatement insertCompletedFileStatement = connection.prepareStatement( - "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)"); - final PreparedStatement deletePendingFileStatement = connection.prepareStatement( - "delete from PendingFiles where fileName = ? and offset <= ?"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - // Update sequence number. - updateSequenceNumberStatement.setLong(1, newNextSequenceNumber); - updateSequenceNumberStatement.execute(); - // Add completed file. - insertCompletedFileStatement.setString(1, fileName); - insertCompletedFileStatement.setLong(2, endOffset); - insertCompletedFileStatement.execute(); - // Remove pending file. - deletePendingFileStatement.setString(1, fileName); - deletePendingFileStatement.setLong(2, beginOffset); - deletePendingFileStatement.execute(); - transactionCoordinator.addTransactionToCommit(txnId); - autoRollback.commit(); - } - } - - @VisibleForTesting - public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException { - addCompletedFile(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty()); - } - - public void deleteTransactionToCommit(Optional txnId) { - transactionCoordinator.deleteTransactionToCommit(txnId); - } - - /** - * @return list of file name and end offset (file size) - */ - public List getCompletedFiles() throws SQLException { - try (final Statement statement = connection.createStatement(); - final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) { - final List files = new ArrayList<>(); - while (rs.next()) { - final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); - files.add(fileNameWithOffset); - } - return files; - } finally { - connection.commit(); - } - } - - public void deleteCompletedFile(String fileName) throws SQLException { - try (final PreparedStatement deleteStatement = connection.prepareStatement( - "delete from CompletedFiles where fileName = ?"); - final AutoRollback autoRollback = new AutoRollback(connection)) { - deleteStatement.setString(1, fileName); - deleteStatement.execute(); - autoRollback.commit(); - } - } - -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileWriterEvent.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileWriterEvent.java deleted file mode 100644 index 4057fd0b..00000000 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileWriterEvent.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.rawfile; - -/** - * Event generated from file and its sequence number - */ -public class RawFileWriterEvent { - public final String routingKey; - public final long sequenceNumber; - public final byte[] bytes; - - public RawFileWriterEvent(String routingKey, long sequenceNumber, byte[] bytes) { - this.routingKey = routingKey; - this.sequenceNumber = sequenceNumber; - this.bytes = bytes; - } - - @Override - public String toString() { - return "PravegaWriterEvent{" + - "routingKey='" + routingKey + '\'' + - ", sequenceNumber=" + sequenceNumber + - ", bytes=" + new String(bytes) + - '}'; - } -} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java index 17cf5d65..742f5d42 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java @@ -12,6 +12,7 @@ import io.pravega.client.EventStreamClientFactory; import io.pravega.client.stream.EventWriterConfig; import io.pravega.client.stream.Serializer; +import io.pravega.client.stream.Transaction; import io.pravega.client.stream.TxnFailedException; import java.util.Optional; @@ -71,5 +72,13 @@ static EventWriter create( */ void abort(); + /** + * This should called be prior to aborting any transactions to make sure it is not open. + */ + public Transaction.Status getTransactionStatus(UUID txnId); + + public Transaction.Status getTransactionStatus(); + void close(); + } diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/FileNameWithOffset.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileNameWithOffset.java similarity index 97% rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/FileNameWithOffset.java rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileNameWithOffset.java index a0f70e98..d161f2d3 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/FileNameWithOffset.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileNameWithOffset.java @@ -7,7 +7,7 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.sensor.collector.rawfile; +package io.pravega.sensor.collector.util; import java.util.Objects; diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileUtils.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileUtils.java new file mode 100644 index 00000000..408ee41e --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileUtils.java @@ -0,0 +1,169 @@ +package io.pravega.sensor.collector.util; + +import java.io.File; +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileTime; +import java.util.List; +import java.util.ArrayList; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FileUtils { + + private static final Logger log = LoggerFactory.getLogger(FileUtils.class); + final static String separator = ","; + public static final String FAILED_FILES = "Failed_Files"; + public static final String COMPLETED_FILES = "Completed_Files"; + + /** + * @return list of file name and file size in bytes + * Handle the below cases + * 1. If given file path does not exist then log the message and continue + * 2. If directory does not exist and no file with given extn like .csv then log the message and continue + * 3. check for empty file, log the message and continue with valid files + * + */ + static public List getDirectoryListing(String fileSpec, String fileExtension, Path movedFilesDirectory, long minTimeInMillisToUpdateFile) throws IOException { + String[] directories= fileSpec.split(separator); + List directoryListing = new ArrayList<>(); + for (String directory : directories) { + final Path pathSpec = Paths.get(directory); + if (!Files.isDirectory(pathSpec.toAbsolutePath())) { + log.error("getDirectoryListing: Directory does not exist or spec is not valid : {}", pathSpec.toAbsolutePath()); + throw new IOException("Directory does not exist or spec is not valid"); + } + getDirectoryFiles(pathSpec, fileExtension, directoryListing, movedFilesDirectory, minTimeInMillisToUpdateFile); + } + return directoryListing; + } + + /** + * get all files in directory(including subdirectories) and their respective file size in bytes + */ + static protected void getDirectoryFiles(Path pathSpec, String fileExtension, List directoryListing, Path movedFilesDirectory, long minTimeInMillisToUpdateFile) throws IOException{ + DirectoryStream.Filter lastModifiedTimeFilter = getLastModifiedTimeFilter(minTimeInMillisToUpdateFile); + try(DirectoryStream dirStream=Files.newDirectoryStream(pathSpec, lastModifiedTimeFilter)){ + for(Path path: dirStream){ + if(Files.isDirectory(path)) //traverse subdirectories + getDirectoryFiles(path, fileExtension, directoryListing, movedFilesDirectory, minTimeInMillisToUpdateFile); + else { + FileNameWithOffset fileEntry = new FileNameWithOffset(path.toAbsolutePath().toString(), path.toFile().length()); + if(isValidFile(fileEntry, fileExtension)) + directoryListing.add(fileEntry); + else //move failed file to different folder + moveFailedFile(fileEntry, movedFilesDirectory); + } + } + } catch(Exception ex){ + if(ex instanceof IOException){ + log.error("getDirectoryListing: Directory does not exist or spec is not valid : {}", pathSpec.toAbsolutePath()); + throw new IOException("Directory does not exist or spec is not valid"); + } else{ + log.error("getDirectoryListing: Exception while listing files: {}", pathSpec.toAbsolutePath()); + throw new IOException(ex); + } + } + } + + /** + * The last modified time filter for files older than #{timeBefore} milliseconds from current timestamp. + * This filter helps to eliminate the files that are partially written in to lookup directory by external services. + */ + private static DirectoryStream.Filter getLastModifiedTimeFilter(long minTimeInMillisToUpdateFile) { + log.debug("getLastModifiedTimeFilter: minTimeInMillisToUpdateFile: {}", minTimeInMillisToUpdateFile); + return entry -> { + BasicFileAttributes attr = Files.readAttributes(entry, BasicFileAttributes.class); + if(attr.isDirectory()) { + return true; + } + FileTime fileTime = attr.lastModifiedTime(); + return (fileTime.toMillis() <= (System.currentTimeMillis() - minTimeInMillisToUpdateFile)); + }; + } + + /* + Check for below file validation + 1. Is File empty + 2. If extension is null or extension is valid ingest all file + */ + public static boolean isValidFile(FileNameWithOffset fileEntry, String fileExtension) { + + if(fileEntry.offset<=0){ + log.warn("isValidFile: Empty file {} can not be processed",fileEntry.fileName); + } + // If extension is null, ingest all files + else if(fileExtension.isEmpty() || fileExtension.equals(fileEntry.fileName.substring(fileEntry.fileName.lastIndexOf(".")+1))) + return true; + else + log.warn("isValidFile: File format {} is not supported ", fileEntry.fileName); + + return false; + } + + static void moveFailedFile(FileNameWithOffset fileEntry, Path filesDirectory) throws IOException { + Path sourcePath = Paths.get(fileEntry.fileName); + Path targetPath = filesDirectory.resolve(FAILED_FILES).resolve(sourcePath.getFileName()); + moveFile(sourcePath, targetPath); + } + + public static void moveCompletedFile(FileNameWithOffset fileEntry, Path filesDirectory) throws IOException { + Path sourcePath = Paths.get(fileEntry.fileName); + Path completedFilesPath = filesDirectory.resolve(COMPLETED_FILES); + String completedFileName = FileUtils.createCompletedFileName(filesDirectory, fileEntry.fileName); + Path targetPath = completedFilesPath.resolve(completedFileName); + moveFile(sourcePath, targetPath); + } + + /** + * To keep same file name of different directories in completed file directory. + * Creating completed file name with _ instead of /, so that it includes all subdirectories. + * If the full file name is greater than 255 characters, it will be truncated to 255 characters. + */ + public static String createCompletedFileName(Path completedFilesDir, String fileName) { + if(fileName==null || fileName.isEmpty() || completedFilesDir==null) { + return fileName; + } + + int validFileNameLength = 255 - completedFilesDir.toString().length(); + + if(fileName.length() > validFileNameLength) { + fileName = fileName.substring(fileName.indexOf(File.separator, fileName.length() - validFileNameLength-1)); + } + return fileName.replace(File.separator,"_"); + } + + /* + Move failed files to different directory + */ + static void moveFile(Path sourcePath, Path targetPath) throws IOException { + Files.createDirectories(targetPath.getParent()); + //Obtain a lock on file before moving + try(FileChannel channel = FileChannel.open(sourcePath, StandardOpenOption.WRITE)) { + try(FileLock lock = channel.tryLock()) { + if(lock!=null){ + Files.move(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING); + log.debug("movedFile: Moved file from {} to {}", sourcePath, targetPath); + lock.release(); + } + else{ + log.warn("Unable to obtain lock on file {} for moving. File is locked by another process.", sourcePath); + throw new Exception(); + } + } + } catch (Exception e) { + log.warn("Unable to move file {}", e.getMessage()); + log.warn("File will be moved on the next iteration."); + // We can continue on this error. Moving will be retried on the next iteration. + } + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java index 6117193c..8c4c2682 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java @@ -10,6 +10,7 @@ package io.pravega.sensor.collector.util; import io.pravega.client.stream.EventStreamWriter; +import io.pravega.client.stream.Transaction; import io.pravega.client.stream.TxnFailedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,6 +54,16 @@ public void commit(UUID txnId) throws TxnFailedException { public void abort() { } + @Override + public Transaction.Status getTransactionStatus(UUID txnId) { + throw new UnsupportedOperationException("Non-transactional writer cannot commit transactions"); + } + + @Override + public Transaction.Status getTransactionStatus() { + throw new UnsupportedOperationException("Non-transactional writer do not have transaction status"); + } + public void close() { writer.close(); } diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/PravegaWriterEvent.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/PravegaWriterEvent.java similarity index 95% rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/PravegaWriterEvent.java rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/PravegaWriterEvent.java index 9bff0a0b..e5231fe3 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/PravegaWriterEvent.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/PravegaWriterEvent.java @@ -7,7 +7,7 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.sensor.collector.parquet; +package io.pravega.sensor.collector.util; /** * Event generated from file and its sequence number diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/SQliteDBUtility.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/SQliteDBUtility.java new file mode 100644 index 00000000..c0249ac4 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/SQliteDBUtility.java @@ -0,0 +1,41 @@ +package io.pravega.sensor.collector.util; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; + +import static java.sql.Connection.TRANSACTION_SERIALIZABLE; + +public class SQliteDBUtility { + + public static Connection createDatabase(String fileName) { + try { + final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName); + try (final Statement statement = connection.createStatement()) { + // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database. + //statement.execute("PRAGMA locking_mode = EXCLUSIVE"); + statement.execute( + "create table if not exists PendingFiles (" + + "id integer primary key autoincrement, " + + "fileName string unique not null, " + + "offset bigint not null)"); + statement.execute( + "create table if not exists CompletedFiles (" + + "fileName string primary key not null, " + + "offset bigint not null)"); + statement.execute( + "create table if not exists SequenceNumber (" + + "id integer primary key check (id = 0), " + + "nextSequenceNumber bigint not null)"); + statement.execute( + "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)"); + } + connection.setAutoCommit(false); + connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE); + return connection; + } catch (SQLException e) { + throw new RuntimeException(e); + } + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java index e06a610d..f5c7dfb9 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java @@ -9,6 +9,7 @@ */ package io.pravega.sensor.collector.util; +import io.pravega.client.stream.Transaction; import io.pravega.client.stream.TxnFailedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -139,9 +140,9 @@ protected List getTransactionsToCommit() { public void performRecovery() { final List transactionsToCommit = getTransactionsToCommit(); if (transactionsToCommit.isEmpty()) { - log.debug("Transaction recovery not needed"); + log.info("performRecovery: No transactions to be recovered"); } else { - log.warn("Transaction recovery needed on {} transactions", transactionsToCommit.size()); + log.info("Transaction recovery needed on {} transactions", transactionsToCommit.size()); transactionsToCommit.forEach((txnId) -> { log.info("Committing transaction {} from a previous process", txnId); try { @@ -160,6 +161,9 @@ public void performRecovery() { txnId, e); // Continue recovery and run as normal. } else { + log.error( + "Unable to commit transaction {} from a previous process. Events may have been lost. " + + "Try increasing the transaction timeout.", txnId, e); throw e; } } diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateDB.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateDB.java new file mode 100644 index 00000000..61580c15 --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateDB.java @@ -0,0 +1,87 @@ +package io.pravega.sensor.collector.util; + +import org.apache.commons.lang3.tuple.Pair; + +import java.sql.SQLException; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +public interface TransactionStateDB { + + /** + * Add file name and begin offset to PendingFiles table + * + * @param files List of file name with Offset. + * + */ + public void addPendingFileRecords(List files) throws SQLException; + + /** + * Get next file to process. Read the file name with begin offset from PendingFiles table and sequence number from SequenceNumber table. + * + * @return ((file name, begin offset), sequence number) or null if there is no pending file + */ + public Pair getNextPendingFileRecord() throws SQLException; + + /** + * Update below details + * 1. Update sequence number into SequenceNumber table + * 2. Add entry into CompletedFiles table for given file name and end offset + * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value + * 4. Add transaction id to TransactionsToCommit table if provided + * + * @param fileName file name of processed file + * @param beginOffset begin offset from where file read starts + * @param endOffset end offset where reading ends. + * @param newNextSequenceNumber next sequence number. + * @param txnId transaction id (Optional value) from Pravega. + * + */ + public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException; + + /** + * Delete record from pendingFiles table + * + * @param fileName file name of pending file + * @param beginOffset begin offset from where file read starts + */ + void deletePendingFile(String fileName, long beginOffset) throws SQLException; + + /** + * Update below details + * 1. Update sequence number into SequenceNumber table + * 2. Add entry into CompletedFiles table for given file name and end offset + * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value + * @param fileName file name of processed file + * @param beginOffset begin offset from where file read starts + * @param endOffset end offset where reading ends. + * @param newNextSequenceNumber next sequence number. + * + */ + public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException; + + /** + * Delete record from TransactionsToCommit table + * + * @param txnId transaction id + */ + public void deleteTransactionToCommit(Optional txnId); + + /** + * Get a list of files from completedFiles table + * + * @return list of file name and end offset (file size) + */ + public List getCompletedFileRecords() throws SQLException; + + /** + * Delete completed file record from completedFiles table for given file name + * + * @param fileName file name + */ + public void deleteCompletedFileRecord(String fileName) throws SQLException; + + + +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateInMemoryImpl.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateInMemoryImpl.java new file mode 100644 index 00000000..1d26665a --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateInMemoryImpl.java @@ -0,0 +1,33 @@ +package io.pravega.sensor.collector.util; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +/** + * Maintain state of pending and completed files in in-memory database. + */ +public class TransactionStateInMemoryImpl extends TransactionStateSQLiteImpl{ + + private static final Logger log = LoggerFactory.getLogger(TransactionStateInMemoryImpl.class); + + + public TransactionStateInMemoryImpl(Connection connection, TransactionCoordinator transactionCoordinator) { + super(connection, transactionCoordinator); + } + @VisibleForTesting + public static TransactionStateInMemoryImpl create(String fileName) { + final Connection connection = SQliteDBUtility.createDatabase(fileName); + final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null); + return new TransactionStateInMemoryImpl(connection, transactionCoordinator); + } + +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImpl.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImpl.java new file mode 100644 index 00000000..e5d857fa --- /dev/null +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImpl.java @@ -0,0 +1,209 @@ +package io.pravega.sensor.collector.util; + + /** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +/** + * Maintain state of pending and completed files in SQLite database. +*/ +public class TransactionStateSQLiteImpl implements AutoCloseable, TransactionStateDB { + private static final Logger log = LoggerFactory.getLogger(TransactionStateSQLiteImpl.class); + + private final Connection connection; + private final TransactionCoordinator transactionCoordinator; + + public TransactionStateSQLiteImpl(Connection connection, TransactionCoordinator transactionCoordinator) { + this.connection = connection; + this.transactionCoordinator = transactionCoordinator; + } + + @Override + public void close() throws SQLException { + connection.close(); + } + + /** + * Add file name and begin offset to PendingFiles table + * + * @param files List of file name with Offset. + * + */ + @Override + public void addPendingFileRecords(List files) throws SQLException { + try (final PreparedStatement insertStatement = connection.prepareStatement( + "insert or ignore into PendingFiles (fileName, offset) values (?, ?)"); + final AutoRollback autoRollback = new AutoRollback(connection)) { + for (FileNameWithOffset file: files) { + insertStatement.setString(1, file.fileName); + insertStatement.setLong(2, file.offset); + insertStatement.execute(); + } + autoRollback.commit(); + } + } + + /** + * Get next file to process. Read the file name with begin offset from PendingFiles table and sequence number from SequenceNumber table. + * + * @return ((file name, begin offset), sequence number) or null if there is no pending file + */ + @Override + public Pair getNextPendingFileRecord() throws SQLException { + try (final Statement statement = connection.createStatement(); + final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) { + if (rs.next()) { + final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); + try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) { + rsSequenceNumber.next(); + final long nextSequenceNumber = rsSequenceNumber.getLong(1); + return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber); + } + } else { + return null; + } + } finally { + connection.commit(); + } + } + + + /** + * Update below details + * 1. Update sequence number into SequenceNumber table + * 2. Add entry into CompletedFiles table for given file name and end offset + * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value + * 4. Add transaction id to TransactionsToCommit table if provided + * + * @param fileName file name of processed file + * @param beginOffset begin offset from where file read starts + * @param endOffset end offset where reading ends. + * @param newNextSequenceNumber next sequence number. + * @param txnId transaction id (Optional value) from Pravega. + * + */ + @Override + public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException { + try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement( + "update SequenceNumber set nextSequenceNumber = ?"); + final PreparedStatement insertCompletedFileStatement = connection.prepareStatement( + "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)"); + final PreparedStatement deletePendingFileStatement = connection.prepareStatement( + "delete from PendingFiles where fileName = ? and offset <= ?"); + final AutoRollback autoRollback = new AutoRollback(connection)) { + // Update sequence number. + updateSequenceNumberStatement.setLong(1, newNextSequenceNumber); + updateSequenceNumberStatement.execute(); + // Add completed file. + insertCompletedFileStatement.setString(1, fileName); + insertCompletedFileStatement.setLong(2, endOffset); + insertCompletedFileStatement.execute(); + // Remove pending file. + deletePendingFileStatement.setString(1, fileName); + deletePendingFileStatement.setLong(2, beginOffset); + deletePendingFileStatement.execute(); + transactionCoordinator.addTransactionToCommit(txnId); + autoRollback.commit(); + } + } + + /** + * Delete record from PendingFiles table + * + * @param fileName file name of pending file + * @param beginOffset begin offset from where file read starts + */ + @Override + public void deletePendingFile(String fileName, long beginOffset) throws SQLException { + try (final PreparedStatement deletePendingFileStatement = connection.prepareStatement( + "delete from PendingFiles where fileName = ? and offset <= ?");) { + // Remove pending file. + deletePendingFileStatement.setString(1, fileName); + deletePendingFileStatement.setLong(2, beginOffset); + deletePendingFileStatement.execute(); + } + } + + + /** + * Update below details + * 1. Update sequence number into SequenceNumber table + * 2. Add entry into CompletedFiles table for given file name and end offset + * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value + * @param fileName file name of processed file + * @param beginOffset begin offset from where file read starts + * @param endOffset end offset where reading ends. + * @param newNextSequenceNumber next sequence number. + * + */ + @Override + @VisibleForTesting + public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException { + addCompletedFileRecord(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty()); + } + + /** + * Delete record from TransactionsToCommit table + * + * @param txnId transaction id + */ + @Override + public void deleteTransactionToCommit(Optional txnId) { + transactionCoordinator.deleteTransactionToCommit(txnId); + } + + /** + * Get a list of files from completedFiles table + * + * @return list of file name and end offset (file size) + */ + @Override + public List getCompletedFileRecords() throws SQLException { + try (final Statement statement = connection.createStatement(); + final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) { + final List files = new ArrayList<>(); + while (rs.next()) { + final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset")); + files.add(fileNameWithOffset); + } + return files; + } finally { + connection.commit(); + } + } + + /** + * Delete completed file record from completedFiles table for given file name + * + * @param fileName file name + */ + @Override + public void deleteCompletedFileRecord(String fileName) throws SQLException { + try (final PreparedStatement deleteStatement = connection.prepareStatement( + "delete from CompletedFiles where fileName = ?"); + final AutoRollback autoRollback = new AutoRollback(connection)) { + deleteStatement.setString(1, fileName); + deleteStatement.execute(); + autoRollback.commit(); + } + } +} diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java index 7bae945a..d53c0c87 100644 --- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java +++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java @@ -9,6 +9,7 @@ */ package io.pravega.sensor.collector.util; +import com.google.common.base.Preconditions; import io.pravega.client.stream.Transaction; import io.pravega.client.stream.TransactionalEventStreamWriter; import io.pravega.client.stream.TxnFailedException; @@ -17,6 +18,7 @@ import java.util.Optional; import java.util.UUID; +import java.util.concurrent.CompletableFuture; public class TransactionalEventWriter implements EventWriter { private static final Logger log = LoggerFactory.getLogger(TransactionalEventWriter.class); @@ -65,10 +67,19 @@ public void commit(long timestamp) throws TxnFailedException { currentTxn = null; } } + private boolean canCommitTransaction(UUID txnId){ + Transaction.Status transactionStatus = writer.getTxn(txnId).checkStatus(); + log.info("canCommitTransaction: Status of Transaction id {} is {}", txnId, transactionStatus); + return transactionStatus == Transaction.Status.OPEN; + } public void commit(UUID txnId) throws TxnFailedException { - log.info("commit: committing transaction {}", txnId); - writer.getTxn(txnId).commit(); + /*Check the transaction status before committing transaction + Only transactions which rea in open status can be committed */ + if(canCommitTransaction(txnId)){ + log.info("commit: committing transaction {}", txnId); + writer.getTxn(txnId).commit(); + } } public void abort() { @@ -79,6 +90,17 @@ public void abort() { } } + public Transaction.Status getTransactionStatus() { + if (currentTxn != null){ + return currentTxn.checkStatus(); + } + return null; + } + public Transaction.Status getTransactionStatus(UUID txnId) { + return writer.getTxn(txnId).checkStatus(); + } + + public void close() { try { abort(); diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/PravegaClientConfigTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/PravegaClientConfigTests.java new file mode 100644 index 00000000..2f59332a --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/PravegaClientConfigTests.java @@ -0,0 +1,62 @@ +package io.pravega.sensor.collector; + +import org.junit.jupiter.api.Test; + +import java.net.URI; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class PravegaClientConfigTests { + + @Test + public void testConstructorWithValues(){ + URI uri = URI.create("tcp://localhost:9090"); + String scopeName = "testScope"; + PravegaClientConfig conf = new PravegaClientConfig(uri,scopeName); + assertEquals(scopeName, conf.getScopeName()); + assertEquals(uri, conf.toClientConfig().getControllerURI()); + + } + + @Test + public void testConstructorWithProperties() { + URI uri = URI.create("tcp://example.com:9090"); + String scopeName = "testScope"; + + Map properties = new HashMap<>(); + properties.put("PRAVEGA_CONTROLLER_URI", uri.toString()); + PravegaClientConfig configFile = new PravegaClientConfig(properties, scopeName); + assertEquals(uri, configFile.toClientConfig().getControllerURI()); + assertEquals(scopeName, configFile.getScopeName()); + } + + @Test + public void testConstructorWithPropertiesDefaultURI() { + String scopeName = "testScope"; + + Map properties = Collections.emptyMap(); + + PravegaClientConfig configFile = new PravegaClientConfig(properties, scopeName); + + assertEquals(URI.create("tcp://localhost:9090"), configFile.toClientConfig().getControllerURI()); + assertEquals(scopeName, configFile.getScopeName()); + } + + @Test + public void testEqualsAndHashCode() { + URI uri1 = URI.create("tcp://localhost:9090"); + String scopeName1 = "testScope1"; + PravegaClientConfig configFile1 = new PravegaClientConfig(uri1, scopeName1); + + URI uri2 = URI.create("tcp://localhost:9090"); + String scopeName2 = "testScope1"; + PravegaClientConfig configFile2 = new PravegaClientConfig(uri2, scopeName2); + + assertEquals(configFile1, configFile2); + assertEquals(configFile1.hashCode(), configFile2.hashCode()); + } + +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileIngestServiceTest.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileIngestServiceTest.java new file mode 100644 index 00000000..508a3a84 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileIngestServiceTest.java @@ -0,0 +1,20 @@ +package io.pravega.sensor.collector.file; + +import io.pravega.sensor.collector.DeviceDriver; +import io.pravega.sensor.collector.DeviceDriverConfig; +import io.pravega.sensor.collector.DeviceDriverManager; +import io.pravega.sensor.collector.file.rawfile.RawFileIngestService; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; + +import java.sql.SQLException; +import java.util.Map; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; + +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; + +public class FileIngestServiceTest { +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorFactoryTest.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorFactoryTest.java new file mode 100644 index 00000000..d1d63ee5 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorFactoryTest.java @@ -0,0 +1,76 @@ +package io.pravega.sensor.collector.file; + +import io.pravega.sensor.collector.file.csvfile.CsvFileSequenceProcessor; +import io.pravega.sensor.collector.file.parquet.ParquetFileProcessor; +import io.pravega.sensor.collector.file.rawfile.RawFileProcessor; +import io.pravega.sensor.collector.util.EventWriter; +import io.pravega.sensor.collector.util.TransactionCoordinator; +import io.pravega.sensor.collector.util.TransactionStateInMemoryImpl; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +public class FileProcessorFactoryTest { + + + private FileConfig config; + @Mock + private EventWriter writer; + @Mock + private TransactionCoordinator transactionCoordinator; + @Mock + private TransactionStateInMemoryImpl state; + + @BeforeEach + public void setUp() { + MockitoAnnotations.initMocks(this); + + } + + /* + * Test for creating Raw file processor + */ + @Test + public void createRAWFileProcessorTest() throws Exception { + String stateDatabaseFileName = ":memory:"; + config = new FileConfig(stateDatabaseFileName,"/opt/pravega-sensor-collector/Files/A","parquet","key12", + "stream1","{}",10, false, + true,20.0, 5000l,"RawFileIngestService"); + FileProcessor rawFileProcessor = FileProcessorFactory.createFileSequenceProcessor(config,state,writer,transactionCoordinator,"writerId"); + + Assertions.assertTrue(rawFileProcessor instanceof RawFileProcessor); + + } + + /* + * Test for creating CSV file processor + */ + @Test + public void createCSVFileProcessorTest() throws Exception { + String stateDatabaseFileName = ":memory:"; + config = new FileConfig(stateDatabaseFileName,"/opt/pravega-sensor-collector/Files/A","parquet","key12", + "stream1","{}",10, false, + true,20.0, 5000L,"CsvFileIngestService"); + FileProcessor csvFileProcessor = FileProcessorFactory.createFileSequenceProcessor(config,state,writer,transactionCoordinator,"writerId"); + + Assertions.assertTrue(csvFileProcessor instanceof CsvFileSequenceProcessor); + + } + + /* + * Test for creating PARQUET file processor + */ + @Test + public void createParquetFileProcessorTest() throws Exception { + String stateDatabaseFileName = ":memory:"; + config = new FileConfig(stateDatabaseFileName,"/opt/pravega-sensor-collector/Files/A","parquet","key12", + "stream1","{}",10, false, + true,20.0, 5000L,"ParquetFileIngestService"); + FileProcessor parquetFileProcessor = FileProcessorFactory.createFileSequenceProcessor(config,state,writer,transactionCoordinator,"writerId"); + + Assertions.assertTrue(parquetFileProcessor instanceof ParquetFileProcessor); + + } +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorTests.java new file mode 100644 index 00000000..83e6f247 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorTests.java @@ -0,0 +1,175 @@ +package io.pravega.sensor.collector.file; + +import com.google.common.collect.ImmutableList; +import io.pravega.client.EventStreamClientFactory; +import io.pravega.client.stream.TxnFailedException; +import io.pravega.sensor.collector.file.rawfile.RawFileProcessor; +import io.pravega.sensor.collector.util.*; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.mockito.junit.MockitoJUnitRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.verify; + + +public class FileProcessorTests { + private static final Logger log = LoggerFactory.getLogger(FileProcessorTests.class); + + protected FileConfig config; + @Mock + protected TransactionStateSQLiteImpl state; + + @Mock + private EventWriter writer; + + @Mock + protected TransactionalEventWriter transactionalEventWriter; + + @Mock + protected TransactionCoordinator transactionCoordinator; + @Mock + private EventGenerator eventGenerator; + @Mock + private EventStreamClientFactory clientFactory; + + + @BeforeEach + public void setup(){ + MockitoAnnotations.initMocks(this); + String stateDatabaseFileName = ":memory:"; + config = new FileConfig("./psc.db","/opt/pravega-sensor-collector/Files/A","parquet","key12", + "stream1","{}",10, false, + true,20.0, 5000,"RawFileIngestService"); + } + + @Test + public void getNewFilesTest() { + final List directoryListing = ImmutableList.of( + new FileNameWithOffset("file2", 10), + new FileNameWithOffset("file4", 10), + new FileNameWithOffset("file3", 10)); + final List completedFiles = ImmutableList.of( + new FileNameWithOffset("file1", 10), + new FileNameWithOffset("file2", 10)); + final List expected = ImmutableList.of( + new FileNameWithOffset("file3", 0), + new FileNameWithOffset("file4", 0)); + RawFileProcessor fileProcessor = new RawFileProcessor(config,state, writer, transactionCoordinator, "writerId"); + final List actual = fileProcessor.getNewFiles(directoryListing, completedFiles); + Assertions.assertEquals(expected, actual); + } + + @Test + public void getDirectoryListingTest() throws IOException { + final List actual = FileUtils.getDirectoryListing( + "../log-file-sample-data/","csv", Paths.get("."), 5000); + log.info("actual={}", actual); + } + + /* + * When there is no new files in SqlLite DB to process. which returns empty file set for nextFiles() call. + */ + @Test + public void getEmptyNextFileSet() throws Exception { + FileProcessor fileProcessor = FileProcessor.create(config, clientFactory); + fileProcessor.processFiles(); + } + + /* + * Process the single file for Raw file processor. + */ + @Test + public void processNextFile() throws Exception { + copyFile(); + FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test"); + doNothing().when(transactionalEventWriter).writeEvent(anyString(), any()); + fileProcessor.processFile(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L); + verify(transactionalEventWriter).writeEvent(anyString(), any()); + } + + /* + * Process 3 files in loop + */ + @Test + public void processNextFewFiles() throws Exception { + copyFile(); + // Define different return values for the first three invocations and from 4th invocation onwards null + Mockito.when(state.getNextPendingFileRecord()) + .thenReturn(new ImmutablePair<>(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L)) + .thenReturn(new ImmutablePair<>(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub2.parquet", 0), 2L)) + .thenReturn(new ImmutablePair<>(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub3.parquet", 0), 3L)) + .thenAnswer(invocation -> null); + + FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test"); + doNothing().when(transactionalEventWriter).writeEvent(anyString(), any()); + fileProcessor.processNewFiles(); + + // Verify that myMethod was called exactly three times + Mockito.verify(transactionalEventWriter, Mockito.times(3)).writeEvent(anyString(), any()); + + } + + /* + * Process the single file . + * Throw transaction failed exception while writing events + */ + @Test + public void processNextFile_WriteEventException() throws Exception { + copyFile(); + FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test"); + Mockito.doThrow(TxnFailedException.class).when(transactionalEventWriter).writeEvent(anyString(), any()); + assertThrows(RuntimeException.class, () -> fileProcessor.processFile(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L)); + // Verify that myMethod was called exactly three times + Mockito.verify(transactionalEventWriter, Mockito.times(1)).writeEvent(anyString(), any()); + + } + /* + * Process the single file . + * Throw transaction failed exception while commiting transaction + */ + @Test + public void processNextFile_CommitException() throws Exception { + copyFile(); + FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test"); + Mockito.doThrow(TxnFailedException.class).when(transactionalEventWriter).commit(); + assertThrows(RuntimeException.class, () -> fileProcessor.processFile(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L)); + // Verify that myMethod was called exactly three times + Mockito.verify(transactionalEventWriter, Mockito.times(1)).commit(); + } + + /* + * Before each test we need to copy the files to parquet file directory so that files are available for processing. + * Post process these files are moved to different directory, so it is important to add them back to the current directory path. + */ + public void copyFile() throws IOException { + Path sourcePath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/test_file/sub1.parquet"); + Path targetPath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet"); + Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING); + sourcePath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/test_file/sub2.parquet"); + targetPath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/sub2.parquet"); + Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING); + sourcePath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/test_file/sub3.parquet"); + targetPath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/sub3.parquet"); + Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING); + } + +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorStateTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorStateTests.java deleted file mode 100644 index 4b3f7aed..00000000 --- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorStateTests.java +++ /dev/null @@ -1,108 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.junit.Assert; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.sql.SQLException; -import java.util.HashSet; -import java.util.List; - -public class LogFileSequenceProcessorStateTests { - private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorStateTests.class); - - @Test - public void pendingFilesTest() throws SQLException { - final String stateDatabaseFileName = ":memory:"; - final LogFileSequenceProcessorState state = LogFileSequenceProcessorState.create(stateDatabaseFileName); - Assert.assertNull(state.getNextPendingFile()); - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); - Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile()); - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L))); - Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile()); - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file0.csv", 0L))); - Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile()); - } - - @Test - public void completedFilesTest() throws SQLException { - final String stateDatabaseFileName = ":memory:"; - final LogFileSequenceProcessorState state = LogFileSequenceProcessorState.create(stateDatabaseFileName); - Assert.assertNull(state.getNextPendingFile()); - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); - Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile()); - state.addCompletedFile("file1.csv", 0L, 1000L, 10L); - final List completedFiles = state.getCompletedFiles(); - log.info("completedFiles={}", completedFiles); - Assert.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles)); - Assert.assertNull(state.getNextPendingFile()); - // Make sure this is idempotent. - state.addCompletedFile("file1.csv", 0L, 1000L, 10L); - Assert.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles)); - Assert.assertNull(state.getNextPendingFile()); - } - - @Test - public void processFilesTest() throws SQLException { - final String stateDatabaseFileName = ":memory:"; - final LogFileSequenceProcessorState state = LogFileSequenceProcessorState.create(stateDatabaseFileName); - Assert.assertNull(state.getNextPendingFile()); - // Find 3 new files. - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L))); - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file3.csv", 0L))); - // Re-add a pending file. This should be ignored. - state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); - // Get next pending file. - Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file2.csv", 0L), 0L), state.getNextPendingFile()); - // Complete file. - state.addCompletedFile("file2.csv", 0L, 1000L, 10L); - Assert.assertEquals(ImmutableSet.of(new FileNameWithOffset("file2.csv", 1000L)), new HashSet<>(state.getCompletedFiles())); - // Get next pending file. - Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 10L), state.getNextPendingFile()); - // Complete file. - state.addCompletedFile("file1.csv", 0L, 2000L, 20L); - Assert.assertEquals(ImmutableSet.of( - new FileNameWithOffset("file2.csv", 1000L), - new FileNameWithOffset("file1.csv", 2000L)), - new HashSet<>(state.getCompletedFiles())); - // Get next pending file. - Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file3.csv", 0L), 20L), state.getNextPendingFile()); - // Complete file. - state.addCompletedFile("file3.csv", 0L, 1500L, 30L); - Assert.assertEquals(ImmutableSet.of( - new FileNameWithOffset("file2.csv", 1000L), - new FileNameWithOffset("file1.csv", 2000L), - new FileNameWithOffset("file3.csv", 1500L)), - new HashSet<>(state.getCompletedFiles())); - // No more pending files. - Assert.assertNull(state.getNextPendingFile()); - // Delete completed file. - state.deleteCompletedFile("file1.csv"); - Assert.assertEquals(ImmutableSet.of( - new FileNameWithOffset("file2.csv", 1000L), - new FileNameWithOffset("file3.csv", 1500L)), - new HashSet<>(state.getCompletedFiles())); - // Delete completed file. - state.deleteCompletedFile("file2.csv"); - Assert.assertEquals(ImmutableSet.of( - new FileNameWithOffset("file3.csv", 1500L)), - new HashSet<>(state.getCompletedFiles())); - // Delete completed file. - state.deleteCompletedFile("file3.csv"); - Assert.assertTrue(state.getCompletedFiles().isEmpty()); - } -} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorTests.java deleted file mode 100644 index 87c92354..00000000 --- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorTests.java +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - */ -package io.pravega.sensor.collector.file; - -import com.google.common.collect.ImmutableList; -import org.junit.Assert; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.List; - -public class LogFileSequenceProcessorTests { - private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorTests.class); - - @Test - public void getNewFilesTest() { - final List directoryListing = ImmutableList.of( - new FileNameWithOffset("file2", 10), - new FileNameWithOffset("file4", 10), - new FileNameWithOffset("file3", 10)); - final List completedFiles = ImmutableList.of( - new FileNameWithOffset("file1", 10), - new FileNameWithOffset("file2", 10)); - final List expected = ImmutableList.of( - new FileNameWithOffset("file3", 0), - new FileNameWithOffset("file4", 0)); - final List actual = LogFileSequenceProcessor.getNewFiles(directoryListing, completedFiles); - Assert.assertEquals(expected, actual); - } - - @Test - public void getDirectoryListingTest() throws IOException { - final List actual = LogFileSequenceProcessor.getDirectoryListing( - "../log-file-sample-data/*.csv"); - log.info("actual={}", actual); - } -} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/EventGeneratorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CSVFileEventGeneratorTests.java similarity index 71% rename from pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/EventGeneratorTests.java rename to pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CSVFileEventGeneratorTests.java index 6c8b4362..fdae2efd 100644 --- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/EventGeneratorTests.java +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CSVFileEventGeneratorTests.java @@ -7,12 +7,14 @@ * * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.sensor.collector.file; +package io.pravega.sensor.collector.file.csvfile; import com.google.common.io.CountingInputStream; +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.util.PravegaWriterEvent; import org.apache.commons.lang3.tuple.Pair; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,12 +24,12 @@ import java.util.ArrayList; import java.util.List; -public class EventGeneratorTests { - private static final Logger log = LoggerFactory.getLogger(EventGeneratorTests.class); +public class CSVFileEventGeneratorTests { + private static final Logger log = LoggerFactory.getLogger(CSVFileEventGeneratorTests.class); @Test public void Test3by2() throws IOException { - final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 2); + final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 2); final String csvStr = "\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" + "\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" + @@ -37,13 +39,13 @@ public void Test3by2() throws IOException { final List events = new ArrayList<>(); Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); log.info("events={}", events); - Assert.assertEquals(102L, (long) nextSequenceNumberAndOffset.getLeft()); - Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + Assertions.assertEquals(102L, (long) nextSequenceNumberAndOffset.getLeft()); + Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); } @Test public void Test3by3() throws IOException { - final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3); + final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3); final String csvStr = "\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" + "\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" + @@ -53,13 +55,13 @@ public void Test3by3() throws IOException { final List events = new ArrayList<>(); Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); log.info("events={}", events); - Assert.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft()); - Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + Assertions.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft()); + Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); } @Test public void Test1by3() throws IOException { - final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3); + final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3); final String csvStr = "\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" + "\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n"; @@ -67,38 +69,38 @@ public void Test1by3() throws IOException { final List events = new ArrayList<>(); Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); log.info("events={}", events); - Assert.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft()); - Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + Assertions.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft()); + Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); } @Test public void Test0by3() throws IOException { - final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3); + final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3); final String csvStr = "\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n"; final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(csvStr.getBytes(StandardCharsets.UTF_8))); final List events = new ArrayList<>(); Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); log.info("events={}", events); - Assert.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft()); - Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + Assertions.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft()); + Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); } @Test public void TestEmptyFile() throws IOException { - final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3); + final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3); final String csvStr = ""; final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(csvStr.getBytes(StandardCharsets.UTF_8))); final List events = new ArrayList<>(); Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); log.info("events={}", events); - Assert.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft()); - Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + Assertions.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft()); + Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); } @Test public void test7by3() throws IOException { - final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3); + final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3); final String csvStr = "\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" + "\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" + @@ -112,7 +114,7 @@ public void test7by3() throws IOException { final List events = new ArrayList<>(); Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); log.info("events={}", events); - Assert.assertEquals(103L, (long) nextSequenceNumberAndOffset.getLeft()); - Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + Assertions.assertEquals(103L, (long) nextSequenceNumberAndOffset.getLeft()); + Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight()); } } diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessorTests.java new file mode 100644 index 00000000..8aa3097f --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessorTests.java @@ -0,0 +1,29 @@ +package io.pravega.sensor.collector.file.csvfile; + +import io.pravega.sensor.collector.file.FileProcessor; +import io.pravega.sensor.collector.file.FileProcessorTests; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import static org.mockito.ArgumentMatchers.any; + +public class CsvFileSequenceProcessorTests extends FileProcessorTests { + + @BeforeEach + public void before() throws Exception { + super.setup(); + + } + + /* + * Generating event for CSV file and process for new files when there are no pending files. + */ + @Test + public void generateEventForCSVFileTests() throws Exception { + FileProcessor fileProcessor = new CsvFileSequenceProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test"); + fileProcessor.processNewFiles(); + Mockito.verify(state, Mockito.times(1)).getNextPendingFileRecord(); + } +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetEventGeneratorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetEventGeneratorTests.java new file mode 100644 index 00000000..4f1cecb7 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetEventGeneratorTests.java @@ -0,0 +1,38 @@ +package io.pravega.sensor.collector.file.parquet; + +import com.google.common.io.CountingInputStream; +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.util.FileNameWithOffset; +import io.pravega.sensor.collector.util.FileUtils; +import io.pravega.sensor.collector.util.PravegaWriterEvent; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +public class ParquetEventGeneratorTests { + private static final Logger log = LoggerFactory.getLogger(ParquetEventGeneratorTests.class); + + @Test + public void TestFile() throws IOException { + final EventGenerator eventGenerator = ParquetEventGenerator.create("routingKey1",100); + final List files = FileUtils.getDirectoryListing("../parquet-file-sample-data","parquet", Paths.get("."), 5000); + File parquetData= new File(files.get(0).fileName); + + final CountingInputStream inputStream = new CountingInputStream(new FileInputStream(parquetData)); + final List events = new ArrayList<>(); + Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 1, events::add); + Assert.assertEquals(501L, (long) nextSequenceNumberAndOffset.getLeft()); + Assert.assertEquals(parquetData.length(), (long) nextSequenceNumberAndOffset.getRight()); + } + +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessorTests.java new file mode 100644 index 00000000..92dbed57 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessorTests.java @@ -0,0 +1,27 @@ +package io.pravega.sensor.collector.file.parquet; + +import io.pravega.sensor.collector.file.FileProcessor; +import io.pravega.sensor.collector.file.FileProcessorTests; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +public class ParquetFileProcessorTests extends FileProcessorTests { + + + @BeforeEach + public void before() throws Exception { + super.setup(); + + } + + /* + * Generating event for Parquet file and check for process new files when there are no pending files. + */ + @Test + public void generateEventForParquetTests() throws Exception { + FileProcessor fileProcessor = new ParquetFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test"); + fileProcessor.processNewFiles(); + Mockito.verify(state, Mockito.times(1)).getNextPendingFileRecord(); + } +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawEventGeneratorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawEventGeneratorTests.java new file mode 100644 index 00000000..d883caf6 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawEventGeneratorTests.java @@ -0,0 +1,49 @@ +package io.pravega.sensor.collector.file.rawfile; + +import com.google.common.io.CountingInputStream; +import io.pravega.sensor.collector.file.EventGenerator; +import io.pravega.sensor.collector.util.PravegaWriterEvent; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +public class RawEventGeneratorTests { + private static final Logger log = LoggerFactory.getLogger(RawEventGeneratorTests.class); + + @Test + public void TestFile() throws IOException { + final EventGenerator eventGenerator = RawEventGenerator.create("routingKey1"); + final String rawfileStr = + "\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" + + "\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" + + "\"2020-07-15 23:59:50.362\",\"1.305966\",\"0.1\",\"1.331963\",\"0\"\n" + + "\"2020-07-15 23:59:50.415\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n"; + final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(rawfileStr.getBytes(StandardCharsets.UTF_8))); + final List events = new ArrayList<>(); + Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); + log.info("events={}", events); + Assert.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft()); + Assert.assertEquals(rawfileStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + } + + @Test + public void TestEmptyFile() throws IOException { + final EventGenerator eventGenerator = RawEventGenerator.create("routingKey1"); + final String rawfileStr = ""; + final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(rawfileStr.getBytes(StandardCharsets.UTF_8))); + final List events = new ArrayList<>(); + Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add); + log.info("events={}", events); + Assert.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft()); + Assert.assertEquals(rawfileStr.length(), (long) nextSequenceNumberAndOffset.getRight()); + } + +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessorTests.java new file mode 100644 index 00000000..d8d68c9c --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessorTests.java @@ -0,0 +1,26 @@ +package io.pravega.sensor.collector.file.rawfile; + +import io.pravega.sensor.collector.file.FileProcessor; +import io.pravega.sensor.collector.file.FileProcessorTests; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +public class RawFileProcessorTests extends FileProcessorTests { + @BeforeEach + public void before() throws Exception { + super.setup(); + + } + + /* + * Generating event for Raw file and check for process new files when there are no pending files. + */ + @Test + public void generateEventForRawFileTests() throws Exception { + FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test"); + fileProcessor.processNewFiles(); + Mockito.verify(state, Mockito.times(1)).getNextPendingFileRecord(); + } + +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java index 349414d7..0209bf1d 100644 --- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java @@ -9,8 +9,8 @@ */ package io.pravega.sensor.collector.util; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,7 +25,7 @@ public class PersistentIdTests { @Test public void persistentIdTest() throws SQLException { - final String fileName = "/tmp/persistent-id-test-" + UUID.randomUUID() + ".db"; + final String fileName = "persistent-id-test-" + UUID.randomUUID() + ".db"; log.info("fileName={}", fileName); try { @@ -39,7 +39,7 @@ public void persistentIdTest() throws SQLException { writerId2 = new PersistentId(connection).getPersistentId().toString(); log.info("writerId2={}", writerId2); } - Assert.assertEquals(writerId1, writerId2); + Assertions.assertEquals(writerId1, writerId2); } finally { new File(fileName).delete(); } diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionCoordinatorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionCoordinatorTests.java new file mode 100644 index 00000000..84733240 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionCoordinatorTests.java @@ -0,0 +1,262 @@ +package io.pravega.sensor.collector.util; + +import io.pravega.client.stream.TxnFailedException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.*; + +public class TransactionCoordinatorTests { + + @Mock + private Connection mockConnection; + + @Mock + private PreparedStatement mockPrepareStatement; + @Mock + private Statement mockStatement; + + @Mock + private ResultSet mockResultSet; + + @Mock + private EventWriter eventWriter; + + @Mock + TransactionalEventWriter transactionalEventWriter; + + private TransactionCoordinator transactionProcessor; + private static final Logger log = LoggerFactory.getLogger(TransactionCoordinatorTests.class); + + @BeforeEach + public void setUp() throws SQLException { + MockitoAnnotations.initMocks(this); + + // Mock behavior for the connection and statement + when(mockConnection.createStatement()).thenReturn(mockStatement); + when(mockStatement.execute(anyString())).thenReturn(true); + /*when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + when(mockPrepareStatement.execute()).thenReturn(true);*/ + transactionProcessor = new TransactionCoordinator(mockConnection,transactionalEventWriter); + } + + @Test + public void testAddTransactionToCommit() throws SQLException { + + UUID mockTransactionId = UUID.randomUUID(); + Optional optionalTransactionId = Optional.of(mockTransactionId); + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + when(mockPrepareStatement.execute()).thenReturn(true); + transactionProcessor.addTransactionToCommit(optionalTransactionId); + // Assert + // Verify that prepareStatement was called with the correct SQL query + verify(mockConnection).prepareStatement("insert into TransactionsToCommit (txnId) values (?)"); + verify(mockStatement).execute(anyString()); + + } + + /* + * SQLExcption while Adding trasaction id to TransactionsToCommit table + */ + @Test + public void testAddTransactionToCommitThrowSQLException() throws SQLException { + + UUID mockTransactionId = UUID.randomUUID(); + Optional optionalTransactionId = Optional.of(mockTransactionId); + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + // Mock behavior: when preparedStatement.execute is called, throw a SQLException + doThrow(new SQLException("Test exception")).when(mockPrepareStatement).execute(); + + // Use assertThrows to verify that SQLException is thrown + assertThrows(RuntimeException.class, () -> transactionProcessor.addTransactionToCommit(optionalTransactionId)); + + // Verify that prepareStatement was called with the correct SQL query + verify(mockConnection).prepareStatement("insert into TransactionsToCommit (txnId) values (?)"); + verify(mockStatement).execute(anyString()); + + } + + @Test + public void testDeleteTransactionToCommit() throws SQLException { + + UUID mockTransactionId = UUID.randomUUID(); + Optional optionalTransactionId = Optional.of(mockTransactionId); + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + when(mockPrepareStatement.execute()).thenReturn(true); + transactionProcessor.deleteTransactionToCommit(optionalTransactionId); + // Assert + // Verify that prepareStatement was called with the correct SQL query + verify(mockConnection).prepareStatement("delete from TransactionsToCommit where txnId = ?"); + verify(mockStatement).execute(anyString()); + + } + + /* + * SQLExcption while deleting trasaction id from TransactionsToCommit table + */ + @Test + public void testDeleteTransactionToCommitThrowSQLException() throws SQLException { + + UUID mockTransactionId = UUID.randomUUID(); + Optional optionalTransactionId = Optional.of(mockTransactionId); + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + // Mock behavior: when preparedStatement.execute is called, throw a SQLException + doThrow(new SQLException("Test exception")).when(mockPrepareStatement).execute(); + + // Use assertThrows to verify that SQLException is thrown + assertThrows(RuntimeException.class, () -> transactionProcessor.deleteTransactionToCommit(optionalTransactionId)); + + // Verify that prepareStatement was called with the correct SQL query + verify(mockConnection).prepareStatement("delete from TransactionsToCommit where txnId = ?"); + verify(mockStatement).execute(anyString()); + } + + /* + * Test to verify getTransactionsToCommit method. + * Verify number of transaction id's matching with result set + * + */ + @Test + public void testGetTransactionToCommit() throws SQLException { + // Mock behavior: when statement.executeQuery is called, return the mock result set + when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet); + // Mock behavior: simulate the result set having two rows with different UUIDs + when(mockResultSet.next()).thenReturn(true, true, false); + when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString()); + + // Get List of transaction ID's from TransactionToCommit table + List uuidList =transactionProcessor.getTransactionsToCommit(); + + // Assert + verify(mockResultSet, times(3)).next(); + verify(mockResultSet, times(2)).getString("txnId"); + //verify result contains 2 UUIDs + assertEquals(2,uuidList.size()); + } + + /* + * Test to verify perform recovery method. + */ + @Test + public void testPerformRecovery() throws SQLException, TxnFailedException { + // Mock behavior: when statement.executeQuery is called, return the mock result set + when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet); + // Mock behavior: simulate the result set having two rows with different UUIDs + when(mockResultSet.next()).thenReturn(true, true, false); + when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString()); + //mock for delete transaction call + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + when(mockPrepareStatement.execute()).thenReturn(true); + + doNothing().when(transactionalEventWriter).commit(any()); + + // Get List of transaction ID's from TransactionToCommit table + transactionProcessor.performRecovery(); + + // Assert + verify(mockResultSet, times(3)).next(); + verify(mockResultSet, times(2)).getString("txnId"); + verify(mockConnection, times(2)).prepareStatement("delete from TransactionsToCommit where txnId = ?"); + + } + + + /* + * Test to verify perform recovery method. + * Verify the scenario where transaction commit throw the TxnFailedException + */ + @Test + public void testPerformRecoveryWithCommitFail() throws SQLException, TxnFailedException { + // Mock behavior: when statement.executeQuery is called, return the mock result set + when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet); + // Mock behavior: simulate the result set having two rows with different UUIDs + when(mockResultSet.next()).thenReturn(true, true,false); + when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString()); + //mock for delete transaction call + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + when(mockPrepareStatement.execute()).thenReturn(true); + Mockito.doAnswer(invocation -> { + throw new TxnFailedException("Simulated transaction failure"); + }).when(transactionalEventWriter).commit(Mockito.any()); + //doNothing().when(transactionalEventWriter).commit(any()); + + // Perform recovery + transactionProcessor.performRecovery(); + + // Assert + verify(mockResultSet, times(3)).next(); + verify(mockResultSet, times(2)).getString("txnId"); + //verify(mockConnection, times(2)).prepareStatement("delete from TransactionsToCommit where txnId = ?"); + + } + + /* + * Test to verify perform recovery method. + * Verify the scenario where transaction commit throw the Unknown Transaction as message + */ + @Test + public void testPerformRecoveryCommitWithUnknownTransactionFail() throws SQLException, TxnFailedException { + // Mock behavior: when statement.executeQuery is called, return the mock result set + when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet); + // Mock behavior: simulate the result set having two rows with different UUIDs + when(mockResultSet.next()).thenReturn(true,true,false); + when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString()); + //mock for delete transaction call + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + when(mockPrepareStatement.execute()).thenReturn(true); + Mockito.doAnswer(invocation -> { + throw new RuntimeException("Unknown transaction"); + }).when(transactionalEventWriter).commit(Mockito.any()); + + // Perform recovery + transactionProcessor.performRecovery(); + + // Assert + verify(mockResultSet, times(3)).next(); + verify(mockResultSet, times(2)).getString("txnId"); + + } + + /* + * Test to verify perform recovery method. + * Verify the scenario where transaction commit throw the other runtime exception as message + */ + @Test + public void testPerformRecoveryCommitWithOtherException() throws SQLException, TxnFailedException { + // Mock behavior: when statement.executeQuery is called, return the mock result set + when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet); + // Mock behavior: simulate the result set having two rows with different UUIDs + when(mockResultSet.next()).thenReturn(true,false); + when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString()); + //mock for delete transaction call + when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement); + when(mockPrepareStatement.execute()).thenReturn(true); + Mockito.doAnswer(invocation -> { + throw new RuntimeException("Other Runtime Exception"); + }).when(transactionalEventWriter).commit(Mockito.any()); + + // Perform recovery + RuntimeException exception = assertThrows(RuntimeException.class, () -> { + transactionProcessor.performRecovery(); + }); + + // Assert + String expectedMessage = "Other Runtime Exception"; + assertEquals(expectedMessage, exception.getMessage(), "Exception message mismatch"); + + + } +} diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImplTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImplTests.java new file mode 100644 index 00000000..d6f98d43 --- /dev/null +++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImplTests.java @@ -0,0 +1,108 @@ +/** + * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package io.pravega.sensor.collector.util; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.HashSet; +import java.util.List; + +public class TransactionStateSQLiteImplTests { + + private static final Logger log = LoggerFactory.getLogger(TransactionStateSQLiteImplTests.class); + + @Test + public void pendingFilesTest() throws SQLException { + final String stateDatabaseFileName = ":memory:"; + final TransactionStateDB state = TransactionStateInMemoryImpl.create(stateDatabaseFileName); + Assertions.assertNull(state.getNextPendingFileRecord()); + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); + Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord()); + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L))); + Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord()); + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file0.csv", 0L))); + Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord()); + } + + @Test + public void completedFilesTest() throws SQLException { + final String stateDatabaseFileName = ":memory:"; + final TransactionStateInMemoryImpl state = TransactionStateInMemoryImpl.create(stateDatabaseFileName); + Assertions.assertNull(state.getNextPendingFileRecord()); + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); + Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord()); + state.addCompletedFileRecord("file1.csv", 0L, 1000L, 10L); + final List completedFiles = state.getCompletedFileRecords(); + log.info("completedFiles={}", completedFiles); + Assertions.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles)); + Assertions.assertNull(state.getNextPendingFileRecord()); + // Make sure this is idempotent. + state.addCompletedFileRecord("file1.csv", 0L, 1000L, 10L); + Assertions.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles)); + Assertions.assertNull(state.getNextPendingFileRecord()); + } + + @Test + public void processFilesTest() throws SQLException { + final String stateDatabaseFileName = ":memory:"; + final TransactionStateInMemoryImpl state = TransactionStateInMemoryImpl.create(stateDatabaseFileName); + Assertions.assertNull(state.getNextPendingFileRecord()); + // Find 3 new files. + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L))); + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file3.csv", 0L))); + // Re-add a pending file. This should be ignored. + state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L))); + // Get next pending file. + Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file2.csv", 0L), 0L), state.getNextPendingFileRecord()); + // Complete file. + state.addCompletedFileRecord("file2.csv", 0L, 1000L, 10L); + Assertions.assertEquals(ImmutableSet.of(new FileNameWithOffset("file2.csv", 1000L)), new HashSet<>(state.getCompletedFileRecords())); + // Get next pending file. + Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 10L), state.getNextPendingFileRecord()); + // Complete file. + state.addCompletedFileRecord("file1.csv", 0L, 2000L, 20L); + Assertions.assertEquals(ImmutableSet.of( + new FileNameWithOffset("file2.csv", 1000L), + new FileNameWithOffset("file1.csv", 2000L)), + new HashSet<>(state.getCompletedFileRecords())); + // Get next pending file. + Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file3.csv", 0L), 20L), state.getNextPendingFileRecord()); + // Complete file. + state.addCompletedFileRecord("file3.csv", 0L, 1500L, 30L); + Assertions.assertEquals(ImmutableSet.of( + new FileNameWithOffset("file2.csv", 1000L), + new FileNameWithOffset("file1.csv", 2000L), + new FileNameWithOffset("file3.csv", 1500L)), new HashSet<>(state.getCompletedFileRecords())); + // No more pending files. + Assertions.assertNull(state.getNextPendingFileRecord()); + // Delete completed file. + state.deleteCompletedFileRecord("file1.csv"); + Assertions.assertEquals(ImmutableSet.of( + new FileNameWithOffset("file2.csv", 1000L), + new FileNameWithOffset("file3.csv", 1500L)), + new HashSet<>(state.getCompletedFileRecords())); + // Delete completed file. + state.deleteCompletedFileRecord("file2.csv"); + Assertions.assertEquals(ImmutableSet.of( + new FileNameWithOffset("file3.csv", 1500L)), + new HashSet<>(state.getCompletedFileRecords())); + // Delete completed file. + state.deleteCompletedFileRecord("file3.csv"); + Assertions.assertTrue(state.getCompletedFileRecords().isEmpty()); + } +} diff --git a/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties b/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties index b6f644fb..c672fa5e 100644 --- a/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties +++ b/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties @@ -10,8 +10,9 @@ # This file can be used to manually test LogFileIngestService. # Run scripts/simulate-logs-accel.sh concurrently. -PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS=io.pravega.sensor.collector.file.LogFileIngestService -PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_SPEC=/tmp/watch/Accelerometer.*.csv +PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS=io.pravega.sensor.collector.file.csvfile.CsvFileIngestService +PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_SPEC=/tmp/watch/files +PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_EXTENSION=csv PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DELETE_COMPLETED_FILES=true PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DATABASE_FILE=/tmp/accelerometer.db PRAVEGA_SENSOR_COLLECTOR_ACCEL2_EVENT_TEMPLATE={"RemoteAddr":"myaddr1","SensorType":"Accelerometer"} @@ -20,3 +21,4 @@ PRAVEGA_SENSOR_COLLECTOR_ACCEL2_SCOPE=examples PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CREATE_SCOPE=true PRAVEGA_SENSOR_COLLECTOR_ACCEL2_STREAM=sensors-accelerometer PRAVEGA_SENSOR_COLLECTOR_ACCEL2_ROUTING_KEY=routingkey1 +PRAVEGA_SENSOR_COLLECTOR_ACCEL2_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000 diff --git a/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties b/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties index 26fdd224..57eaa626 100644 --- a/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties +++ b/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties @@ -8,8 +8,8 @@ # http://www.apache.org/licenses/LICENSE-2.0 # # This file can be used to manually test ParquetFileIngestService. -PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.parquet.ParquetFileIngestService -PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC=/opt/pravega-sensor-collector/ParquetNew +PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.file.parquet.ParquetFileIngestService +PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC="/opt/pravega-sensor-collector/Parquet_Files/A,/opt/pravega-sensor-collector/Parquet_Files/B" PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_EXTENSION=parquet PRAVEGA_SENSOR_COLLECTOR_PARQ2_DELETE_COMPLETED_FILES=false PRAVEGA_SENSOR_COLLECTOR_PARQ2_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db @@ -20,4 +20,9 @@ PRAVEGA_SENSOR_COLLECTOR_PARQ2_CREATE_SCOPE=false PRAVEGA_SENSOR_COLLECTOR_PARQ2_STREAM=stream-p PRAVEGA_SENSOR_COLLECTOR_PARQ2_ROUTING_KEY=$(hostname) PRAVEGA_SENSOR_COLLECTOR_PARQ2_TRANSACTION_TIMEOUT_MINUTES=2.0 +PRAVEGA_SENSOR_COLLECTOR_PARQ2_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000 +HADOOP_HOME=${HOME}/dev + +# windows - location of bin/winutils.exe +export HADOOP_HOME=/opt/dev diff --git a/pravega-sensor-collector/src/test/resources/RawFileIngest.properties b/pravega-sensor-collector/src/test/resources/RawFileIngest.properties index e7b439c2..b548ff07 100644 --- a/pravega-sensor-collector/src/test/resources/RawFileIngest.properties +++ b/pravega-sensor-collector/src/test/resources/RawFileIngest.properties @@ -8,8 +8,8 @@ # http://www.apache.org/licenses/LICENSE-2.0 # # This file can be used to manually test RawFileIngestService. -PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.rawfile.RawFileIngestService -PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC=/opt/pravega-sensor-collector/Files +PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.file.rawfile.RawFileIngestService +PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC="/opt/pravega-sensor-collector/Files/A,/opt/pravega-sensor-collector/Files/B" PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_EXTENSION=parquet PRAVEGA_SENSOR_COLLECTOR_RAW1_DELETE_COMPLETED_FILES=false PRAVEGA_SENSOR_COLLECTOR_RAW1_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db @@ -19,4 +19,5 @@ PRAVEGA_SENSOR_COLLECTOR_RAW1_CREATE_SCOPE=false PRAVEGA_SENSOR_COLLECTOR_RAW1_STREAM=stream1 PRAVEGA_SENSOR_COLLECTOR_RAW1_ROUTING_KEY=$(hostname) PRAVEGA_SENSOR_COLLECTOR_RAW1_TRANSACTION_TIMEOUT_MINUTES=2.0 +PRAVEGA_SENSOR_COLLECTOR_RAW1_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000 diff --git a/scripts/build-installer.sh b/scripts/build-installer.sh index 33bacfcb..d91540e9 100755 --- a/scripts/build-installer.sh +++ b/scripts/build-installer.sh @@ -12,7 +12,11 @@ set -ex ROOT_DIR=$(readlink -f $(dirname $0)/..) source ${ROOT_DIR}/scripts/env.sh pushd ${ROOT_DIR} + GZIP="--rsyncable" ./gradlew distTar ${GRADLE_OPTIONS} -popd ls -lh ${ROOT_DIR}/pravega-sensor-collector/build/distributions/pravega-sensor-collector-${APP_VERSION}.tgz + +./gradlew shadowJar ${GRADLE_OPTIONS} +ls -lh ${ROOT_DIR}/pravega-sensor-collector/build/libs/pravega-sensor-collector-${APP_VERSION}.jar +popd diff --git a/scripts/env.sh b/scripts/env.sh index e53cb974..83712a92 100755 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -11,5 +11,5 @@ export ENV_LOCAL_SCRIPT=$(dirname $0)/env-local.sh if [[ -f ${ENV_LOCAL_SCRIPT} ]]; then source ${ENV_LOCAL_SCRIPT} fi -export APP_VERSION=${APP_VERSION:-0.2.17} +export APP_VERSION=${APP_VERSION:-0.2.18} export GRADLE_OPTIONS="${GRADLE_OPTIONS:-"-Pversion=${APP_VERSION}"}" diff --git a/scripts/run-with-gradle-csv-file.sh b/scripts/run-with-gradle-csv-file.sh new file mode 100644 index 00000000..a76a6439 --- /dev/null +++ b/scripts/run-with-gradle-csv-file.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# +# Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +set -ex + +export CREATE_SCOPE=false +export ROUTING_KEY=${HOSTNAME} +export ENABLE_PRAVEGA=true +export pravega_client_auth_method=Bearer +export pravega_client_auth_loadDynamic=true +export KEYCLOAK_SERVICE_ACCOUNT_FILE=/opt/pravega-sensor-collector/conf/keycloak.json +export JAVA_OPTS="-Xmx512m" + +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS=io.pravega.sensor.collector.file.csvfile.CsvFileIngestService +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_SPEC="/opt/pravega-sensor-collector/Files/A" +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_EXTENSION=csv +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_SAMPLES_PER_EVENT=200 +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_PRAVEGA_CONTROLLER_URI=tls://pravega-controller.sdp.cluster1.sdp-demo.org:443 +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_SCOPE=project1 +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_STREAM=stream2 +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_ROUTING_KEY=$(hostname) +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DELETE_COMPLETED_FILES=false +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_TRANSACTION_TIMEOUT_MINUTES=2.0 +export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CREATE_SCOPE=false + + +./gradlew --no-daemon run diff --git a/scripts/run-with-gradle-parquet-file-ingest.sh b/scripts/run-with-gradle-parquet-file-ingest.sh index c241b5f1..5914e70c 100644 --- a/scripts/run-with-gradle-parquet-file-ingest.sh +++ b/scripts/run-with-gradle-parquet-file-ingest.sh @@ -18,8 +18,8 @@ export pravega_client_auth_loadDynamic=true export KEYCLOAK_SERVICE_ACCOUNT_FILE=/opt/pravega-sensor-collector/conf/keycloak.json export JAVA_OPTS="-Xmx512m" -export PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.parquet.ParquetFileIngestService -export PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC=/opt/pravega-sensor-collector/Parquet_Files +export PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.file.parquet.ParquetFileIngestService +export PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC="/opt/pravega-sensor-collector/Parquet_Files/A,/opt/pravega-sensor-collector/Parquet_Files/B" export PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_EXTENSION=parquet export PRAVEGA_SENSOR_COLLECTOR_PARQ2_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db export PRAVEGA_SENSOR_COLLECTOR_PARQ2_SAMPLES_PER_EVENT=200 diff --git a/scripts/run-with-gradle-raw-file.sh b/scripts/run-with-gradle-raw-file.sh index aa84cb93..feaffdfd 100644 --- a/scripts/run-with-gradle-raw-file.sh +++ b/scripts/run-with-gradle-raw-file.sh @@ -18,8 +18,8 @@ export pravega_client_auth_loadDynamic=true export KEYCLOAK_SERVICE_ACCOUNT_FILE=/opt/pravega-sensor-collector/conf/keycloak.json export JAVA_OPTS="-Xmx512m" -export PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.rawfile.RawFileIngestService -export PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC=/opt/pravega-sensor-collector/Files +export PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.file.rawfile.RawFileIngestService +export PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC="/opt/pravega-sensor-collector/Files/A,/opt/pravega-sensor-collector/Files/B" export PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_EXTENSION=parquet export PRAVEGA_SENSOR_COLLECTOR_RAW1_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db export PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI=tls://pravega-controller.sdp.cluster1.sdp-demo.org:443 @@ -29,5 +29,6 @@ export PRAVEGA_SENSOR_COLLECTOR_RAW1_ROUTING_KEY=$(hostname) export PRAVEGA_SENSOR_COLLECTOR_RAW1_DELETE_COMPLETED_FILES=false export PRAVEGA_SENSOR_COLLECTOR_RAW1_TRANSACTION_TIMEOUT_MINUTES=2.0 export PRAVEGA_SENSOR_COLLECTOR_RAW1_CREATE_SCOPE=false +export PRAVEGA_SENSOR_COLLECTOR_RAW1_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000 ./gradlew --no-daemon run diff --git a/windows-service/PravegaSensorCollectorApp.xml b/windows-service/PravegaSensorCollectorApp.xml index 465d243b..05d75604 100644 --- a/windows-service/PravegaSensorCollectorApp.xml +++ b/windows-service/PravegaSensorCollectorApp.xml @@ -15,7 +15,7 @@ - + @@ -26,6 +26,7 @@ +