diff --git a/README.md b/README.md
index c737de6f..6930232b 100644
--- a/README.md
+++ b/README.md
@@ -142,6 +142,32 @@ All environment variables and properties begin with the prefix `PRAVEGA_SENSOR_C
For a list of commonly-used configuration values, see the
[sample configuration files](pravega-sensor-collector/src/main/dist/conf).
+#### Sample configuration properties
+
+
+| Configuration Parameter | Value | Description |
+|-------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| `CREATE_SCOPE` | `false` | Boolean value. |
+| `ROUTING_KEY` | `routingkey1` | Pravega routing key |
+| `ENABLE_PRAVEGA` | `true` | Boolean parameter. Default value = true |
+| `pravega_client_auth_method` | `Bearer` | Authentication type to connect to Pravega client |
+| `pravega_client_auth_loadDynamic` | `true` | Boolean parameter. Default value = true |
+| `KEYCLOAK_SERVICE_ACCOUNT_FILE` | `/opt/Pravega-sensor-collector/PSC_Files/keycloak-project1.json` | Path for keycloak service account file |
+| `PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS` | Raw File: `io.pravega.sensor.collector.file.rawfile.RawFileIngestService`
CSV file: `io.pravega.sensor.collector.file.csvfile.CsvFileIngestService`
Parquet file: `io.pravega.sensor.collector.file.parquet.ParquetFileIngestService` | Pravega sensor collector class package |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC` | `/opt/Pravega-sensor-collector/files1` | The application reads files for processing from a specified directory path |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_EXTENSION` | `parquet` | Types of file Example:-
Raw File: parquet
CSV file: csv
Parquet file: parquet | | |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_DATABASE_FILE` | `/opt/Pravega-sensor-collector/PSC_Files/datafile.db` | Directory path where database file gets created Example: /opt/database/databasefile.db |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI` | `tls://pravega-controller.foggy-nelson.ns.sdp.hop.lab.emc.com:443` | Pravega controller URI EX: Pravega Controller URI |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_SCOPE` | `scope1` | Scope name for Pravega sensor collector |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_STREAM` | `stream1` | Stream name for Pravega sensor collector |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_ROUTING_KEY` | `routingkey1` | Routing key for Pravega Sensor collector |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_DELETE_COMPLETED_FILES` | `false` | If true, PSC immediately delete the file soon after processing |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_TRANSACTION_TIMEOUT_MINUTES` | `2.0` | Timeout for each transaction. Default value is 2 minutes |
+| `PRAVEGA_SENSOR_COLLECTOR_RAW1_CREATE_SCOPE` | `false` | If Pravega is on SDP, set this to `false`. Accept Boolean value. |
+| `HADOOP_HOME` | `${HOME}/dev` | For windows, Hadoop requires native libraries on Windows to work properly. You can download `Winutils.exe` to fix this.
See [here](https://cwiki.apache.org/confluence/display/HADOOP2/WindowsProblems). Add the location of bin/winutils.exe in the parameter HADOOP_HOME.
**This is required only for Parquet file type not for CSV and Raw file ingestion type** |
+
+
+
### Install the Service
1. The only prerequisite on the target system is Java 11.
@@ -270,11 +296,13 @@ If DNS is not configured throughout your network, you may need to edit the /etc/
### Running as a Windows Service
-1. Download winsw.exe from https://github.com/winsw/winsw/releases and rename it as PravegaSensorCollectorApp.exe.
+1. Download winsw.exe from https://github.com/winsw/winsw/releases and rename it as PravegaSensorCollectorApp.exe. Save it in the same folder as [PravegaSensorCollectorApp.xml](windows-service/PravegaSensorCollectorApp.xml).
+
+2. Modify PravegaSensorCollectorApp.xml. Check PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI.
-2. Modify [PravegaSensorCollectorApp.xml](windows-service/PravegaSensorCollectorApp.xml). Check PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI.
+4. Add the path to the jar file `pravega-sensor-collector/build/libs/pravega-sensor-collector-${APP_VERSION}.jar` generated by build-installer.sh in PravegaSensorCollectorApp.xml.
-3. Install and run the service using following commands:
+5. Install and run the service using following commands:
```
PravegaSensorCollectorApp.exe install
PravegaSensorCollectorApp.exe start
diff --git a/build.gradle b/build.gradle
index 0c778046..916bc9e9 100644
--- a/build.gradle
+++ b/build.gradle
@@ -15,7 +15,7 @@
* user guide available at https://docs.gradle.org/3.4.1/userguide/tutorial_java_projects.html
*/
-configurations.all {
+configurations.configureEach {
// Check for updates every build
resolutionStrategy.cacheChangingModulesFor 0, "seconds"
}
@@ -23,6 +23,7 @@ configurations.all {
subprojects {
repositories {
mavenLocal()
+ mavenCentral()
maven {
url "https://oss.jfrog.org/jfrog-dependencies"
}
diff --git a/config/checkstyle.xml b/config/checkstyle.xml
new file mode 100644
index 00000000..b910873f
--- /dev/null
+++ b/config/checkstyle.xml
@@ -0,0 +1,144 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/config/eclipse.xml b/config/eclipse.xml
new file mode 100644
index 00000000..937f5828
--- /dev/null
+++ b/config/eclipse.xml
@@ -0,0 +1,313 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/config/import-control.xml b/config/import-control.xml
new file mode 100644
index 00000000..b9c48b02
--- /dev/null
+++ b/config/import-control.xml
@@ -0,0 +1,56 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/config/intelij.xml b/config/intelij.xml
new file mode 100644
index 00000000..df3b6fa0
--- /dev/null
+++ b/config/intelij.xml
@@ -0,0 +1,39 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/config/spotbugs-exclude.xml b/config/spotbugs-exclude.xml
new file mode 100644
index 00000000..170dff33
--- /dev/null
+++ b/config/spotbugs-exclude.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/config/spotbugs-include.xml b/config/spotbugs-include.xml
new file mode 100644
index 00000000..b03c9895
--- /dev/null
+++ b/config/spotbugs-include.xml
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/config/suppressions.xml b/config/suppressions.xml
new file mode 100644
index 00000000..fdb40328
--- /dev/null
+++ b/config/suppressions.xml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/gradle.properties b/gradle.properties
index 0b6c38b5..fa99c486 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -13,23 +13,27 @@ commonsCLIVersion=1.4
commonsCSVVersion=1.8
commonsCodecVersion=1.14
commonsMath3Version=3.6.1
-grizzlyVersion=2.25.1
-gsonVersion=2.8.9
+grizzlyVersion=3.1.3
+gsonVersion=2.10.1
includePravegaCredentials=true
-jacksonVersion=2.9.10.3
-junitVersion=4.12
+jacksonVersion=2.15.2
+junitVersion=5.6.2
jakartaBindVersion=2.3.2
jaxbVersion=2.3.2
javaxServletApiVersion=3.0.1
miloVersion=0.6.8
pravegaCredentialsVersion=0.12.0
pravegaVersion=0.12.0
-qosLogbackVersion=1.2.3
-slf4jApiVersion=1.7.25
-sqliteVersion=3.32.3
-parquetVersion=1.12.1
+qosLogbackVersion=1.4.11
+shadowPluginVersion=7.1.0
+slf4jApiVersion=2.0.9
+sqliteVersion=3.43.0.0
+parquetVersion=1.13.1
hadoopVersion=3.2.1
-
+mockitoVersion=3.12.4
+spotbugsVersion=4.8.1
+spotbugsPluginVersion=5.1.4
+checkstyleVersion=10.12.5
# Application version. This will be overridden by APP_VERSION in scripts/env.sh when using scripts/publish.sh.
version=unknown
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index b45533d3..06abd188 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -11,4 +11,4 @@ distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.3-all.zip
diff --git a/parquet-file-sample-data/sub1.parquet b/parquet-file-sample-data/sub1.parquet
new file mode 100644
index 00000000..05b51be6
Binary files /dev/null and b/parquet-file-sample-data/sub1.parquet differ
diff --git a/parquet-file-sample-data/sub2.parquet b/parquet-file-sample-data/sub2.parquet
new file mode 100644
index 00000000..c4344b12
Binary files /dev/null and b/parquet-file-sample-data/sub2.parquet differ
diff --git a/parquet-file-sample-data/sub3.parquet b/parquet-file-sample-data/sub3.parquet
new file mode 100644
index 00000000..d6568258
Binary files /dev/null and b/parquet-file-sample-data/sub3.parquet differ
diff --git a/parquet-file-sample-data/test_file/f1-f10/sub1.parquet b/parquet-file-sample-data/test_file/f1-f10/sub1.parquet
new file mode 100644
index 00000000..05b51be6
Binary files /dev/null and b/parquet-file-sample-data/test_file/f1-f10/sub1.parquet differ
diff --git a/parquet-file-sample-data/test_file/sub1.parquet b/parquet-file-sample-data/test_file/sub1.parquet
new file mode 100644
index 00000000..05b51be6
Binary files /dev/null and b/parquet-file-sample-data/test_file/sub1.parquet differ
diff --git a/parquet-file-sample-data/test_file/sub2.parquet b/parquet-file-sample-data/test_file/sub2.parquet
new file mode 100644
index 00000000..c4344b12
Binary files /dev/null and b/parquet-file-sample-data/test_file/sub2.parquet differ
diff --git a/parquet-file-sample-data/test_file/sub3.parquet b/parquet-file-sample-data/test_file/sub3.parquet
new file mode 100644
index 00000000..d6568258
Binary files /dev/null and b/parquet-file-sample-data/test_file/sub3.parquet differ
diff --git a/pravega-sensor-collector/build.gradle b/pravega-sensor-collector/build.gradle
index 2c63d8b7..4c5031d0 100644
--- a/pravega-sensor-collector/build.gradle
+++ b/pravega-sensor-collector/build.gradle
@@ -7,9 +7,25 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
+
+buildscript {
+ repositories {
+ maven {
+ url = uri("https://plugins.gradle.org/m2/")
+ }
+ }
+ dependencies {
+ classpath("com.github.spotbugs.snom:spotbugs-gradle-plugin:${spotbugsPluginVersion}")
+ classpath("gradle.plugin.com.github.johnrengelman:shadow:${shadowPluginVersion}")
+ }
+}
+
apply plugin: "java"
-apply plugin: "maven"
+apply plugin: "maven-publish"
+apply plugin: "com.github.spotbugs"
+apply plugin: 'checkstyle'
apply plugin: "application"
+apply plugin: "com.github.johnrengelman.shadow"
group = "io.pravega"
archivesBaseName = "pravega-sensor-collector"
@@ -18,48 +34,58 @@ mainClassName = "io.pravega.sensor.collector.PravegaSensorCollectorApp"
sourceCompatibility = 11
targetCompatibility = 11
+test {
+ useJUnitPlatform()
+}
+
dependencies {
- compile "org.slf4j:slf4j-api:${slf4jApiVersion}"
- compile "ch.qos.logback:logback-classic:${qosLogbackVersion}"
- compile "ch.qos.logback:logback-core:${qosLogbackVersion}"
+ implementation "org.slf4j:slf4j-api:${slf4jApiVersion}"
+ implementation "ch.qos.logback:logback-classic:${qosLogbackVersion}"
+ implementation "ch.qos.logback:logback-core:${qosLogbackVersion}"
- compile "io.pravega:pravega-client:${pravegaVersion}",
+ implementation "io.pravega:pravega-client:${pravegaVersion}",
"io.pravega:pravega-common:${pravegaVersion}",
"commons-cli:commons-cli:${commonsCLIVersion}"
if (includePravegaCredentials.toBoolean()) {
- compile "io.pravega:pravega-keycloak-client:${pravegaCredentialsVersion}"
+ implementation "io.pravega:pravega-keycloak-client:${pravegaCredentialsVersion}"
}
- compile "com.fasterxml.jackson.core:jackson-databind:${jacksonVersion}"
- compile "org.xerial:sqlite-jdbc:${sqliteVersion}"
- compile "org.apache.commons:commons-math3:${commonsMath3Version}"
- compile "org.apache.commons:commons-csv:${commonsCSVVersion}"
- compile "commons-codec:commons-codec:${commonsCodecVersion}"
- compile "com.github.vladimir-bukhtoyarov:bucket4j-core:${bucket4jVersion}"
- compile "org.eclipse.milo:sdk-client:${miloVersion}"
- compile "com.google.code.gson:gson:${gsonVersion}"
+ implementation "com.fasterxml.jackson.core:jackson-databind:${jacksonVersion}"
+ implementation "org.xerial:sqlite-jdbc:${sqliteVersion}"
+ implementation "org.apache.commons:commons-math3:${commonsMath3Version}"
+ implementation "org.apache.commons:commons-csv:${commonsCSVVersion}"
+ implementation "commons-codec:commons-codec:${commonsCodecVersion}"
+ implementation "com.github.vladimir-bukhtoyarov:bucket4j-core:${bucket4jVersion}"
+ implementation "org.eclipse.milo:sdk-client:${miloVersion}"
+ implementation "com.google.code.gson:gson:${gsonVersion}"
+
+ implementation "org.apache.parquet:parquet-avro:${parquetVersion}"
+ implementation "org.apache.parquet:parquet-hadoop:${parquetVersion}"
+ implementation "org.apache.hadoop:hadoop-client:${hadoopVersion}"
- compile "org.apache.parquet:parquet-avro:${parquetVersion}"
- compile "org.apache.parquet:parquet-hadoop:${parquetVersion}"
- compile "org.apache.hadoop:hadoop-client:${hadoopVersion}"
+ testImplementation "org.junit.jupiter:junit-jupiter-api:${junitVersion}"
+ testImplementation "org.junit.vintage:junit-vintage-engine:${junitVersion}"
+ testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:${junitVersion}"
+ testImplementation "org.junit.platform:junit-platform-launcher"
- testCompile "junit:junit:${junitVersion}"
+ testImplementation "org.mockito:mockito-core:${mockitoVersion}"
- testCompile "org.glassfish.jersey.containers:jersey-container-grizzly2-http:${grizzlyVersion}"
- testCompile "jakarta.xml.bind:jakarta.xml.bind-api:${jakartaBindVersion}"
- testCompile "org.glassfish.jaxb:jaxb-runtime:${jaxbVersion}"
- testCompile "javax.servlet:javax.servlet-api:${javaxServletApiVersion}"
+ testImplementation "org.glassfish.jersey.containers:jersey-container-grizzly2-http:${grizzlyVersion}"
+ testImplementation "jakarta.xml.bind:jakarta.xml.bind-api:${jakartaBindVersion}"
+ testImplementation "org.glassfish.jaxb:jaxb-runtime:${jaxbVersion}"
+ testImplementation "javax.servlet:javax.servlet-api:${javaxServletApiVersion}"
+ spotbugsPlugins 'com.h3xstream.findsecbugs:findsecbugs-plugin:1.12.0'
}
-tasks.withType(JavaCompile) {
+tasks.withType(JavaCompile).configureEach {
options.encoding = "UTF-8"
}
distributions {
main {
- baseName = archivesBaseName
+ distributionBaseName = archivesBaseName
}
}
@@ -77,9 +103,56 @@ startScripts {
}
}
-task runLeapAPIMockServer(type: JavaExec) {
+shadowJar{
+ archiveBaseName = 'pravega-sensor-collector'
+ archiveClassifier = ''
+}
+
+tasks.register('runLeapAPIMockServer', JavaExec) {
group = "Execution"
description = "Run the mock Leap server"
classpath = sourceSets.test.runtimeClasspath
main = "io.pravega.sensor.collector.leap.LeapAPIMock"
}
+
+tasks.withType(com.github.spotbugs.snom.SpotBugsTask) {
+ reports {
+ xml {
+ required.set(false)
+ }
+ html {
+ required.set(true)
+ }
+ }
+}
+
+spotbugs {
+ toolVersion = spotbugsVersion
+ ignoreFailures = true
+ showProgress = true
+ effort = 'max'
+ reportLevel = 'default'
+ includeFilter = file("$rootDir/config/spotbugs-include.xml")
+ excludeFilter = file("$rootDir/config/spotbugs-exclude.xml")
+ //baselineFile = file("baseline.xml")
+}
+checkstyle {
+ toolVersion = checkstyleVersion
+ configFile = file("$rootDir/config/checkstyle.xml")
+ ignoreFailures = true
+ configProperties = [importControlFile: "$rootDir/config/import-control.xml",
+ suppressionsFile: "$rootDir/config/suppressions.xml"]
+ checkstyleMain {
+ source = sourceSets.main.allSource
+ }
+ configurations {
+ checkstyle
+ }
+
+ dependencies{
+ assert project.hasProperty("checkstyleVersion")
+
+ checkstyle "com.puppycrawl.tools:checkstyle:${checkstyleVersion}"
+ }
+}
+
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java
index 6f25b591..f917934d 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/EventGenerator.java
@@ -9,106 +9,25 @@
*/
package io.pravega.sensor.collector.file;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.node.ArrayNode;
-import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.io.CountingInputStream;
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVParser;
-import org.apache.commons.csv.CSVRecord;
-import org.apache.commons.lang3.tuple.ImmutablePair;
+import io.pravega.sensor.collector.util.PravegaWriterEvent;
import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.Map;
import java.util.function.Consumer;
-public class EventGenerator {
- private static final Logger log = LoggerFactory.getLogger(EventGenerator.class);
-
- private final String routingKey;
- private final int maxRecordsPerEvent;
- private final ObjectNode eventTemplate;
- private final ObjectMapper mapper;
-
- public EventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) {
- this.routingKey = routingKey;
- this.maxRecordsPerEvent = maxRecordsPerEvent;
- this.eventTemplate = eventTemplate;
- this.mapper = mapper;
- }
-
- public static EventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) {
- try {
- final ObjectMapper mapper = new ObjectMapper();
- final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr);
- eventTemplate.put("WriterId", writerId);
- return new EventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- public static EventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException {
- return create(routingKey, maxRecordsPerEvent, "{}", "MyWriterId");
- }
-
- /**
- * @param inputStream
- * @param firstSequenceNumber
- * @return next sequence number, end offset
- */
- protected Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException {
- final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader();
- final CSVParser parser = CSVParser.parse(inputStream, StandardCharsets.UTF_8, format);
- long nextSequenceNumber = firstSequenceNumber;
- int numRecordsInEvent = 0;
- ObjectNode jsonEvent = null;
- for (CSVRecord record: parser) {
- if (numRecordsInEvent >= maxRecordsPerEvent) {
- consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(jsonEvent)));
- nextSequenceNumber++;
- jsonEvent = null;
- numRecordsInEvent = 0;
- }
- if (jsonEvent == null) {
- jsonEvent = eventTemplate.deepCopy();
- }
- for (Map.Entry entry: record.toMap().entrySet()) {
- addValueToArray(jsonEvent, entry.getKey(), entry.getValue());
- }
- numRecordsInEvent++;
- }
- if (jsonEvent != null) {
- consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(jsonEvent)));
- nextSequenceNumber++;
- }
- final long endOffset = inputStream.getCount();
- return new ImmutablePair<>(nextSequenceNumber, endOffset);
- }
+/**
+ *The EventGenerator is responsible for generating events depending on file type
+ */
+public interface EventGenerator{
- protected JsonNode stringValueToJsonNode(String s) {
- // TODO: convert timestamp
- try {
- return mapper.getNodeFactory().numberNode(Long.parseLong(s));
- } catch (NumberFormatException ignored) {}
- try {
- return mapper.getNodeFactory().numberNode(Double.parseDouble(s));
- } catch (NumberFormatException ignored) {}
- return mapper.getNodeFactory().textNode(s);
- }
+ /*
+ * Generate events from Input stream.
+ * Depending on file type event generation logic defers
+ * @param inputStream
+ * @param firstSequenceNumber
+ * @return next sequence number, end offset
+ * */
+ Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException;
- protected void addValueToArray(ObjectNode objectNode, String key, String value) {
- final JsonNode node = objectNode.get(key);
- final JsonNode valueNode = stringValueToJsonNode(value);
- if (node instanceof ArrayNode ) {
- ((ArrayNode) node).add(valueNode);
- } else {
- objectNode.putArray(key).add(valueNode);
- }
- }
}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileConfig.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileConfig.java
similarity index 72%
rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileConfig.java
rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileConfig.java
index 78111602..d4b96d37 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileConfig.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileConfig.java
@@ -7,19 +7,19 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
-package io.pravega.sensor.collector.parquet;
+package io.pravega.sensor.collector.file;
-/**
- * Config passed to Pravega Sensor Collector
+/*
+ * Configuration file.
*/
-public class ParquetFileConfig {
+public class FileConfig {
public final String stateDatabaseFileName;
public final String fileSpec;
public final String fileExtension;
public final String routingKey;
public final String streamName;
public final String eventTemplateStr;
-
+ public final String fileType;
/**
* Also known as samplesPerEvent.
*/
@@ -29,7 +29,9 @@ public class ParquetFileConfig {
public final boolean exactlyOnce;
public final double transactionTimeoutMinutes;
- public ParquetFileConfig(String stateDatabaseFileName, String fileSpec, String fileExtension, String routingKey, String streamName, String eventTemplateStr, int maxRecordsPerEvent, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes) {
+ public final long minTimeInMillisToUpdateFile;
+
+ public FileConfig(String stateDatabaseFileName, String fileSpec, String fileExtension, String routingKey, String streamName, String eventTemplateStr, int maxRecordsPerEvent, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes, long minTimeInMillisToUpdateFile, String fileType) {
this.stateDatabaseFileName = stateDatabaseFileName;
this.fileSpec = fileSpec;
this.fileExtension = fileExtension;
@@ -40,14 +42,17 @@ public ParquetFileConfig(String stateDatabaseFileName, String fileSpec, String f
this.enableDeleteCompletedFiles = enableDeleteCompletedFiles;
this.exactlyOnce = exactlyOnce;
this.transactionTimeoutMinutes = transactionTimeoutMinutes;
+ this.minTimeInMillisToUpdateFile = minTimeInMillisToUpdateFile;
+ this.fileType = fileType;
}
@Override
public String toString() {
- return "ParquetFileConfig{" +
+ return "FileConfig{" +
"stateDatabaseFileName='" + stateDatabaseFileName + '\'' +
", fileSpec='" + fileSpec + '\'' +
", fileExtension='" + fileExtension + '\'' +
+ ", fileType='" + fileType + '\'' +
", routingKey='" + routingKey + '\'' +
", streamName='" + streamName + '\'' +
", eventTemplateStr='" + eventTemplateStr + '\'' +
@@ -55,8 +60,7 @@ public String toString() {
", enableDeleteCompletedFiles=" + enableDeleteCompletedFiles +
", exactlyOnce=" + exactlyOnce +
", transactionTimeoutMinutes=" + transactionTimeoutMinutes +
+ ", minTimeInMillisToUpdateFile=" + minTimeInMillisToUpdateFile +
'}';
}
-
-
}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileIngestService.java
similarity index 63%
rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileIngestService.java
rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileIngestService.java
index 97a8cd3e..6926a8c0 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileIngestService.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileIngestService.java
@@ -7,30 +7,27 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
-package io.pravega.sensor.collector.parquet;
-
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.TimeUnit;
+package io.pravega.sensor.collector.file;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import io.pravega.client.EventStreamClientFactory;
import io.pravega.sensor.collector.DeviceDriver;
import io.pravega.sensor.collector.DeviceDriverConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
/**
- * Ingestion service for parquet file data.
+ * Ingestion service with common implementation logic for all files.
*/
-public class ParquetFileIngestService extends DeviceDriver{
- private static final Logger log = LoggerFactory.getLogger(ParquetFileIngestService.class);
-
+public abstract class FileIngestService extends DeviceDriver {
+ private static final Logger log = LoggerFactory.getLogger(FileIngestService.class);
+
private static final String FILE_SPEC_KEY = "FILE_SPEC";
private static final String FILE_EXT= "FILE_EXTENSION";
private static final String DELETE_COMPLETED_FILES_KEY = "DELETE_COMPLETED_FILES";
@@ -44,14 +41,17 @@ public class ParquetFileIngestService extends DeviceDriver{
private static final String ROUTING_KEY_KEY = "ROUTING_KEY";
private static final String EXACTLY_ONCE_KEY = "EXACTLY_ONCE";
private static final String TRANSACTION_TIMEOUT_MINUTES_KEY = "TRANSACTION_TIMEOUT_MINUTES";
+ private static final String MIN_TIME_IN_MILLIS_TO_UPDATE_FILE_KEY = "MIN_TIME_IN_MILLIS_TO_UPDATE_FILE";
- private final ParquetFileProcessor processor;
+ private final FileProcessor processor;
private final ScheduledExecutorService executor;
- private ScheduledFuture> task;
- public ParquetFileIngestService(DeviceDriverConfig config){
+ private ScheduledFuture> watchFiletask;
+ private ScheduledFuture> processFileTask;
+
+ public FileIngestService(DeviceDriverConfig config) {
super(config);
- final ParquetFileConfig parquetFileConfig = new ParquetFileConfig(
+ final FileConfig fileSequenceConfig = new FileConfig(
getDatabaseFileName(),
getFileSpec(),
getFileExtension(),
@@ -61,24 +61,23 @@ public ParquetFileIngestService(DeviceDriverConfig config){
getSamplesPerEvent(),
getDeleteCompletedFiles(),
getExactlyOnce(),
- getTransactionTimeoutMinutes());
- log.info("Parquet File Ingest Config: {}", parquetFileConfig);
+ getTransactionTimeoutMinutes(),
+ getMinTimeInMillisToUpdateFile(),
+ config.getClassName());
+ log.info("File Ingest Config: {}", fileSequenceConfig);
final String scopeName = getScopeName();
log.info("Scope: {}", scopeName);
createStream(scopeName, getStreamName());
-
final EventStreamClientFactory clientFactory = getEventStreamClientFactory(scopeName);
- processor = ParquetFileProcessor.create(parquetFileConfig, clientFactory);
+ processor =FileProcessor.create(fileSequenceConfig, clientFactory);
ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat(
- ParquetFileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build();
+ FileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build();
executor = Executors.newScheduledThreadPool(1, namedThreadFactory);
-
}
String getFileSpec() {
return getProperty(FILE_SPEC_KEY);
}
-
String getFileExtension() {
return getProperty(FILE_EXT, "");
}
@@ -126,30 +125,55 @@ boolean getExactlyOnce() {
return Double.parseDouble(getProperty(TRANSACTION_TIMEOUT_MINUTES_KEY, Double.toString(18.0 * 60.0)));
}
- protected void ingestParquetFiles() {
- log.trace("ingestParquetFiles: BEGIN");
+ long getMinTimeInMillisToUpdateFile() {
+ return Long.parseLong(getProperty(MIN_TIME_IN_MILLIS_TO_UPDATE_FILE_KEY, "5000"));
+ }
+
+ protected void watchFiles() {
+ log.trace("watchFiles: BEGIN");
try {
- processor.ingestParquetFiles();
+ processor.watchFiles();
} catch (Exception e) {
- log.error("Error", e);
+ log.error("watchFiles: watch file error", e);
// Continue on any errors. We will retry on the next iteration.
}
- log.trace("ingestParquetFiles: END");
+ log.trace("watchFiles: END");
+ }
+ protected void processFiles() {
+ log.trace("processFiles: BEGIN");
+ try {
+ processor.processFiles();
+ } catch (Exception e) {
+ log.error("processFiles: Process file error", e);
+ // Continue on any errors. We will retry on the next iteration.
+ }
+ log.trace("processFiles: END");
}
@Override
protected void doStart() {
- task = executor.scheduleAtFixedRate(
- this::ingestParquetFiles,
+ watchFiletask = executor.scheduleAtFixedRate(
+ this::watchFiles,
0,
getIntervalMs(),
TimeUnit.MILLISECONDS);
- notifyStarted();
+ /*
+ Submits a periodic action that becomes enabled immediately for the first time,
+ and subsequently with the delay of 1 milliseconds between the termination of one execution and the commencement of the next
+ ie immediately after completion of first action.
+ */
+ processFileTask = executor.scheduleWithFixedDelay(
+ this::processFiles,
+ 0,
+ 1,
+ TimeUnit.MILLISECONDS);
+ notifyStarted();
}
@Override
protected void doStop() {
- task.cancel(false);
+ log.info("doStop: Cancelling ingestion task and process file task");
+ watchFiletask.cancel(false);
+ processFileTask.cancel(false);
}
-
}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileNameWithOffset.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileNameWithOffset.java
deleted file mode 100644
index 43f71456..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileNameWithOffset.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-import java.util.Objects;
-
-public class FileNameWithOffset implements Comparable {
- public final String fileName;
- /**
- * In some contexts, this is the size of the file.
- * In the future, this will represent the offset in the file for incrementally ingesting growing log files.
- * This is partially implemented today.
- * TODO: Clarify usage of offset.
- */
- public final long offset;
-
- public FileNameWithOffset(String fileName, long offset) {
- this.fileName = fileName;
- this.offset = offset;
- }
-
- @Override
- public String toString() {
- return "FileNameWithOffset{" +
- "fileName='" + fileName + '\'' +
- ", offset=" + offset +
- '}';
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
- FileNameWithOffset that = (FileNameWithOffset) o;
- return offset == that.offset &&
- Objects.equals(fileName, that.fileName);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(fileName, offset);
- }
-
- @Override
- public int compareTo(FileNameWithOffset o) {
- return this.fileName.compareTo(o.fileName);
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessor.java
new file mode 100644
index 00000000..4f5e6d58
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessor.java
@@ -0,0 +1,284 @@
+package io.pravega.sensor.collector.file;
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+import com.google.common.io.CountingInputStream;
+import io.pravega.client.EventStreamClientFactory;
+import io.pravega.client.stream.EventWriterConfig;
+import io.pravega.client.stream.Transaction;
+import io.pravega.client.stream.TxnFailedException;
+import io.pravega.client.stream.impl.ByteArraySerializer;
+import io.pravega.sensor.collector.util.*;
+import org.apache.commons.lang3.tuple.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.UUID;
+import java.util.Optional;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Get list of files obtained from config. Process each file for ingestion.
+ * Keep track of new files and delete ingested files if "DELETE_COMPLETED_FILES"=true.
+ */
+public abstract class FileProcessor {
+ private static final Logger log = LoggerFactory.getLogger(FileProcessor.class);
+
+ private final FileConfig config;
+ private final TransactionStateDB state;
+ private final EventWriter writer;
+ private final TransactionCoordinator transactionCoordinator;
+ private final EventGenerator eventGenerator;
+ private final Path movedFilesDirectory;
+
+ public FileProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator) {
+ this.config = config;
+ this.state = state;
+ this.writer = writer;
+ this.transactionCoordinator = transactionCoordinator;
+ this.eventGenerator = getEventGenerator(config);
+ this.movedFilesDirectory = Paths.get(config.stateDatabaseFileName).getParent();
+ }
+
+ public static FileProcessor create(
+ FileConfig config, EventStreamClientFactory clientFactory){
+
+ final Connection connection = SQliteDBUtility.createDatabase(config.stateDatabaseFileName);
+
+ final String writerId = new PersistentId(connection).getPersistentId().toString();
+ log.info("Writer ID: {}", writerId);
+
+ final EventWriter writer = EventWriter.create(
+ clientFactory,
+ writerId,
+ config.streamName,
+ new ByteArraySerializer(),
+ EventWriterConfig.builder()
+ .enableConnectionPooling(true)
+ .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0))
+ .build(),
+ config.exactlyOnce);
+
+ final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer);
+ transactionCoordinator.performRecovery();
+
+ final TransactionStateDB state = new TransactionStateSQLiteImpl(connection, transactionCoordinator);
+ return FileProcessorFactory.createFileSequenceProcessor(config, state, writer, transactionCoordinator,writerId);
+
+ }
+
+ /* The abstract method serves as an Event Generator responsible for generating events.
+ * This logic is tailored to specific file types, and as such, it will be implemented in their respective classes.
+ * @param config configurations parameters
+ * @return eventGenerator
+ */
+ public abstract EventGenerator getEventGenerator(FileConfig config);
+ public void watchFiles() throws Exception {
+ findAndRecordNewFiles();
+ }
+ public void processFiles() throws Exception {
+ log.debug("processFiles: BEGIN");
+ if (config.enableDeleteCompletedFiles) {
+ log.debug("processFiles: Deleting completed files");
+ deleteCompletedFiles();
+ }
+ processNewFiles();
+ log.debug("processFiles: END");
+ }
+
+ public void processNewFiles() throws Exception {
+ for (;;) {
+ // If nextFile is null then check for new files to process is handled as part of scheduleWithDelay
+ final Pair nextFile = state.getNextPendingFileRecord();
+ if (nextFile == null) {
+ log.debug("processNewFiles: No more files to watch");
+ break;
+ } else {
+ processFile(nextFile.getLeft(), nextFile.getRight());
+ }
+ }
+ }
+
+ protected void findAndRecordNewFiles() throws Exception {
+ final List directoryListing = getDirectoryListing();
+ final List completedFiles = state.getCompletedFileRecords();
+ final List newFiles = getNewFiles(directoryListing, completedFiles);
+ state.addPendingFileRecords(newFiles);
+ }
+
+ /**
+ * @return list of file name and file size in bytes
+ */
+ protected List getDirectoryListing() throws IOException {
+ log.debug("getDirectoryListing: fileSpec={}", config.fileSpec);
+ //Invalid files will be moved to a separate folder Failed_Files parallel to the database file
+ log.debug("movedFilesDirectory: {}", movedFilesDirectory);
+ final List directoryListing = FileUtils.getDirectoryListing(config.fileSpec, config.fileExtension, movedFilesDirectory, config.minTimeInMillisToUpdateFile);
+ log.debug("getDirectoryListing: directoryListing={}", directoryListing);
+ return directoryListing;
+ }
+
+ /**
+ * @return sorted list of file name and file size in bytes
+ */
+ protected List getNewFiles(List directoryListing, List completedFiles) {
+ final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing);
+ Collections.sort(sortedDirectoryListing);
+ final List newFiles = new ArrayList<>();
+ final Set setCompletedFiles = new HashSet<>(completedFiles);
+ log.trace("setCompletedFiles={}", setCompletedFiles);
+ sortedDirectoryListing.forEach(dirFile -> {
+ if (!setCompletedFiles.contains(dirFile)) {
+ newFiles.add(new FileNameWithOffset(dirFile.fileName, 0));
+ } else {
+ try {
+ FileUtils.moveCompletedFile(dirFile, movedFilesDirectory);
+ log.warn("File: {} already marked as completed, moving now", dirFile.fileName);
+ } catch (IOException e) {
+ log.error("File: {} already marked as completed, but failed to move, error:{}", dirFile.fileName,e.getMessage());
+ }
+ }
+ });
+ log.info("getNewFiles: new file lists = {}", newFiles);
+ return newFiles;
+ }
+
+ void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception {
+ log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}",
+ fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber);
+
+ AtomicLong numOfBytes = new AtomicLong(0);
+ long timestamp = System.nanoTime();
+ // In case a previous iteration encountered an error, we need to ensure that
+ // previous flushed transactions are committed and any unflushed transactions as aborted.
+ transactionCoordinator.performRecovery();
+ /* Check if transactions can be aborted.
+ * Will fail with {@link TxnFailedException} if the transaction has already been committed or aborted.
+ */
+ log.debug("processFile: Transaction status {} ", writer.getTransactionStatus());
+ if(writer.getTransactionStatus() == Transaction.Status.OPEN){
+ writer.abort();
+ }
+
+ File pendingFile = new File(fileNameWithBeginOffset.fileName);
+ if(!pendingFile.exists()){
+ log.warn("File {} does not exist. It was deleted before processing", fileNameWithBeginOffset.fileName);
+ state.deletePendingFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset);
+ return;
+ }
+
+ try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) {
+ final CountingInputStream countingInputStream = new CountingInputStream(inputStream);
+ countingInputStream.skip(fileNameWithBeginOffset.offset);
+ final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber,
+ e -> {
+ log.trace("processFile: event={}", e);
+ try {
+ writer.writeEvent(e.routingKey, e.bytes);
+ numOfBytes.addAndGet(e.bytes.length);
+ } catch (TxnFailedException ex) {
+ log.error("processFile: Write event to transaction failed with exception {} while processing file: {}, event: {}", ex, fileNameWithBeginOffset.fileName, e);
+
+ /* TODO while writing event if we get Transaction failed exception then should we abort the trasaction and process again?
+ This will occur only if Transaction state is not open*/
+
+ throw new RuntimeException(ex);
+ }
+ });
+ final Optional txnId = writer.flush();
+ final long nextSequenceNumber = result.getLeft();
+ final long endOffset = result.getRight();
+
+ // injectCommitFailure();
+ try {
+ // commit fails only if Transaction is not in open state.
+ log.info("processFile: Commit transaction for Id: {}; file: {}", txnId.orElse(null), fileNameWithBeginOffset.fileName);
+ writer.commit();
+ } catch (TxnFailedException ex) {
+ log.error("processFile: Commit transaction for id: {}, file : {}, failed with exception: {}", txnId, fileNameWithBeginOffset.fileName, ex);
+ throw new RuntimeException(ex);
+ }
+ log.debug("processFile: Adding completed file: {}", fileNameWithBeginOffset.fileName);
+ state.addCompletedFileRecord(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId);
+ // Add to completed file list only if commit is successfull else it will be taken care as part of recovery
+ if(txnId.isPresent()){
+ Transaction.Status status = writer.getTransactionStatus(txnId.get());
+ if(status == Transaction.Status.COMMITTED || status == Transaction.Status.ABORTED)
+ state.deleteTransactionToCommit(txnId);
+ }
+
+ double elapsedSec = (System.nanoTime() - timestamp) / 1_000_000_000.0;
+ double megabyteCount = numOfBytes.getAndSet(0) / 1_000_000.0;
+ double megabytesPerSec = megabyteCount / elapsedSec;
+ log.info("Sent {} MB in {} sec. Transfer rate: {} MB/sec ", megabyteCount, elapsedSec, megabytesPerSec );
+ log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}",
+ fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber);
+ }
+ FileUtils.moveCompletedFile(fileNameWithBeginOffset, movedFilesDirectory);
+ // Delete file right after ingesting
+ if (config.enableDeleteCompletedFiles) {
+ deleteCompletedFiles();
+ }
+ }
+
+ void deleteCompletedFiles() throws Exception {
+ final List completedFiles = state.getCompletedFileRecords();
+ completedFiles.forEach(file -> {
+ //Obtain a lock on file
+ Path completedFilesPath = movedFilesDirectory.resolve(FileUtils.COMPLETED_FILES);
+ String completedFileName = FileUtils.createCompletedFileName(completedFilesPath, file.fileName);
+ Path filePath = completedFilesPath.resolve(completedFileName);
+ log.debug("deleteCompletedFiles: Deleting File default name:{}, and it's completed file name:{}.", file.fileName, filePath);
+ try {
+ /**
+ * If file gets deleted from completed files directory, or it does not exist in default ingestion directory
+ * then only remove the record from DB.
+ */
+ if(Files.deleteIfExists(filePath) || Files.notExists(Paths.get(file.fileName))) {
+ state.deleteCompletedFileRecord(file.fileName);
+ log.debug("deleteCompletedFiles: Deleted File default name:{}, and it's completed file name:{}.", file.fileName, filePath);
+ } else {
+ /**
+ * This situation occurs because at first attempt moving file to completed directory fails, but the file still exists in default ingestion directory.
+ * Moving file from default directory to completed directory will be taken care in next iteration, post which delete will be taken care.
+ */
+ log.warn("deleteCompletedFiles: File {} doesn't exists in completed directory but still exist in default ingestion directory.", filePath);
+ }
+ } catch (Exception e) {
+ log.warn("Unable to delete ingested file default name:{}, and it's completed file name:{}, error: {}.", file.fileName, filePath, e.getMessage());
+ log.warn("Deletion will be retried on the next iteration.");
+ // We can continue on this error. Deletion will be retried on the next iteration.
+ }
+ });
+ }
+
+ /**
+ * Inject a failure before commit for testing.
+ */
+ protected void injectCommitFailure() {
+ if (Math.random() < 0.3) {
+ throw new RuntimeException("injectCommitFailure: Commit failure test exception");
+ }
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessorFactory.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessorFactory.java
new file mode 100644
index 00000000..6897ec96
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/FileProcessorFactory.java
@@ -0,0 +1,39 @@
+package io.pravega.sensor.collector.file;
+
+import io.pravega.sensor.collector.file.csvfile.CsvFileSequenceProcessor;
+import io.pravega.sensor.collector.file.parquet.ParquetFileProcessor;
+import io.pravega.sensor.collector.file.rawfile.RawFileProcessor;
+import io.pravega.sensor.collector.util.EventWriter;
+import io.pravega.sensor.collector.util.TransactionCoordinator;
+import io.pravega.sensor.collector.util.TransactionStateDB;
+import io.pravega.sensor.collector.util.TransactionStateSQLiteImpl;
+
+/*
+ * The FileProcessorFactory class is responsible for creating instances of file processors based on the type of the input file.
+ *
+ */
+public class FileProcessorFactory {
+
+ public static FileProcessor createFileSequenceProcessor(final FileConfig config, TransactionStateDB state,
+ EventWriter writer,
+ TransactionCoordinator transactionCoordinator,
+ String writerId){
+
+ final String className = config.fileType.substring(config.fileType.lastIndexOf(".")+1);
+
+ switch(className){
+ case "ParquetFileIngestService":
+ return new ParquetFileProcessor(config, state, writer, transactionCoordinator, writerId);
+
+ case "CsvFileIngestService":
+ return new CsvFileSequenceProcessor(config, state, writer, transactionCoordinator, writerId);
+
+ case "RawFileIngestService":
+ return new RawFileProcessor(config, state, writer, transactionCoordinator, writerId);
+
+ default :
+ throw new RuntimeException("Unsupported className: "+ className);
+ }
+
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileIngestService.java
deleted file mode 100644
index 5bf5b70f..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileIngestService.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import io.pravega.client.EventStreamClientFactory;
-import io.pravega.sensor.collector.DeviceDriver;
-import io.pravega.sensor.collector.DeviceDriverConfig;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.TimeUnit;
-
-public class LogFileIngestService extends DeviceDriver {
- private static final Logger log = LoggerFactory.getLogger(LogFileIngestService.class);
-
- private static final String FILE_SPEC_KEY = "FILE_SPEC";
- private static final String DELETE_COMPLETED_FILES_KEY = "DELETE_COMPLETED_FILES";
- private static final String DATABASE_FILE_KEY = "DATABASE_FILE";
- private static final String EVENT_TEMPLATE_KEY = "EVENT_TEMPLATE";
- private static final String SAMPLES_PER_EVENT_KEY = "SAMPLES_PER_EVENT";
- private static final String INTERVAL_MS_KEY = "INTERVAL_MS";
-
- private static final String SCOPE_KEY = "SCOPE";
- private static final String STREAM_KEY = "STREAM";
- private static final String ROUTING_KEY_KEY = "ROUTING_KEY";
- private static final String EXACTLY_ONCE_KEY = "EXACTLY_ONCE";
- private static final String TRANSACTION_TIMEOUT_MINUTES_KEY = "TRANSACTION_TIMEOUT_MINUTES";
-
- private final LogFileSequenceProcessor processor;
- private final ScheduledExecutorService executor;
-
- private ScheduledFuture> task;
-
- public LogFileIngestService(DeviceDriverConfig config) {
- super(config);
- final LogFileSequenceConfig logFileSequenceConfig = new LogFileSequenceConfig(
- getDatabaseFileName(),
- getFileSpec(),
- getRoutingKey(),
- getStreamName(),
- getEventTemplate(),
- getSamplesPerEvent(),
- getDeleteCompletedFiles(),
- getExactlyOnce(),
- getTransactionTimeoutMinutes());
- log.info("Log File Ingest Config: {}", logFileSequenceConfig);
- final String scopeName = getScopeName();
- log.info("Scope: {}", scopeName);
- createStream(scopeName, getStreamName());
- final EventStreamClientFactory clientFactory = getEventStreamClientFactory(scopeName);
- processor = LogFileSequenceProcessor.create(logFileSequenceConfig, clientFactory);
- ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat(
- LogFileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build();
- executor = Executors.newScheduledThreadPool(1, namedThreadFactory);
- }
-
- String getFileSpec() {
- return getProperty(FILE_SPEC_KEY);
- }
-
- boolean getDeleteCompletedFiles() {
- return Boolean.parseBoolean(getProperty(DELETE_COMPLETED_FILES_KEY, Boolean.toString(true)));
- }
-
- String getDatabaseFileName() {
- return getProperty(DATABASE_FILE_KEY);
- }
-
- String getEventTemplate() {
- return getProperty(EVENT_TEMPLATE_KEY, "{}");
- }
-
- int getSamplesPerEvent() {
- return Integer.parseInt(getProperty(SAMPLES_PER_EVENT_KEY, Integer.toString(100)));
- }
-
- long getIntervalMs() {
- return Long.parseLong(getProperty(INTERVAL_MS_KEY, Long.toString(10000)));
- }
-
- String getScopeName() {
- return getProperty(SCOPE_KEY);
- }
-
- String getStreamName() {
- return getProperty(STREAM_KEY);
- }
-
- protected String getRoutingKey() {
- return getProperty(ROUTING_KEY_KEY, "");
- }
-
- boolean getExactlyOnce() {
- return Boolean.parseBoolean(getProperty(EXACTLY_ONCE_KEY, Boolean.toString(true)));
- }
-
- /**
- * This time duration must not exceed the controller property controller.transaction.maxLeaseValue (milliseconds).
- */
- double getTransactionTimeoutMinutes() {
- return Double.parseDouble(getProperty(TRANSACTION_TIMEOUT_MINUTES_KEY, Double.toString(18.0 * 60.0)));
- }
-
- protected void ingestLogFiles() {
- log.info("ingestLogFiles: BEGIN");
- try {
- processor.ingestLogFiles();
- } catch (Exception e) {
- log.error("Error", e);
- // Continue on any errors. We will retry on the next iteration.
- }
- log.info("ingestLogFiles: END");
- }
-
- @Override
- protected void doStart() {
- task = executor.scheduleAtFixedRate(
- this::ingestLogFiles,
- 0,
- getIntervalMs(),
- TimeUnit.MILLISECONDS);
- notifyStarted();
- }
-
- @Override
- protected void doStop() {
- task.cancel(false);
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceConfig.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceConfig.java
deleted file mode 100644
index d9693d0b..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceConfig.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-public class LogFileSequenceConfig {
- public final String stateDatabaseFileName;
- public final String fileSpec;
- public final String routingKey;
- public final String streamName;
- public final String eventTemplateStr;
-
- /**
- * Also known as samplesPerEvent.
- */
- public final int maxRecordsPerEvent;
-
- public final boolean enableDeleteCompletedFiles;
- public final boolean exactlyOnce;
- public final double transactionTimeoutMinutes;
-
- public LogFileSequenceConfig(String stateDatabaseFileName, String fileSpec, String routingKey, String streamName, String eventTemplateStr, int maxRecordsPerEvent, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes) {
- this.stateDatabaseFileName = stateDatabaseFileName;
- this.fileSpec = fileSpec;
- this.routingKey = routingKey;
- this.streamName = streamName;
- this.eventTemplateStr = eventTemplateStr;
- this.maxRecordsPerEvent = maxRecordsPerEvent;
- this.enableDeleteCompletedFiles = enableDeleteCompletedFiles;
- this.exactlyOnce = exactlyOnce;
- this.transactionTimeoutMinutes = transactionTimeoutMinutes;
- }
-
- @Override
- public String toString() {
- return "LogFileSequenceConfig{" +
- "stateDatabaseFileName='" + stateDatabaseFileName + '\'' +
- ", fileSpec='" + fileSpec + '\'' +
- ", routingKey='" + routingKey + '\'' +
- ", streamName='" + streamName + '\'' +
- ", eventTemplateStr='" + eventTemplateStr + '\'' +
- ", maxRecordsPerEvent=" + maxRecordsPerEvent +
- ", enableDeleteCompletedFiles=" + enableDeleteCompletedFiles +
- ", exactlyOnce=" + exactlyOnce +
- ", transactionTimeoutMinutes=" + transactionTimeoutMinutes +
- '}';
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessor.java
deleted file mode 100644
index d99db339..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessor.java
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-import com.google.common.io.CountingInputStream;
-import io.pravega.client.ClientConfig;
-import io.pravega.client.EventStreamClientFactory;
-import io.pravega.client.admin.StreamManager;
-import io.pravega.client.stream.EventWriterConfig;
-import io.pravega.client.stream.StreamConfiguration;
-import io.pravega.client.stream.TxnFailedException;
-import io.pravega.client.stream.impl.ByteArraySerializer;
-import io.pravega.sensor.collector.util.EventWriter;
-import io.pravega.sensor.collector.util.PersistentId;
-import io.pravega.sensor.collector.util.TransactionCoordinator;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.DirectoryStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.Connection;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Optional;
-import java.util.Set;
-import java.util.UUID;
-import java.util.stream.Collectors;
-import java.util.stream.StreamSupport;
-
-public class LogFileSequenceProcessor {
- private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorState.class);
-
- private final LogFileSequenceConfig config;
- private final LogFileSequenceProcessorState state;
- private final EventWriter writer;
- private final TransactionCoordinator transactionCoordinator;
- private final EventGenerator eventGenerator;
-
- public LogFileSequenceProcessor(LogFileSequenceConfig config, LogFileSequenceProcessorState state, EventWriter writer, TransactionCoordinator transactionCoordinator, EventGenerator eventGenerator) {
- this.config = config;
- this.state = state;
- this.writer = writer;
- this.transactionCoordinator = transactionCoordinator;
- this.eventGenerator = eventGenerator;
- }
-
- public static LogFileSequenceProcessor create(
- LogFileSequenceConfig config, EventStreamClientFactory clientFactory){
-
- final Connection connection = LogFileSequenceProcessorState.createDatabase(config.stateDatabaseFileName);
-
- final String writerId = new PersistentId(connection).getPersistentId().toString();
- log.info("Writer ID: {}", writerId);
-
- final EventWriter writer = EventWriter.create(
- clientFactory,
- writerId,
- config.streamName,
- new ByteArraySerializer(),
- EventWriterConfig.builder()
- .enableConnectionPooling(true)
- .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0))
- .build(),
- config.exactlyOnce);
-
- final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer);
- transactionCoordinator.performRecovery();
-
- final EventGenerator eventGenerator = EventGenerator.create(
- config.routingKey,
- config.maxRecordsPerEvent,
- config.eventTemplateStr,
- writerId);
- final LogFileSequenceProcessorState state = new LogFileSequenceProcessorState(connection, transactionCoordinator);
- return new LogFileSequenceProcessor(config, state, writer, transactionCoordinator, eventGenerator);
- }
-
- public void ingestLogFiles() throws Exception {
- log.info("ingestLogFiles: BEGIN");
- findAndRecordNewFiles();
- processNewFiles();
- if (config.enableDeleteCompletedFiles) {
- deleteCompletedFiles();
- }
- log.info("ingestLogFiles: END");
- }
-
- public void processNewFiles() throws Exception {
- for (;;) {
- final Pair nextFile = state.getNextPendingFile();
- if (nextFile == null) {
- log.info("No more files to ingest");
- break;
- } else {
- processFile(nextFile.getLeft(), nextFile.getRight());
- }
- }
- }
-
- protected void findAndRecordNewFiles() throws Exception {
- final List directoryListing = getDirectoryListing();
- final List completedFiles = state.getCompletedFiles();
- final List newFiles = getNewFiles(directoryListing, completedFiles);
- state.addPendingFiles(newFiles);
- }
-
- /**
- * @return list of file name and file size in bytes
- */
- protected List getDirectoryListing() throws IOException {
- log.info("getDirectoryListing: fileSpec={}", config.fileSpec);
- final List directoryListing = getDirectoryListing(config.fileSpec);
- log.trace("getDirectoryListing: directoryListing={}", directoryListing);
- return directoryListing;
- }
-
- /**
- * @return list of file name and file size in bytes
- */
- static protected List getDirectoryListing(String fileSpec) throws IOException {
- final Path pathSpec = Paths.get(fileSpec);
- try (DirectoryStream dirStream = Files.newDirectoryStream(pathSpec.getParent(), pathSpec.getFileName().toString())) {
- return StreamSupport.stream(dirStream.spliterator(), false)
- .map(f -> new FileNameWithOffset(f.toAbsolutePath().toString(), f.toFile().length()))
- .collect(Collectors.toList());
- }
- }
-
- /**
- * @return sorted list of file name and file size in bytes
- */
- static protected List getNewFiles(List directoryListing, List completedFiles) {
- final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing);
- Collections.sort(sortedDirectoryListing);
- final List newFiles = new ArrayList<>();
- final Set setCompletedFiles = new HashSet<>(completedFiles);
- log.trace("setCompletedFiles={}", setCompletedFiles);
- sortedDirectoryListing.forEach(dirFile -> {
- if (!setCompletedFiles.contains(dirFile)) {
- newFiles.add(new FileNameWithOffset(dirFile.fileName, 0));
- }
- });
- log.info("getNewFiles={}", newFiles);
- return newFiles;
- }
-
- void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception {
- log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}",
- fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber);
-
- // In case a previous iteration encountered an error, we need to ensure that
- // previous flushed transactions are committed and any unflushed transactions as aborted.
- transactionCoordinator.performRecovery();
- writer.abort();
-
- try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) {
- final CountingInputStream countingInputStream = new CountingInputStream(inputStream);
- countingInputStream.skip(fileNameWithBeginOffset.offset);
- final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber,
- e -> {
- log.trace("processFile: event={}", e);
- try {
- writer.writeEvent(e.routingKey, e.bytes);
- } catch (TxnFailedException ex) {
- throw new RuntimeException(ex);
- }
- });
- final Optional txnId = writer.flush();
- final long nextSequenceNumber = result.getLeft();
- final long endOffset = result.getRight();
- state.addCompletedFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId);
- // injectCommitFailure();
- writer.commit();
- state.deleteTransactionToCommit(txnId);
- log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}",
- fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber);
- }
- }
-
- void deleteCompletedFiles() throws Exception {
- final List completedFiles = state.getCompletedFiles();
- completedFiles.forEach(file -> {
- try {
- Files.deleteIfExists(Paths.get(file.fileName));
- log.info("deleteCompletedFiles: Deleted file {}", file.fileName);
- // Only remove from database if we could delete file.
- state.deleteCompletedFile(file.fileName);
- } catch (Exception e) {
- log.warn("Unable to delete ingested file {}", e);
- // We can continue on this error. It will be retried on the next iteration.
- }
- });
- }
-
- /**
- * Inject a failure before commit for testing.
- */
- protected void injectCommitFailure() {
- if (Math.random() < 0.3) {
- throw new RuntimeException("injectCommitFailure: Commit failure test exception");
- }
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorState.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorState.java
deleted file mode 100644
index 53629cf1..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorState.java
+++ /dev/null
@@ -1,179 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-import com.google.common.annotations.VisibleForTesting;
-import io.pravega.sensor.collector.util.AutoRollback;
-import io.pravega.sensor.collector.util.TransactionCoordinator;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.UUID;
-
-import static java.sql.Connection.TRANSACTION_SERIALIZABLE;
-
-public class LogFileSequenceProcessorState implements AutoCloseable {
- private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorState.class);
-
- private final Connection connection;
- private final TransactionCoordinator transactionCoordinator;
-
- public LogFileSequenceProcessorState(Connection connection, TransactionCoordinator transactionCoordinator) {
- this.connection = connection;
- this.transactionCoordinator = transactionCoordinator;
- }
-
- public static Connection createDatabase(String fileName) {
- try {
- final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName);
- try (final Statement statement = connection.createStatement()) {
- // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database.
- //statement.execute("PRAGMA locking_mode = EXCLUSIVE");
- statement.execute(
- "create table if not exists PendingFiles (" +
- "id integer primary key autoincrement, " +
- "fileName string unique not null, " +
- "offset bigint not null)");
- statement.execute(
- "create table if not exists CompletedFiles (" +
- "fileName string primary key not null, " +
- "offset bigint not null)");
- statement.execute(
- "create table if not exists SequenceNumber (" +
- "id integer primary key check (id = 0), " +
- "nextSequenceNumber bigint not null)");
- statement.execute(
- "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)");
- }
- connection.setAutoCommit(false);
- connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE);
- return connection;
- } catch (SQLException e) {
- throw new RuntimeException(e);
- }
- }
-
- @VisibleForTesting
- public static LogFileSequenceProcessorState create(String fileName) {
- final Connection connection = createDatabase(fileName);
- final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null);
- return new LogFileSequenceProcessorState(connection, transactionCoordinator);
- }
-
- @Override
- public void close() throws SQLException {
- connection.close();
- }
-
- public void addPendingFiles(List files) throws SQLException {
- try (final PreparedStatement insertStatement = connection.prepareStatement(
- "insert or ignore into PendingFiles (fileName, offset) values (?, ?)");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- for (FileNameWithOffset file: files) {
- insertStatement.setString(1, file.fileName);
- insertStatement.setLong(2, file.offset);
- insertStatement.execute();
- }
- autoRollback.commit();
- }
- }
-
- /**
- * @return ((file name, begin offset), sequence number) or null if there is no pending file
- */
- public Pair getNextPendingFile() throws SQLException {
- try (final Statement statement = connection.createStatement();
- final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) {
- if (rs.next()) {
- final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
- try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) {
- rsSequenceNumber.next();
- final long nextSequenceNumber = rsSequenceNumber.getLong(1);
- return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber);
- }
- } else {
- return null;
- }
- } finally {
- connection.commit();
- }
- }
-
- public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException {
- try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement(
- "update SequenceNumber set nextSequenceNumber = ?");
- final PreparedStatement insertCompletedFileStatement = connection.prepareStatement(
- "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)");
- final PreparedStatement deletePendingFileStatement = connection.prepareStatement(
- "delete from PendingFiles where fileName = ? and offset <= ?");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- // Update sequence number.
- updateSequenceNumberStatement.setLong(1, newNextSequenceNumber);
- updateSequenceNumberStatement.execute();
- // Add completed file.
- insertCompletedFileStatement.setString(1, fileName);
- insertCompletedFileStatement.setLong(2, endOffset);
- insertCompletedFileStatement.execute();
- // Remove pending file.
- deletePendingFileStatement.setString(1, fileName);
- deletePendingFileStatement.setLong(2, beginOffset);
- deletePendingFileStatement.execute();
- transactionCoordinator.addTransactionToCommit(txnId);
- autoRollback.commit();
- }
- }
-
- @VisibleForTesting
- public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException {
- addCompletedFile(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty());
- }
-
- public void deleteTransactionToCommit(Optional txnId) {
- transactionCoordinator.deleteTransactionToCommit(txnId);
- }
-
- /**
- * @return list of file name and end offset (file size)
- */
- public List getCompletedFiles() throws SQLException {
- try (final Statement statement = connection.createStatement();
- final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) {
- final List files = new ArrayList<>();
- while (rs.next()) {
- final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
- files.add(fileNameWithOffset);
- }
- return files;
- } finally {
- connection.commit();
- }
- }
-
- public void deleteCompletedFile(String fileName) throws SQLException {
- try (final PreparedStatement deleteStatement = connection.prepareStatement(
- "delete from CompletedFiles where fileName = ?");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- deleteStatement.setString(1, fileName);
- deleteStatement.execute();
- autoRollback.commit();
- }
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/PravegaWriterEvent.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/PravegaWriterEvent.java
deleted file mode 100644
index 377766db..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/PravegaWriterEvent.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-public class PravegaWriterEvent {
- public final String routingKey;
- public final long sequenceNumber;
- public final byte[] bytes;
-
- public PravegaWriterEvent(String routingKey, long sequenceNumber, byte[] bytes) {
- this.routingKey = routingKey;
- this.sequenceNumber = sequenceNumber;
- this.bytes = bytes;
- }
-
- @Override
- public String toString() {
- return "PravegaWriterEvent{" +
- "routingKey='" + routingKey + '\'' +
- ", sequenceNumber=" + sequenceNumber +
- ", bytes=" + new String(bytes) +
- '}';
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileEventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileEventGenerator.java
new file mode 100644
index 00000000..42e11502
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileEventGenerator.java
@@ -0,0 +1,110 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.file.csvfile;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.common.io.CountingInputStream;
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.util.PravegaWriterEvent;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.nio.charset.StandardCharsets;
+import java.util.function.Consumer;
+
+/**
+ * Generate Event from CSV file
+ */
+public class CsvFileEventGenerator implements EventGenerator {
+ private static final Logger log = LoggerFactory.getLogger(CsvFileEventGenerator.class);
+
+ private final String routingKey;
+ private final int maxRecordsPerEvent;
+ private final ObjectNode eventTemplate;
+ private final ObjectMapper mapper;
+
+ public CsvFileEventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) {
+ this.routingKey = routingKey;
+ this.maxRecordsPerEvent = maxRecordsPerEvent;
+ this.eventTemplate = eventTemplate;
+ this.mapper = mapper;
+ }
+
+ public static CsvFileEventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) {
+ try {
+ final ObjectMapper mapper = new ObjectMapper();
+ final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr);
+ eventTemplate.put("WriterId", writerId);
+ return new CsvFileEventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static CsvFileEventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException {
+ return create(routingKey, maxRecordsPerEvent, "{}", "MyWriterId");
+ }
+
+ /** Generate event from input stream. number of records in one event is defined in input config file
+ * @param inputStream
+ * @param firstSequenceNumber
+ * @return next sequence number, end offset
+ */
+ public Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException {
+ final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader();
+ final CSVParser parser = CSVParser.parse(inputStream, StandardCharsets.UTF_8, format);
+ long nextSequenceNumber = firstSequenceNumber;
+ int numRecordsInEvent = 0;
+ List> eventBatch = new ArrayList<>();
+ for (CSVRecord record : parser) {
+ HashMap recordDataMap = new HashMap();
+ for(int i=0; i= maxRecordsPerEvent) {
+ consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(eventBatch)));
+ nextSequenceNumber++;
+ eventBatch.clear();
+ numRecordsInEvent = 0;
+ }
+ }
+ if (!eventBatch.isEmpty()) {
+ consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, mapper.writeValueAsBytes(eventBatch)));
+ nextSequenceNumber++;
+ eventBatch.clear();
+ }
+ final long endOffset = inputStream.getCount();
+ return new ImmutablePair<>(nextSequenceNumber, endOffset);
+ }
+
+ public Object convertValue(String s) {
+ // TODO: convert timestamp
+ try {
+ return Long.parseLong(s);
+ } catch (NumberFormatException ignored) {}
+ try {
+ return Double.parseDouble(s);
+ } catch (NumberFormatException ignored) {}
+ return s;
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileIngestService.java
new file mode 100644
index 00000000..85d25e00
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileIngestService.java
@@ -0,0 +1,29 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.file.csvfile;
+
+import io.pravega.sensor.collector.DeviceDriverConfig;
+import io.pravega.sensor.collector.file.FileIngestService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Ingestion service for csv files.
+ */
+public class CsvFileIngestService extends FileIngestService {
+ private static final Logger log = LoggerFactory.getLogger(CsvFileIngestService.class);
+
+
+ public CsvFileIngestService(DeviceDriverConfig config) {
+
+ super(config);
+ }
+
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessor.java
new file mode 100644
index 00000000..58f27e63
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessor.java
@@ -0,0 +1,46 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.file.csvfile;
+
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.file.FileConfig;
+import io.pravega.sensor.collector.file.FileProcessor;
+import io.pravega.sensor.collector.util.TransactionStateDB;
+import io.pravega.sensor.collector.util.EventWriter;
+import io.pravega.sensor.collector.util.TransactionCoordinator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class CsvFileSequenceProcessor extends FileProcessor {
+ private static final Logger log = LoggerFactory.getLogger(CsvFileSequenceProcessor.class);
+ private final FileConfig config;
+ private final String writerId;
+
+
+ public CsvFileSequenceProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator, String writerId) {
+ super(config, state, writer, transactionCoordinator);
+ this.config =config;
+ this.writerId = writerId;
+ }
+
+ /**
+ * Event generator for CSV file
+ * @param config configurations parameters
+ * @return eventGenerator
+ */
+ @Override
+ public EventGenerator getEventGenerator(FileConfig config) {
+ return CsvFileEventGenerator.create(
+ config.routingKey,
+ config.maxRecordsPerEvent,
+ config.eventTemplateStr,
+ writerId);
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/EventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetEventGenerator.java
similarity index 83%
rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/EventGenerator.java
rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetEventGenerator.java
index 8b2a3c84..5a7311d1 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/EventGenerator.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetEventGenerator.java
@@ -8,21 +8,13 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
-package io.pravega.sensor.collector.parquet;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.function.Consumer;
-import java.util.stream.Collectors;
+package io.pravega.sensor.collector.file.parquet;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.io.CountingInputStream;
-
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.util.PravegaWriterEvent;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
@@ -44,47 +36,58 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
/**
- * Generate Event from file
+ * Generate Event from Parquet file
*/
-public class EventGenerator {
- private static final Logger log = LoggerFactory.getLogger(EventGenerator.class);
+public class ParquetEventGenerator implements EventGenerator {
+ private static final Logger log = LoggerFactory.getLogger(ParquetEventGenerator.class);
private final String routingKey;
private final int maxRecordsPerEvent;
private final ObjectNode eventTemplate;
private final ObjectMapper mapper;
- public EventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) {
+ public ParquetEventGenerator(String routingKey, int maxRecordsPerEvent, ObjectNode eventTemplate, ObjectMapper mapper) {
this.routingKey = routingKey;
this.maxRecordsPerEvent = maxRecordsPerEvent;
this.eventTemplate = eventTemplate;
this.mapper = mapper;
}
- public static EventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) {
+ public static ParquetEventGenerator create(String routingKey, int maxRecordsPerEvent, String eventTemplateStr, String writerId) {
try {
final ObjectMapper mapper = new ObjectMapper();
final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr);
eventTemplate.put("WriterId", writerId);
- return new EventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper);
+ return new ParquetEventGenerator(routingKey, maxRecordsPerEvent, eventTemplate, mapper);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
- public static EventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException {
+ public static ParquetEventGenerator create(String routingKey, int maxRecordsPerEvent) throws IOException {
return create(routingKey, maxRecordsPerEvent, "{}", "MyWriterId");
}
/**
- * Convert Parquet to Json
+ * Generate event from input stream. number of records in one event is defined in input config file
+ * Convert Parquet to Json.
+ *
* @param inputStream
* @param firstSequenceNumber
* @return next sequence number, end offset
*/
- protected Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException {
+ public Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException {
File tempFile = File.createTempFile("temp", ".parquet");
FileOutputStream outputStream = new FileOutputStream(tempFile);
IOUtils.copy(inputStream,outputStream);
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileIngestService.java
new file mode 100644
index 00000000..57a45403
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileIngestService.java
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.file.parquet;
+
+import io.pravega.sensor.collector.DeviceDriverConfig;
+import io.pravega.sensor.collector.file.FileIngestService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Ingestion service for parquet file data.
+ */
+public class ParquetFileIngestService extends FileIngestService {
+ private static final Logger log = LoggerFactory.getLogger(ParquetFileIngestService.class);
+
+
+ public ParquetFileIngestService(DeviceDriverConfig config){
+ super(config);
+
+ }
+
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessor.java
new file mode 100644
index 00000000..0474a1e4
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessor.java
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.file.parquet;
+
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.file.FileConfig;
+import io.pravega.sensor.collector.file.FileProcessor;
+import io.pravega.sensor.collector.util.TransactionStateDB;
+import io.pravega.sensor.collector.util.EventWriter;
+import io.pravega.sensor.collector.util.TransactionCoordinator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class ParquetFileProcessor extends FileProcessor {
+ private static final Logger log = LoggerFactory.getLogger(ParquetFileProcessor.class);
+
+ private final FileConfig config;
+ private final String writerId;
+
+ public ParquetFileProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator, String writerId) {
+ super(config,state,writer,transactionCoordinator);
+ this.config =config;
+ this.writerId = writerId;
+ }
+
+ /** Event generator for Parquet file
+ * @param config configurations parameters
+ * @return eventGenerator
+ */
+ @Override
+ public EventGenerator getEventGenerator(FileConfig config) {
+ return ParquetEventGenerator.create(
+ config.routingKey,
+ config.maxRecordsPerEvent,
+ config.eventTemplateStr,
+ writerId);
+ }
+
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/EventGenerator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawEventGenerator.java
similarity index 57%
rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/EventGenerator.java
rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawEventGenerator.java
index 52882e18..9ce9ec44 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/EventGenerator.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawEventGenerator.java
@@ -8,48 +8,52 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
-package io.pravega.sensor.collector.rawfile;
-
-import java.io.IOException;
-import java.util.function.Consumer;
+package io.pravega.sensor.collector.file.rawfile;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.io.CountingInputStream;
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.util.PravegaWriterEvent;
+import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.util.function.Consumer;
+
/**
- * Generate Event from file
+ * Generate Event from RAW file
*/
-public class EventGenerator {
- private static final Logger log = LoggerFactory.getLogger(EventGenerator.class);
+public class RawEventGenerator implements EventGenerator {
+ private static final Logger log = LoggerFactory.getLogger(RawEventGenerator.class);
private final String routingKey;
private final ObjectNode eventTemplate;
private final ObjectMapper mapper;
- public EventGenerator(String routingKey, ObjectNode eventTemplate, ObjectMapper mapper) {
+ public RawEventGenerator(String routingKey, ObjectNode eventTemplate, ObjectMapper mapper) {
this.routingKey = routingKey;
this.eventTemplate = eventTemplate;
this.mapper = mapper;
}
- public static EventGenerator create(String routingKey, String eventTemplateStr, String writerId) {
+ public static RawEventGenerator create(String routingKey, String eventTemplateStr, String writerId) {
try {
final ObjectMapper mapper = new ObjectMapper();
final ObjectNode eventTemplate = (ObjectNode) mapper.readTree(eventTemplateStr);
eventTemplate.put("WriterId", writerId);
- return new EventGenerator(routingKey, eventTemplate, mapper);
+ return new RawEventGenerator(routingKey, eventTemplate, mapper);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
- public static EventGenerator create(String routingKey) throws IOException {
+ public static RawEventGenerator create(String routingKey) throws IOException {
return create(routingKey, "{}", "MyWriterId");
}
@@ -60,17 +64,17 @@ public static EventGenerator create(String routingKey) throws IOException {
* @param firstSequenceNumber
* @return next sequence number, end offset
*/
- protected Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException {
-
+ public Pair generateEventsFromInputStream(CountingInputStream inputStream, long firstSequenceNumber, Consumer consumer) throws IOException {
long nextSequenceNumber = firstSequenceNumber;
try{
- byte[] byteArray = inputStream.readAllBytes();
- //TODO: Batching
+ BufferedInputStream bis = new BufferedInputStream(inputStream);
+ byte[] byteArray = IOUtils.toByteArray(bis);
- consumer.accept(new RawFileWriterEvent(routingKey, nextSequenceNumber, byteArray));
- nextSequenceNumber++;
+ if (byteArray.length > 0) { //non-empty file
+ consumer.accept(new PravegaWriterEvent(routingKey, nextSequenceNumber, byteArray));
+ nextSequenceNumber++;
+ }
final long endOffset = inputStream.getCount();
-
return new ImmutablePair<>(nextSequenceNumber, endOffset);
} catch (Exception e){
log.error("Exception = {}",e);
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileIngestService.java
new file mode 100644
index 00000000..176600d4
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileIngestService.java
@@ -0,0 +1,26 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.file.rawfile;
+
+import io.pravega.sensor.collector.DeviceDriverConfig;
+import io.pravega.sensor.collector.file.FileIngestService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Ingestion service for raw file data.
+ */
+public class RawFileIngestService extends FileIngestService {
+ private static final Logger log = LoggerFactory.getLogger(RawFileIngestService.class);
+
+ public RawFileIngestService(DeviceDriverConfig config){
+ super(config);
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessor.java
new file mode 100644
index 00000000..07b349f8
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessor.java
@@ -0,0 +1,46 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.file.rawfile;
+
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.file.FileConfig;
+import io.pravega.sensor.collector.file.FileProcessor;
+import io.pravega.sensor.collector.util.TransactionStateDB;
+import io.pravega.sensor.collector.util.EventWriter;
+import io.pravega.sensor.collector.util.TransactionCoordinator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+public class RawFileProcessor extends FileProcessor {
+ private static final Logger log = LoggerFactory.getLogger(RawFileProcessor.class);
+ private final FileConfig config;
+ private final String writerId;
+
+ public RawFileProcessor(FileConfig config, TransactionStateDB state, EventWriter writer, TransactionCoordinator transactionCoordinator, String writerId) {
+ super(config, state, writer, transactionCoordinator);
+ this.config =config;
+ this.writerId = writerId;
+ }
+
+ /** Event generator for Raw file
+ * @param config configurations parameters
+ * @return eventGenerator
+ */
+ @Override
+ public EventGenerator getEventGenerator(FileConfig config) {
+ return RawEventGenerator.create(
+ config.routingKey,
+ config.eventTemplateStr,
+ writerId);
+ }
+
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/FileNameWithOffset.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/FileNameWithOffset.java
deleted file mode 100644
index 1d84f533..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/FileNameWithOffset.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.parquet;
-
-import java.util.Objects;
-
-/**
- * File name and file size
- */
-public class FileNameWithOffset implements Comparable {
- public final String fileName;
- /**
- * In some contexts, this is the size of the file.
- * In the future, this will represent the offset in the file for incrementally ingesting growing log files.
- * This is partially implemented today.
- * TODO: Clarify usage of offset.
- */
- public final long offset;
-
- public FileNameWithOffset(String fileName, long offset) {
- this.fileName = fileName;
- this.offset = offset;
- }
-
- @Override
- public String toString() {
- return "FileNameWithOffset{" +
- "fileName='" + fileName + '\'' +
- ", offset=" + offset +
- '}';
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
- FileNameWithOffset that = (FileNameWithOffset) o;
- return offset == that.offset &&
- Objects.equals(fileName, that.fileName);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(fileName, offset);
- }
-
- @Override
- public int compareTo(FileNameWithOffset o) {
- return this.fileName.compareTo(o.fileName);
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileProcessor.java
deleted file mode 100644
index fa9a68c6..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileProcessor.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.parquet;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.channels.FileChannel;
-import java.nio.channels.FileLock;
-import java.nio.file.DirectoryStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.StandardOpenOption;
-import java.sql.Connection;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Optional;
-import java.util.Set;
-import java.util.UUID;
-import java.util.concurrent.atomic.AtomicLong;
-
-import com.google.common.io.CountingInputStream;
-
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import io.pravega.client.EventStreamClientFactory;
-import io.pravega.client.stream.EventWriterConfig;
-import io.pravega.client.stream.TxnFailedException;
-import io.pravega.client.stream.impl.ByteArraySerializer;
-import io.pravega.sensor.collector.util.EventWriter;
-import io.pravega.sensor.collector.util.PersistentId;
-import io.pravega.sensor.collector.util.TransactionCoordinator;
-
-/**
- * Get list of files obtained from config. Process each file for ingestion.
- * Keep track of new files and delete ingested files if "DELETE_COMPLETED_FILES"=true.
- */
-public class ParquetFileProcessor {
- private static final Logger log = LoggerFactory.getLogger(ParquetFileIngestService.class);
-
- private final ParquetFileConfig config;
- private final ParquetFileState state;
- private final EventWriter writer;
- private final TransactionCoordinator transactionCoordinator;
- private final EventGenerator eventGenerator;
-
- public ParquetFileProcessor(ParquetFileConfig config, ParquetFileState state, EventWriter writer, TransactionCoordinator transactionCoordinator, EventGenerator eventGenerator) {
- this.config = config;
- this.state = state;
- this.writer = writer;
- this.transactionCoordinator = transactionCoordinator;
- this.eventGenerator = eventGenerator;
- }
-
- public static ParquetFileProcessor create(ParquetFileConfig config, EventStreamClientFactory clientFactory){
- final Connection connection = ParquetFileState.createDatabase(config.stateDatabaseFileName);
-
- final String writerId = new PersistentId(connection).getPersistentId().toString();
- log.info("Writer ID: {}", writerId);
-
- final EventWriter writer = EventWriter.create(
- clientFactory,
- writerId,
- config.streamName,
- new ByteArraySerializer(),
- EventWriterConfig.builder()
- .enableConnectionPooling(false)
- .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0))
- .build(),
- config.exactlyOnce);
-
- final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer);
- transactionCoordinator.performRecovery();
-
- final EventGenerator eventGenerator = EventGenerator.create(
- config.routingKey,
- config.maxRecordsPerEvent,
- config.eventTemplateStr,
- writerId);
- final ParquetFileState state = new ParquetFileState(connection, transactionCoordinator);
- return new ParquetFileProcessor(config, state, writer, transactionCoordinator, eventGenerator);
- }
-
- public void ingestParquetFiles() throws Exception {
- log.trace("ingestParquetFiles: BEGIN");
- // delete leftover completed files
- if (config.enableDeleteCompletedFiles) {
- deleteCompletedFiles();
- }
- findAndRecordNewFiles();
- processNewFiles();
- log.trace("ingestParquetFiles: END");
- }
-
- public void processNewFiles() throws Exception {
- for (;;) {
- final Pair nextFile = state.getNextPendingFile();
- if (nextFile == null) {
- log.trace("No more files to ingest");
- break;
- } else {
- processFile(nextFile.getLeft(), nextFile.getRight());
- }
- }
- }
-
- protected void findAndRecordNewFiles() throws Exception {
- final List directoryListing = getDirectoryListing();
- final List completedFiles = state.getCompletedFiles();
- final List newFiles = getNewFiles(directoryListing, completedFiles);
- state.addPendingFiles(newFiles);
- }
-
- /**
- * @return list of file name and file size in bytes
- */
- protected List getDirectoryListing() throws IOException {
- log.trace("getDirectoryListing: fileSpec={}", config.fileSpec);
- final List directoryListing = getDirectoryListing(config.fileSpec, config.fileExtension);
- log.trace("getDirectoryListing: directoryListing={}", directoryListing);
- return directoryListing;
- }
-
- /**
- * @return list of file name and file size in bytes
- */
- static protected List getDirectoryListing(String fileSpec, String fileExtension) throws IOException {
- final Path pathSpec = Paths.get(fileSpec);
- List directoryListing = new ArrayList<>();
- try(DirectoryStream dirStream=Files.newDirectoryStream(pathSpec)){
- for(Path path: dirStream){
- if(Files.isDirectory(path)) //traverse subdirectories
- directoryListing.addAll(getDirectoryListing(path.toString(), fileExtension));
- else {
- FileNameWithOffset fileEntry = new FileNameWithOffset(path.toAbsolutePath().toString(), path.toFile().length());
- // If extension is null, ingest all files
- if(fileExtension.isEmpty() || fileExtension.equals(fileEntry.fileName.substring(fileEntry.fileName.lastIndexOf(".")+1)))
- directoryListing.add(fileEntry);
- }
- }
- }
- return directoryListing;
- }
-
- /**
- * @return sorted list of file name and file size in bytes
- */
- static protected List getNewFiles(List directoryListing, List completedFiles) {
- final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing);
- Collections.sort(sortedDirectoryListing);
- final List newFiles = new ArrayList<>();
- final Set setCompletedFiles = new HashSet<>(completedFiles);
- log.trace("setCompletedFiles={}", setCompletedFiles);
- sortedDirectoryListing.forEach(dirFile -> {
- if (!setCompletedFiles.contains(dirFile)) {
- newFiles.add(new FileNameWithOffset(dirFile.fileName, 0));
- }
- });
- if(!newFiles.isEmpty())
- log.info("{} New file(s) = {}", newFiles.size(), newFiles);
- return newFiles;
- }
-
-
- // PROCESS FILE
-
- void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception {
- log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}",
- fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber);
-
- AtomicLong numofbytes = new AtomicLong(0);
- long timestamp = System.nanoTime();
-
- // In case a previous iteration encountered an error, we need to ensure that
- // previous flushed transactions are committed and any unflushed transactions as aborted.
- transactionCoordinator.performRecovery();
- writer.abort();
-
- try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) {
- try(final CountingInputStream countingInputStream = new CountingInputStream(inputStream)) {
- countingInputStream.skip(fileNameWithBeginOffset.offset);
- final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber,
- e -> {
- log.trace("processFile: event={}", e);
- try {
- writer.writeEvent(e.routingKey, e.bytes);
- numofbytes.addAndGet(e.bytes.length);
-
- } catch (TxnFailedException ex) {
- throw new RuntimeException(ex);
- }
- });
- final Optional txnId = writer.flush();
- final long nextSequenceNumber = result.getLeft();
- final long endOffset = result.getRight();
- state.addCompletedFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId);
- // injectCommitFailure();
- writer.commit();
- state.deleteTransactionToCommit(txnId);
-
- double elapsedSec = (System.nanoTime() - timestamp) / 1_000_000_000.0;
- double megabyteCount = numofbytes.getAndSet(0) / 1_000_000.0;
- double megabytesPerSec = megabyteCount / elapsedSec;
- log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}",
- fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber);
- log.info("Sent {} MB in {} sec", megabyteCount, elapsedSec );
- log.info("Transfer rate: {} MB/sec", megabytesPerSec);
- }
- }
-
- // Delete file right after ingesting
- if (config.enableDeleteCompletedFiles) {
- deleteCompletedFiles();
- }
-
- }
-
- void deleteCompletedFiles() throws Exception {
- final List completedFiles = state.getCompletedFiles();
- completedFiles.forEach(file -> {
- //Obtain a lock on file
- try(FileChannel channel = FileChannel.open(Paths.get(file.fileName),StandardOpenOption.WRITE)){
- try(FileLock lock = channel.tryLock()) {
- if(lock!=null){
- Files.deleteIfExists(Paths.get(file.fileName));
- log.info("deleteCompletedFiles: Deleted file {}", file.fileName);
- lock.release();
- // Only remove from database if we could delete file.
- state.deleteCompletedFile(file.fileName);
- }
- else{
- log.warn("Unable to obtain lock on file {}. File is locked by another process.", file.fileName);
- throw new Exception();
- }
- }
- } catch (Exception e) {
- log.warn("Unable to delete ingested file {}", e.getMessage());
- log.warn("Deletion will be retried on the next iteration.");
- // We can continue on this error. Deletion will be retried on the next iteration.
- }
- });
- }
-
- /**
- * Inject a failure before commit for testing.
- */
- protected void injectCommitFailure() {
- if (Math.random() < 0.3) {
- throw new RuntimeException("injectCommitFailure: Commit failure test exception");
- }
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileState.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileState.java
deleted file mode 100644
index 712b3a72..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/ParquetFileState.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.parquet;
-
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.UUID;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import io.pravega.sensor.collector.parquet.FileNameWithOffset;
-import io.pravega.sensor.collector.util.AutoRollback;
-import io.pravega.sensor.collector.util.TransactionCoordinator;
-
-import static java.sql.Connection.TRANSACTION_SERIALIZABLE;
-
-/**
- * Maintain state of pending and completed files in SQLite database.
- */
-public class ParquetFileState implements AutoCloseable{
- private static final Logger log = LoggerFactory.getLogger(ParquetFileState.class);
-
- private final Connection connection;
- private final TransactionCoordinator transactionCoordinator;
-
- public ParquetFileState(Connection connection, TransactionCoordinator transactionCoordinator) {
- this.connection = connection;
- this.transactionCoordinator = transactionCoordinator;
- }
-
- public static Connection createDatabase(String fileName) {
- try {
- final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName);
- try (final Statement statement = connection.createStatement()) {
- // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database.
- //statement.execute("PRAGMA locking_mode = EXCLUSIVE");
- statement.execute(
- "create table if not exists PendingFiles (" +
- "id integer primary key autoincrement, " +
- "fileName string unique not null, " +
- "offset bigint not null)");
- statement.execute(
- "create table if not exists CompletedFiles (" +
- "fileName string primary key not null, " +
- "offset bigint not null)");
- statement.execute(
- "create table if not exists SequenceNumber (" +
- "id integer primary key check (id = 0), " +
- "nextSequenceNumber bigint not null)");
- statement.execute(
- "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)");
- }
- connection.setAutoCommit(false);
- connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE);
- return connection;
- } catch (SQLException e) {
- throw new RuntimeException(e);
- }
- }
-
- @VisibleForTesting
- public static ParquetFileState create(String fileName) {
- final Connection connection = createDatabase(fileName);
- final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null);
- return new ParquetFileState(connection, transactionCoordinator);
- }
-
- @Override
- public void close() throws SQLException {
- connection.close();
- }
-
- public void addPendingFiles(List files) throws SQLException {
- try (final PreparedStatement insertStatement = connection.prepareStatement(
- "insert or ignore into PendingFiles (fileName, offset) values (?, ?)");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- for (FileNameWithOffset file: files) {
- insertStatement.setString(1, file.fileName);
- insertStatement.setLong(2, file.offset);
- insertStatement.execute();
- }
- autoRollback.commit();
- }
- }
-
- /**
- * @return ((file name, begin offset), sequence number) or null if there is no pending file
- */
- public Pair getNextPendingFile() throws SQLException {
- try (final Statement statement = connection.createStatement();
- final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) {
- if (rs.next()) {
- final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
- try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) {
- rsSequenceNumber.next();
- final long nextSequenceNumber = rsSequenceNumber.getLong(1);
- return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber);
- }
- } else {
- return null;
- }
- } finally {
- connection.commit();
- }
- }
-
- public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException {
- try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement(
- "update SequenceNumber set nextSequenceNumber = ?");
- final PreparedStatement insertCompletedFileStatement = connection.prepareStatement(
- "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)");
- final PreparedStatement deletePendingFileStatement = connection.prepareStatement(
- "delete from PendingFiles where fileName = ? and offset <= ?");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- // Update sequence number.
- updateSequenceNumberStatement.setLong(1, newNextSequenceNumber);
- updateSequenceNumberStatement.execute();
- // Add completed file.
- insertCompletedFileStatement.setString(1, fileName);
- insertCompletedFileStatement.setLong(2, endOffset);
- insertCompletedFileStatement.execute();
- // Remove pending file.
- deletePendingFileStatement.setString(1, fileName);
- deletePendingFileStatement.setLong(2, beginOffset);
- deletePendingFileStatement.execute();
- transactionCoordinator.addTransactionToCommit(txnId);
- autoRollback.commit();
- }
- }
-
- @VisibleForTesting
- public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException {
- addCompletedFile(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty());
- }
-
- public void deleteTransactionToCommit(Optional txnId) {
- transactionCoordinator.deleteTransactionToCommit(txnId);
- }
-
- /**
- * @return list of file name and end offset (file size)
- */
- public List getCompletedFiles() throws SQLException {
- try (final Statement statement = connection.createStatement();
- final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) {
- final List files = new ArrayList<>();
- while (rs.next()) {
- final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
- files.add(fileNameWithOffset);
- }
- return files;
- } finally {
- connection.commit();
- }
- }
-
- public void deleteCompletedFile(String fileName) throws SQLException {
- try (final PreparedStatement deleteStatement = connection.prepareStatement(
- "delete from CompletedFiles where fileName = ?");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- deleteStatement.setString(1, fileName);
- deleteStatement.execute();
- autoRollback.commit();
- }
- }
-
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileConfig.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileConfig.java
deleted file mode 100644
index aea1adb2..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileConfig.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.rawfile;
-
-/**
- * Config passed to Pravega Sensor Collector
- */
-public class RawFileConfig {
- public final String stateDatabaseFileName;
- public final String fileSpec;
- public final String fileExtension;
- public final String routingKey;
- public final String streamName;
- public final String eventTemplateStr;
-
-
- public final boolean enableDeleteCompletedFiles;
- public final boolean exactlyOnce;
- public final double transactionTimeoutMinutes;
-
- public RawFileConfig(String stateDatabaseFileName, String fileSpec, String fileExtension, String routingKey, String streamName, String eventTemplateStr, boolean enableDeleteCompletedFiles, boolean exactlyOnce, double transactionTimeoutMinutes) {
- this.stateDatabaseFileName = stateDatabaseFileName;
- this.fileSpec = fileSpec;
- this.fileExtension = fileExtension;
- this.routingKey = routingKey;
- this.streamName = streamName;
- this.eventTemplateStr = eventTemplateStr;
- this.enableDeleteCompletedFiles = enableDeleteCompletedFiles;
- this.exactlyOnce = exactlyOnce;
- this.transactionTimeoutMinutes = transactionTimeoutMinutes;
- }
-
- @Override
- public String toString() {
- return "RawFileConfig{" +
- "stateDatabaseFileName='" + stateDatabaseFileName + '\'' +
- ", fileSpec='" + fileSpec + '\'' +
- ", fileExtension='" + fileExtension + '\'' +
- ", routingKey='" + routingKey + '\'' +
- ", streamName='" + streamName + '\'' +
- ", eventTemplateStr='" + eventTemplateStr + '\'' +
- ", enableDeleteCompletedFiles=" + enableDeleteCompletedFiles +
- ", exactlyOnce=" + exactlyOnce +
- ", transactionTimeoutMinutes=" + transactionTimeoutMinutes +
- '}';
- }
-
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileIngestService.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileIngestService.java
deleted file mode 100644
index df2941e0..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileIngestService.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.rawfile;
-
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.TimeUnit;
-
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import io.pravega.client.EventStreamClientFactory;
-import io.pravega.sensor.collector.DeviceDriver;
-import io.pravega.sensor.collector.DeviceDriverConfig;
-
-/**
- * Ingestion service for raw file data.
- */
-public class RawFileIngestService extends DeviceDriver{
- private static final Logger log = LoggerFactory.getLogger(RawFileIngestService.class);
-
- private static final String FILE_SPEC_KEY = "FILE_SPEC";
- private static final String FILE_EXT= "FILE_EXTENSION";
- private static final String DELETE_COMPLETED_FILES_KEY = "DELETE_COMPLETED_FILES";
- private static final String DATABASE_FILE_KEY = "DATABASE_FILE";
- private static final String EVENT_TEMPLATE_KEY = "EVENT_TEMPLATE";
- private static final String INTERVAL_MS_KEY = "INTERVAL_MS";
-
- private static final String SCOPE_KEY = "SCOPE";
- private static final String STREAM_KEY = "STREAM";
- private static final String ROUTING_KEY_KEY = "ROUTING_KEY";
- private static final String EXACTLY_ONCE_KEY = "EXACTLY_ONCE";
- private static final String TRANSACTION_TIMEOUT_MINUTES_KEY = "TRANSACTION_TIMEOUT_MINUTES";
-
- private final RawFileProcessor processor;
- private final ScheduledExecutorService executor;
- private ScheduledFuture> task;
-
- public RawFileIngestService(DeviceDriverConfig config){
- super(config);
- final RawFileConfig rawFileConfig = new RawFileConfig(
- getDatabaseFileName(),
- getFileSpec(),
- getFileExtension(),
- getRoutingKey(),
- getStreamName(),
- getEventTemplate(),
- getDeleteCompletedFiles(),
- getExactlyOnce(),
- getTransactionTimeoutMinutes());
- log.info("Raw File Ingest Config: {}", rawFileConfig);
- final String scopeName = getScopeName();
- log.info("Scope: {}", scopeName);
- createStream(scopeName, getStreamName());
-
- final EventStreamClientFactory clientFactory = getEventStreamClientFactory(scopeName);
- processor = RawFileProcessor.create(rawFileConfig, clientFactory);
- ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat(
- RawFileIngestService.class.getSimpleName() + "-" + config.getInstanceName() + "-%d").build();
- executor = Executors.newScheduledThreadPool(1, namedThreadFactory);
-
- }
-
- String getFileSpec() {
- return getProperty(FILE_SPEC_KEY);
- }
-
- String getFileExtension() {
- return getProperty(FILE_EXT, "");
- }
-
- boolean getDeleteCompletedFiles() {
- return Boolean.parseBoolean(getProperty(DELETE_COMPLETED_FILES_KEY, Boolean.toString(true)));
- }
-
- String getDatabaseFileName() {
- return getProperty(DATABASE_FILE_KEY);
- }
-
- String getEventTemplate() {
- return getProperty(EVENT_TEMPLATE_KEY, "{}");
- }
-
- long getIntervalMs() {
- return Long.parseLong(getProperty(INTERVAL_MS_KEY, Long.toString(10000)));
- }
-
- String getScopeName() {
- return getProperty(SCOPE_KEY);
- }
-
- String getStreamName() {
- return getProperty(STREAM_KEY);
- }
-
- protected String getRoutingKey() {
- return getProperty(ROUTING_KEY_KEY, "");
- }
-
- boolean getExactlyOnce() {
- return Boolean.parseBoolean(getProperty(EXACTLY_ONCE_KEY, Boolean.toString(true)));
- }
-
- /**
- * This time duration must not exceed the controller property controller.transaction.maxLeaseValue (milliseconds).
- */
- double getTransactionTimeoutMinutes() {
- return Double.parseDouble(getProperty(TRANSACTION_TIMEOUT_MINUTES_KEY, Double.toString(18.0 * 60.0)));
- }
-
- protected void ingestRawFiles() {
- log.trace("ingestRawFiles: BEGIN");
- try {
- processor.ingestRawFiles();
- } catch (Exception e) {
- log.error("Error", e);
- // Continue on any errors. We will retry on the next iteration.
- }
- log.trace("ingestRawFiles: END");
- }
-
- @Override
- protected void doStart() {
- task = executor.scheduleAtFixedRate(
- this::ingestRawFiles,
- 0,
- getIntervalMs(),
- TimeUnit.MILLISECONDS);
- notifyStarted();
- }
-
- @Override
- protected void doStop() {
- task.cancel(false);
- }
-
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileProcessor.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileProcessor.java
deleted file mode 100644
index e420493d..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileProcessor.java
+++ /dev/null
@@ -1,260 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.rawfile;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.channels.FileChannel;
-import java.nio.channels.FileLock;
-import java.nio.file.DirectoryStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.StandardOpenOption;
-import java.sql.Connection;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Optional;
-import java.util.Set;
-import java.util.UUID;
-import java.util.concurrent.atomic.AtomicLong;
-
-import com.google.common.io.CountingInputStream;
-
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import io.pravega.client.EventStreamClientFactory;
-import io.pravega.client.stream.EventWriterConfig;
-import io.pravega.client.stream.TxnFailedException;
-import io.pravega.client.stream.impl.ByteArraySerializer;
-import io.pravega.sensor.collector.util.EventWriter;
-import io.pravega.sensor.collector.util.PersistentId;
-import io.pravega.sensor.collector.util.TransactionCoordinator;
-
-/**
- * Get list of files obtained from config. Process each file for ingestion.
- * Keep track of new files and delete ingested files if "DELETE_COMPLETED_FILES"=true.
- */
-public class RawFileProcessor {
- private static final Logger log = LoggerFactory.getLogger(RawFileIngestService.class);
-
- private final RawFileConfig config;
- private final RawFileState state;
- private final EventWriter writer;
- private final TransactionCoordinator transactionCoordinator;
- private final EventGenerator eventGenerator;
-
- public RawFileProcessor(RawFileConfig config, RawFileState state, EventWriter writer, TransactionCoordinator transactionCoordinator, EventGenerator eventGenerator) {
- this.config = config;
- this.state = state;
- this.writer = writer;
- this.transactionCoordinator = transactionCoordinator;
- this.eventGenerator = eventGenerator;
- }
-
- public static RawFileProcessor create(RawFileConfig config, EventStreamClientFactory clientFactory){
- final Connection connection = RawFileState.createDatabase(config.stateDatabaseFileName);
-
- final String writerId = new PersistentId(connection).getPersistentId().toString();
- log.info("Writer ID: {}", writerId);
-
- final EventWriter writer = EventWriter.create(
- clientFactory,
- writerId,
- config.streamName,
- new ByteArraySerializer(),
- EventWriterConfig.builder()
- .enableConnectionPooling(false)
- .transactionTimeoutTime((long) (config.transactionTimeoutMinutes * 60.0 * 1000.0))
- .build(),
- config.exactlyOnce);
-
- final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, writer);
- transactionCoordinator.performRecovery();
-
- final EventGenerator eventGenerator = EventGenerator.create(
- config.routingKey,
- config.eventTemplateStr,
- writerId);
- final RawFileState state = new RawFileState(connection, transactionCoordinator);
- return new RawFileProcessor(config, state, writer, transactionCoordinator, eventGenerator);
- }
-
- public void ingestRawFiles() throws Exception {
- log.trace("ingestRawFiles: BEGIN");
- // delete leftover completed files
- if (config.enableDeleteCompletedFiles) {
- deleteCompletedFiles();
- }
- findAndRecordNewFiles();
- processNewFiles();
- log.trace("ingestRawFiles: END");
- }
-
- public void processNewFiles() throws Exception {
- for (;;) {
- final Pair nextFile = state.getNextPendingFile();
- if (nextFile == null) {
- log.trace("No more files to ingest");
- break;
- } else {
- processFile(nextFile.getLeft(), nextFile.getRight());
- }
- }
- }
-
- protected void findAndRecordNewFiles() throws Exception {
- final List directoryListing = getDirectoryListing();
- final List completedFiles = state.getCompletedFiles();
- final List newFiles = getNewFiles(directoryListing, completedFiles);
- state.addPendingFiles(newFiles);
- }
-
- /**
- * @return list of file name and file size in bytes
- */
- protected List getDirectoryListing() throws IOException {
- log.trace("getDirectoryListing: fileSpec={}", config.fileSpec);
- final List directoryListing = getDirectoryListing(config.fileSpec, config.fileExtension);
- log.trace("getDirectoryListing: directoryListing={}", directoryListing);
- return directoryListing;
- }
-
- /**
- * @return list of file name and file size in bytes
- */
- static protected List getDirectoryListing(String fileSpec, String fileExtension) throws IOException {
- final Path pathSpec = Paths.get(fileSpec);
- List directoryListing = new ArrayList<>();
- try(DirectoryStream dirStream=Files.newDirectoryStream(pathSpec)){
- for(Path path: dirStream){
- if(Files.isDirectory(path))
- directoryListing.addAll(getDirectoryListing(path.toString(), fileExtension));
- else {
- FileNameWithOffset fileEntry = new FileNameWithOffset(path.toAbsolutePath().toString(), path.toFile().length());
- // If extension is null, ingest all files
- if(fileExtension.isEmpty() || fileExtension.equals(fileEntry.fileName.substring(fileEntry.fileName.lastIndexOf(".")+1)))
- directoryListing.add(fileEntry);
- }
- }
- }
- return directoryListing;
- }
-
- /**
- * @return sorted list of file name and file size in bytes
- */
- static protected List getNewFiles(List directoryListing, List completedFiles) {
- final ArrayList sortedDirectoryListing = new ArrayList<>(directoryListing);
- Collections.sort(sortedDirectoryListing);
- final List newFiles = new ArrayList<>();
- final Set setCompletedFiles = new HashSet<>(completedFiles);
- log.trace("setCompletedFiles={}", setCompletedFiles);
- sortedDirectoryListing.forEach(dirFile -> {
- if (!setCompletedFiles.contains(dirFile)) {
- newFiles.add(new FileNameWithOffset(dirFile.fileName, 0));
- }
- });
- if(!newFiles.isEmpty())
- log.info("{} New file(s) = {}", newFiles.size(), newFiles);
- return newFiles;
- }
-
-
- void processFile(FileNameWithOffset fileNameWithBeginOffset, long firstSequenceNumber) throws Exception {
- log.info("processFile: Ingesting file {}; beginOffset={}, firstSequenceNumber={}",
- fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, firstSequenceNumber);
-
- AtomicLong numofbytes = new AtomicLong(0);
- long timestamp = System.nanoTime();
-
- // In case a previous iteration encountered an error, we need to ensure that
- // previous flushed transactions are committed and any unflushed transactions as aborted.
- transactionCoordinator.performRecovery();
- writer.abort();
-
- try (final InputStream inputStream = new FileInputStream(fileNameWithBeginOffset.fileName)) {
- try(final CountingInputStream countingInputStream = new CountingInputStream(inputStream)) {
- countingInputStream.skip(fileNameWithBeginOffset.offset);
- final Pair result = eventGenerator.generateEventsFromInputStream(countingInputStream, firstSequenceNumber,
- e -> {
- log.trace("processFile: event={}", e);
- try {
- writer.writeEvent(e.routingKey, e.bytes);
- numofbytes.addAndGet(e.bytes.length);
-
- } catch (TxnFailedException ex) {
- throw new RuntimeException(ex);
- }
- });
- final Optional txnId = writer.flush();
- final long nextSequenceNumber = result.getLeft();
- final long endOffset = result.getRight();
- state.addCompletedFile(fileNameWithBeginOffset.fileName, fileNameWithBeginOffset.offset, endOffset, nextSequenceNumber, txnId);
- // injectCommitFailure();
- writer.commit();
- state.deleteTransactionToCommit(txnId);
-
- double elapsedSec = (System.nanoTime() - timestamp) / 1_000_000_000.0;
- double megabyteCount = numofbytes.getAndSet(0) / 1_000_000.0;
- double megabytesPerSec = megabyteCount / elapsedSec;
- log.info("processFile: Finished ingesting file {}; endOffset={}, nextSequenceNumber={}",
- fileNameWithBeginOffset.fileName, endOffset, nextSequenceNumber);
- log.info("Sent {} MB in {} sec", megabyteCount, elapsedSec );
- log.info("Transfer rate: {} MB/sec", megabytesPerSec);
- }
- }
-
- // Delete file right after ingesting
- if (config.enableDeleteCompletedFiles) {
- deleteCompletedFiles();
- }
- }
-
- void deleteCompletedFiles() throws Exception {
- final List completedFiles = state.getCompletedFiles();
- completedFiles.forEach(file -> {
- //Obtain a lock on file
- try(FileChannel channel = FileChannel.open(Paths.get(file.fileName),StandardOpenOption.WRITE)){
- try(FileLock lock = channel.tryLock()) {
- if(lock!=null){
- Files.deleteIfExists(Paths.get(file.fileName));
- log.info("deleteCompletedFiles: Deleted file {}", file.fileName);
- lock.release();
- // Only remove from database if we could delete file.
- state.deleteCompletedFile(file.fileName);
- }
- else{
- log.warn("Unable to obtain lock on file {}. File is locked by another process.", file.fileName);
- throw new Exception();
- }
- }
- } catch (Exception e) {
- log.warn("Unable to delete ingested file {}", e.getMessage());
- log.warn("Deletion will be retried on the next iteration.");
- // We can continue on this error. Deletion will be retried on the next iteration.
- }
- });
- }
-
- /**
- * Inject a failure before commit for testing.
- */
- protected void injectCommitFailure() {
- if (Math.random() < 0.3) {
- throw new RuntimeException("injectCommitFailure: Commit failure test exception");
- }
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileState.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileState.java
deleted file mode 100644
index 97079b4a..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileState.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.rawfile;
-
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.UUID;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import io.pravega.sensor.collector.util.AutoRollback;
-import io.pravega.sensor.collector.util.TransactionCoordinator;
-
-import static java.sql.Connection.TRANSACTION_SERIALIZABLE;
-
-/**
- * Maintain state of pending and completed files in SQLite database.
- */
-public class RawFileState implements AutoCloseable{
- private static final Logger log = LoggerFactory.getLogger(RawFileState.class);
-
- private final Connection connection;
- private final TransactionCoordinator transactionCoordinator;
-
- public RawFileState(Connection connection, TransactionCoordinator transactionCoordinator) {
- this.connection = connection;
- this.transactionCoordinator = transactionCoordinator;
- }
-
- public static Connection createDatabase(String fileName) {
- try {
- final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName);
- try (final Statement statement = connection.createStatement()) {
- // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database.
- //statement.execute("PRAGMA locking_mode = EXCLUSIVE");
- statement.execute(
- "create table if not exists PendingFiles (" +
- "id integer primary key autoincrement, " +
- "fileName string unique not null, " +
- "offset bigint not null)");
- statement.execute(
- "create table if not exists CompletedFiles (" +
- "fileName string primary key not null, " +
- "offset bigint not null)");
- statement.execute(
- "create table if not exists SequenceNumber (" +
- "id integer primary key check (id = 0), " +
- "nextSequenceNumber bigint not null)");
- statement.execute(
- "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)");
- }
- connection.setAutoCommit(false);
- connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE);
- return connection;
- } catch (SQLException e) {
- throw new RuntimeException(e);
- }
- }
-
- @VisibleForTesting
- public static RawFileState create(String fileName) {
- final Connection connection = createDatabase(fileName);
- final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null);
- return new RawFileState(connection, transactionCoordinator);
- }
-
- @Override
- public void close() throws SQLException {
- connection.close();
- }
-
- public void addPendingFiles(List files) throws SQLException {
- try (final PreparedStatement insertStatement = connection.prepareStatement(
- "insert or ignore into PendingFiles (fileName, offset) values (?, ?)");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- for (FileNameWithOffset file: files) {
- insertStatement.setString(1, file.fileName);
- insertStatement.setLong(2, file.offset);
- insertStatement.execute();
- }
- autoRollback.commit();
- }
- }
-
- /**
- * @return ((file name, begin offset), sequence number) or null if there is no pending file
- */
- public Pair getNextPendingFile() throws SQLException {
- try (final Statement statement = connection.createStatement();
- final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) {
- if (rs.next()) {
- final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
- try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) {
- rsSequenceNumber.next();
- final long nextSequenceNumber = rsSequenceNumber.getLong(1);
- return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber);
- }
- } else {
- return null;
- }
- } finally {
- connection.commit();
- }
- }
-
- public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException {
- try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement(
- "update SequenceNumber set nextSequenceNumber = ?");
- final PreparedStatement insertCompletedFileStatement = connection.prepareStatement(
- "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)");
- final PreparedStatement deletePendingFileStatement = connection.prepareStatement(
- "delete from PendingFiles where fileName = ? and offset <= ?");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- // Update sequence number.
- updateSequenceNumberStatement.setLong(1, newNextSequenceNumber);
- updateSequenceNumberStatement.execute();
- // Add completed file.
- insertCompletedFileStatement.setString(1, fileName);
- insertCompletedFileStatement.setLong(2, endOffset);
- insertCompletedFileStatement.execute();
- // Remove pending file.
- deletePendingFileStatement.setString(1, fileName);
- deletePendingFileStatement.setLong(2, beginOffset);
- deletePendingFileStatement.execute();
- transactionCoordinator.addTransactionToCommit(txnId);
- autoRollback.commit();
- }
- }
-
- @VisibleForTesting
- public void addCompletedFile(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException {
- addCompletedFile(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty());
- }
-
- public void deleteTransactionToCommit(Optional txnId) {
- transactionCoordinator.deleteTransactionToCommit(txnId);
- }
-
- /**
- * @return list of file name and end offset (file size)
- */
- public List getCompletedFiles() throws SQLException {
- try (final Statement statement = connection.createStatement();
- final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) {
- final List files = new ArrayList<>();
- while (rs.next()) {
- final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
- files.add(fileNameWithOffset);
- }
- return files;
- } finally {
- connection.commit();
- }
- }
-
- public void deleteCompletedFile(String fileName) throws SQLException {
- try (final PreparedStatement deleteStatement = connection.prepareStatement(
- "delete from CompletedFiles where fileName = ?");
- final AutoRollback autoRollback = new AutoRollback(connection)) {
- deleteStatement.setString(1, fileName);
- deleteStatement.execute();
- autoRollback.commit();
- }
- }
-
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileWriterEvent.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileWriterEvent.java
deleted file mode 100644
index 4057fd0b..00000000
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/RawFileWriterEvent.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.rawfile;
-
-/**
- * Event generated from file and its sequence number
- */
-public class RawFileWriterEvent {
- public final String routingKey;
- public final long sequenceNumber;
- public final byte[] bytes;
-
- public RawFileWriterEvent(String routingKey, long sequenceNumber, byte[] bytes) {
- this.routingKey = routingKey;
- this.sequenceNumber = sequenceNumber;
- this.bytes = bytes;
- }
-
- @Override
- public String toString() {
- return "PravegaWriterEvent{" +
- "routingKey='" + routingKey + '\'' +
- ", sequenceNumber=" + sequenceNumber +
- ", bytes=" + new String(bytes) +
- '}';
- }
-}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java
index 17cf5d65..742f5d42 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/EventWriter.java
@@ -12,6 +12,7 @@
import io.pravega.client.EventStreamClientFactory;
import io.pravega.client.stream.EventWriterConfig;
import io.pravega.client.stream.Serializer;
+import io.pravega.client.stream.Transaction;
import io.pravega.client.stream.TxnFailedException;
import java.util.Optional;
@@ -71,5 +72,13 @@ static EventWriter create(
*/
void abort();
+ /**
+ * This should called be prior to aborting any transactions to make sure it is not open.
+ */
+ public Transaction.Status getTransactionStatus(UUID txnId);
+
+ public Transaction.Status getTransactionStatus();
+
void close();
+
}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/FileNameWithOffset.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileNameWithOffset.java
similarity index 97%
rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/FileNameWithOffset.java
rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileNameWithOffset.java
index a0f70e98..d161f2d3 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/rawfile/FileNameWithOffset.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileNameWithOffset.java
@@ -7,7 +7,7 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
-package io.pravega.sensor.collector.rawfile;
+package io.pravega.sensor.collector.util;
import java.util.Objects;
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileUtils.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileUtils.java
new file mode 100644
index 00000000..408ee41e
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/FileUtils.java
@@ -0,0 +1,169 @@
+package io.pravega.sensor.collector.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileLock;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.nio.file.StandardOpenOption;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.nio.file.attribute.FileTime;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class FileUtils {
+
+ private static final Logger log = LoggerFactory.getLogger(FileUtils.class);
+ final static String separator = ",";
+ public static final String FAILED_FILES = "Failed_Files";
+ public static final String COMPLETED_FILES = "Completed_Files";
+
+ /**
+ * @return list of file name and file size in bytes
+ * Handle the below cases
+ * 1. If given file path does not exist then log the message and continue
+ * 2. If directory does not exist and no file with given extn like .csv then log the message and continue
+ * 3. check for empty file, log the message and continue with valid files
+ *
+ */
+ static public List getDirectoryListing(String fileSpec, String fileExtension, Path movedFilesDirectory, long minTimeInMillisToUpdateFile) throws IOException {
+ String[] directories= fileSpec.split(separator);
+ List directoryListing = new ArrayList<>();
+ for (String directory : directories) {
+ final Path pathSpec = Paths.get(directory);
+ if (!Files.isDirectory(pathSpec.toAbsolutePath())) {
+ log.error("getDirectoryListing: Directory does not exist or spec is not valid : {}", pathSpec.toAbsolutePath());
+ throw new IOException("Directory does not exist or spec is not valid");
+ }
+ getDirectoryFiles(pathSpec, fileExtension, directoryListing, movedFilesDirectory, minTimeInMillisToUpdateFile);
+ }
+ return directoryListing;
+ }
+
+ /**
+ * get all files in directory(including subdirectories) and their respective file size in bytes
+ */
+ static protected void getDirectoryFiles(Path pathSpec, String fileExtension, List directoryListing, Path movedFilesDirectory, long minTimeInMillisToUpdateFile) throws IOException{
+ DirectoryStream.Filter lastModifiedTimeFilter = getLastModifiedTimeFilter(minTimeInMillisToUpdateFile);
+ try(DirectoryStream dirStream=Files.newDirectoryStream(pathSpec, lastModifiedTimeFilter)){
+ for(Path path: dirStream){
+ if(Files.isDirectory(path)) //traverse subdirectories
+ getDirectoryFiles(path, fileExtension, directoryListing, movedFilesDirectory, minTimeInMillisToUpdateFile);
+ else {
+ FileNameWithOffset fileEntry = new FileNameWithOffset(path.toAbsolutePath().toString(), path.toFile().length());
+ if(isValidFile(fileEntry, fileExtension))
+ directoryListing.add(fileEntry);
+ else //move failed file to different folder
+ moveFailedFile(fileEntry, movedFilesDirectory);
+ }
+ }
+ } catch(Exception ex){
+ if(ex instanceof IOException){
+ log.error("getDirectoryListing: Directory does not exist or spec is not valid : {}", pathSpec.toAbsolutePath());
+ throw new IOException("Directory does not exist or spec is not valid");
+ } else{
+ log.error("getDirectoryListing: Exception while listing files: {}", pathSpec.toAbsolutePath());
+ throw new IOException(ex);
+ }
+ }
+ }
+
+ /**
+ * The last modified time filter for files older than #{timeBefore} milliseconds from current timestamp.
+ * This filter helps to eliminate the files that are partially written in to lookup directory by external services.
+ */
+ private static DirectoryStream.Filter getLastModifiedTimeFilter(long minTimeInMillisToUpdateFile) {
+ log.debug("getLastModifiedTimeFilter: minTimeInMillisToUpdateFile: {}", minTimeInMillisToUpdateFile);
+ return entry -> {
+ BasicFileAttributes attr = Files.readAttributes(entry, BasicFileAttributes.class);
+ if(attr.isDirectory()) {
+ return true;
+ }
+ FileTime fileTime = attr.lastModifiedTime();
+ return (fileTime.toMillis() <= (System.currentTimeMillis() - minTimeInMillisToUpdateFile));
+ };
+ }
+
+ /*
+ Check for below file validation
+ 1. Is File empty
+ 2. If extension is null or extension is valid ingest all file
+ */
+ public static boolean isValidFile(FileNameWithOffset fileEntry, String fileExtension) {
+
+ if(fileEntry.offset<=0){
+ log.warn("isValidFile: Empty file {} can not be processed",fileEntry.fileName);
+ }
+ // If extension is null, ingest all files
+ else if(fileExtension.isEmpty() || fileExtension.equals(fileEntry.fileName.substring(fileEntry.fileName.lastIndexOf(".")+1)))
+ return true;
+ else
+ log.warn("isValidFile: File format {} is not supported ", fileEntry.fileName);
+
+ return false;
+ }
+
+ static void moveFailedFile(FileNameWithOffset fileEntry, Path filesDirectory) throws IOException {
+ Path sourcePath = Paths.get(fileEntry.fileName);
+ Path targetPath = filesDirectory.resolve(FAILED_FILES).resolve(sourcePath.getFileName());
+ moveFile(sourcePath, targetPath);
+ }
+
+ public static void moveCompletedFile(FileNameWithOffset fileEntry, Path filesDirectory) throws IOException {
+ Path sourcePath = Paths.get(fileEntry.fileName);
+ Path completedFilesPath = filesDirectory.resolve(COMPLETED_FILES);
+ String completedFileName = FileUtils.createCompletedFileName(filesDirectory, fileEntry.fileName);
+ Path targetPath = completedFilesPath.resolve(completedFileName);
+ moveFile(sourcePath, targetPath);
+ }
+
+ /**
+ * To keep same file name of different directories in completed file directory.
+ * Creating completed file name with _ instead of /, so that it includes all subdirectories.
+ * If the full file name is greater than 255 characters, it will be truncated to 255 characters.
+ */
+ public static String createCompletedFileName(Path completedFilesDir, String fileName) {
+ if(fileName==null || fileName.isEmpty() || completedFilesDir==null) {
+ return fileName;
+ }
+
+ int validFileNameLength = 255 - completedFilesDir.toString().length();
+
+ if(fileName.length() > validFileNameLength) {
+ fileName = fileName.substring(fileName.indexOf(File.separator, fileName.length() - validFileNameLength-1));
+ }
+ return fileName.replace(File.separator,"_");
+ }
+
+ /*
+ Move failed files to different directory
+ */
+ static void moveFile(Path sourcePath, Path targetPath) throws IOException {
+ Files.createDirectories(targetPath.getParent());
+ //Obtain a lock on file before moving
+ try(FileChannel channel = FileChannel.open(sourcePath, StandardOpenOption.WRITE)) {
+ try(FileLock lock = channel.tryLock()) {
+ if(lock!=null){
+ Files.move(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING);
+ log.debug("movedFile: Moved file from {} to {}", sourcePath, targetPath);
+ lock.release();
+ }
+ else{
+ log.warn("Unable to obtain lock on file {} for moving. File is locked by another process.", sourcePath);
+ throw new Exception();
+ }
+ }
+ } catch (Exception e) {
+ log.warn("Unable to move file {}", e.getMessage());
+ log.warn("File will be moved on the next iteration.");
+ // We can continue on this error. Moving will be retried on the next iteration.
+ }
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java
index 6117193c..8c4c2682 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/NonTransactionalEventWriter.java
@@ -10,6 +10,7 @@
package io.pravega.sensor.collector.util;
import io.pravega.client.stream.EventStreamWriter;
+import io.pravega.client.stream.Transaction;
import io.pravega.client.stream.TxnFailedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -53,6 +54,16 @@ public void commit(UUID txnId) throws TxnFailedException {
public void abort() {
}
+ @Override
+ public Transaction.Status getTransactionStatus(UUID txnId) {
+ throw new UnsupportedOperationException("Non-transactional writer cannot commit transactions");
+ }
+
+ @Override
+ public Transaction.Status getTransactionStatus() {
+ throw new UnsupportedOperationException("Non-transactional writer do not have transaction status");
+ }
+
public void close() {
writer.close();
}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/PravegaWriterEvent.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/PravegaWriterEvent.java
similarity index 95%
rename from pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/PravegaWriterEvent.java
rename to pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/PravegaWriterEvent.java
index 9bff0a0b..e5231fe3 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/parquet/PravegaWriterEvent.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/PravegaWriterEvent.java
@@ -7,7 +7,7 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
-package io.pravega.sensor.collector.parquet;
+package io.pravega.sensor.collector.util;
/**
* Event generated from file and its sequence number
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/SQliteDBUtility.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/SQliteDBUtility.java
new file mode 100644
index 00000000..c0249ac4
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/SQliteDBUtility.java
@@ -0,0 +1,41 @@
+package io.pravega.sensor.collector.util;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import static java.sql.Connection.TRANSACTION_SERIALIZABLE;
+
+public class SQliteDBUtility {
+
+ public static Connection createDatabase(String fileName) {
+ try {
+ final Connection connection = DriverManager.getConnection("jdbc:sqlite:" + fileName);
+ try (final Statement statement = connection.createStatement()) {
+ // Use SQLite exclusive locking mode to ensure that another process or device driver instance is not using this database.
+ //statement.execute("PRAGMA locking_mode = EXCLUSIVE");
+ statement.execute(
+ "create table if not exists PendingFiles (" +
+ "id integer primary key autoincrement, " +
+ "fileName string unique not null, " +
+ "offset bigint not null)");
+ statement.execute(
+ "create table if not exists CompletedFiles (" +
+ "fileName string primary key not null, " +
+ "offset bigint not null)");
+ statement.execute(
+ "create table if not exists SequenceNumber (" +
+ "id integer primary key check (id = 0), " +
+ "nextSequenceNumber bigint not null)");
+ statement.execute(
+ "insert or ignore into SequenceNumber (id, nextSequenceNumber) values (0, 0)");
+ }
+ connection.setAutoCommit(false);
+ connection.setTransactionIsolation(TRANSACTION_SERIALIZABLE);
+ return connection;
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java
index e06a610d..f5c7dfb9 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionCoordinator.java
@@ -9,6 +9,7 @@
*/
package io.pravega.sensor.collector.util;
+import io.pravega.client.stream.Transaction;
import io.pravega.client.stream.TxnFailedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -139,9 +140,9 @@ protected List getTransactionsToCommit() {
public void performRecovery() {
final List transactionsToCommit = getTransactionsToCommit();
if (transactionsToCommit.isEmpty()) {
- log.debug("Transaction recovery not needed");
+ log.info("performRecovery: No transactions to be recovered");
} else {
- log.warn("Transaction recovery needed on {} transactions", transactionsToCommit.size());
+ log.info("Transaction recovery needed on {} transactions", transactionsToCommit.size());
transactionsToCommit.forEach((txnId) -> {
log.info("Committing transaction {} from a previous process", txnId);
try {
@@ -160,6 +161,9 @@ public void performRecovery() {
txnId, e);
// Continue recovery and run as normal.
} else {
+ log.error(
+ "Unable to commit transaction {} from a previous process. Events may have been lost. " +
+ "Try increasing the transaction timeout.", txnId, e);
throw e;
}
}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateDB.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateDB.java
new file mode 100644
index 00000000..61580c15
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateDB.java
@@ -0,0 +1,87 @@
+package io.pravega.sensor.collector.util;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Optional;
+import java.util.UUID;
+
+public interface TransactionStateDB {
+
+ /**
+ * Add file name and begin offset to PendingFiles table
+ *
+ * @param files List of file name with Offset.
+ *
+ */
+ public void addPendingFileRecords(List files) throws SQLException;
+
+ /**
+ * Get next file to process. Read the file name with begin offset from PendingFiles table and sequence number from SequenceNumber table.
+ *
+ * @return ((file name, begin offset), sequence number) or null if there is no pending file
+ */
+ public Pair getNextPendingFileRecord() throws SQLException;
+
+ /**
+ * Update below details
+ * 1. Update sequence number into SequenceNumber table
+ * 2. Add entry into CompletedFiles table for given file name and end offset
+ * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value
+ * 4. Add transaction id to TransactionsToCommit table if provided
+ *
+ * @param fileName file name of processed file
+ * @param beginOffset begin offset from where file read starts
+ * @param endOffset end offset where reading ends.
+ * @param newNextSequenceNumber next sequence number.
+ * @param txnId transaction id (Optional value) from Pravega.
+ *
+ */
+ public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException;
+
+ /**
+ * Delete record from pendingFiles table
+ *
+ * @param fileName file name of pending file
+ * @param beginOffset begin offset from where file read starts
+ */
+ void deletePendingFile(String fileName, long beginOffset) throws SQLException;
+
+ /**
+ * Update below details
+ * 1. Update sequence number into SequenceNumber table
+ * 2. Add entry into CompletedFiles table for given file name and end offset
+ * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value
+ * @param fileName file name of processed file
+ * @param beginOffset begin offset from where file read starts
+ * @param endOffset end offset where reading ends.
+ * @param newNextSequenceNumber next sequence number.
+ *
+ */
+ public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException;
+
+ /**
+ * Delete record from TransactionsToCommit table
+ *
+ * @param txnId transaction id
+ */
+ public void deleteTransactionToCommit(Optional txnId);
+
+ /**
+ * Get a list of files from completedFiles table
+ *
+ * @return list of file name and end offset (file size)
+ */
+ public List getCompletedFileRecords() throws SQLException;
+
+ /**
+ * Delete completed file record from completedFiles table for given file name
+ *
+ * @param fileName file name
+ */
+ public void deleteCompletedFileRecord(String fileName) throws SQLException;
+
+
+
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateInMemoryImpl.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateInMemoryImpl.java
new file mode 100644
index 00000000..1d26665a
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateInMemoryImpl.java
@@ -0,0 +1,33 @@
+package io.pravega.sensor.collector.util;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.UUID;
+
+/**
+ * Maintain state of pending and completed files in in-memory database.
+ */
+public class TransactionStateInMemoryImpl extends TransactionStateSQLiteImpl{
+
+ private static final Logger log = LoggerFactory.getLogger(TransactionStateInMemoryImpl.class);
+
+
+ public TransactionStateInMemoryImpl(Connection connection, TransactionCoordinator transactionCoordinator) {
+ super(connection, transactionCoordinator);
+ }
+ @VisibleForTesting
+ public static TransactionStateInMemoryImpl create(String fileName) {
+ final Connection connection = SQliteDBUtility.createDatabase(fileName);
+ final TransactionCoordinator transactionCoordinator = new TransactionCoordinator(connection, null);
+ return new TransactionStateInMemoryImpl(connection, transactionCoordinator);
+ }
+
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImpl.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImpl.java
new file mode 100644
index 00000000..e5d857fa
--- /dev/null
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImpl.java
@@ -0,0 +1,209 @@
+package io.pravega.sensor.collector.util;
+
+ /**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.UUID;
+
+/**
+ * Maintain state of pending and completed files in SQLite database.
+*/
+public class TransactionStateSQLiteImpl implements AutoCloseable, TransactionStateDB {
+ private static final Logger log = LoggerFactory.getLogger(TransactionStateSQLiteImpl.class);
+
+ private final Connection connection;
+ private final TransactionCoordinator transactionCoordinator;
+
+ public TransactionStateSQLiteImpl(Connection connection, TransactionCoordinator transactionCoordinator) {
+ this.connection = connection;
+ this.transactionCoordinator = transactionCoordinator;
+ }
+
+ @Override
+ public void close() throws SQLException {
+ connection.close();
+ }
+
+ /**
+ * Add file name and begin offset to PendingFiles table
+ *
+ * @param files List of file name with Offset.
+ *
+ */
+ @Override
+ public void addPendingFileRecords(List files) throws SQLException {
+ try (final PreparedStatement insertStatement = connection.prepareStatement(
+ "insert or ignore into PendingFiles (fileName, offset) values (?, ?)");
+ final AutoRollback autoRollback = new AutoRollback(connection)) {
+ for (FileNameWithOffset file: files) {
+ insertStatement.setString(1, file.fileName);
+ insertStatement.setLong(2, file.offset);
+ insertStatement.execute();
+ }
+ autoRollback.commit();
+ }
+ }
+
+ /**
+ * Get next file to process. Read the file name with begin offset from PendingFiles table and sequence number from SequenceNumber table.
+ *
+ * @return ((file name, begin offset), sequence number) or null if there is no pending file
+ */
+ @Override
+ public Pair getNextPendingFileRecord() throws SQLException {
+ try (final Statement statement = connection.createStatement();
+ final ResultSet rs = statement.executeQuery("select fileName, offset from PendingFiles order by id limit 1")) {
+ if (rs.next()) {
+ final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
+ try (final ResultSet rsSequenceNumber = statement.executeQuery("select nextSequenceNumber from SequenceNumber")) {
+ rsSequenceNumber.next();
+ final long nextSequenceNumber = rsSequenceNumber.getLong(1);
+ return new ImmutablePair<>(fileNameWithOffset, nextSequenceNumber);
+ }
+ } else {
+ return null;
+ }
+ } finally {
+ connection.commit();
+ }
+ }
+
+
+ /**
+ * Update below details
+ * 1. Update sequence number into SequenceNumber table
+ * 2. Add entry into CompletedFiles table for given file name and end offset
+ * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value
+ * 4. Add transaction id to TransactionsToCommit table if provided
+ *
+ * @param fileName file name of processed file
+ * @param beginOffset begin offset from where file read starts
+ * @param endOffset end offset where reading ends.
+ * @param newNextSequenceNumber next sequence number.
+ * @param txnId transaction id (Optional value) from Pravega.
+ *
+ */
+ @Override
+ public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber, Optional txnId) throws SQLException {
+ try (final PreparedStatement updateSequenceNumberStatement = connection.prepareStatement(
+ "update SequenceNumber set nextSequenceNumber = ?");
+ final PreparedStatement insertCompletedFileStatement = connection.prepareStatement(
+ "insert or ignore into CompletedFiles (fileName, offset) values (?, ?)");
+ final PreparedStatement deletePendingFileStatement = connection.prepareStatement(
+ "delete from PendingFiles where fileName = ? and offset <= ?");
+ final AutoRollback autoRollback = new AutoRollback(connection)) {
+ // Update sequence number.
+ updateSequenceNumberStatement.setLong(1, newNextSequenceNumber);
+ updateSequenceNumberStatement.execute();
+ // Add completed file.
+ insertCompletedFileStatement.setString(1, fileName);
+ insertCompletedFileStatement.setLong(2, endOffset);
+ insertCompletedFileStatement.execute();
+ // Remove pending file.
+ deletePendingFileStatement.setString(1, fileName);
+ deletePendingFileStatement.setLong(2, beginOffset);
+ deletePendingFileStatement.execute();
+ transactionCoordinator.addTransactionToCommit(txnId);
+ autoRollback.commit();
+ }
+ }
+
+ /**
+ * Delete record from PendingFiles table
+ *
+ * @param fileName file name of pending file
+ * @param beginOffset begin offset from where file read starts
+ */
+ @Override
+ public void deletePendingFile(String fileName, long beginOffset) throws SQLException {
+ try (final PreparedStatement deletePendingFileStatement = connection.prepareStatement(
+ "delete from PendingFiles where fileName = ? and offset <= ?");) {
+ // Remove pending file.
+ deletePendingFileStatement.setString(1, fileName);
+ deletePendingFileStatement.setLong(2, beginOffset);
+ deletePendingFileStatement.execute();
+ }
+ }
+
+
+ /**
+ * Update below details
+ * 1. Update sequence number into SequenceNumber table
+ * 2. Add entry into CompletedFiles table for given file name and end offset
+ * 3. Delete all entry from PendingFiles for given file name offset less than equal to given begin offset value
+ * @param fileName file name of processed file
+ * @param beginOffset begin offset from where file read starts
+ * @param endOffset end offset where reading ends.
+ * @param newNextSequenceNumber next sequence number.
+ *
+ */
+ @Override
+ @VisibleForTesting
+ public void addCompletedFileRecord(String fileName, long beginOffset, long endOffset, long newNextSequenceNumber) throws SQLException {
+ addCompletedFileRecord(fileName, beginOffset, endOffset, newNextSequenceNumber, Optional.empty());
+ }
+
+ /**
+ * Delete record from TransactionsToCommit table
+ *
+ * @param txnId transaction id
+ */
+ @Override
+ public void deleteTransactionToCommit(Optional txnId) {
+ transactionCoordinator.deleteTransactionToCommit(txnId);
+ }
+
+ /**
+ * Get a list of files from completedFiles table
+ *
+ * @return list of file name and end offset (file size)
+ */
+ @Override
+ public List getCompletedFileRecords() throws SQLException {
+ try (final Statement statement = connection.createStatement();
+ final ResultSet rs = statement.executeQuery("select fileName, offset from completedFiles")) {
+ final List files = new ArrayList<>();
+ while (rs.next()) {
+ final FileNameWithOffset fileNameWithOffset = new FileNameWithOffset(rs.getString("fileName"), rs.getLong("offset"));
+ files.add(fileNameWithOffset);
+ }
+ return files;
+ } finally {
+ connection.commit();
+ }
+ }
+
+ /**
+ * Delete completed file record from completedFiles table for given file name
+ *
+ * @param fileName file name
+ */
+ @Override
+ public void deleteCompletedFileRecord(String fileName) throws SQLException {
+ try (final PreparedStatement deleteStatement = connection.prepareStatement(
+ "delete from CompletedFiles where fileName = ?");
+ final AutoRollback autoRollback = new AutoRollback(connection)) {
+ deleteStatement.setString(1, fileName);
+ deleteStatement.execute();
+ autoRollback.commit();
+ }
+ }
+}
diff --git a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java
index 7bae945a..d53c0c87 100644
--- a/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java
+++ b/pravega-sensor-collector/src/main/java/io/pravega/sensor/collector/util/TransactionalEventWriter.java
@@ -9,6 +9,7 @@
*/
package io.pravega.sensor.collector.util;
+import com.google.common.base.Preconditions;
import io.pravega.client.stream.Transaction;
import io.pravega.client.stream.TransactionalEventStreamWriter;
import io.pravega.client.stream.TxnFailedException;
@@ -17,6 +18,7 @@
import java.util.Optional;
import java.util.UUID;
+import java.util.concurrent.CompletableFuture;
public class TransactionalEventWriter implements EventWriter {
private static final Logger log = LoggerFactory.getLogger(TransactionalEventWriter.class);
@@ -65,10 +67,19 @@ public void commit(long timestamp) throws TxnFailedException {
currentTxn = null;
}
}
+ private boolean canCommitTransaction(UUID txnId){
+ Transaction.Status transactionStatus = writer.getTxn(txnId).checkStatus();
+ log.info("canCommitTransaction: Status of Transaction id {} is {}", txnId, transactionStatus);
+ return transactionStatus == Transaction.Status.OPEN;
+ }
public void commit(UUID txnId) throws TxnFailedException {
- log.info("commit: committing transaction {}", txnId);
- writer.getTxn(txnId).commit();
+ /*Check the transaction status before committing transaction
+ Only transactions which rea in open status can be committed */
+ if(canCommitTransaction(txnId)){
+ log.info("commit: committing transaction {}", txnId);
+ writer.getTxn(txnId).commit();
+ }
}
public void abort() {
@@ -79,6 +90,17 @@ public void abort() {
}
}
+ public Transaction.Status getTransactionStatus() {
+ if (currentTxn != null){
+ return currentTxn.checkStatus();
+ }
+ return null;
+ }
+ public Transaction.Status getTransactionStatus(UUID txnId) {
+ return writer.getTxn(txnId).checkStatus();
+ }
+
+
public void close() {
try {
abort();
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/PravegaClientConfigTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/PravegaClientConfigTests.java
new file mode 100644
index 00000000..2f59332a
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/PravegaClientConfigTests.java
@@ -0,0 +1,62 @@
+package io.pravega.sensor.collector;
+
+import org.junit.jupiter.api.Test;
+
+import java.net.URI;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class PravegaClientConfigTests {
+
+ @Test
+ public void testConstructorWithValues(){
+ URI uri = URI.create("tcp://localhost:9090");
+ String scopeName = "testScope";
+ PravegaClientConfig conf = new PravegaClientConfig(uri,scopeName);
+ assertEquals(scopeName, conf.getScopeName());
+ assertEquals(uri, conf.toClientConfig().getControllerURI());
+
+ }
+
+ @Test
+ public void testConstructorWithProperties() {
+ URI uri = URI.create("tcp://example.com:9090");
+ String scopeName = "testScope";
+
+ Map properties = new HashMap<>();
+ properties.put("PRAVEGA_CONTROLLER_URI", uri.toString());
+ PravegaClientConfig configFile = new PravegaClientConfig(properties, scopeName);
+ assertEquals(uri, configFile.toClientConfig().getControllerURI());
+ assertEquals(scopeName, configFile.getScopeName());
+ }
+
+ @Test
+ public void testConstructorWithPropertiesDefaultURI() {
+ String scopeName = "testScope";
+
+ Map properties = Collections.emptyMap();
+
+ PravegaClientConfig configFile = new PravegaClientConfig(properties, scopeName);
+
+ assertEquals(URI.create("tcp://localhost:9090"), configFile.toClientConfig().getControllerURI());
+ assertEquals(scopeName, configFile.getScopeName());
+ }
+
+ @Test
+ public void testEqualsAndHashCode() {
+ URI uri1 = URI.create("tcp://localhost:9090");
+ String scopeName1 = "testScope1";
+ PravegaClientConfig configFile1 = new PravegaClientConfig(uri1, scopeName1);
+
+ URI uri2 = URI.create("tcp://localhost:9090");
+ String scopeName2 = "testScope1";
+ PravegaClientConfig configFile2 = new PravegaClientConfig(uri2, scopeName2);
+
+ assertEquals(configFile1, configFile2);
+ assertEquals(configFile1.hashCode(), configFile2.hashCode());
+ }
+
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileIngestServiceTest.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileIngestServiceTest.java
new file mode 100644
index 00000000..508a3a84
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileIngestServiceTest.java
@@ -0,0 +1,20 @@
+package io.pravega.sensor.collector.file;
+
+import io.pravega.sensor.collector.DeviceDriver;
+import io.pravega.sensor.collector.DeviceDriverConfig;
+import io.pravega.sensor.collector.DeviceDriverManager;
+import io.pravega.sensor.collector.file.rawfile.RawFileIngestService;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mock;
+
+import java.sql.SQLException;
+import java.util.Map;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.*;
+
+public class FileIngestServiceTest {
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorFactoryTest.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorFactoryTest.java
new file mode 100644
index 00000000..d1d63ee5
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorFactoryTest.java
@@ -0,0 +1,76 @@
+package io.pravega.sensor.collector.file;
+
+import io.pravega.sensor.collector.file.csvfile.CsvFileSequenceProcessor;
+import io.pravega.sensor.collector.file.parquet.ParquetFileProcessor;
+import io.pravega.sensor.collector.file.rawfile.RawFileProcessor;
+import io.pravega.sensor.collector.util.EventWriter;
+import io.pravega.sensor.collector.util.TransactionCoordinator;
+import io.pravega.sensor.collector.util.TransactionStateInMemoryImpl;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+public class FileProcessorFactoryTest {
+
+
+ private FileConfig config;
+ @Mock
+ private EventWriter writer;
+ @Mock
+ private TransactionCoordinator transactionCoordinator;
+ @Mock
+ private TransactionStateInMemoryImpl state;
+
+ @BeforeEach
+ public void setUp() {
+ MockitoAnnotations.initMocks(this);
+
+ }
+
+ /*
+ * Test for creating Raw file processor
+ */
+ @Test
+ public void createRAWFileProcessorTest() throws Exception {
+ String stateDatabaseFileName = ":memory:";
+ config = new FileConfig(stateDatabaseFileName,"/opt/pravega-sensor-collector/Files/A","parquet","key12",
+ "stream1","{}",10, false,
+ true,20.0, 5000l,"RawFileIngestService");
+ FileProcessor rawFileProcessor = FileProcessorFactory.createFileSequenceProcessor(config,state,writer,transactionCoordinator,"writerId");
+
+ Assertions.assertTrue(rawFileProcessor instanceof RawFileProcessor);
+
+ }
+
+ /*
+ * Test for creating CSV file processor
+ */
+ @Test
+ public void createCSVFileProcessorTest() throws Exception {
+ String stateDatabaseFileName = ":memory:";
+ config = new FileConfig(stateDatabaseFileName,"/opt/pravega-sensor-collector/Files/A","parquet","key12",
+ "stream1","{}",10, false,
+ true,20.0, 5000L,"CsvFileIngestService");
+ FileProcessor csvFileProcessor = FileProcessorFactory.createFileSequenceProcessor(config,state,writer,transactionCoordinator,"writerId");
+
+ Assertions.assertTrue(csvFileProcessor instanceof CsvFileSequenceProcessor);
+
+ }
+
+ /*
+ * Test for creating PARQUET file processor
+ */
+ @Test
+ public void createParquetFileProcessorTest() throws Exception {
+ String stateDatabaseFileName = ":memory:";
+ config = new FileConfig(stateDatabaseFileName,"/opt/pravega-sensor-collector/Files/A","parquet","key12",
+ "stream1","{}",10, false,
+ true,20.0, 5000L,"ParquetFileIngestService");
+ FileProcessor parquetFileProcessor = FileProcessorFactory.createFileSequenceProcessor(config,state,writer,transactionCoordinator,"writerId");
+
+ Assertions.assertTrue(parquetFileProcessor instanceof ParquetFileProcessor);
+
+ }
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorTests.java
new file mode 100644
index 00000000..83e6f247
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/FileProcessorTests.java
@@ -0,0 +1,175 @@
+package io.pravega.sensor.collector.file;
+
+import com.google.common.collect.ImmutableList;
+import io.pravega.client.EventStreamClientFactory;
+import io.pravega.client.stream.TxnFailedException;
+import io.pravega.sensor.collector.file.rawfile.RawFileProcessor;
+import io.pravega.sensor.collector.util.*;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+import org.mockito.junit.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.verify;
+
+
+public class FileProcessorTests {
+ private static final Logger log = LoggerFactory.getLogger(FileProcessorTests.class);
+
+ protected FileConfig config;
+ @Mock
+ protected TransactionStateSQLiteImpl state;
+
+ @Mock
+ private EventWriter writer;
+
+ @Mock
+ protected TransactionalEventWriter transactionalEventWriter;
+
+ @Mock
+ protected TransactionCoordinator transactionCoordinator;
+ @Mock
+ private EventGenerator eventGenerator;
+ @Mock
+ private EventStreamClientFactory clientFactory;
+
+
+ @BeforeEach
+ public void setup(){
+ MockitoAnnotations.initMocks(this);
+ String stateDatabaseFileName = ":memory:";
+ config = new FileConfig("./psc.db","/opt/pravega-sensor-collector/Files/A","parquet","key12",
+ "stream1","{}",10, false,
+ true,20.0, 5000,"RawFileIngestService");
+ }
+
+ @Test
+ public void getNewFilesTest() {
+ final List directoryListing = ImmutableList.of(
+ new FileNameWithOffset("file2", 10),
+ new FileNameWithOffset("file4", 10),
+ new FileNameWithOffset("file3", 10));
+ final List completedFiles = ImmutableList.of(
+ new FileNameWithOffset("file1", 10),
+ new FileNameWithOffset("file2", 10));
+ final List expected = ImmutableList.of(
+ new FileNameWithOffset("file3", 0),
+ new FileNameWithOffset("file4", 0));
+ RawFileProcessor fileProcessor = new RawFileProcessor(config,state, writer, transactionCoordinator, "writerId");
+ final List actual = fileProcessor.getNewFiles(directoryListing, completedFiles);
+ Assertions.assertEquals(expected, actual);
+ }
+
+ @Test
+ public void getDirectoryListingTest() throws IOException {
+ final List actual = FileUtils.getDirectoryListing(
+ "../log-file-sample-data/","csv", Paths.get("."), 5000);
+ log.info("actual={}", actual);
+ }
+
+ /*
+ * When there is no new files in SqlLite DB to process. which returns empty file set for nextFiles() call.
+ */
+ @Test
+ public void getEmptyNextFileSet() throws Exception {
+ FileProcessor fileProcessor = FileProcessor.create(config, clientFactory);
+ fileProcessor.processFiles();
+ }
+
+ /*
+ * Process the single file for Raw file processor.
+ */
+ @Test
+ public void processNextFile() throws Exception {
+ copyFile();
+ FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test");
+ doNothing().when(transactionalEventWriter).writeEvent(anyString(), any());
+ fileProcessor.processFile(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L);
+ verify(transactionalEventWriter).writeEvent(anyString(), any());
+ }
+
+ /*
+ * Process 3 files in loop
+ */
+ @Test
+ public void processNextFewFiles() throws Exception {
+ copyFile();
+ // Define different return values for the first three invocations and from 4th invocation onwards null
+ Mockito.when(state.getNextPendingFileRecord())
+ .thenReturn(new ImmutablePair<>(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L))
+ .thenReturn(new ImmutablePair<>(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub2.parquet", 0), 2L))
+ .thenReturn(new ImmutablePair<>(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub3.parquet", 0), 3L))
+ .thenAnswer(invocation -> null);
+
+ FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test");
+ doNothing().when(transactionalEventWriter).writeEvent(anyString(), any());
+ fileProcessor.processNewFiles();
+
+ // Verify that myMethod was called exactly three times
+ Mockito.verify(transactionalEventWriter, Mockito.times(3)).writeEvent(anyString(), any());
+
+ }
+
+ /*
+ * Process the single file .
+ * Throw transaction failed exception while writing events
+ */
+ @Test
+ public void processNextFile_WriteEventException() throws Exception {
+ copyFile();
+ FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test");
+ Mockito.doThrow(TxnFailedException.class).when(transactionalEventWriter).writeEvent(anyString(), any());
+ assertThrows(RuntimeException.class, () -> fileProcessor.processFile(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L));
+ // Verify that myMethod was called exactly three times
+ Mockito.verify(transactionalEventWriter, Mockito.times(1)).writeEvent(anyString(), any());
+
+ }
+ /*
+ * Process the single file .
+ * Throw transaction failed exception while commiting transaction
+ */
+ @Test
+ public void processNextFile_CommitException() throws Exception {
+ copyFile();
+ FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test");
+ Mockito.doThrow(TxnFailedException.class).when(transactionalEventWriter).commit();
+ assertThrows(RuntimeException.class, () -> fileProcessor.processFile(new FileNameWithOffset("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet", 0), 1L));
+ // Verify that myMethod was called exactly three times
+ Mockito.verify(transactionalEventWriter, Mockito.times(1)).commit();
+ }
+
+ /*
+ * Before each test we need to copy the files to parquet file directory so that files are available for processing.
+ * Post process these files are moved to different directory, so it is important to add them back to the current directory path.
+ */
+ public void copyFile() throws IOException {
+ Path sourcePath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/test_file/sub1.parquet");
+ Path targetPath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/sub1.parquet");
+ Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING);
+ sourcePath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/test_file/sub2.parquet");
+ targetPath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/sub2.parquet");
+ Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING);
+ sourcePath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/test_file/sub3.parquet");
+ targetPath = Paths.get("../../pravega-sensor-collector/parquet-file-sample-data/sub3.parquet");
+ Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING);
+ }
+
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorStateTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorStateTests.java
deleted file mode 100644
index 4b3f7aed..00000000
--- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorStateTests.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.sql.SQLException;
-import java.util.HashSet;
-import java.util.List;
-
-public class LogFileSequenceProcessorStateTests {
- private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorStateTests.class);
-
- @Test
- public void pendingFilesTest() throws SQLException {
- final String stateDatabaseFileName = ":memory:";
- final LogFileSequenceProcessorState state = LogFileSequenceProcessorState.create(stateDatabaseFileName);
- Assert.assertNull(state.getNextPendingFile());
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
- Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile());
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L)));
- Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile());
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file0.csv", 0L)));
- Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile());
- }
-
- @Test
- public void completedFilesTest() throws SQLException {
- final String stateDatabaseFileName = ":memory:";
- final LogFileSequenceProcessorState state = LogFileSequenceProcessorState.create(stateDatabaseFileName);
- Assert.assertNull(state.getNextPendingFile());
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
- Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFile());
- state.addCompletedFile("file1.csv", 0L, 1000L, 10L);
- final List completedFiles = state.getCompletedFiles();
- log.info("completedFiles={}", completedFiles);
- Assert.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles));
- Assert.assertNull(state.getNextPendingFile());
- // Make sure this is idempotent.
- state.addCompletedFile("file1.csv", 0L, 1000L, 10L);
- Assert.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles));
- Assert.assertNull(state.getNextPendingFile());
- }
-
- @Test
- public void processFilesTest() throws SQLException {
- final String stateDatabaseFileName = ":memory:";
- final LogFileSequenceProcessorState state = LogFileSequenceProcessorState.create(stateDatabaseFileName);
- Assert.assertNull(state.getNextPendingFile());
- // Find 3 new files.
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L)));
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file3.csv", 0L)));
- // Re-add a pending file. This should be ignored.
- state.addPendingFiles(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
- // Get next pending file.
- Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file2.csv", 0L), 0L), state.getNextPendingFile());
- // Complete file.
- state.addCompletedFile("file2.csv", 0L, 1000L, 10L);
- Assert.assertEquals(ImmutableSet.of(new FileNameWithOffset("file2.csv", 1000L)), new HashSet<>(state.getCompletedFiles()));
- // Get next pending file.
- Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 10L), state.getNextPendingFile());
- // Complete file.
- state.addCompletedFile("file1.csv", 0L, 2000L, 20L);
- Assert.assertEquals(ImmutableSet.of(
- new FileNameWithOffset("file2.csv", 1000L),
- new FileNameWithOffset("file1.csv", 2000L)),
- new HashSet<>(state.getCompletedFiles()));
- // Get next pending file.
- Assert.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file3.csv", 0L), 20L), state.getNextPendingFile());
- // Complete file.
- state.addCompletedFile("file3.csv", 0L, 1500L, 30L);
- Assert.assertEquals(ImmutableSet.of(
- new FileNameWithOffset("file2.csv", 1000L),
- new FileNameWithOffset("file1.csv", 2000L),
- new FileNameWithOffset("file3.csv", 1500L)),
- new HashSet<>(state.getCompletedFiles()));
- // No more pending files.
- Assert.assertNull(state.getNextPendingFile());
- // Delete completed file.
- state.deleteCompletedFile("file1.csv");
- Assert.assertEquals(ImmutableSet.of(
- new FileNameWithOffset("file2.csv", 1000L),
- new FileNameWithOffset("file3.csv", 1500L)),
- new HashSet<>(state.getCompletedFiles()));
- // Delete completed file.
- state.deleteCompletedFile("file2.csv");
- Assert.assertEquals(ImmutableSet.of(
- new FileNameWithOffset("file3.csv", 1500L)),
- new HashSet<>(state.getCompletedFiles()));
- // Delete completed file.
- state.deleteCompletedFile("file3.csv");
- Assert.assertTrue(state.getCompletedFiles().isEmpty());
- }
-}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorTests.java
deleted file mode 100644
index 87c92354..00000000
--- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/LogFileSequenceProcessorTests.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-package io.pravega.sensor.collector.file;
-
-import com.google.common.collect.ImmutableList;
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.List;
-
-public class LogFileSequenceProcessorTests {
- private static final Logger log = LoggerFactory.getLogger(LogFileSequenceProcessorTests.class);
-
- @Test
- public void getNewFilesTest() {
- final List directoryListing = ImmutableList.of(
- new FileNameWithOffset("file2", 10),
- new FileNameWithOffset("file4", 10),
- new FileNameWithOffset("file3", 10));
- final List completedFiles = ImmutableList.of(
- new FileNameWithOffset("file1", 10),
- new FileNameWithOffset("file2", 10));
- final List expected = ImmutableList.of(
- new FileNameWithOffset("file3", 0),
- new FileNameWithOffset("file4", 0));
- final List actual = LogFileSequenceProcessor.getNewFiles(directoryListing, completedFiles);
- Assert.assertEquals(expected, actual);
- }
-
- @Test
- public void getDirectoryListingTest() throws IOException {
- final List actual = LogFileSequenceProcessor.getDirectoryListing(
- "../log-file-sample-data/*.csv");
- log.info("actual={}", actual);
- }
-}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/EventGeneratorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CSVFileEventGeneratorTests.java
similarity index 71%
rename from pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/EventGeneratorTests.java
rename to pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CSVFileEventGeneratorTests.java
index 6c8b4362..fdae2efd 100644
--- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/EventGeneratorTests.java
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CSVFileEventGeneratorTests.java
@@ -7,12 +7,14 @@
*
* http://www.apache.org/licenses/LICENSE-2.0
*/
-package io.pravega.sensor.collector.file;
+package io.pravega.sensor.collector.file.csvfile;
import com.google.common.io.CountingInputStream;
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.util.PravegaWriterEvent;
import org.apache.commons.lang3.tuple.Pair;
-import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -22,12 +24,12 @@
import java.util.ArrayList;
import java.util.List;
-public class EventGeneratorTests {
- private static final Logger log = LoggerFactory.getLogger(EventGeneratorTests.class);
+public class CSVFileEventGeneratorTests {
+ private static final Logger log = LoggerFactory.getLogger(CSVFileEventGeneratorTests.class);
@Test
public void Test3by2() throws IOException {
- final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 2);
+ final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 2);
final String csvStr =
"\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" +
"\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" +
@@ -37,13 +39,13 @@ public void Test3by2() throws IOException {
final List events = new ArrayList<>();
Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
log.info("events={}", events);
- Assert.assertEquals(102L, (long) nextSequenceNumberAndOffset.getLeft());
- Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ Assertions.assertEquals(102L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
}
@Test
public void Test3by3() throws IOException {
- final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3);
+ final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3);
final String csvStr =
"\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" +
"\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" +
@@ -53,13 +55,13 @@ public void Test3by3() throws IOException {
final List events = new ArrayList<>();
Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
log.info("events={}", events);
- Assert.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft());
- Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ Assertions.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
}
@Test
public void Test1by3() throws IOException {
- final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3);
+ final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3);
final String csvStr =
"\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" +
"\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n";
@@ -67,38 +69,38 @@ public void Test1by3() throws IOException {
final List events = new ArrayList<>();
Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
log.info("events={}", events);
- Assert.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft());
- Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ Assertions.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
}
@Test
public void Test0by3() throws IOException {
- final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3);
+ final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3);
final String csvStr =
"\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n";
final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(csvStr.getBytes(StandardCharsets.UTF_8)));
final List events = new ArrayList<>();
Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
log.info("events={}", events);
- Assert.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft());
- Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ Assertions.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
}
@Test
public void TestEmptyFile() throws IOException {
- final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3);
+ final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3);
final String csvStr = "";
final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(csvStr.getBytes(StandardCharsets.UTF_8)));
final List events = new ArrayList<>();
Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
log.info("events={}", events);
- Assert.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft());
- Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ Assertions.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
}
@Test
public void test7by3() throws IOException {
- final EventGenerator eventGenerator = EventGenerator.create("routingKey1", 3);
+ final EventGenerator eventGenerator = CsvFileEventGenerator.create("routingKey1", 3);
final String csvStr =
"\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" +
"\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" +
@@ -112,7 +114,7 @@ public void test7by3() throws IOException {
final List events = new ArrayList<>();
Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
log.info("events={}", events);
- Assert.assertEquals(103L, (long) nextSequenceNumberAndOffset.getLeft());
- Assert.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ Assertions.assertEquals(103L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assertions.assertEquals(csvStr.length(), (long) nextSequenceNumberAndOffset.getRight());
}
}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessorTests.java
new file mode 100644
index 00000000..8aa3097f
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/csvfile/CsvFileSequenceProcessorTests.java
@@ -0,0 +1,29 @@
+package io.pravega.sensor.collector.file.csvfile;
+
+import io.pravega.sensor.collector.file.FileProcessor;
+import io.pravega.sensor.collector.file.FileProcessorTests;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+import static org.mockito.ArgumentMatchers.any;
+
+public class CsvFileSequenceProcessorTests extends FileProcessorTests {
+
+ @BeforeEach
+ public void before() throws Exception {
+ super.setup();
+
+ }
+
+ /*
+ * Generating event for CSV file and process for new files when there are no pending files.
+ */
+ @Test
+ public void generateEventForCSVFileTests() throws Exception {
+ FileProcessor fileProcessor = new CsvFileSequenceProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test");
+ fileProcessor.processNewFiles();
+ Mockito.verify(state, Mockito.times(1)).getNextPendingFileRecord();
+ }
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetEventGeneratorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetEventGeneratorTests.java
new file mode 100644
index 00000000..4f1cecb7
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetEventGeneratorTests.java
@@ -0,0 +1,38 @@
+package io.pravega.sensor.collector.file.parquet;
+
+import com.google.common.io.CountingInputStream;
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.util.FileNameWithOffset;
+import io.pravega.sensor.collector.util.FileUtils;
+import io.pravega.sensor.collector.util.PravegaWriterEvent;
+import org.apache.commons.lang3.tuple.Pair;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ParquetEventGeneratorTests {
+ private static final Logger log = LoggerFactory.getLogger(ParquetEventGeneratorTests.class);
+
+ @Test
+ public void TestFile() throws IOException {
+ final EventGenerator eventGenerator = ParquetEventGenerator.create("routingKey1",100);
+ final List files = FileUtils.getDirectoryListing("../parquet-file-sample-data","parquet", Paths.get("."), 5000);
+ File parquetData= new File(files.get(0).fileName);
+
+ final CountingInputStream inputStream = new CountingInputStream(new FileInputStream(parquetData));
+ final List events = new ArrayList<>();
+ Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 1, events::add);
+ Assert.assertEquals(501L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assert.assertEquals(parquetData.length(), (long) nextSequenceNumberAndOffset.getRight());
+ }
+
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessorTests.java
new file mode 100644
index 00000000..92dbed57
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/parquet/ParquetFileProcessorTests.java
@@ -0,0 +1,27 @@
+package io.pravega.sensor.collector.file.parquet;
+
+import io.pravega.sensor.collector.file.FileProcessor;
+import io.pravega.sensor.collector.file.FileProcessorTests;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+public class ParquetFileProcessorTests extends FileProcessorTests {
+
+
+ @BeforeEach
+ public void before() throws Exception {
+ super.setup();
+
+ }
+
+ /*
+ * Generating event for Parquet file and check for process new files when there are no pending files.
+ */
+ @Test
+ public void generateEventForParquetTests() throws Exception {
+ FileProcessor fileProcessor = new ParquetFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test");
+ fileProcessor.processNewFiles();
+ Mockito.verify(state, Mockito.times(1)).getNextPendingFileRecord();
+ }
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawEventGeneratorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawEventGeneratorTests.java
new file mode 100644
index 00000000..d883caf6
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawEventGeneratorTests.java
@@ -0,0 +1,49 @@
+package io.pravega.sensor.collector.file.rawfile;
+
+import com.google.common.io.CountingInputStream;
+import io.pravega.sensor.collector.file.EventGenerator;
+import io.pravega.sensor.collector.util.PravegaWriterEvent;
+import org.apache.commons.lang3.tuple.Pair;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+public class RawEventGeneratorTests {
+ private static final Logger log = LoggerFactory.getLogger(RawEventGeneratorTests.class);
+
+ @Test
+ public void TestFile() throws IOException {
+ final EventGenerator eventGenerator = RawEventGenerator.create("routingKey1");
+ final String rawfileStr =
+ "\"Time\",\"X\",\"Y\",\"Z\",\"IN_PROGRESS\"\n" +
+ "\"2020-07-15 23:59:50.352\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n" +
+ "\"2020-07-15 23:59:50.362\",\"1.305966\",\"0.1\",\"1.331963\",\"0\"\n" +
+ "\"2020-07-15 23:59:50.415\",\"0.305966\",\"0.0\",\"9.331963\",\"0\"\n";
+ final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(rawfileStr.getBytes(StandardCharsets.UTF_8)));
+ final List events = new ArrayList<>();
+ Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
+ log.info("events={}", events);
+ Assert.assertEquals(101L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assert.assertEquals(rawfileStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ }
+
+ @Test
+ public void TestEmptyFile() throws IOException {
+ final EventGenerator eventGenerator = RawEventGenerator.create("routingKey1");
+ final String rawfileStr = "";
+ final CountingInputStream inputStream = new CountingInputStream(new ByteArrayInputStream(rawfileStr.getBytes(StandardCharsets.UTF_8)));
+ final List events = new ArrayList<>();
+ Pair nextSequenceNumberAndOffset = eventGenerator.generateEventsFromInputStream(inputStream, 100, events::add);
+ log.info("events={}", events);
+ Assert.assertEquals(100L, (long) nextSequenceNumberAndOffset.getLeft());
+ Assert.assertEquals(rawfileStr.length(), (long) nextSequenceNumberAndOffset.getRight());
+ }
+
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessorTests.java
new file mode 100644
index 00000000..d8d68c9c
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/file/rawfile/RawFileProcessorTests.java
@@ -0,0 +1,26 @@
+package io.pravega.sensor.collector.file.rawfile;
+
+import io.pravega.sensor.collector.file.FileProcessor;
+import io.pravega.sensor.collector.file.FileProcessorTests;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+public class RawFileProcessorTests extends FileProcessorTests {
+ @BeforeEach
+ public void before() throws Exception {
+ super.setup();
+
+ }
+
+ /*
+ * Generating event for Raw file and check for process new files when there are no pending files.
+ */
+ @Test
+ public void generateEventForRawFileTests() throws Exception {
+ FileProcessor fileProcessor = new RawFileProcessor(config, state, transactionalEventWriter,transactionCoordinator, "test");
+ fileProcessor.processNewFiles();
+ Mockito.verify(state, Mockito.times(1)).getNextPendingFileRecord();
+ }
+
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java
index 349414d7..0209bf1d 100644
--- a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/PersistentIdTests.java
@@ -9,8 +9,8 @@
*/
package io.pravega.sensor.collector.util;
-import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -25,7 +25,7 @@ public class PersistentIdTests {
@Test
public void persistentIdTest() throws SQLException {
- final String fileName = "/tmp/persistent-id-test-" + UUID.randomUUID() + ".db";
+ final String fileName = "persistent-id-test-" + UUID.randomUUID() + ".db";
log.info("fileName={}", fileName);
try {
@@ -39,7 +39,7 @@ public void persistentIdTest() throws SQLException {
writerId2 = new PersistentId(connection).getPersistentId().toString();
log.info("writerId2={}", writerId2);
}
- Assert.assertEquals(writerId1, writerId2);
+ Assertions.assertEquals(writerId1, writerId2);
} finally {
new File(fileName).delete();
}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionCoordinatorTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionCoordinatorTests.java
new file mode 100644
index 00000000..84733240
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionCoordinatorTests.java
@@ -0,0 +1,262 @@
+package io.pravega.sensor.collector.util;
+
+import io.pravega.client.stream.TxnFailedException;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.*;
+import java.util.List;
+import java.util.Optional;
+import java.util.UUID;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.*;
+
+public class TransactionCoordinatorTests {
+
+ @Mock
+ private Connection mockConnection;
+
+ @Mock
+ private PreparedStatement mockPrepareStatement;
+ @Mock
+ private Statement mockStatement;
+
+ @Mock
+ private ResultSet mockResultSet;
+
+ @Mock
+ private EventWriter eventWriter;
+
+ @Mock
+ TransactionalEventWriter transactionalEventWriter;
+
+ private TransactionCoordinator transactionProcessor;
+ private static final Logger log = LoggerFactory.getLogger(TransactionCoordinatorTests.class);
+
+ @BeforeEach
+ public void setUp() throws SQLException {
+ MockitoAnnotations.initMocks(this);
+
+ // Mock behavior for the connection and statement
+ when(mockConnection.createStatement()).thenReturn(mockStatement);
+ when(mockStatement.execute(anyString())).thenReturn(true);
+ /*when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ when(mockPrepareStatement.execute()).thenReturn(true);*/
+ transactionProcessor = new TransactionCoordinator(mockConnection,transactionalEventWriter);
+ }
+
+ @Test
+ public void testAddTransactionToCommit() throws SQLException {
+
+ UUID mockTransactionId = UUID.randomUUID();
+ Optional optionalTransactionId = Optional.of(mockTransactionId);
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ when(mockPrepareStatement.execute()).thenReturn(true);
+ transactionProcessor.addTransactionToCommit(optionalTransactionId);
+ // Assert
+ // Verify that prepareStatement was called with the correct SQL query
+ verify(mockConnection).prepareStatement("insert into TransactionsToCommit (txnId) values (?)");
+ verify(mockStatement).execute(anyString());
+
+ }
+
+ /*
+ * SQLExcption while Adding trasaction id to TransactionsToCommit table
+ */
+ @Test
+ public void testAddTransactionToCommitThrowSQLException() throws SQLException {
+
+ UUID mockTransactionId = UUID.randomUUID();
+ Optional optionalTransactionId = Optional.of(mockTransactionId);
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ // Mock behavior: when preparedStatement.execute is called, throw a SQLException
+ doThrow(new SQLException("Test exception")).when(mockPrepareStatement).execute();
+
+ // Use assertThrows to verify that SQLException is thrown
+ assertThrows(RuntimeException.class, () -> transactionProcessor.addTransactionToCommit(optionalTransactionId));
+
+ // Verify that prepareStatement was called with the correct SQL query
+ verify(mockConnection).prepareStatement("insert into TransactionsToCommit (txnId) values (?)");
+ verify(mockStatement).execute(anyString());
+
+ }
+
+ @Test
+ public void testDeleteTransactionToCommit() throws SQLException {
+
+ UUID mockTransactionId = UUID.randomUUID();
+ Optional optionalTransactionId = Optional.of(mockTransactionId);
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ when(mockPrepareStatement.execute()).thenReturn(true);
+ transactionProcessor.deleteTransactionToCommit(optionalTransactionId);
+ // Assert
+ // Verify that prepareStatement was called with the correct SQL query
+ verify(mockConnection).prepareStatement("delete from TransactionsToCommit where txnId = ?");
+ verify(mockStatement).execute(anyString());
+
+ }
+
+ /*
+ * SQLExcption while deleting trasaction id from TransactionsToCommit table
+ */
+ @Test
+ public void testDeleteTransactionToCommitThrowSQLException() throws SQLException {
+
+ UUID mockTransactionId = UUID.randomUUID();
+ Optional optionalTransactionId = Optional.of(mockTransactionId);
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ // Mock behavior: when preparedStatement.execute is called, throw a SQLException
+ doThrow(new SQLException("Test exception")).when(mockPrepareStatement).execute();
+
+ // Use assertThrows to verify that SQLException is thrown
+ assertThrows(RuntimeException.class, () -> transactionProcessor.deleteTransactionToCommit(optionalTransactionId));
+
+ // Verify that prepareStatement was called with the correct SQL query
+ verify(mockConnection).prepareStatement("delete from TransactionsToCommit where txnId = ?");
+ verify(mockStatement).execute(anyString());
+ }
+
+ /*
+ * Test to verify getTransactionsToCommit method.
+ * Verify number of transaction id's matching with result set
+ *
+ */
+ @Test
+ public void testGetTransactionToCommit() throws SQLException {
+ // Mock behavior: when statement.executeQuery is called, return the mock result set
+ when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet);
+ // Mock behavior: simulate the result set having two rows with different UUIDs
+ when(mockResultSet.next()).thenReturn(true, true, false);
+ when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+
+ // Get List of transaction ID's from TransactionToCommit table
+ List uuidList =transactionProcessor.getTransactionsToCommit();
+
+ // Assert
+ verify(mockResultSet, times(3)).next();
+ verify(mockResultSet, times(2)).getString("txnId");
+ //verify result contains 2 UUIDs
+ assertEquals(2,uuidList.size());
+ }
+
+ /*
+ * Test to verify perform recovery method.
+ */
+ @Test
+ public void testPerformRecovery() throws SQLException, TxnFailedException {
+ // Mock behavior: when statement.executeQuery is called, return the mock result set
+ when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet);
+ // Mock behavior: simulate the result set having two rows with different UUIDs
+ when(mockResultSet.next()).thenReturn(true, true, false);
+ when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+ //mock for delete transaction call
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ when(mockPrepareStatement.execute()).thenReturn(true);
+
+ doNothing().when(transactionalEventWriter).commit(any());
+
+ // Get List of transaction ID's from TransactionToCommit table
+ transactionProcessor.performRecovery();
+
+ // Assert
+ verify(mockResultSet, times(3)).next();
+ verify(mockResultSet, times(2)).getString("txnId");
+ verify(mockConnection, times(2)).prepareStatement("delete from TransactionsToCommit where txnId = ?");
+
+ }
+
+
+ /*
+ * Test to verify perform recovery method.
+ * Verify the scenario where transaction commit throw the TxnFailedException
+ */
+ @Test
+ public void testPerformRecoveryWithCommitFail() throws SQLException, TxnFailedException {
+ // Mock behavior: when statement.executeQuery is called, return the mock result set
+ when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet);
+ // Mock behavior: simulate the result set having two rows with different UUIDs
+ when(mockResultSet.next()).thenReturn(true, true,false);
+ when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+ //mock for delete transaction call
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ when(mockPrepareStatement.execute()).thenReturn(true);
+ Mockito.doAnswer(invocation -> {
+ throw new TxnFailedException("Simulated transaction failure");
+ }).when(transactionalEventWriter).commit(Mockito.any());
+ //doNothing().when(transactionalEventWriter).commit(any());
+
+ // Perform recovery
+ transactionProcessor.performRecovery();
+
+ // Assert
+ verify(mockResultSet, times(3)).next();
+ verify(mockResultSet, times(2)).getString("txnId");
+ //verify(mockConnection, times(2)).prepareStatement("delete from TransactionsToCommit where txnId = ?");
+
+ }
+
+ /*
+ * Test to verify perform recovery method.
+ * Verify the scenario where transaction commit throw the Unknown Transaction as message
+ */
+ @Test
+ public void testPerformRecoveryCommitWithUnknownTransactionFail() throws SQLException, TxnFailedException {
+ // Mock behavior: when statement.executeQuery is called, return the mock result set
+ when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet);
+ // Mock behavior: simulate the result set having two rows with different UUIDs
+ when(mockResultSet.next()).thenReturn(true,true,false);
+ when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+ //mock for delete transaction call
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ when(mockPrepareStatement.execute()).thenReturn(true);
+ Mockito.doAnswer(invocation -> {
+ throw new RuntimeException("Unknown transaction");
+ }).when(transactionalEventWriter).commit(Mockito.any());
+
+ // Perform recovery
+ transactionProcessor.performRecovery();
+
+ // Assert
+ verify(mockResultSet, times(3)).next();
+ verify(mockResultSet, times(2)).getString("txnId");
+
+ }
+
+ /*
+ * Test to verify perform recovery method.
+ * Verify the scenario where transaction commit throw the other runtime exception as message
+ */
+ @Test
+ public void testPerformRecoveryCommitWithOtherException() throws SQLException, TxnFailedException {
+ // Mock behavior: when statement.executeQuery is called, return the mock result set
+ when(mockStatement.executeQuery("select txnId from TransactionsToCommit")).thenReturn(mockResultSet);
+ // Mock behavior: simulate the result set having two rows with different UUIDs
+ when(mockResultSet.next()).thenReturn(true,false);
+ when(mockResultSet.getString("txnId")).thenReturn(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+ //mock for delete transaction call
+ when(mockConnection.prepareStatement(anyString())).thenReturn(mockPrepareStatement);
+ when(mockPrepareStatement.execute()).thenReturn(true);
+ Mockito.doAnswer(invocation -> {
+ throw new RuntimeException("Other Runtime Exception");
+ }).when(transactionalEventWriter).commit(Mockito.any());
+
+ // Perform recovery
+ RuntimeException exception = assertThrows(RuntimeException.class, () -> {
+ transactionProcessor.performRecovery();
+ });
+
+ // Assert
+ String expectedMessage = "Other Runtime Exception";
+ assertEquals(expectedMessage, exception.getMessage(), "Exception message mismatch");
+
+
+ }
+}
diff --git a/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImplTests.java b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImplTests.java
new file mode 100644
index 00000000..d6f98d43
--- /dev/null
+++ b/pravega-sensor-collector/src/test/java/io/pravega/sensor/collector/util/TransactionStateSQLiteImplTests.java
@@ -0,0 +1,108 @@
+/**
+ * Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+package io.pravega.sensor.collector.util;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.HashSet;
+import java.util.List;
+
+public class TransactionStateSQLiteImplTests {
+
+ private static final Logger log = LoggerFactory.getLogger(TransactionStateSQLiteImplTests.class);
+
+ @Test
+ public void pendingFilesTest() throws SQLException {
+ final String stateDatabaseFileName = ":memory:";
+ final TransactionStateDB state = TransactionStateInMemoryImpl.create(stateDatabaseFileName);
+ Assertions.assertNull(state.getNextPendingFileRecord());
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
+ Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord());
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L)));
+ Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord());
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file0.csv", 0L)));
+ Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord());
+ }
+
+ @Test
+ public void completedFilesTest() throws SQLException {
+ final String stateDatabaseFileName = ":memory:";
+ final TransactionStateInMemoryImpl state = TransactionStateInMemoryImpl.create(stateDatabaseFileName);
+ Assertions.assertNull(state.getNextPendingFileRecord());
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
+ Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 0L), state.getNextPendingFileRecord());
+ state.addCompletedFileRecord("file1.csv", 0L, 1000L, 10L);
+ final List completedFiles = state.getCompletedFileRecords();
+ log.info("completedFiles={}", completedFiles);
+ Assertions.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles));
+ Assertions.assertNull(state.getNextPendingFileRecord());
+ // Make sure this is idempotent.
+ state.addCompletedFileRecord("file1.csv", 0L, 1000L, 10L);
+ Assertions.assertEquals(ImmutableSet.of(new FileNameWithOffset("file1.csv", 1000L)), new HashSet<>(completedFiles));
+ Assertions.assertNull(state.getNextPendingFileRecord());
+ }
+
+ @Test
+ public void processFilesTest() throws SQLException {
+ final String stateDatabaseFileName = ":memory:";
+ final TransactionStateInMemoryImpl state = TransactionStateInMemoryImpl.create(stateDatabaseFileName);
+ Assertions.assertNull(state.getNextPendingFileRecord());
+ // Find 3 new files.
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file2.csv", 0L)));
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file3.csv", 0L)));
+ // Re-add a pending file. This should be ignored.
+ state.addPendingFileRecords(ImmutableList.of(new FileNameWithOffset("file1.csv", 0L)));
+ // Get next pending file.
+ Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file2.csv", 0L), 0L), state.getNextPendingFileRecord());
+ // Complete file.
+ state.addCompletedFileRecord("file2.csv", 0L, 1000L, 10L);
+ Assertions.assertEquals(ImmutableSet.of(new FileNameWithOffset("file2.csv", 1000L)), new HashSet<>(state.getCompletedFileRecords()));
+ // Get next pending file.
+ Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file1.csv", 0L), 10L), state.getNextPendingFileRecord());
+ // Complete file.
+ state.addCompletedFileRecord("file1.csv", 0L, 2000L, 20L);
+ Assertions.assertEquals(ImmutableSet.of(
+ new FileNameWithOffset("file2.csv", 1000L),
+ new FileNameWithOffset("file1.csv", 2000L)),
+ new HashSet<>(state.getCompletedFileRecords()));
+ // Get next pending file.
+ Assertions.assertEquals(new ImmutablePair<>(new FileNameWithOffset("file3.csv", 0L), 20L), state.getNextPendingFileRecord());
+ // Complete file.
+ state.addCompletedFileRecord("file3.csv", 0L, 1500L, 30L);
+ Assertions.assertEquals(ImmutableSet.of(
+ new FileNameWithOffset("file2.csv", 1000L),
+ new FileNameWithOffset("file1.csv", 2000L),
+ new FileNameWithOffset("file3.csv", 1500L)), new HashSet<>(state.getCompletedFileRecords()));
+ // No more pending files.
+ Assertions.assertNull(state.getNextPendingFileRecord());
+ // Delete completed file.
+ state.deleteCompletedFileRecord("file1.csv");
+ Assertions.assertEquals(ImmutableSet.of(
+ new FileNameWithOffset("file2.csv", 1000L),
+ new FileNameWithOffset("file3.csv", 1500L)),
+ new HashSet<>(state.getCompletedFileRecords()));
+ // Delete completed file.
+ state.deleteCompletedFileRecord("file2.csv");
+ Assertions.assertEquals(ImmutableSet.of(
+ new FileNameWithOffset("file3.csv", 1500L)),
+ new HashSet<>(state.getCompletedFileRecords()));
+ // Delete completed file.
+ state.deleteCompletedFileRecord("file3.csv");
+ Assertions.assertTrue(state.getCompletedFileRecords().isEmpty());
+ }
+}
diff --git a/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties b/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties
index b6f644fb..c672fa5e 100644
--- a/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties
+++ b/pravega-sensor-collector/src/test/resources/LogFileIngestTest.properties
@@ -10,8 +10,9 @@
# This file can be used to manually test LogFileIngestService.
# Run scripts/simulate-logs-accel.sh concurrently.
-PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS=io.pravega.sensor.collector.file.LogFileIngestService
-PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_SPEC=/tmp/watch/Accelerometer.*.csv
+PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS=io.pravega.sensor.collector.file.csvfile.CsvFileIngestService
+PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_SPEC=/tmp/watch/files
+PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_EXTENSION=csv
PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DELETE_COMPLETED_FILES=true
PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DATABASE_FILE=/tmp/accelerometer.db
PRAVEGA_SENSOR_COLLECTOR_ACCEL2_EVENT_TEMPLATE={"RemoteAddr":"myaddr1","SensorType":"Accelerometer"}
@@ -20,3 +21,4 @@ PRAVEGA_SENSOR_COLLECTOR_ACCEL2_SCOPE=examples
PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CREATE_SCOPE=true
PRAVEGA_SENSOR_COLLECTOR_ACCEL2_STREAM=sensors-accelerometer
PRAVEGA_SENSOR_COLLECTOR_ACCEL2_ROUTING_KEY=routingkey1
+PRAVEGA_SENSOR_COLLECTOR_ACCEL2_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000
diff --git a/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties b/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties
index 26fdd224..57eaa626 100644
--- a/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties
+++ b/pravega-sensor-collector/src/test/resources/ParquetFileIngest.properties
@@ -8,8 +8,8 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# This file can be used to manually test ParquetFileIngestService.
-PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.parquet.ParquetFileIngestService
-PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC=/opt/pravega-sensor-collector/ParquetNew
+PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.file.parquet.ParquetFileIngestService
+PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC="/opt/pravega-sensor-collector/Parquet_Files/A,/opt/pravega-sensor-collector/Parquet_Files/B"
PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_EXTENSION=parquet
PRAVEGA_SENSOR_COLLECTOR_PARQ2_DELETE_COMPLETED_FILES=false
PRAVEGA_SENSOR_COLLECTOR_PARQ2_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db
@@ -20,4 +20,9 @@ PRAVEGA_SENSOR_COLLECTOR_PARQ2_CREATE_SCOPE=false
PRAVEGA_SENSOR_COLLECTOR_PARQ2_STREAM=stream-p
PRAVEGA_SENSOR_COLLECTOR_PARQ2_ROUTING_KEY=$(hostname)
PRAVEGA_SENSOR_COLLECTOR_PARQ2_TRANSACTION_TIMEOUT_MINUTES=2.0
+PRAVEGA_SENSOR_COLLECTOR_PARQ2_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000
+HADOOP_HOME=${HOME}/dev
+
+# windows - location of bin/winutils.exe
+export HADOOP_HOME=/opt/dev
diff --git a/pravega-sensor-collector/src/test/resources/RawFileIngest.properties b/pravega-sensor-collector/src/test/resources/RawFileIngest.properties
index e7b439c2..b548ff07 100644
--- a/pravega-sensor-collector/src/test/resources/RawFileIngest.properties
+++ b/pravega-sensor-collector/src/test/resources/RawFileIngest.properties
@@ -8,8 +8,8 @@
# http://www.apache.org/licenses/LICENSE-2.0
#
# This file can be used to manually test RawFileIngestService.
-PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.rawfile.RawFileIngestService
-PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC=/opt/pravega-sensor-collector/Files
+PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.file.rawfile.RawFileIngestService
+PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC="/opt/pravega-sensor-collector/Files/A,/opt/pravega-sensor-collector/Files/B"
PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_EXTENSION=parquet
PRAVEGA_SENSOR_COLLECTOR_RAW1_DELETE_COMPLETED_FILES=false
PRAVEGA_SENSOR_COLLECTOR_RAW1_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db
@@ -19,4 +19,5 @@ PRAVEGA_SENSOR_COLLECTOR_RAW1_CREATE_SCOPE=false
PRAVEGA_SENSOR_COLLECTOR_RAW1_STREAM=stream1
PRAVEGA_SENSOR_COLLECTOR_RAW1_ROUTING_KEY=$(hostname)
PRAVEGA_SENSOR_COLLECTOR_RAW1_TRANSACTION_TIMEOUT_MINUTES=2.0
+PRAVEGA_SENSOR_COLLECTOR_RAW1_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000
diff --git a/scripts/build-installer.sh b/scripts/build-installer.sh
index 33bacfcb..d91540e9 100755
--- a/scripts/build-installer.sh
+++ b/scripts/build-installer.sh
@@ -12,7 +12,11 @@ set -ex
ROOT_DIR=$(readlink -f $(dirname $0)/..)
source ${ROOT_DIR}/scripts/env.sh
pushd ${ROOT_DIR}
+
GZIP="--rsyncable" ./gradlew distTar ${GRADLE_OPTIONS}
-popd
ls -lh ${ROOT_DIR}/pravega-sensor-collector/build/distributions/pravega-sensor-collector-${APP_VERSION}.tgz
+
+./gradlew shadowJar ${GRADLE_OPTIONS}
+ls -lh ${ROOT_DIR}/pravega-sensor-collector/build/libs/pravega-sensor-collector-${APP_VERSION}.jar
+popd
diff --git a/scripts/env.sh b/scripts/env.sh
index e53cb974..83712a92 100755
--- a/scripts/env.sh
+++ b/scripts/env.sh
@@ -11,5 +11,5 @@ export ENV_LOCAL_SCRIPT=$(dirname $0)/env-local.sh
if [[ -f ${ENV_LOCAL_SCRIPT} ]]; then
source ${ENV_LOCAL_SCRIPT}
fi
-export APP_VERSION=${APP_VERSION:-0.2.17}
+export APP_VERSION=${APP_VERSION:-0.2.18}
export GRADLE_OPTIONS="${GRADLE_OPTIONS:-"-Pversion=${APP_VERSION}"}"
diff --git a/scripts/run-with-gradle-csv-file.sh b/scripts/run-with-gradle-csv-file.sh
new file mode 100644
index 00000000..a76a6439
--- /dev/null
+++ b/scripts/run-with-gradle-csv-file.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) Dell Inc., or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+set -ex
+
+export CREATE_SCOPE=false
+export ROUTING_KEY=${HOSTNAME}
+export ENABLE_PRAVEGA=true
+export pravega_client_auth_method=Bearer
+export pravega_client_auth_loadDynamic=true
+export KEYCLOAK_SERVICE_ACCOUNT_FILE=/opt/pravega-sensor-collector/conf/keycloak.json
+export JAVA_OPTS="-Xmx512m"
+
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CLASS=io.pravega.sensor.collector.file.csvfile.CsvFileIngestService
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_SPEC="/opt/pravega-sensor-collector/Files/A"
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_FILE_EXTENSION=csv
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_SAMPLES_PER_EVENT=200
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_PRAVEGA_CONTROLLER_URI=tls://pravega-controller.sdp.cluster1.sdp-demo.org:443
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_SCOPE=project1
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_STREAM=stream2
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_ROUTING_KEY=$(hostname)
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_DELETE_COMPLETED_FILES=false
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_TRANSACTION_TIMEOUT_MINUTES=2.0
+export PRAVEGA_SENSOR_COLLECTOR_ACCEL2_CREATE_SCOPE=false
+
+
+./gradlew --no-daemon run
diff --git a/scripts/run-with-gradle-parquet-file-ingest.sh b/scripts/run-with-gradle-parquet-file-ingest.sh
index c241b5f1..5914e70c 100644
--- a/scripts/run-with-gradle-parquet-file-ingest.sh
+++ b/scripts/run-with-gradle-parquet-file-ingest.sh
@@ -18,8 +18,8 @@ export pravega_client_auth_loadDynamic=true
export KEYCLOAK_SERVICE_ACCOUNT_FILE=/opt/pravega-sensor-collector/conf/keycloak.json
export JAVA_OPTS="-Xmx512m"
-export PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.parquet.ParquetFileIngestService
-export PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC=/opt/pravega-sensor-collector/Parquet_Files
+export PRAVEGA_SENSOR_COLLECTOR_PARQ2_CLASS=io.pravega.sensor.collector.file.parquet.ParquetFileIngestService
+export PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_SPEC="/opt/pravega-sensor-collector/Parquet_Files/A,/opt/pravega-sensor-collector/Parquet_Files/B"
export PRAVEGA_SENSOR_COLLECTOR_PARQ2_FILE_EXTENSION=parquet
export PRAVEGA_SENSOR_COLLECTOR_PARQ2_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db
export PRAVEGA_SENSOR_COLLECTOR_PARQ2_SAMPLES_PER_EVENT=200
diff --git a/scripts/run-with-gradle-raw-file.sh b/scripts/run-with-gradle-raw-file.sh
index aa84cb93..feaffdfd 100644
--- a/scripts/run-with-gradle-raw-file.sh
+++ b/scripts/run-with-gradle-raw-file.sh
@@ -18,8 +18,8 @@ export pravega_client_auth_loadDynamic=true
export KEYCLOAK_SERVICE_ACCOUNT_FILE=/opt/pravega-sensor-collector/conf/keycloak.json
export JAVA_OPTS="-Xmx512m"
-export PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.rawfile.RawFileIngestService
-export PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC=/opt/pravega-sensor-collector/Files
+export PRAVEGA_SENSOR_COLLECTOR_RAW1_CLASS=io.pravega.sensor.collector.file.rawfile.RawFileIngestService
+export PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_SPEC="/opt/pravega-sensor-collector/Files/A,/opt/pravega-sensor-collector/Files/B"
export PRAVEGA_SENSOR_COLLECTOR_RAW1_FILE_EXTENSION=parquet
export PRAVEGA_SENSOR_COLLECTOR_RAW1_DATABASE_FILE=/opt/pravega-sensor-collector/datafile.db
export PRAVEGA_SENSOR_COLLECTOR_RAW1_PRAVEGA_CONTROLLER_URI=tls://pravega-controller.sdp.cluster1.sdp-demo.org:443
@@ -29,5 +29,6 @@ export PRAVEGA_SENSOR_COLLECTOR_RAW1_ROUTING_KEY=$(hostname)
export PRAVEGA_SENSOR_COLLECTOR_RAW1_DELETE_COMPLETED_FILES=false
export PRAVEGA_SENSOR_COLLECTOR_RAW1_TRANSACTION_TIMEOUT_MINUTES=2.0
export PRAVEGA_SENSOR_COLLECTOR_RAW1_CREATE_SCOPE=false
+export PRAVEGA_SENSOR_COLLECTOR_RAW1_MIN_TIME_IN_MILLIS_TO_UPDATE_FILE=5000
./gradlew --no-daemon run
diff --git a/windows-service/PravegaSensorCollectorApp.xml b/windows-service/PravegaSensorCollectorApp.xml
index 465d243b..05d75604 100644
--- a/windows-service/PravegaSensorCollectorApp.xml
+++ b/windows-service/PravegaSensorCollectorApp.xml
@@ -15,7 +15,7 @@
-
+
@@ -26,6 +26,7 @@
+