Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Improve CSV import #77

Open
wants to merge 3 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions androidlibrary_lib/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ dependencies {
api 'com.android.support:support-annotations:27.1.0'
api 'com.android.support:support-v4:27.1.0'

api group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: '2.9.4'
api group: 'com.fasterxml.jackson.core', name: 'jackson-annotations', version: '2.9.4'
api group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.9.4'
api group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-csv', version: '2.9.4'
api group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-xml', version: '2.9.4'

// Testing dependencies
testImplementation 'junit:junit:4.12'
androidTestImplementation 'com.android.support.test:rules:1.0.1'
Expand Down
Binary file not shown.
Binary file not shown.
Binary file modified androidlibrary_lib/libs/aggregate-rest-interface-2017-06-22.jar
Binary file not shown.
Binary file not shown.
Binary file removed androidlibrary_lib/libs/jackson-core-2.8.8.jar
Binary file not shown.
Binary file removed androidlibrary_lib/libs/jackson-databind-2.8.8.jar
Binary file not shown.
Binary file not shown.
452 changes: 126 additions & 326 deletions androidlibrary_lib/src/main/java/org/opendatakit/builder/CsvUtil.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
package org.opendatakit.builder.csvparser;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;

import org.apache.commons.lang3.time.DateUtils;
import org.opendatakit.aggregate.odktables.rest.SavepointTypeManipulator;
import org.opendatakit.aggregate.odktables.rest.TableConstants;
import org.opendatakit.aggregate.odktables.rest.entity.Row;
import org.opendatakit.aggregate.odktables.rest.entity.RowFilterScope;
import org.opendatakit.builder.csvparser.jackson.CsvRow;
import org.opendatakit.builder.csvparser.jackson.RowCsvMixin;
import org.opendatakit.builder.csvparser.jackson.RowFilterScopeCsvMixin;
import org.opendatakit.database.utilities.CursorUtils;
import org.opendatakit.utilities.LocalizationUtils;

import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;

public class Parser {
private static final String[] TIMESTAMP_PATTERNS = {
"yyyy-MM-dd'T'HH:mm:ss.SSS", // ISO 8601 w/o TZ
"yyyy-MM-dd'T'HH:mm:ss.SSSZ", // ISO 8601 UTC
"yyyy-MM-dd'T'HH:mm:ss.SSSXXX", // ISO 8601 w/ TZ
"EEE, d MMM yyyy HH:mm:ss Z", // RFC1123
"MMMM dd, yyyy",
"dd/MM/yyyy",
"yyyy/MM/dd",
"dd-MM-yyyy",
"yyyy-MM-dd"
};

private final ObjectReader objectReader;

public Parser() {
CsvMapper csvMapper = (CsvMapper) new CsvMapper()
.enable(CsvParser.Feature.FAIL_ON_MISSING_COLUMNS)
.enable(CsvParser.Feature.SKIP_EMPTY_LINES)
.enable(DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL)
.enable(DeserializationFeature.ACCEPT_EMPTY_STRING_AS_NULL_OBJECT)
.addMixIn(CsvRow.class, RowCsvMixin.class)
.addMixIn(RowFilterScope.class, RowFilterScopeCsvMixin.class);

CsvSchema csvSchema = CsvSchema.builder()
.addColumnsFrom(csvMapper.schemaFor(Row.class))
.build()
.withHeader()
.withColumnReordering(true);

objectReader = csvMapper
.readerFor(CsvRow.class)
.with(csvSchema);;
}

public List<Row> parse(File file) throws IOException {
MappingIterator<CsvRow> iterator = null;

try {
iterator = objectReader.readValues(file);

List<Row> rows = new ArrayList<>();
while (iterator.hasNext()) {
CsvRow nextRow = iterator.next();
nextRow.setValues(Row.convertFromMap(nextRow.getColumns()));
nextRow = populateWithDefault(nextRow);

rows.add(nextRow);
}

return rows;
} finally {
if (iterator != null) {
iterator.close();
}
}
}

public List<CsvRow> parseAsCsvRow(File file) throws IOException {
MappingIterator<CsvRow> iterator = null;

try {
iterator = objectReader.readValues(file);
List<CsvRow> rows = iterator.readAll();

for (CsvRow row : rows) {
populateWithDefault(row);
}

return rows;
} finally {
if (iterator != null) {
iterator.close();
}
}
}

private <T extends Row> T populateWithDefault(T row) {
if (row.getRowId() == null || row.getRowId().isEmpty()) {
row.setRowId(LocalizationUtils.genUUID());
}

if (row.getRowETag() == null || row.getRowETag().isEmpty()) {
row.setRowETag(null);
}

if (row.getSavepointCreator() == null || row.getSavepointCreator().isEmpty()) {
row.setSavepointCreator(CursorUtils.DEFAULT_CREATOR);
}

if (row.getFormId() == null || row.getFormId().isEmpty()) {
row.setFormId(null);
}

if (row.getLocale() == null || row.getLocale().isEmpty()) {
row.setLocale(CursorUtils.DEFAULT_LOCALE);
}

// savepointType cannot be null, empty or a value other than complete or incomplete
if (row.getSavepointType() == null || row.getSavepointType().isEmpty() ||
!(row.getSavepointType().equals(SavepointTypeManipulator.complete()) ||
row.getSavepointType().equals(SavepointTypeManipulator.incomplete()))) {
row.setSavepointType(SavepointTypeManipulator.complete());
}

row.setRowFilterScope(populateRowFilterScopeWithDefault(row.getRowFilterScope()));
row.setSavepointTimestamp(convertInvalidTimestamp(row.getSavepointTimestamp()));

return row;
}

private RowFilterScope populateRowFilterScopeWithDefault(RowFilterScope scope) {
if (scope.getDefaultAccess() == null) {
scope.setDefaultAccess(RowFilterScope.EMPTY_ROW_FILTER.getDefaultAccess());
}

if (scope.getRowOwner() == null || scope.getRowOwner().isEmpty()) {
scope.setRowOwner(RowFilterScope.EMPTY_ROW_FILTER.getRowOwner());
}

if (scope.getGroupReadOnly() == null || scope.getGroupReadOnly().isEmpty()) {
scope.setGroupReadOnly(RowFilterScope.EMPTY_ROW_FILTER.getGroupReadOnly());
}

if (scope.getGroupModify() == null || scope.getGroupModify().isEmpty()) {
scope.setGroupModify(RowFilterScope.EMPTY_ROW_FILTER.getGroupModify());
}

if (scope.getGroupPrivileged() == null || scope.getGroupPrivileged().isEmpty()) {
scope.setGroupPrivileged(RowFilterScope.EMPTY_ROW_FILTER.getGroupPrivileged());
}

return scope;
}

private String convertInvalidTimestamp(String timestamp) {
// timestamp is null/empty, use the current time
if (timestamp == null || timestamp.isEmpty()) {
return TableConstants.nanoSecondsFromMillis(
System.currentTimeMillis(), TableConstants.TIMESTAMP_LOCALE);
}

// timestamp is valid
try {
TableConstants.milliSecondsFromNanos(timestamp, TableConstants.TIMESTAMP_LOCALE);
return timestamp;
} catch (IllegalArgumentException e) {
// ignored
// IllegalArgumentException is thrown when the pattern doesn't match
}

// try our timestamp format with system default locale
try {
Long timeLong = TableConstants.milliSecondsFromNanos(timestamp, null);
return TableConstants.nanoSecondsFromMillis(timeLong, TableConstants.TIMESTAMP_LOCALE);
} catch (IllegalArgumentException e) {
// ignored
}

// try with DateUtils
try {
long timeLong = DateUtils
.parseDate(timestamp, TableConstants.TIMESTAMP_LOCALE, TIMESTAMP_PATTERNS)
.getTime();
return TableConstants.nanoSecondsFromMillis(timeLong, TableConstants.TIMESTAMP_LOCALE);
} catch (ParseException e) {
// ignored
}

// try with DateUtils and system default locale
try {
long timeLong = DateUtils
.parseDate(timestamp, TIMESTAMP_PATTERNS)
.getTime();
return TableConstants.nanoSecondsFromMillis(timeLong, TableConstants.TIMESTAMP_LOCALE);
} catch (ParseException e) {
// ignored
}

throw new IllegalArgumentException("Unable to parse timestamp");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.opendatakit.builder.csvparser.jackson;

import com.fasterxml.jackson.annotation.JsonAnyGetter;
import com.fasterxml.jackson.annotation.JsonAnySetter;
import com.fasterxml.jackson.annotation.JsonUnwrapped;

import org.apache.commons.lang3.time.DateUtils;
import org.opendatakit.aggregate.odktables.rest.SavepointTypeManipulator;
import org.opendatakit.aggregate.odktables.rest.TableConstants;
import org.opendatakit.aggregate.odktables.rest.entity.Row;
import org.opendatakit.aggregate.odktables.rest.entity.RowFilterScope;
import org.opendatakit.database.utilities.CursorUtils;
import org.opendatakit.utilities.LocalizationUtils;

import java.util.HashMap;
import java.util.Map;

public class CsvRow extends Row {
private final Map<String, String> columns;

@JsonAnyGetter
@JsonUnwrapped
public Map<String, String> getColumns() {
return columns;
}

@JsonAnySetter
public void anySetter(String key, String value) {
// set empty strings to null,
// because this is how org.opendatakit.aggregate.odktables.rest.RFC4180CsvReader does it
if (value.isEmpty()) {
value = null;
}

if (!key.startsWith("_")) {
columns.put(key, value);
}
}

public CsvRow() {
this.columns = new HashMap<>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package org.opendatakit.builder.csvparser.jackson;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonUnwrapped;

import org.opendatakit.aggregate.odktables.rest.TableConstants;
import org.opendatakit.aggregate.odktables.rest.entity.Row;
import org.opendatakit.aggregate.odktables.rest.entity.RowFilterScope;

@JsonIgnoreProperties(ignoreUnknown = true)
public abstract class RowCsvMixin extends Row {
@Override
@JsonProperty(TableConstants.ID)
public abstract String getRowId();

@Override
@JsonProperty(TableConstants.ROW_ETAG)
public abstract String getRowETag();

@Override
@JsonIgnore
public abstract String getDataETagAtModification();

@Override
@JsonIgnore
public abstract boolean isDeleted();

@Override
@JsonIgnore
public abstract String getCreateUser();

@Override
@JsonIgnore
public abstract String getLastUpdateUser();

@Override
@JsonUnwrapped
public abstract RowFilterScope getRowFilterScope();

@Override
@JsonProperty(TableConstants.SAVEPOINT_CREATOR)
public abstract String getSavepointCreator();

@Override
@JsonProperty(TableConstants.FORM_ID)
public abstract String getFormId();

@Override
@JsonProperty(TableConstants.LOCALE)
public abstract String getLocale();

@Override
@JsonProperty(TableConstants.SAVEPOINT_TYPE)
public abstract String getSavepointType();

@Override
@JsonProperty(TableConstants.SAVEPOINT_TIMESTAMP)
public abstract String getSavepointTimestamp();

@Override
@JsonProperty(TableConstants.ID)
public abstract void setRowId(String rowId);

@Override
@JsonProperty(TableConstants.ROW_ETAG)
public abstract void setRowETag(String rowETag);

@Override
@JsonIgnore
public abstract void setDataETagAtModification(String dataETagAtModification);

@Override
@JsonIgnore
public abstract void setDeleted(boolean deleted);

@Override
@JsonIgnore
public abstract void setCreateUser(String createUser);

@Override
@JsonIgnore
public abstract void setLastUpdateUser(String lastUpdateUser);

@Override
@JsonUnwrapped
public abstract void setRowFilterScope(RowFilterScope filterScope);

@Override
@JsonProperty(TableConstants.SAVEPOINT_CREATOR)
public abstract void setSavepointCreator(String savepointCreator);

@Override
@JsonProperty(TableConstants.FORM_ID)
public abstract void setFormId(String formId);

@Override
@JsonProperty(TableConstants.LOCALE)
public abstract void setLocale(String locale);

@Override
@JsonProperty(TableConstants.SAVEPOINT_TYPE)
public abstract void setSavepointType(String savepointType);

@Override
@JsonProperty(TableConstants.SAVEPOINT_TIMESTAMP)
public abstract void setSavepointTimestamp(String savepointTimestamp);
}
Loading