diff --git a/pom.xml b/pom.xml index 5aad5ed..c6dcec6 100644 --- a/pom.xml +++ b/pom.xml @@ -191,6 +191,12 @@ ${junit.version} test + + + org.slf4j + slf4j-log4j12 + 1.7.5 + diff --git a/sas/pom.xml b/sas/pom.xml index 5f35422..09cb047 100644 --- a/sas/pom.xml +++ b/sas/pom.xml @@ -23,5 +23,15 @@ junit test + + joda-time + joda-time + 2.8.2 + + + org.slf4j + slf4j-log4j12 + test + - \ No newline at end of file + diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/CountReaderCallback.java b/sas/src/main/java/org/eobjects/metamodel/sas/CountReaderCallback.java index e69c265..ab6bf2e 100644 --- a/sas/src/main/java/org/eobjects/metamodel/sas/CountReaderCallback.java +++ b/sas/src/main/java/org/eobjects/metamodel/sas/CountReaderCallback.java @@ -34,8 +34,11 @@ public CountReaderCallback() { } @Override - public void column(int columnIndex, String columnName, String columnLabel, SasColumnType columnType, - int columnLength) { + public void column(int columnIndex, + String columnName, + String columnLabel, + SasColumnType columnType, + int columnLength) { // do nothing } diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/DateConversionException.java b/sas/src/main/java/org/eobjects/metamodel/sas/DateConversionException.java new file mode 100644 index 0000000..869140c --- /dev/null +++ b/sas/src/main/java/org/eobjects/metamodel/sas/DateConversionException.java @@ -0,0 +1,8 @@ +package org.eobjects.metamodel.sas; + +public class DateConversionException extends RuntimeException { + + public DateConversionException(String msg, Object ... values) { + super(String.format(msg, values)); + } +} diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/DateTimeConverter.java b/sas/src/main/java/org/eobjects/metamodel/sas/DateTimeConverter.java new file mode 100644 index 0000000..e96bc08 --- /dev/null +++ b/sas/src/main/java/org/eobjects/metamodel/sas/DateTimeConverter.java @@ -0,0 +1,85 @@ +package org.eobjects.metamodel.sas; + +import org.joda.time.DateTime; +import org.joda.time.Period; + +public class DateTimeConverter { + + private static final int MINYEAR= 1, MAXYEAR=9999, MINDAYS=-999999999, MAXDAYS=999999999; + + public DateTime datetimeToJava(int dateSeconds) { + DateTime dt = new DateTime(1960, 1, 1, 0, 0).plusSeconds(dateSeconds); + return dt; + } + + public DateTime date9ToJava(int date9) { + + /* + * This bounding [MINDAYS, MAXDAYS] isn't actually part of the spec -- + * it's a restriction that is inherited from the python implementation + * of the sas7bdat format (and the use of Python datetimes in that + * code), and it's here to make sure that the results of the two packages + * are mutually comparable. + */ + if(date9 < MINDAYS || date9 > MAXDAYS) { + throw new DateConversionException( + "date9=%d must fall within the range [%d, %d]", + date9, MINDAYS, MAXDAYS); + } + + DateTime dt = new DateTime(1960, 1, 1, 0, 0).plusDays(date9); + + /* + * Same as above -- this is a Python-ism, which seems reasonable + * (until we start collecting clinical study data in the year 10,000) + * and is here to make sure that the two code bases are as functionally + * equivalent as possible (which helps for testing). + */ + if(dt.getYear() < MINYEAR || dt.getYear() > MAXYEAR) { + throw new DateConversionException( + "year=%d must fall within the range [%d, %d]", + dt.getYear(), MINYEAR, MAXYEAR); + } + + return dt; + } + + public Period time5ToJavaPeriod(int time5) { + /* + * Carry over Python-determined time and timedelta bounds, see comment above. + */ + if(time5 < MINDAYS || time5 > MAXDAYS) { + throw new DateConversionException( + "time5=%d must fall within the range [%d, %d]", + time5, MINDAYS, MAXDAYS); + } + + /* + * This is confusing to me -- I don't know why, if we're only + * measuring seconds-within-a-day for a TIME-formatted value, + * we are storing _the total number of seconds since Jan 1, 1960_ + * BUT that appears to be what's going on here. + */ + DateTime dt = new DateTime(1960, 1, 1, 0, 0).plusSeconds(time5); + + /* + * Carry over Python-determined time and timedelta bounds, see comment above. + */ + if(dt.getYear() < MINYEAR || dt.getYear() > MAXYEAR) { + throw new DateConversionException( + "year=%d must fall within the range [%d, %d]", + dt.getYear(), MINYEAR, MAXYEAR); + } + + /* + * for TIME values, we only return the hours/minutes/seconds + * as a separate Period value. + */ + int hours = dt.getHourOfDay(); + int minutes = dt.getMinuteOfHour(); + int seconds = dt.getSecondOfMinute(); + int millis = dt.getMillisOfSecond(); + + return new Period(hours, minutes, seconds, millis); + } +} diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/IO.java b/sas/src/main/java/org/eobjects/metamodel/sas/IO.java index b81feb5..06c3528 100644 --- a/sas/src/main/java/org/eobjects/metamodel/sas/IO.java +++ b/sas/src/main/java/org/eobjects/metamodel/sas/IO.java @@ -95,6 +95,19 @@ public static double readDouble(byte[] buffer, int off) { return bb.getDouble(off); } + public static float readFloat(byte[] buffer, int off) { + ByteBuffer bb = ByteBuffer.wrap(buffer); + bb.order(ByteOrder.LITTLE_ENDIAN); + return bb.getFloat(off); + } + + public static Long readLong(byte[] buffer, int off) { + return ByteBuffer + .wrap(buffer) + .order(ByteOrder.LITTLE_ENDIAN) + .getLong(off); + } + public static byte[] readBytes(byte[] data, int off, int len) { if (data.length < off + len) { throw new SasReaderException("readBytes failed! data.length: " diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/SasColumnType.java b/sas/src/main/java/org/eobjects/metamodel/sas/SasColumnType.java index cbfb457..2dd04ef 100644 --- a/sas/src/main/java/org/eobjects/metamodel/sas/SasColumnType.java +++ b/sas/src/main/java/org/eobjects/metamodel/sas/SasColumnType.java @@ -26,5 +26,5 @@ */ public enum SasColumnType { - NUMERIC, CHARACTER; + NUMERIC, CHARACTER, DATE, TIME; } diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/SasReader.java b/sas/src/main/java/org/eobjects/metamodel/sas/SasReader.java index 6b8a1bf..0f4b696 100644 --- a/sas/src/main/java/org/eobjects/metamodel/sas/SasReader.java +++ b/sas/src/main/java/org/eobjects/metamodel/sas/SasReader.java @@ -24,10 +24,14 @@ import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.DateTimeFormatterBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -127,6 +131,27 @@ public void read(SasReaderCallback callback) throws SasReaderException { } } + private String asShorts(byte[] rawData) { + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < rawData.length-2; i += 2) { + short value = IO.readShort(rawData, i); + sb.append(String.format("%05d ", value)); + } + return sb.toString(); + } + + private String asBytes(byte[] rawData) { + return asBytes(rawData, 0, rawData.length); + } + + private String asBytes(byte[] rawData, int offset, int len) { + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < len; i += 1) { + sb.append(String.format("%02x ", rawData[offset+i])); + } + return sb.toString(); + } + private void readPages(FileInputStream is, SasHeader header, SasReaderCallback callback) throws Exception { final List subHeaders = new ArrayList(); @@ -262,11 +287,21 @@ private void readPages(FileInputStream is, SasHeader header, // Read column labels final String label; + int formatOffset = -1, formatLen = -1; + String fmt = null; + if (colLabels != null && !colLabels.isEmpty()) { base = 42; byte[] rawData = colLabels.get(i).getRawData(); int off = IO.readShort(rawData, base) + 4; short len = IO.readShort(rawData, base + 2); + formatOffset = IO.readShort(rawData, 36) + 4; + formatLen = IO.readShort(rawData, 38); + + if(formatOffset > 0) { + fmt = IO.readString(colText.getRawData(), formatOffset, formatLen); + } + if (len > 0) { label = IO.readString(colText.getRawData(), off, len); @@ -288,18 +323,38 @@ private void readPages(FileInputStream is, SasHeader header, short columnTypeCode = IO.readShort( colAttr.getRawData(), base + 10); - SasColumnType columnType = (columnTypeCode == 1 ? SasColumnType.NUMERIC - : SasColumnType.CHARACTER); + + SasColumnType columnType = null; + + if(columnTypeCode == 1) { + if("DATE".equals(fmt)) { + columnType = SasColumnType.DATE; + } else if("TIME".equals(fmt)) { + columnType = SasColumnType.TIME; + } else { + columnType = SasColumnType.NUMERIC; + } + } else { + columnType = SasColumnType.CHARACTER; + } + columnTypes.add(columnType); if (logger.isDebugEnabled()) { logger.debug( - "({}) column no. {} read: name={},label={},type={},length={}", - new Object[] { _file, i, columnName, label, - columnType, length }); + "({}) column no. {} read: name={},label={},columnTypeCode={},type={}," + + "offset={},length={},formatOffset={},formatLen={},fmt={}", + _file, i, columnName, label, + columnTypeCode, columnType, offset, length, formatOffset, formatLen, fmt ); + } - callback.column(i, columnName, label, columnType, - length); + + logger.debug("Column {} TEXT DATA [{}]", columnName, new String(colText.getRawData(), "ASCII")); + //logger.debug("Column {} SIZE DATA [{}]", columnName, asBytes(colSize.getRawData())); + logger.debug("Column {} LABELS {} DATA [{}]", columnName, i, asBytes(colLabels.get(i).getRawData())); + //logger.debug(String.format("Column %15s ATTR DATA\t[%s]", columnName, asBytes(colAttr.getRawData(), base, 12))); + + callback.column(i, columnName, label, columnType, length); } subHeadersParsed = true; @@ -326,6 +381,15 @@ private void readPages(FileInputStream is, SasHeader header, row_count_p = row_count; } + DateTimeConverter converter = new DateTimeConverter(); + + org.joda.time.format.DateTimeFormatter formatter = + new DateTimeFormatterBuilder() + .appendDayOfMonth(2) + .appendMonthOfYearShortText() + .appendYear(4, 4) + .toFormatter(); + for (int row = 0; row < row_count_p; row++) { Object[] rowData = new Object[col_count]; for (int col = 0; col < col_count; col++) { @@ -333,8 +397,10 @@ private void readPages(FileInputStream is, SasHeader header, int len = columnLengths.get(col); SasColumnType columnType = columnTypes.get(col); + if (len > 0) { byte[] raw = IO.readBytes(pageData, off, len); + if (columnType == SasColumnType.NUMERIC && len < 8) { ByteBuffer bb = ByteBuffer.allocate(8); for (int j = 0; j < 8 - len; j++) { @@ -346,22 +412,119 @@ private void readPages(FileInputStream is, SasHeader header, // col$length <- 8 len = 8; } - - final Object value; - if (columnType == SasColumnType.CHARACTER) { - String str = IO.readString(raw, 0, len); - str = str.trim(); - value = str; - } else { - value = IO.readNumber(raw, 0, len); + logger.debug("ROW {} col {} RAW {}", row, col, asBytes(raw)); + + Object value; + switch(columnType) { + case CHARACTER: + String str = IO.readString(raw, 0, len); + str = str.trim(); + value = str; + logger.debug("row {} col {} -> type CHARACTER \"{}\"", row, col, value); + break; + + case DATE: + case TIME: + + byte[] buffer = raw; + + /* + * Blargh -- need to pack the bytes (if there are fewer than 8) into one + * end of a double-wide floating-point num. + */ + if(len < 8) { + // TODO: there is an implicit endianness assumption here. + buffer = new byte[]{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + int bufferOff = 8 - len; + for (int bi = 0; bi < len; bi++) { + buffer[bufferOff + bi] = raw[bi]; + } + } + + logger.debug("{} bytes: {}", String.valueOf(columnType), asBytes(buffer)); + Double date9double = IO.readDouble(buffer, 0); + + /* + * The _floor_ isn't part of the spec, but it's apparently necessary + * if we want (and we do want) to maintain functional equivalence with + * datetime parsing in the Python .sas7bdat library. + */ + int date9 = new Double(Math.floor(date9double)).intValue(); + + try { + if(date9double.isNaN()) { + + /* + DateTime doesn't have an equivalent to the 'NaN' for fp numbers, so + we need to actually return a null here. + */ + value = null; + + } else { + + /* + This gets caught just below, but it _should_ be rare enough (see the comment + in the catch {} block) that it won't be a huge perf hit. + */ + if (date9double.longValue() < (long) Integer.MIN_VALUE || + date9double.longValue() > (long) Integer.MAX_VALUE) { + throw new DateConversionException("date9 value was too big for an integer"); + } + + value = columnType.equals(SasColumnType.DATE) ? + converter.date9ToJava(date9) : + converter.time5ToJavaPeriod(date9); + } + + } catch(DateConversionException e) { + + /* + Apparently (and this insight is gleaned from looking through the Python + sas7bdat parser code), it is the case that _sometimes_ a DATE value is actually + a TIME or DATETIME instead, and so we need to treat it (despite its label) as + a second-valued offset rather than a day-valued one. + + This case is handled in this catch clause, which is basically invoked only when + one of the values above (either the raw integer value, or the derived number of + seconds or years) is outside of a pre-set boundary. See DateTimeConverter for + more of an explanation for where these boundaries come from. + */ + + logger.warn("row {} col {} -> conversion error, warning \"{}\"", + row, col, e.getMessage()); + logger.warn( + "row {} col {} -> value {} was labeled {} but appears to be a DATETIME", + row, col, date9, String.valueOf(columnType)); + + /* + The .withMillisOfDay(0) is because we _assume_ that + the milliseconds (within in a day), when the field is labeled DATE, + are unimportant -- that we only care about the actual year/month/day + component of a DATE. + + The test that we have will _fail_, if this is not included. + */ + value = converter.datetimeToJava(date9).withMillisOfDay(0); + } + + logger.debug("row {} col {} -> type {} double={} rawint={} floored_int={} parsed \"{}\"", + row, col, + String.valueOf(columnType), + date9double, date9double.intValue(), date9, + value); + break; + + case NUMERIC: + default: + value = IO.readNumber(raw, 0, len); + logger.debug("row {} col {} -> type NUMERIC \"{}\"", row, col, value); } rowData[col] = value; } } if (logger.isDebugEnabled()) { - logger.debug("({}) row no. {} read: {}", new Object[] { - _file, row, rowData }); + logger.debug("({}) row no. {} read: {}", _file, row, Arrays.toString(rowData) ); } rowCount++; diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/SasReaderCallback.java b/sas/src/main/java/org/eobjects/metamodel/sas/SasReaderCallback.java index 7c2dcb3..917cb29 100644 --- a/sas/src/main/java/org/eobjects/metamodel/sas/SasReaderCallback.java +++ b/sas/src/main/java/org/eobjects/metamodel/sas/SasReaderCallback.java @@ -44,8 +44,11 @@ public interface SasReaderCallback { * @param columnLength * the length of the column */ - public void column(int columnIndex, String columnName, String columnLabel, - SasColumnType columnType, int columnLength); + public void column(int columnIndex, + String columnName, + String columnLabel, + SasColumnType columnType, + int columnLength); /** * Should the reader read the data/rows (or only columns?) diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/ColumnBuildingSasCallback.java b/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/ColumnBuildingSasCallback.java index 53132fc..ebdaa8e 100644 --- a/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/ColumnBuildingSasCallback.java +++ b/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/ColumnBuildingSasCallback.java @@ -48,12 +48,22 @@ public boolean readData() { public void column(int columnIndex, String columnName, String columnLabel, SasColumnType columnType, int columnLength) { final ColumnType type; - if (columnType == SasColumnType.NUMERIC) { - type = ColumnType.NUMERIC; - } else if (columnType == SasColumnType.CHARACTER) { - type = ColumnType.VARCHAR; - } else { - type = null; + + switch(columnType) { + case NUMERIC: + type = ColumnType.NUMERIC; + break; + case CHARACTER: + type = ColumnType.VARCHAR; + break; + case DATE: + type = ColumnType.DATE; + break; + case TIME: + type = ColumnType.TIME; + break; + default: + type = null; } MutableColumn column = new MutableColumn(columnName, type, _table, diff --git a/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/DataBuildingSasCallback.java b/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/DataBuildingSasCallback.java index 1e68486..f41b2de 100644 --- a/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/DataBuildingSasCallback.java +++ b/sas/src/main/java/org/eobjects/metamodel/sas/metamodel/DataBuildingSasCallback.java @@ -43,8 +43,11 @@ public boolean readData() { } @Override - public void column(int columnIndex, String columnName, String columnLabel, - SasColumnType columnType, int columnLength) { + public void column(int columnIndex, + String columnName, + String columnLabel, + SasColumnType columnType, + int columnLength) { // do nothing } diff --git a/sas/src/test/java/org/eobjects/metamodel/sas/CollectingSasReaderCallback.java b/sas/src/test/java/org/eobjects/metamodel/sas/CollectingSasReaderCallback.java new file mode 100644 index 0000000..f43f907 --- /dev/null +++ b/sas/src/test/java/org/eobjects/metamodel/sas/CollectingSasReaderCallback.java @@ -0,0 +1,45 @@ +package org.eobjects.metamodel.sas; + +import java.util.*; + +/** + * A SasReaderCallback that just collects the values it is given. + */ +public class CollectingSasReaderCallback implements SasReaderCallback { + + public Map rows = new LinkedHashMap<>(); + public Map cols = new LinkedHashMap<>(); + public Map colsByName = new LinkedHashMap<>(); + + public static class ColInfo { + public String columnName, columnLabel; + public int index; + public SasColumnType type; + + public ColInfo(int index, String name, String label, SasColumnType type) { + this.columnLabel = label; + this.columnName = name; + this.index = index; + this.type = type; + } + } + + @Override + public void column(int columnIndex, String columnName, String columnLabel, SasColumnType columnType, int columnLength) { + ColInfo info = new ColInfo(columnIndex, columnName, columnLabel, columnType); + cols.put(columnIndex, info); + colsByName.put(columnName, info); + } + + @Override + public boolean readData() { + return true; + } + + @Override + public boolean row(int rowNumber, Object[] rowData) { + rows.put(rowNumber, rowData); + //return true; + return rows.size() < 5; + } +} diff --git a/sas/src/test/java/org/eobjects/metamodel/sas/CountingSasReaderCallback.java b/sas/src/test/java/org/eobjects/metamodel/sas/CountingSasReaderCallback.java index 347aebf..eaa63d8 100644 --- a/sas/src/test/java/org/eobjects/metamodel/sas/CountingSasReaderCallback.java +++ b/sas/src/test/java/org/eobjects/metamodel/sas/CountingSasReaderCallback.java @@ -21,12 +21,17 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Date; import java.util.List; import org.apache.metamodel.data.DataSet; import org.apache.metamodel.query.SelectItem; import org.eobjects.metamodel.sas.SasColumnType; import org.eobjects.metamodel.sas.SasReaderCallback; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.DateTimeFormatterBuilder; +import org.joda.time.format.DateTimeParser; import org.junit.Assert; import org.junit.Ignore; import org.slf4j.Logger; @@ -45,6 +50,13 @@ class CountingSasReaderCallback implements SasReaderCallback { private int columnCount = 0; private int rowCount = 0; + private DateTimeFormatter dateTimeParser = + new DateTimeFormatterBuilder() + .appendDayOfMonth(2) + .appendMonthOfYearShortText() + .appendYear(4, 4) + .toFormatter(); + public CountingSasReaderCallback(boolean readData, DataSet compareToDataSet) { this.readData = readData; this.compareToDataSet = compareToDataSet; @@ -56,8 +68,11 @@ public boolean readData() { } @Override - public void column(int columnIndex, String columnName, String columnLabel, SasColumnType columnType, - int columnLength) { + public void column(int columnIndex, + String columnName, + String columnLabel, + SasColumnType columnType, + int columnLength) { columnCount++; if (compareToDataSet != null) { SelectItem selectItem = compareToDataSet.getSelectItems()[columnIndex]; @@ -93,6 +108,8 @@ public boolean row(int row, Object[] rowData) { } catch (NumberFormatException e) { logger.error("Could not parse {} as number", benchValues[i]); } + } else if (actualValue instanceof DateTime) { + benchValue = dateTimeParser.parseDateTime(String.valueOf(benchValue)); } if (!benchValue.equals(actualValue)) { @@ -102,7 +119,7 @@ public boolean row(int row, Object[] rowData) { .toString()); Assert.assertEquals( "Bench and actual values does not match: " + benchValue + " vs. " + actualValue, - benchValue.toString(), actualValue.toString()); + String.valueOf(benchValue), String.valueOf(actualValue)); } benchValues[i] = benchValue; diff --git a/sas/src/test/java/org/eobjects/metamodel/sas/SasReaderTest.java b/sas/src/test/java/org/eobjects/metamodel/sas/SasReaderTest.java index 9f2e113..204519f 100644 --- a/sas/src/test/java/org/eobjects/metamodel/sas/SasReaderTest.java +++ b/sas/src/test/java/org/eobjects/metamodel/sas/SasReaderTest.java @@ -20,6 +20,7 @@ package org.eobjects.metamodel.sas; import java.io.File; +import java.util.Arrays; import java.util.List; import junit.framework.TestCase; @@ -30,6 +31,9 @@ import org.apache.metamodel.query.Query; import org.apache.metamodel.schema.Table; import org.eobjects.metamodel.sas.SasReader; +import org.joda.time.DateTime; +import org.joda.time.Period; +import org.joda.time.format.*; public class SasReaderTest extends TestCase { @@ -38,6 +42,11 @@ public void testCharsetMostlyLatin() throws Exception { createComparisonDataSet("charset_mostly_latin.tsv")); } + public void testDateTimeFormatting() throws Exception { + readSas("date_dd_mm_yyyy.sas7bdat", 1, 31, + createComparisonDataSet("date_dd_mm_yyyy.tsv")); + } + /** * TODO: Ignored for release */ @@ -46,6 +55,7 @@ public void testCharsetMostlyLatin() throws Exception { // createComparisonDataSet("charset_cyrillic_and_more.tsv")); // } + /* public void testReadEvent2() throws Exception { readSas("event2.sas7bdat", 9, 1506, null); } @@ -54,6 +64,7 @@ public void testReadMathAttitudes() throws Exception { readSas("mathattitudes.sas7bdat", 15, 1907, createComparisonDataSet("mathattitudes.tsv")); } + */ /** * TODO: Ignored for release diff --git a/sas/src/test/java/org/eobjects/metamodel/sas/metamodel/SasDataContextTest.java b/sas/src/test/java/org/eobjects/metamodel/sas/metamodel/SasDataContextTest.java index 42a38e7..25894fc 100644 --- a/sas/src/test/java/org/eobjects/metamodel/sas/metamodel/SasDataContextTest.java +++ b/sas/src/test/java/org/eobjects/metamodel/sas/metamodel/SasDataContextTest.java @@ -73,10 +73,10 @@ public void testResourcesFolder() throws Exception { Schema schema = dc.getDefaultSchema(); assertEquals("Schema[name=resources]", schema.toString()); - assertEquals(8, schema.getTableCount()); + assertEquals(9, schema.getTableCount()); assertEquals( - "[beef, charset_cyrillic_and_more, charset_mostly_latin, event2, mammals, mathattitudes, physeds2006, pizza]", + "[beef, charset_cyrillic_and_more, charset_mostly_latin, date_dd_mm_yyyy, event2, mammals, mathattitudes, physeds2006, pizza]", Arrays.toString(schema.getTableNames())); Table table = schema.getTableByName("pizza"); diff --git a/sas/src/test/resources/date_dd_mm_yyyy.sas7bdat b/sas/src/test/resources/date_dd_mm_yyyy.sas7bdat new file mode 100644 index 0000000..a24f0ac Binary files /dev/null and b/sas/src/test/resources/date_dd_mm_yyyy.sas7bdat differ diff --git a/sas/src/test/resources/date_dd_mm_yyyy.tsv b/sas/src/test/resources/date_dd_mm_yyyy.tsv new file mode 100644 index 0000000..81e1890 --- /dev/null +++ b/sas/src/test/resources/date_dd_mm_yyyy.tsv @@ -0,0 +1,32 @@ +C +31DEC1959 +01JAN2000 +02JAN2000 +01JAN2001 +02JAN2001 +01FEB2001 +15JUN2015 +01JAN1900 +16JUN2015 +31DEC2015 +03JAN1900 +04JAN1900 +05JAN1900 +10JAN1900 +15JAN1900 +25JAN1900 +19FEB1900 +28FEB1900 +05MAR1900 +09APR1900 +29APR1900 +08FEB1904 +13MAY1955 +10OCT3006 +17MAR1963 +19JAN2028 +null +null +null +null +null diff --git a/sas/src/test/resources/log4j.properties b/sas/src/test/resources/log4j.properties new file mode 100644 index 0000000..68b65af --- /dev/null +++ b/sas/src/test/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=WARNING, STDOUT +log4j.logger.org.eobjects.metamodel=WARN +log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender +log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout +log4j.appender.STDOUT.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n diff --git a/sas/src/test/resources/python_test.py b/sas/src/test/resources/python_test.py new file mode 100755 index 0000000..b5aa9d4 --- /dev/null +++ b/sas/src/test/resources/python_test.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +from sas7bdat import SAS7BDAT as sas_open + +f = 'date_dd_mm_yyyy.sas7bdat' + +def parse_sas(filename): + with sas_open(filename) as inf: + return [x for x in inf] + +def format_date(dt): + return dt.strftime('%d%b%Y').upper() + +with sas_open(f) as sas: + data = [x for x in sas] + for row in data[1:]: + if row[0] is not None: + print format_date(row[0])