Skip to content

Commit

Permalink
Add Row.equals/Row.hashCode. Fix Table.isDuplicate when row has…
Browse files Browse the repository at this point in the history
…hes collide (#1267)

Co-authored-by: ccleva <[email protected]>
  • Loading branch information
benmccann and ccleva authored Jan 3, 2025
1 parent 5efb947 commit 9cf0365
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 58 deletions.
23 changes: 22 additions & 1 deletion core/src/main/java/tech/tablesaw/api/Row.java
Original file line number Diff line number Diff line change
Expand Up @@ -809,8 +809,29 @@ public Column<?> column(int columnIndex) {
return tableSlice.column(columnIndex);
}

/** Returns true if every value is equal to the corresponding value in the given row */
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != this.getClass()) {
return false;
}

Row other = (Row) obj;
if (columnCount() != other.columnCount()) {
return false;
}

for (int columnIndex = 0; columnIndex < columnCount(); columnIndex++) {
if (!column(columnIndex).equals(getRowNumber(), other.getRowNumber())) {
return false;
}
}
return true;
}

/** Returns a hash computed on the values in the backing table at this row */
public int rowHash() {
@Override
public int hashCode() {
int[] values = new int[columnCount()];
for (int i = 0; i < columnCount(); i++) {
Column<?> column = tableSlice.column(i);
Expand Down
60 changes: 3 additions & 57 deletions core/src/main/java/tech/tablesaw/api/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -545,25 +545,6 @@ public static boolean compareRows(int rowNumber, Table table1, Table table2) {
return true;
}

/**
* Returns true if every value in row1 is equal to the same value in row2, where row1 and row2 are
* both rows from this table
*/
private boolean duplicateRows(Row row1, Row row2) {
if (row1.columnCount() != row2.columnCount()) {
return false;
}
boolean result;
for (int columnIndex = 0; columnIndex < row1.columnCount(); columnIndex++) {
Column<?> c = column(columnIndex);
result = c.equals(row1.getRowNumber(), row2.getRowNumber());
if (!result) {
return false;
}
}
return true;
}

public Table[] sampleSplit(double table1Proportion) {
Table[] tables = new Table[2];
int table1Count = (int) Math.round(rowCount() * table1Proportion);
Expand Down Expand Up @@ -931,52 +912,17 @@ public TableSliceGroup splitOn(CategoricalColumn<?>... columns) {
* this table, appears only once in the returned table.
*/
public Table dropDuplicateRows() {

Table temp = emptyCopy();
Int2ObjectMap<IntArrayList> uniqueHashes = new Int2ObjectOpenHashMap<>();
// ListMultimap<Integer, Integer> uniqueHashes = ArrayListMultimap.create();
Set uniqueRows = new HashSet<>();
for (Row row : this) {
if (!isDuplicate(row, uniqueHashes)) {
if (!uniqueRows.contains(row)) {
uniqueRows.add(row);
temp.append(row);
}
}
return temp;
}

/**
* Returns true if all the values in row are identical to those in another row previously seen and
* recorded in the list.
*
* @param row the row to evaluate
* @param uniqueHashes a map of row hashes to the id of an exemplar row that produces that hash.
* If two different rows produce the same hash, then the row number for each is placed in the
* list, so that there are exemplars for both
* @return true if the row's values exactly match a row that was previously seen
*/
private boolean isDuplicate(Row row, Int2ObjectMap<IntArrayList> uniqueHashes) {
int hash = row.rowHash();
if (!uniqueHashes.containsKey(hash)) {
IntArrayList rowNumbers = new IntArrayList();
rowNumbers.add(row.getRowNumber());
uniqueHashes.put(hash, rowNumbers);
return false;
}

// the hashmap contains the hash, make sure the actual row values match
IntArrayList matchingKeys = uniqueHashes.get(hash);

for (int key : matchingKeys) {
Row oldRow = this.row(key);
if (duplicateRows(row, oldRow)) {
return true;
} else {
uniqueHashes.get(hash).add(row.getRowNumber());
return false;
}
}
return true;
}

/** Returns only those records in this table that have no columns with missing values */
public Table dropRowsWithMissingValues() {

Expand Down

0 comments on commit 9cf0365

Please sign in to comment.