43
43
import org .apache .iceberg .DataOperations ;
44
44
import org .apache .iceberg .MetadataUpdate ;
45
45
import org .apache .iceberg .PartitionSpec ;
46
- import org .apache .iceberg .SnapshotRef ;
47
46
import org .apache .iceberg .Schema ;
48
47
import org .apache .iceberg .Snapshot ;
48
+ import org .apache .iceberg .SnapshotRef ;
49
49
import org .apache .iceberg .SortOrder ;
50
50
import org .apache .iceberg .Table ;
51
51
import org .apache .iceberg .TableMetadata ;
@@ -782,23 +782,6 @@ public LoadTableResponse updateTable(
782
782
return updateTableWithRollback (baseCatalog , tableIdentifier , applyUpdateFilters (request ));
783
783
}
784
784
785
- private static TableMetadata create (TableOperations ops , UpdateTableRequest request ) {
786
- request .requirements ().forEach ((requirement ) -> requirement .validate (ops .current ()));
787
- Optional <Integer > formatVersion =
788
- request .updates ().stream ()
789
- .filter ((update ) -> update instanceof MetadataUpdate .UpgradeFormatVersion )
790
- .map ((update ) -> ((MetadataUpdate .UpgradeFormatVersion ) update ).formatVersion ())
791
- .findFirst ();
792
- TableMetadata .Builder builder =
793
- (TableMetadata .Builder )
794
- formatVersion
795
- .map (TableMetadata ::buildFromEmpty )
796
- .orElseGet (TableMetadata ::buildFromEmpty );
797
- request .updates ().forEach ((update ) -> update .applyTo (builder ));
798
- ops .commit ((TableMetadata ) null , builder .build ());
799
- return ops .current ();
800
- }
801
-
802
785
// TODO: Clean this up when CatalogHandler become extensible.
803
786
// Copy of CatalogHandler#update
804
787
private static LoadTableResponse updateTableWithRollback (
@@ -828,7 +811,28 @@ private static LoadTableResponse updateTableWithRollback(
828
811
return LoadTableResponse .builder ().withTableMetadata (finalMetadata ).build ();
829
812
}
830
813
814
+ // TODO: Clean this up when CatalogHandler become extensible.
815
+ // Copy of CatalogHandler#create
816
+ private static TableMetadata create (TableOperations ops , UpdateTableRequest request ) {
817
+ request .requirements ().forEach ((requirement ) -> requirement .validate (ops .current ()));
818
+ Optional <Integer > formatVersion =
819
+ request .updates ().stream ()
820
+ .filter ((update ) -> update instanceof MetadataUpdate .UpgradeFormatVersion )
821
+ .map ((update ) -> ((MetadataUpdate .UpgradeFormatVersion ) update ).formatVersion ())
822
+ .findFirst ();
823
+ TableMetadata .Builder builder =
824
+ (TableMetadata .Builder )
825
+ formatVersion
826
+ .map (TableMetadata ::buildFromEmpty )
827
+ .orElseGet (TableMetadata ::buildFromEmpty );
828
+ request .updates ().forEach ((update ) -> update .applyTo (builder ));
829
+ ops .commit ((TableMetadata ) null , builder .build ());
830
+ return ops .current ();
831
+ }
832
+
831
833
@ VisibleForTesting
834
+ // TODO: Clean this up when CatalogHandler become extensible.
835
+ // Copy of CatalogHandler#commit
832
836
public static TableMetadata commit (TableOperations ops , UpdateTableRequest request ) {
833
837
AtomicBoolean isRetry = new AtomicBoolean (false );
834
838
@@ -840,8 +844,6 @@ public static TableMetadata commit(TableOperations ops, UpdateTableRequest reque
840
844
.run (
841
845
(taskOps ) -> {
842
846
TableMetadata base = isRetry .get () ? taskOps .refresh () : taskOps .current ();
843
- isRetry .set (true );
844
- // Prev PR: https://github.com/apache/iceberg/pull/5888
845
847
boolean rollbackCompaction =
846
848
PropertyUtil .propertyAsBoolean (
847
849
taskOps .current ().properties (), ROLLBACK_REPLACE_ENABLED_PROPERTY , false );
@@ -854,76 +856,61 @@ public static TableMetadata commit(TableOperations ops, UpdateTableRequest reque
854
856
if (!rollbackCompaction ) {
855
857
throw new ValidationFailureException (e );
856
858
}
857
- // Since snapshot has already been created at the client end.
858
- // Nothing much can be done, we can move this
859
- // to writer specific thing, but it would be cool if catalog does this for us.
860
- // Inspect that the requirements states that snapshot
861
- // ref needs to be asserted this usually means in the update section
862
- // it has addSnapshot and setSnapshotRef
863
- UpdateRequirement .AssertRefSnapshotID addSnapshot = null ;
864
- int found = 0 ;
865
- for (UpdateRequirement requirement : request .requirements ()) {
866
- // there should be only add snapshot request
867
- if (requirement instanceof UpdateRequirement .AssertRefSnapshotID ) {
868
- ++found ;
869
- addSnapshot = (UpdateRequirement .AssertRefSnapshotID ) requirement ;
870
- }
871
- }
859
+ UpdateRequirement .AssertRefSnapshotID assertRefSnapshotId =
860
+ findAssertRefSnapshotID (request );
861
+ MetadataUpdate .SetSnapshotRef setSnapshotRef = findSetSnapshotRefUpdate (request );
872
862
873
- if (found != 1 ) {
874
- // TODO: handle this case, find min snapshot id, to rollback to give it creates
875
- // lineage
876
- // lets not complicate things rn
863
+ if (assertRefSnapshotId == null || setSnapshotRef == null ) {
864
+ // This implies the request was not trying to add a snapshot.
877
865
throw new ValidationFailureException (e );
878
866
}
879
867
880
- Long parentSnapshotId = addSnapshot .snapshotId ();
881
- // so we will first check all the snapshots on the top of
882
- // base on which the snapshot we want to commit is of type REPLACE ops.
883
- Long parentToRollbackTo = base .currentSnapshot ().snapshotId ();
884
- List <MetadataUpdate > updateToRemoveSnapshot = new ArrayList <>();
885
- while (!Objects .equals (parentToRollbackTo , parentSnapshotId )) {
886
- Snapshot snap = ops .current ().snapshot (parentToRollbackTo );
887
- if (!DataOperations .REPLACE .equals (snap .operation ())) {
888
- break ;
889
- }
890
- updateToRemoveSnapshot .add (
891
- new MetadataUpdate .RemoveSnapshot (snap .snapshotId ()));
892
- parentToRollbackTo = snap .parentId ();
893
- }
894
-
895
- MetadataUpdate .SetSnapshotRef ref = null ;
896
- found = 0 ;
897
- // find the SetRefName snapshot update
898
- for (MetadataUpdate update : request .updates ()) {
899
- if (update instanceof MetadataUpdate .SetSnapshotRef ) {
900
- ++found ;
901
- ref = (MetadataUpdate .SetSnapshotRef ) update ;
902
- }
868
+ if (!hasJustMainBranch (base )) {
869
+ // There can be cases when the snapshot we want to rollback
870
+ // is being referenced by another branch and just checking
871
+ // the tip of these branches is not sufficient.
872
+ // TODO: handle cases when tables have >1 branches.
873
+ throw new ValidationFailureException (e );
903
874
}
904
875
905
- if (found != 1 || !Objects .equals (parentToRollbackTo , parentSnapshotId )) {
906
- // nothing can be done as this implies there was a non replace
907
- // snapshot in between or there is more than setRef ops, we don't know where
908
- // to go.
876
+ // snapshot-id the client expects the table current_snapshot_id
877
+ long expectedCurrentSnapshotId = assertRefSnapshotId .snapshotId ();
878
+ // table current_snapshot_id.
879
+ long currentSnapshotId = base .currentSnapshot ().snapshotId ();
880
+ List <MetadataUpdate > metadataUpdates =
881
+ generateUpdatesToRemoveNoopSnapshot (
882
+ ops , currentSnapshotId , expectedCurrentSnapshotId );
883
+
884
+ if (metadataUpdates == null || metadataUpdates .isEmpty ()) {
885
+ // Nothing can be done as this implies that there were not all
886
+ // No-op snapshots (REPLACE) between expectedCurrentSnapshotId and
887
+ // currentSnapshotId.
888
+ // hence re-throw the exception caught.
909
889
throw new ValidationFailureException (e );
910
890
}
911
891
912
- // first we should also set back the ref we wanted to set, back to the base
913
- // on which the current update is based on.
914
- metadataBuilder .setBranchSnapshot (parentSnapshotId , ref .name ());
892
+ // Set back the ref we wanted to set, back to the snapshot-id
893
+ // the client is expecting the table to be at.
894
+ metadataBuilder .setBranchSnapshot (
895
+ expectedCurrentSnapshotId , setSnapshotRef .name ());
915
896
916
897
// apply the remove snapshots update in the current metadata.
917
- // NOTE: we need to setRef to parent first and then apply remove as the remove
918
- // will drop. The tags / branch which don't have reference.
898
+ // NOTE: we need to setRef to expectedCurrentSnapshotId first and then apply
899
+ // remove,
900
+ // as otherwise the remove will drop.
919
901
// NOTE: we can skip removing the now orphan base. Its not a hard requirement.
920
902
// just something good to do, and not leave for Remove Orphans.
921
- updateToRemoveSnapshot .forEach ((update -> update .applyTo (metadataBuilder )));
903
+ metadataUpdates .forEach ((update -> update .applyTo (metadataBuilder )));
922
904
// Ref rolled back update correctly to snapshot to be committed parent now.
923
905
newBase = metadataBuilder .build ();
924
- // move the lastSequenceNumber back, to apply snapshot properly.
925
- // Seq number are considered increasing monotonically, snapshot over snapshot, so
926
- // this is important.
906
+ // move the lastSequenceNumber back, to apply snapshot properly on the
907
+ // current-metadata
908
+ // Seq number are considered increasing monotonically, snapshot over snapshot, the
909
+ // client
910
+ // generates the manifest list and hence the sequence number can't be changed for
911
+ // a snapshot
912
+ // the only possible option then is to change the sequenceNumber tracked by
913
+ // metadata.json
927
914
Class <?> clazz = newBase .getClass ();
928
915
try {
929
916
Field field = clazz .getDeclaredField ("lastSequenceNumber" );
@@ -935,7 +922,6 @@ public static TableMetadata commit(TableOperations ops, UpdateTableRequest reque
935
922
throw new RuntimeException (ex );
936
923
}
937
924
}
938
-
939
925
// double check if the requirements passes now.
940
926
try {
941
927
TableMetadata baseWithRemovedSnaps = newBase ;
@@ -959,7 +945,57 @@ public static TableMetadata commit(TableOperations ops, UpdateTableRequest reque
959
945
return ops .current ();
960
946
}
961
947
948
+ private static UpdateRequirement .AssertRefSnapshotID findAssertRefSnapshotID (
949
+ UpdateTableRequest request ) {
950
+ UpdateRequirement .AssertRefSnapshotID assertRefSnapshotID = null ;
951
+ int total = 0 ;
952
+ for (UpdateRequirement requirement : request .requirements ()) {
953
+ if (requirement instanceof UpdateRequirement .AssertRefSnapshotID ) {
954
+ ++total ;
955
+ assertRefSnapshotID = (UpdateRequirement .AssertRefSnapshotID ) requirement ;
956
+ }
957
+ }
958
+
959
+ // if > 1 assertion for refs, then it's not safe to rollback, make this Noop.
960
+ return total != 1 ? null : assertRefSnapshotID ;
961
+ }
962
+
963
+ private static List <MetadataUpdate > generateUpdatesToRemoveNoopSnapshot (
964
+ TableOperations ops , long currentSnapshotId , long expectedCurrentSnapshotId ) {
965
+ List <MetadataUpdate > updateToRemoveSnapshot = new ArrayList <>();
966
+ Long snapshotId = currentSnapshotId ;
967
+ while (snapshotId != null && !Objects .equals (snapshotId , expectedCurrentSnapshotId )) {
968
+ Snapshot snap = ops .current ().snapshot (snapshotId );
969
+ if (!DataOperations .REPLACE .equals (snap .operation ())) {
970
+ break ;
971
+ }
972
+ updateToRemoveSnapshot .add (new MetadataUpdate .RemoveSnapshot (snap .snapshotId ()));
973
+ snapshotId = snap .parentId ();
974
+ }
975
+
976
+ boolean wasExpectedSnapshotReached = Objects .equals (snapshotId , expectedCurrentSnapshotId );
977
+ return wasExpectedSnapshotReached ? updateToRemoveSnapshot : null ;
978
+ }
979
+
980
+ private static MetadataUpdate .SetSnapshotRef findSetSnapshotRefUpdate (
981
+ UpdateTableRequest request ) {
982
+ int total = 0 ;
983
+ MetadataUpdate .SetSnapshotRef setSnapshotRefUpdate = null ;
984
+ // find the SetRefName snapshot update
985
+ for (MetadataUpdate update : request .updates ()) {
986
+ if (update instanceof MetadataUpdate .SetSnapshotRef ) {
987
+ total ++;
988
+ setSnapshotRefUpdate = (MetadataUpdate .SetSnapshotRef ) update ;
989
+ }
990
+ }
962
991
992
+ // if > 1 assertion for refs, then it's not safe to rollback, make this Noop.
993
+ return total != 1 ? null : setSnapshotRefUpdate ;
994
+ }
995
+
996
+ private static boolean hasJustMainBranch (TableMetadata tableMetadata ) {
997
+ return tableMetadata .refs ().values ().stream ().filter (SnapshotRef ::isBranch ).count () == 1 ;
998
+ }
963
999
964
1000
public LoadTableResponse updateTableForStagedCreate (
965
1001
TableIdentifier tableIdentifier , UpdateTableRequest request ) {
0 commit comments