7
7
import java .nio .file .Files ;
8
8
import java .nio .file .Path ;
9
9
import java .text .Normalizer ;
10
+ import java .util .ArrayList ;
10
11
import java .util .Collection ;
11
12
import java .util .HashSet ;
13
+ import java .util .List ;
12
14
import java .util .ResourceBundle ;
13
15
import java .util .Set ;
14
16
15
17
import org .slf4j .Logger ;
16
18
import org .slf4j .LoggerFactory ;
17
19
import org .slf4j .helpers .MessageFormatter ;
18
20
21
+ import gov .loc .repository .bagit .domain .Manifest ;
22
+ import gov .loc .repository .bagit .domain .Version ;
19
23
import gov .loc .repository .bagit .exceptions .InvalidBagitFileFormatException ;
24
+ import gov .loc .repository .bagit .exceptions .MaliciousPathException ;
25
+ import gov .loc .repository .bagit .exceptions .UnsupportedAlgorithmException ;
26
+ import gov .loc .repository .bagit .hash .StandardBagitAlgorithmNameToSupportedAlgorithmMapping ;
27
+ import gov .loc .repository .bagit .reader .ManifestReader ;
20
28
import gov .loc .repository .bagit .util .PathUtils ;
21
29
22
30
/**
23
31
* Part of the BagIt conformance suite.
24
32
* This checker checks for various problems related to the manifests in a bag.
25
33
*/
26
- @ SuppressWarnings ({"PMD.UseLocaleWithCaseConversions" })
34
+ //TODO refactor to remove PMD warnings!
35
+ @ SuppressWarnings ({"PMD.UseLocaleWithCaseConversions" , "PMD.TooManyMethods" , "PMD.GodClass" })
27
36
public final class ManifestChecker {
28
37
private static final Logger logger = LoggerFactory .getLogger (ManifestChecker .class );
29
38
private static final ResourceBundle messages = ResourceBundle .getBundle ("MessageBundle" );
@@ -34,46 +43,79 @@ public final class ManifestChecker {
34
43
private static final String TRASHES_FILE = "\\ .(_.)?[Tt][Rr][Aa][Ss][Hh][Ee][Ss]" ;
35
44
private static final String FS_EVENTS_FILE = "\\ .[Ff][Ss][Ee][Vv][Ee][Nn][Tt][Ss][Dd]" ;
36
45
private static final String OS_FILES_REGEX = ".*data/(" + THUMBS_DB_FILE + "|" + DS_STORE_FILE + "|" + SPOTLIGHT_FILE + "|" + TRASHES_FILE + "|" + FS_EVENTS_FILE + ")" ;
46
+ private static final Version VERSION_1_0 = new Version (1 ,0 );
37
47
38
48
private ManifestChecker (){
39
49
//intentionally left empty
40
50
}
41
51
42
- /*
52
+ /**
43
53
* Check for all the manifest specific potential problems
54
+ *
55
+ * @param version the version of the bag we are checking
56
+ * @param bagitDir the directory where the manifests are stored
57
+ * @param encoding the encoding of the manifests
58
+ * @param warnings the set of warnings that will be appended to while checking
59
+ * @param warningsToIgnore the set of warnings to ignore
60
+ *
61
+ * @throws IOException if there is a problem reading a file (because it doesn't exist)
62
+ * @throws InvalidBagitFileFormatException if one (or more) of the files does not match the formatting as specified in the specification
63
+ * @throws MaliciousPathException if someone crafted the bag to specifically try and write outside the bag directory
64
+ * @throws UnsupportedAlgorithmException if a manifest uses an algorithm that the computer doesn't know how to use
44
65
*/
45
- public static void checkManifests (final Path bagitDir , final Charset encoding , final Set <BagitWarning > warnings ,
46
- final Collection <BagitWarning > warningsToIgnore ) throws IOException , InvalidBagitFileFormatException {
66
+ //@SuppressWarnings("PMD.CyclomaticComplexity")
67
+ public static void checkManifests (final Version version , final Path bagitDir , final Charset encoding , final Set <BagitWarning > warnings ,
68
+ final Collection <BagitWarning > warningsToIgnore ) throws IOException , InvalidBagitFileFormatException , MaliciousPathException , UnsupportedAlgorithmException {
47
69
48
70
boolean missingTagManifest = true ;
71
+ final List <Path > payloadManifests = new ArrayList <>();
72
+ final List <Path > tagManifests = new ArrayList <>();
49
73
try (final DirectoryStream <Path > files = Files .newDirectoryStream (bagitDir )){
50
74
for (final Path file : files ){
51
- final String filename = PathUtils .getFilename (file );
52
- if (filename .contains ("manifest-" )){
53
- if (filename .startsWith ("manifest-" )){
54
- checkData (file , encoding , warnings , warningsToIgnore , true );
55
- }
56
- else {
57
- checkData (file , encoding , warnings , warningsToIgnore , false );
58
- missingTagManifest = false ;
59
- }
60
-
61
- final String algorithm = filename .split ("[-\\ .]" )[1 ];
62
- checkAlgorthm (algorithm , warnings , warningsToIgnore );
63
- }
75
+ missingTagManifest = missingTagManifest && checkManifest (file , payloadManifests , tagManifests , encoding , warnings , warningsToIgnore );
64
76
}
65
77
}
66
78
79
+ if (!warnings .contains (BagitWarning .MANIFEST_SETS_DIFFER )){
80
+ checkManifestSets (version , tagManifests , payloadManifests , warnings , encoding );
81
+ }
82
+
67
83
if (!warningsToIgnore .contains (BagitWarning .MISSING_TAG_MANIFEST ) && missingTagManifest ){
68
84
logger .warn (messages .getString ("bag_missing_tag_manifest_warning" ), bagitDir );
69
85
warnings .add (BagitWarning .MISSING_TAG_MANIFEST );
70
86
}
71
87
}
72
88
89
+ private static boolean checkManifest (final Path file , final List <Path > payloadManifests , final List <Path > tagManifests ,
90
+ final Charset encoding , final Set <BagitWarning > warnings ,
91
+ final Collection <BagitWarning > warningsToIgnore ) throws IOException , InvalidBagitFileFormatException {
92
+ boolean missingTagManifest = true ;
93
+ final String filename = PathUtils .getFilename (file );
94
+ if (filename .contains ("manifest-" )){
95
+ if (filename .startsWith ("manifest-" )){
96
+ payloadManifests .add (file );
97
+ checkManifestPayload (file , encoding , warnings , warningsToIgnore , true );
98
+ }
99
+ else {
100
+ tagManifests .add (file );
101
+ checkManifestPayload (file , encoding , warnings , warningsToIgnore , false );
102
+ missingTagManifest = false ;
103
+ }
104
+
105
+ final String algorithm = filename .split ("[-\\ .]" )[1 ];
106
+ checkAlgorthm (algorithm , warnings , warningsToIgnore );
107
+ }
108
+
109
+ return missingTagManifest ;
110
+ }
111
+
73
112
/*
74
- * Check for a "bag within a bag" and for relative paths in the manifests
113
+ * Check for a "bag within a bag", relative paths, and OS specific files in the manifests
75
114
*/
76
- private static void checkData (final Path manifestFile , final Charset encoding , final Set <BagitWarning > warnings , final Collection <BagitWarning > warningsToIgnore , final boolean isPayloadManifest ) throws IOException , InvalidBagitFileFormatException {
115
+ private static void checkManifestPayload (final Path manifestFile , final Charset encoding , final Set <BagitWarning > warnings ,
116
+ final Collection <BagitWarning > warningsToIgnore , final boolean isPayloadManifest )
117
+ throws IOException , InvalidBagitFileFormatException {
118
+
77
119
try (final BufferedReader reader = Files .newBufferedReader (manifestFile , encoding )){
78
120
final Set <String > paths = new HashSet <>();
79
121
@@ -82,28 +124,24 @@ private static void checkData(final Path manifestFile, final Charset encoding, f
82
124
String path = parsePath (line );
83
125
84
126
path = checkForManifestCreatedWithMD5SumTools (path , warnings , warningsToIgnore );
85
-
86
- if (!warningsToIgnore .contains (BagitWarning .DIFFERENT_CASE ) && paths .contains (path .toLowerCase ())){
87
- logger .warn (messages .getString ("different_case_warning" ), manifestFile , path );
88
- warnings .add (BagitWarning .DIFFERENT_CASE );
89
- }
90
127
paths .add (path .toLowerCase ());
91
128
129
+ checkForDifferentCase (path , paths , manifestFile , warnings , warningsToIgnore );
92
130
if (encoding .name ().startsWith ("UTF" )){
93
131
checkNormalization (path , manifestFile .getParent (), warnings , warningsToIgnore );
94
132
}
95
-
96
133
checkForBagWithinBag (line , warnings , warningsToIgnore , isPayloadManifest );
97
-
98
134
checkForRelativePaths (line , warnings , warningsToIgnore , manifestFile );
99
-
100
135
checkForOSSpecificFiles (line , warnings , warningsToIgnore , manifestFile );
101
136
102
137
line = reader .readLine ();
103
138
}
104
139
}
105
140
}
106
141
142
+ /*
143
+ * Check to make sure it conforms to <hash> <path>
144
+ */
107
145
static String parsePath (final String line ) throws InvalidBagitFileFormatException {
108
146
final String [] parts = line .split ("\\ s+" , 2 );
109
147
if (parts .length < 2 ){
@@ -114,6 +152,9 @@ static String parsePath(final String line) throws InvalidBagitFileFormatExceptio
114
152
return parts [1 ];
115
153
}
116
154
155
+ /*
156
+ * We allow for MD5sum tools for compatibility but it is not recommended
157
+ */
117
158
private static String checkForManifestCreatedWithMD5SumTools (final String path , final Set <BagitWarning > warnings , final Collection <BagitWarning > warningsToIgnore ){
118
159
String fixedPath = path ;
119
160
final boolean startsWithStar = path .charAt (0 ) == '*' ;
@@ -130,6 +171,17 @@ private static String checkForManifestCreatedWithMD5SumTools(final String path,
130
171
return fixedPath ;
131
172
}
132
173
174
+ /*
175
+ * Check that the same line doesn't already exist in the set of paths
176
+ */
177
+ private static void checkForDifferentCase (final String path , final Set <String > paths , final Path manifestFile ,
178
+ final Set <BagitWarning > warnings , final Collection <BagitWarning > warningsToIgnore ){
179
+ if (!warningsToIgnore .contains (BagitWarning .DIFFERENT_CASE ) && paths .contains (path .toLowerCase ())){
180
+ logger .warn (messages .getString ("different_case_warning" ), manifestFile , path );
181
+ warnings .add (BagitWarning .DIFFERENT_CASE );
182
+ }
183
+ }
184
+
133
185
/*
134
186
* Check that the file specified has not changed its normalization (i.e. have the bytes changed but it still looks the same?)
135
187
*/
@@ -210,6 +262,47 @@ else if(!warningsToIgnore.contains(BagitWarning.NON_STANDARD_ALGORITHM) && !"SHA
210
262
warnings .add (BagitWarning .NON_STANDARD_ALGORITHM );
211
263
}
212
264
}
265
+
266
+ static void checkManifestSets (final Version version , final List <Path > tagManifests , final List <Path > payloadManifests ,
267
+ final Set <BagitWarning > warnings , final Charset encoding )
268
+ throws IOException , MaliciousPathException , UnsupportedAlgorithmException , InvalidBagitFileFormatException {
269
+ //edge case, for version 1.0+ all tag manifests SHOULD list the same set of files
270
+ if (tagManifests .size () > 1 && VERSION_1_0 .isSameOrOlder (version )){
271
+ checkManifestsListSameSetOfFiles (warnings , tagManifests , encoding );
272
+ }
273
+
274
+ //edge case, for version 1.0+ all payload manifests SHOULD list the same set of files
275
+ if (payloadManifests .size () > 1 && VERSION_1_0 .isSameOrOlder (version )){
276
+ checkManifestsListSameSetOfFiles (warnings , payloadManifests , encoding );
277
+ }
278
+ }
279
+
280
+ //starting with version 1.0 all manifest types (tag, payload) should list the same set of files
281
+ @ SuppressWarnings ("PMD.EmptyCatchBlock" )
282
+ static void checkManifestsListSameSetOfFiles (final Set <BagitWarning > warnings , final List <Path > manifestPaths , final Charset charset ) throws IOException , MaliciousPathException , UnsupportedAlgorithmException , InvalidBagitFileFormatException {
283
+ final StandardBagitAlgorithmNameToSupportedAlgorithmMapping nameMapping = new StandardBagitAlgorithmNameToSupportedAlgorithmMapping ();
284
+
285
+ Manifest compareToManifest = null ;
286
+ Path compareToManifestPath = null ;
287
+ for (final Path manifestPath : manifestPaths ) {
288
+ try {
289
+ final Manifest manifest = ManifestReader .readManifest (nameMapping , manifestPath , manifestPath .getParent (), charset );
290
+ if (compareToManifest == null ) {
291
+ compareToManifestPath = manifestPath ;
292
+ compareToManifest = manifest ;
293
+ continue ;
294
+ }
295
+
296
+ if (!compareToManifest .getFileToChecksumMap ().keySet ().equals (manifest .getFileToChecksumMap ().keySet ())) {
297
+ logger .warn (messages .getString ("manifest_fileset_differ" ), compareToManifestPath , manifestPath );
298
+ warnings .add (BagitWarning .MANIFEST_SETS_DIFFER );
299
+ }
300
+ }
301
+ catch (UnsupportedAlgorithmException e ) {
302
+ //ignore an unsupported algorithm as it is caught in checkAlgorthm()
303
+ }
304
+ }
305
+ }
213
306
214
307
//for unit test only
215
308
static String getOsFilesRegex () {
0 commit comments