Skip to content

Commit 5f08ccd

Browse files
Analyze - Documentation and filtering
Refine the code that filters extra files, based on further testing with player builds. Update the documentation to explain more about when Analyse can be used. Add a non-recursive option to reduce the noise if you only want to analyze a player build (e.g. ignore the StreamingAssets contents)
1 parent 26d1aa2 commit 5f08ccd

File tree

4 files changed

+95
-42
lines changed

4 files changed

+95
-42
lines changed

Analyzer/AnalyzerTool.cs

+33-21
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.Diagnostics;
34
using System.IO;
45
using UnityDataTools.Analyzer.SQLite;
@@ -10,7 +11,13 @@ public class AnalyzerTool
1011
{
1112
bool m_Verbose = false;
1213

13-
public int Analyze(string path, string databaseName, string searchPattern, bool skipReferences, bool verbose)
14+
public int Analyze(
15+
string path,
16+
string databaseName,
17+
string searchPattern,
18+
bool skipReferences,
19+
bool verbose,
20+
bool noRecursion)
1421
{
1522
m_Verbose = verbose;
1623

@@ -29,7 +36,11 @@ public int Analyze(string path, string databaseName, string searchPattern, bool
2936
var timer = new Stopwatch();
3037
timer.Start();
3138

32-
var files = Directory.GetFiles(path, searchPattern, SearchOption.AllDirectories);
39+
var files = Directory.GetFiles(
40+
path,
41+
searchPattern,
42+
noRecursion ? SearchOption.TopDirectoryOnly : SearchOption.AllDirectories);
43+
3344
int i = 1;
3445
foreach (var file in files)
3546
{
@@ -65,30 +76,31 @@ public int Analyze(string path, string databaseName, string searchPattern, bool
6576
bool ShouldIgnoreFile(string file)
6677
{
6778
// Unfortunately there is no standard extension for AssetBundles, and SerializedFiles often have no extension at all.
68-
// There is also no distinctive signature at the start of a SerializedFile to immediately recognize it
69-
// (Unity Archives do have this).
70-
// So to reduce noise in UnityDataTool output we filter out files that we have a high confidence are NOT SerializedFiles or Unity Archives.
79+
// Also there is also no distinctive signature at the start of a SerializedFile to immediately recognize it based on its first bytes.
80+
// This makes it difficult to use the "--search-pattern" argument to only pick those files.
81+
82+
// Hence to reduce noise in UnityDataTool output we filter out files that we have a high confidence are
83+
// NOT SerializedFiles or Unity Archives.
7184

7285
string fileName = Path.GetFileName(file);
7386
string extension = Path.GetExtension(file);
7487

75-
if ((fileName == ".DS_Store") || // Automatically ignore these annoying OS X style files meta files.
76-
(fileName == "archive_dependencies.bin") ||
77-
(fileName == "scene_info.bin") ||
78-
(fileName == "app.info") ||
79-
(extension == ".txt") ||
80-
(extension == ".resS") ||
81-
(extension == ".resource") ||
82-
(extension == ".json") ||
83-
(extension == ".dll") ||
84-
(extension == ".pdb") ||
85-
(extension == ".manifest") ||
86-
(extension == ".entities") ||
87-
(extension == ".entityheader"))
88-
return true;
89-
return false;
88+
return IgnoredFileNames.Contains(fileName) || IgnoredExtensions.Contains(extension);
9089
}
9190

91+
// These lists are based on expected output files in Player, AssetBundle, Addressables and ECS builds.
92+
// However this is by no means exhaustive.
93+
private static readonly HashSet<string> IgnoredFileNames = new()
94+
{
95+
".DS_Store", "boot.config", "archive_dependencies.bin", "scene_info.bin", "app.info", "link.xml",
96+
"catalog.bin", "catalog.hash"
97+
};
98+
99+
private static readonly HashSet<string> IgnoredExtensions = new()
100+
{
101+
".txt", ".resS", ".resource", ".json", ".dll", ".pdb", ".exe", ".manifest", ".entities", ".entityheader"
102+
};
103+
92104
void ProcessFile(string file, string rootDirectory, SQLiteWriter writer, int fileIndex, int cntFiles)
93105
{
94106
try
@@ -148,7 +160,7 @@ void ProcessFile(string file, string rootDirectory, SQLiteWriter writer, int fil
148160
{
149161
EraseProgressLine();
150162
Console.Error.WriteLine();
151-
//Console.Error.WriteLine($"File not supported: {file}"); // This is commented out because another codepath will output "failed to load"
163+
//A "failed to load" error will already be logged by the UnityFileSystem library
152164
}
153165
catch (Exception e)
154166
{

Documentation/unity-content-format.md

+9-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ To do so, the **ForceAlwaysWriteTypeTrees** Diagnostic Switch must be enabled in
5050

5151
![](./TypeTreeForPlayer.png)
5252

53+
54+
Note: The `Resources\unity default resources` file is shipped with the Unity Editor and is not rebuilt when doing a Player Build. It does not have TypeTrees. Hence it is normal that this file emits errors when analyzing a player build, even after rebuilding with TypeTrees enabled. For example:
55+
56+
```
57+
Error processing file: C:\TestProject\CompressedPlayer\TestProject_Data\Resources\unity default resources
58+
System.ArgumentException: Invalid object id.
59+
```
60+
5361
For more information about TypeTrees see the following section.
5462

5563
## TypeTrees
@@ -61,7 +69,7 @@ definition exactly matches the Type definition used when the object was serializ
6169
Unity will attempt to match up the properties as best as it can, based on the property names and structure
6270
of the data. This process is called a "Safe Binary Read" and is somewhat slower than the regular fast binary read path.
6371

64-
TypeTrees are important in the case of AssetBundles, to avoid rebuilding and redistributing all AssetBundles after each minor upgrade of Unity or after doing minor changes to your MonoBehaviour and ScriptableObject serialization. However there can be a noticable overhead to storing the TypeTrees in each AssetBundle, e.g. the header size of each SerializedFile is bigger.
72+
TypeTrees are important in the case of AssetBundles, to avoid rebuilding and redistributing all AssetBundles after each minor upgrade of Unity or after doing minor changes to your MonoBehaviour and ScriptableObject serialization. However there can be a noticeable overhead to storing the TypeTrees in each AssetBundle, e.g. the header size of each SerializedFile is bigger.
6573

6674
TypeTrees also make it possible to load an AssetBundle in the Editor, when testing game play.
6775

UnityDataTool/Program.cs

+14-5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public static async Task<int> Main(string[] args)
2424
var rOpt = new Option<bool>(aliases: new[] { "--extract-references", "-r" }) { IsHidden = true };
2525
var pOpt = new Option<string>(aliases: new[] { "--search-pattern", "-p" }, description: "File search pattern", getDefaultValue: () => "*");
2626
var vOpt = new Option<bool>(aliases: new[] { "--verbose", "-v" }, description: "Verbose output");
27+
var recurseOpt = new Option<bool>(aliases: new[] { "--no-recurse" }, description: "Do not analyze contents of subdirectories inside path");
2728

2829
var analyzeCommand = new Command("analyze", "Analyze AssetBundles or SerializedFiles.")
2930
{
@@ -32,13 +33,14 @@ public static async Task<int> Main(string[] args)
3233
sOpt,
3334
rOpt,
3435
pOpt,
35-
vOpt
36+
vOpt,
37+
recurseOpt
3638
};
3739

3840
analyzeCommand.AddAlias("analyse");
3941
analyzeCommand.SetHandler(
40-
(DirectoryInfo di, string o, bool s, bool r, string p, bool v) => Task.FromResult(HandleAnalyze(di, o, s, r, p, v)),
41-
pathArg, oOpt, sOpt, rOpt, pOpt, vOpt);
42+
(DirectoryInfo di, string o, bool s, bool r, string p, bool v, bool recurseOpt) => Task.FromResult(HandleAnalyze(di, o, s, r, p, v, recurseOpt)),
43+
pathArg, oOpt, sOpt, rOpt, pOpt, vOpt, recurseOpt);
4244

4345
rootCommand.AddCommand(analyzeCommand);
4446
}
@@ -132,7 +134,14 @@ enum DumpFormat
132134
Text,
133135
}
134136

135-
static int HandleAnalyze(DirectoryInfo path, string outputFile, bool skipReferences, bool extractReferences, string searchPattern, bool verbose)
137+
static int HandleAnalyze(
138+
DirectoryInfo path,
139+
string outputFile,
140+
bool skipReferences,
141+
bool extractReferences,
142+
string searchPattern,
143+
bool verbose,
144+
bool noRecurse)
136145
{
137146
var analyzer = new AnalyzerTool();
138147

@@ -141,7 +150,7 @@ static int HandleAnalyze(DirectoryInfo path, string outputFile, bool skipReferen
141150
Console.WriteLine("WARNING: --extract-references, -r option is deprecated (references are now extracted by default)");
142151
}
143152

144-
return analyzer.Analyze(path.FullName, outputFile, searchPattern, skipReferences, verbose);
153+
return analyzer.Analyze(path.FullName, outputFile, searchPattern, skipReferences, verbose, noRecurse);
145154
}
146155

147156
static int HandleFindReferences(FileInfo databasePath, string outputFile, long? objectId, string objectName, string objectType, bool findAll)

UnityDataTool/README.md

+39-15
Original file line numberDiff line numberDiff line change
@@ -33,48 +33,72 @@ The tool is invoked from the command line like this: `UnityDataTool [command] [c
3333

3434
For a list of available commands run it like this: `UnityDataTool --help`
3535

36-
For help on a specific command use `--help` along with the command name, for example: `UnityDataTool analyse --help`
36+
For help on a specific command use `--help` along with the command name, for example: `UnityDataTool analyze --help`
3737

3838

3939
# Commands
4040

4141
## analyze/analyse
4242

43-
This command extracts information from AssetBundles and SerializedFiles and dumps the results
44-
into a SQLite database.
43+
This command extracts information from Unity Archives (e.g. AssetBundles) and SerializedFiles and dumps the results into a SQLite database.
4544

46-
The command will fail if the SerializedFiles were built without TypeTrees, see [this topic](../Documentation/unity-content-format.md) for more information.
4745
The command takes the path of the folder containing the files to analyze as argument.
4846

4947
It also provides the following options:
5048
* -o, --output-file \<database-filename\>: filename of the database that will be created, the
51-
default is database.db.
49+
default is database.db. Any existing file by that name will be replaced by the data from running this command.
5250
* -s, --skip-references: skip CRC and reference (PPtrs) extraction. Faster processing and smaller
5351
database, but inaccurate duplicate asset detection and no references table.
54-
* -p, --search-pattern \<pattern\>: search pattern used to determine which files are AssetBundles. The default is \*. The * and ? characters are supported, but not regular expressions. The search is always recursive.
52+
* -p, --search-pattern \<pattern\>: search pattern used to determine which files are AssetBundles. The default is \*. The * and ? characters are supported. Regular expressions are not supported.
53+
* -v, --verbose: show more information during the analysis process, for example list any files that are ignored.
54+
* --no-recurse: do not recurse into sub-directories.
5555

56-
Example: `UnityDataTool analyze /path/to/asset/bundles -o my_database.db -p *.bundle`
56+
Example: `UnityDataTool analyze /path/to/asset/bundles -o my_database.db -p "*.bundle"`
5757

5858
**Refer to this [documentation](../Analyzer/README.md#How-to-use-the-database) for more information
5959
about the output database structure.**
6060

61-
### Common Warnings during Analysis
61+
Note: If a SerializedFile is built without TypeTrees, then the command will not be able to extract information about the contained objects. It will print an error similar to this example, then skip to the next file:
6262

63-
The analysis search may find files that are not actually Archives or SerializedFiles, for example .manifest files, text dumps etc.
63+
```
64+
Error processing file: C:\Src\TestProject\Build\Player\TestProject_Data\level0
65+
System.ArgumentException: Invalid object id.
66+
```
67+
68+
See [this topic](../Documentation/unity-content-format.md) for more information about TypeTrees.
69+
70+
### Example Input to the Analyze command
71+
72+
Example of directories that could be analyzed:
6473

65-
This can lead to error messages like this:
74+
* The output path of an AssetBundle build.
75+
* A folder inside the StreamingAssets folder of a Unity Player build. For example:
76+
* The "StreamingAssets/aa" folder, containing AssetBundles from an Addressables build.
77+
* The "StreamingAssets/ContentArchives" folder containing sub-scene content if your project uses [Entities](https://docs.unity3d.com/Packages/[email protected]/manual/content-management-intro.html).
78+
* The "Data" folder of a Unity Player build.
79+
* By default, any AssetBundles or ContentArchives in the StreamingAssets folder would also be included. Use the "--no-recurse" option to avoid that.
80+
* Compressed Player Builds are supported. The data.unity3d file will be analyzed the same way AssetBundles are.
81+
* The structure and content of a Player varies based on the platform. In some cases you may have to first extract the content out of a platform-specific container file prior to Analysis (for example .apk files on Android).
82+
83+
### Filtering Other File Types
84+
85+
Analyze works by trying to process all files in the provided path, assuming they are all Unity Archives or SerializedFiles. Because there is no standard file extension for those files it is tricky to reliably distinguish these file types from the other files that may also be found in the build output.
86+
87+
This can lead to error messages in the UnityDataTool output like this:
6688

6789
```
68-
Failed to load 'C:\....\AssetBundles.manifest'. File may be corrupted or was serialized with a newer version of Unity.
90+
Failed to load 'C:\....\MyData.db'. File may be corrupted or was serialized with a newer version of Unity.
6991
```
7092

71-
In that case it is not a serious error, because the analyze process will continue and can still produce a perfectly valid database file.
93+
Typically these are not serious errors. The analyze process will continue, and can still produce a perfectly valid database file. However if there are many messages like this it can obscure more important or unexpected failures.
94+
95+
To reduce the number of these warnings, UnityDataTool automatically ignores common filenames and file paths that are found in Player, AssetBundle, Addressable or Entities builds. For example ".txt, .json, .manifest". When the `--verbose` option is passed each ignored file will be listed.
7296

73-
If you use an extension of other naming convention for your AssetBundles, for example ".bundle", then you can avoid those warnings using the `-p .bundle` option to ignore .manifest and other files.
97+
If you use an extension or other naming convention for your AssetBundles, for example ".bundle", then you can avoid those warnings using the `-p .bundle` option to ignore other files.
7498

75-
For Player builds there is no single -p option that can catch all SerializedFiles (unless it is a compressed build generating a single data.unity3d file).
99+
For Player builds there is no single -p option that can catch all SerializedFiles (unless it is a compressed build with a single `data.unity3d` file).
76100

77-
Overall it can be a good idea to avoid those errors, as noisy errors may hide more serious errors that would need your attention.
101+
The `--no-recurse` option can reduce the volume of these warnings.
78102

79103
## dump
80104

0 commit comments

Comments
 (0)