-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
Copy pathBatchingEngine.cs
427 lines (388 loc) · 23.1 KB
/
BatchingEngine.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Collections.Generic;
using Microsoft.Build.BackEnd.Logging;
using Microsoft.Build.Collections;
using Microsoft.Build.Evaluation;
using Microsoft.Build.Execution;
using Microsoft.Build.Shared;
using ElementLocation = Microsoft.Build.Construction.ElementLocation;
#nullable disable
namespace Microsoft.Build.BackEnd
{
/// <summary>
/// This class is used by objects in the build engine that have the ability to execute themselves in batches, to partition the
/// items they consume into "buckets", based on the values of select item metadata.
/// </summary>
/// <remarks>
/// What batching does
///
/// Batching partitions the items consumed by the batchable object into buckets, where each bucket
/// contains a set of items that have the same value set on all item metadata consumed by the object.
/// Metadata consumed may be unqualified, for example %(m), or qualified by the item list to which it
/// refers, for example %(a.m).
///
/// If metadata is qualified, for example %(a.m), then this is considered distinct to metadata with the
/// same name on a different item type. For example, %(a.m) is distinct to %(b.m), and items of type ‘b’
/// are considered to always have a blank value for %(a.m). This means items of type ‘b’ will only be
/// placed in buckets where %(a.m) is blank. However %(a.m) is equivalent to %(m) on items of type ‘a’.
///
/// There is an extra ambiguity rule: every items consumed by the object must have an explicit value for
/// every piece of unqualified metadata. For example, if @(a), %(m), and %(a.n) are consumed, every item
/// of type ‘a’ must have a value for the metadata ‘m’ but need not all necessarily have a value for the
/// metadata ‘n’. This rule eliminates ambiguity about whether items that do not define values for an
/// unqualified metadata should go in all buckets, or just into buckets with a blank value for
/// that metadata.
///
/// For example
///
/// <ItemGroup>
/// <a Include='a1;a2'>
/// <n>m0</n>
/// </a>
/// <a Include='a3'>
/// <n>m1</n>
/// </a>
/// <b Include='b1'>
/// <n>n0</n>
/// </b>
/// <b Include='b2;b3'>
/// <n>n1</n>
/// </b>
/// <b Include='b4'/>
/// </ItemGroup>
///
/// <Target Name="t" >
/// <Message Text="a={@(a).%(a.n)} b={@(b).%(b.n)}" />
/// </Target>
///
/// Will produce 5 buckets:
///
/// a={a1;a2.m0} b={.}
/// a={a3.m1} b={.}
/// a={.} b={b1.n0}
/// a={.} b={b2;b3.n1}
/// a={.} b={b4.}
///
/// </remarks>
internal static class BatchingEngine
{
#region Methods
/// <summary>
/// Determines how many times the batchable object needs to be executed (each execution is termed a "batch"), and prepares
/// buckets of items to pass to the object in each batch.
/// </summary>
/// <returns>List containing ItemBucket objects, each one representing an execution batch.</returns>
internal static List<ItemBucket> PrepareBatchingBuckets(
List<string> batchableObjectParameters,
Lookup lookup,
ElementLocation elementLocation,
LoggingContext loggingContext)
{
return PrepareBatchingBuckets(batchableObjectParameters, lookup, null, elementLocation, loggingContext);
}
/// <summary>
/// Determines how many times the batchable object needs to be executed (each execution is termed a "batch"), and prepares
/// buckets of items to pass to the object in each batch.
/// </summary>
/// <param name="elementLocation"></param>
/// <param name="batchableObjectParameters"></param>
/// <param name="lookup"></param>
/// <param name="implicitBatchableItemType">Any item type that can be considered an implicit input to this batchable object.
/// This is useful for items inside targets, where the item name is plainly an item type that's an "input" to the object.</param>
/// <param name="loggingContext"></param>
/// <returns>List containing ItemBucket objects, each one representing an execution batch.</returns>
internal static List<ItemBucket> PrepareBatchingBuckets(
List<string> batchableObjectParameters,
Lookup lookup,
string implicitBatchableItemType,
ElementLocation elementLocation,
LoggingContext loggingContext)
{
if (batchableObjectParameters == null)
{
ErrorUtilities.ThrowInternalError("Need the parameters of the batchable object to determine if it can be batched.");
}
if (lookup == null)
{
ErrorUtilities.ThrowInternalError("Need to specify the lookup.");
}
ItemsAndMetadataPair pair = ExpressionShredder.GetReferencedItemNamesAndMetadata(batchableObjectParameters);
// All the @(itemname) item list references in the tag, including transforms, etc.
HashSet<string> consumedItemReferences = pair.Items;
// All the %(itemname.metadataname) references in the tag (not counting those embedded
// inside item transforms), and note that the itemname portion is optional.
// The keys in the returned hash table are the qualified metadata names (e.g. "EmbeddedResource.Culture"
// or just "Culture"). The values are MetadataReference structs, which simply split out the item
// name (possibly null) and the actual metadata name.
Dictionary<string, MetadataReference> consumedMetadataReferences = pair.Metadata;
List<ItemBucket> buckets = null;
if (consumedMetadataReferences?.Count > 0)
{
// Add any item types that we were explicitly told to assume.
if (implicitBatchableItemType != null)
{
consumedItemReferences ??= new HashSet<string>(MSBuildNameIgnoreCaseComparer.Default);
consumedItemReferences.Add(implicitBatchableItemType);
}
// This method goes through all the item list references and figures out which ones
// will be participating in batching, and which ones won't. We get back a hashtable
// where the key is the item name that will be participating in batching. The values
// are all String.Empty (not used). This method may return additional item names
// that weren't represented in "consumedItemReferences"... this would happen if there
// were qualified metadata references in the consumedMetadataReferences table, such as
// %(EmbeddedResource.Culture).
Dictionary<string, ICollection<ProjectItemInstance>> itemListsToBeBatched = GetItemListsToBeBatched(consumedMetadataReferences, consumedItemReferences, lookup, elementLocation);
// At this point, if there were any metadata references in the tag, but no item
// references to batch on, we've got a problem because we can't figure out which
// item lists the user wants us to batch.
if (itemListsToBeBatched.Count == 0)
{
foreach (string unqualifiedMetadataName in consumedMetadataReferences.Keys)
{
// Of course, since this throws an exception, there's no way we're ever going
// to really loop here... it's just that the foreach is the only way I can
// figure out how to get data out of the hashtable without knowing any of the
// keys!
ProjectErrorUtilities.ThrowInvalidProject(elementLocation, "CannotReferenceItemMetadataWithoutItemName", unqualifiedMetadataName);
}
}
else
{
// If the batchable object consumes item metadata as well as items to be batched,
// we need to partition the items consumed by the object.
buckets = BucketConsumedItems(lookup, itemListsToBeBatched, consumedMetadataReferences, elementLocation, loggingContext);
}
}
// if the batchable object does not consume any item metadata or items, or if the item lists it consumes are all
// empty, then the object does not need to be batched
if ((buckets == null) || (buckets.Count == 0))
{
// create a default bucket that references the project items and properties -- this way we always have a bucket
buckets = new List<ItemBucket>(1);
var bucket = new ItemBucket(null, null, lookup, buckets.Count);
if (loggingContext != null)
{
bucket.Initialize(loggingContext);
}
buckets.Add(bucket);
}
return buckets;
}
/// <summary>
/// Of all the item lists that are referenced in this batchable object, which ones should we
/// batch on, and which ones should we just pass in wholesale to every invocation of the
/// target/task?
///
/// Rule #1. If the user has referenced any *qualified* item metadata such as %(EmbeddedResource.Culture),
/// then that item list "EmbeddedResource" will definitely get batched.
///
/// Rule #2. For all the unqualified item metadata such as %(Culture), we make sure that
/// every single item in every single item list being passed into the task contains a value
/// for that metadata. If not, it's an error. If so, we batch all of those item lists.
///
/// All other item lists will not be batched, and instead will be passed in wholesale to all buckets.
/// </summary>
/// <returns>Dictionary containing the item names that should be batched. If the items match unqualified metadata,
/// the entire list of items will be returned in the Value. Otherwise, the Value will be empty, indicating only the
/// qualified item set (in the Key) should be batched.
/// </returns>
private static Dictionary<string, ICollection<ProjectItemInstance>> GetItemListsToBeBatched(
Dictionary<string, MetadataReference> consumedMetadataReferences, // Key is [string] potentially qualified metadata name
// Value is [struct MetadataReference]
HashSet<string> consumedItemReferenceNames,
Lookup lookup,
ElementLocation elementLocation)
{
// The keys in this hashtable are the names of the items that we will batch on.
// The values are always String.Empty (not used).
var itemListsToBeBatched = new Dictionary<string, ICollection<ProjectItemInstance>>(MSBuildNameIgnoreCaseComparer.Default);
// Loop through all the metadata references and find the ones that are qualified
// with an item name.
foreach (MetadataReference consumedMetadataReference in consumedMetadataReferences.Values)
{
if (consumedMetadataReference.ItemName != null)
{
// Rule #1. Qualified metadata reference.
// For metadata references that are qualified with an item name
// (e.g., %(EmbeddedResource.Culture) ), we add that item name to the list of
// consumed item names, even if the item name wasn't otherwise referenced via
// @(...) syntax, and even if every item in the list doesn't necessary contain
// a value for this metadata. This is the special power that you get by qualifying
// the metadata reference with an item name.
itemListsToBeBatched[consumedMetadataReference.ItemName] = null;
// Also add this qualified item to the consumed item references list, because
// %(EmbeddedResource.Culture) effectively means that @(EmbeddedResource) is
// being consumed, even though we may not see literally "@(EmbeddedResource)"
// in the tag anywhere. Adding it to this list allows us (down below in this
// method) to check that every item in this list has a value for each
// unqualified metadata reference.
consumedItemReferenceNames ??= new HashSet<string>(MSBuildNameIgnoreCaseComparer.Default);
consumedItemReferenceNames.Add(consumedMetadataReference.ItemName);
}
}
// Loop through all the metadata references and find the ones that are unqualified.
foreach (MetadataReference consumedMetadataReference in consumedMetadataReferences.Values)
{
if (consumedMetadataReference.ItemName == null)
{
// Rule #2. Unqualified metadata reference.
// For metadata references that are unqualified, every single consumed item
// must contain a value for that metadata. If any item doesn't, it's an error
// to use unqualified metadata.
if (consumedItemReferenceNames != null)
{
foreach (string consumedItemName in consumedItemReferenceNames)
{
// Loop through all the items in the item list.
ICollection<ProjectItemInstance> items = lookup.GetItems(consumedItemName);
if (items != null)
{
// Loop through all the items in the BuildItemGroup.
foreach (ProjectItemInstance item in items.GetStructEnumerable())
{
ProjectErrorUtilities.VerifyThrowInvalidProject(
item.HasMetadata(consumedMetadataReference.MetadataName),
elementLocation, "ItemDoesNotContainValueForUnqualifiedMetadata",
item.EvaluatedInclude, consumedItemName, consumedMetadataReference.MetadataName);
}
}
// This item list passes the test of having every single item containing
// a value for this metadata. Therefore, add this item list to the batching list.
// Also, to save doing lookup.GetItems again, put the items in the table as the value.
itemListsToBeBatched[consumedItemName] = items;
}
}
}
}
return itemListsToBeBatched;
}
/// <summary>
/// Partitions the items consumed by the batchable object into buckets, where each bucket contains a set of items that
/// have the same value set on all item metadata consumed by the object.
/// </summary>
/// <remarks>
/// PERF NOTE: Given n items and m batching metadata that produce l buckets, it is usually the case that n > l > m,
/// because a batchable object typically uses one or two item metadata to control batching, and only has a handful of
/// buckets. The number of buckets is typically only large if a batchable object is using single-item batching
/// (where l == n). Any algorithm devised for bucketing therefore, should try to minimize n and l in its complexity
/// equation. The algorithm below has a complexity of O(n*lg(l)*m/2) in its comparisons, and is effectively O(n) when
/// l is small, and O(n*lg(n)) in the worst case as l -> n. However, note that the comparison complexity is not the
/// same as the operational complexity for this algorithm. The operational complexity of this algorithm is actually
/// O(n*m + n*lg(l)*m/2 + n*l/2 + n + l), which is effectively O(n^2) in the worst case. The additional complexity comes
/// from the array and metadata operations that are performed. However, those operations are extremely cheap compared
/// to the comparison operations, which dominate the time spent in this method.
/// </remarks>
/// <returns>List containing ItemBucket objects (can be empty), each one representing an execution batch.</returns>
private static List<ItemBucket> BucketConsumedItems(
Lookup lookup,
Dictionary<string, ICollection<ProjectItemInstance>> itemListsToBeBatched,
Dictionary<string, MetadataReference> consumedMetadataReferences,
ElementLocation elementLocation,
LoggingContext loggingContext)
{
ErrorUtilities.VerifyThrow(itemListsToBeBatched.Count > 0, "Need item types consumed by the batchable object.");
ErrorUtilities.VerifyThrow(consumedMetadataReferences.Count > 0, "Need item metadata consumed by the batchable object.");
var buckets = new List<ItemBucket>();
// Get and iterate through the list of item names that we're supposed to batch on.
foreach (KeyValuePair<string, ICollection<ProjectItemInstance>> entry in itemListsToBeBatched)
{
string itemName = entry.Key;
// Use the previously-fetched items, if possible
ICollection<ProjectItemInstance> items = entry.Value ?? lookup.GetItems(itemName);
if (items != null)
{
foreach (ProjectItemInstance item in items.GetStructEnumerable())
{
// Get this item's values for all the metadata consumed by the batchable object.
Dictionary<string, string> itemMetadataValues = GetItemMetadataValues(item, consumedMetadataReferences, elementLocation);
// put the metadata into a dummy bucket we can use for searching
ItemBucket dummyBucket = ItemBucket.GetDummyBucketForComparisons(itemMetadataValues);
// look through all previously created buckets to find a bucket whose items have the same values as
// this item for all metadata consumed by the batchable object
int matchingBucketIndex = buckets.BinarySearch(dummyBucket);
ItemBucket matchingBucket = (matchingBucketIndex >= 0)
? buckets[matchingBucketIndex]
: null;
// If we didn't find a bucket that matches this item, create a new one, adding
// this item to the bucket.
if (matchingBucket == null)
{
matchingBucket = new ItemBucket(itemListsToBeBatched.Keys, itemMetadataValues, lookup, buckets.Count);
if (loggingContext != null)
{
matchingBucket.Initialize(loggingContext);
}
// make sure to put the new bucket into the appropriate location
// in the sorted list as indicated by the binary search
// NOTE: observe the ~ operator (bitwise complement) in front of
// the index -- see MSDN for more information on the return value
// from the List.BinarySearch() method
buckets.Insert(~matchingBucketIndex, matchingBucket);
}
// We already have a bucket for this type of item, so add this item to
// the bucket.
matchingBucket.AddItem(item);
}
}
}
// Put the buckets back in the order in which they were discovered, so that the first
// item declared in the project file ends up in the first batch passed into the target/task.
var orderedBuckets = new List<ItemBucket>(buckets.Count);
for (int i = 0; i < buckets.Count; ++i)
{
orderedBuckets.Add(null);
}
foreach (ItemBucket bucket in buckets)
{
orderedBuckets[bucket.BucketSequenceNumber] = bucket;
}
return orderedBuckets;
}
/// <summary>
/// Gets the values of the specified metadata for the given item.
/// The keys in the dictionary returned may be qualified and/or unqualified, exactly
/// as they are found in the metadata reference.
/// For example if %(x) is found, the key is "x", if %(z.x) is found, the key is "z.x".
/// This dictionary in each bucket is used by Expander to expand exactly the same metadata references, so
/// %(x) is expanded using the key "x", and %(z.x) is expanded using the key "z.x".
/// </summary>
/// <returns>the metadata values</returns>
private static Dictionary<string, string> GetItemMetadataValues(
ProjectItemInstance item,
Dictionary<string, MetadataReference> consumedMetadataReferences,
ElementLocation elementLocation)
{
var itemMetadataValues = new Dictionary<string, string>(consumedMetadataReferences.Count, MSBuildNameIgnoreCaseComparer.Default);
foreach (KeyValuePair<string, MetadataReference> consumedMetadataReference in consumedMetadataReferences)
{
string metadataQualifiedName = consumedMetadataReference.Key;
string metadataItemName = consumedMetadataReference.Value.ItemName;
string metadataName = consumedMetadataReference.Value.MetadataName;
if (
(metadataItemName != null) &&
(!String.Equals(item.ItemType, metadataItemName, StringComparison.OrdinalIgnoreCase)))
{
itemMetadataValues[metadataQualifiedName] = String.Empty;
}
else
{
try
{
// This returns String.Empty for both metadata that is undefined and metadata that has
// an empty value; they are treated the same.
itemMetadataValues[metadataQualifiedName] = ((IItem)item).GetMetadataValueEscaped(metadataName);
}
catch (InvalidOperationException e)
{
ProjectErrorUtilities.ThrowInvalidProject(elementLocation,
"CannotEvaluateItemMetadata", metadataName, e.Message);
}
}
}
return itemMetadataValues;
}
#endregion
}
}