Skip to content

Commit

Permalink
Fix bug related to quoted or unquoted etags held by a CDN or caching …
Browse files Browse the repository at this point in the history
…layer
  • Loading branch information
joelverhagen committed May 1, 2022
1 parent fbffe4f commit 1ab1300
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 27 deletions.
2 changes: 1 addition & 1 deletion MiniZip.Sandbox/MiniZip.Sandbox.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<TargetFramework>net6.0</TargetFramework>
<AssemblyName>Knapcode.MiniZip.Sandbox</AssemblyName>
<RootNamespace>Knapcode.MiniZip</RootNamespace>
</PropertyGroup>
Expand Down
7 changes: 6 additions & 1 deletion MiniZip.Sandbox/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ private static async Task LargeAsync()
{
// This provider uses and HTTP client and initializes a ZipDirectoryReader from a URL. This URL
// must support HEAD method, Content-Length response header, and Range request header.
var httpZipProvider = new HttpZipProvider(httpClient);
var httpZipProvider = new HttpZipProvider(httpClient)
{
RequireAcceptRanges = false,
};

foreach (var url in urls)
{
Expand Down Expand Up @@ -119,6 +122,8 @@ private class LoggingHandler : DelegatingHandler
"Accept-Ranges",
"Content-Range",
"Content-Length",
"ETag",
"If-Match",
};

public long TotalResponseBodyBytes => _totalResponseBodyBytes;
Expand Down
13 changes: 9 additions & 4 deletions MiniZip.Test/EndToEndTest.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Collections;
using System.Collections.Generic;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
Expand All @@ -23,7 +22,10 @@ public async Task CanGatherAndRecreateNuGetPackageCentralDirectory(string id, st
string mzipPath;
using (var httpClient = new HttpClient())
{
var httpZipProvider = new HttpZipProvider(httpClient);
var httpZipProvider = new HttpZipProvider(httpClient)
{
RequireAcceptRanges = false,
};
using (var reader = await httpZipProvider.GetReaderAsync(packageUri))
{
// Read the ZIP directory from the .nupkg URL.
Expand Down Expand Up @@ -66,7 +68,10 @@ public async Task CanReadSignatureFile(string id, string version)

using (var httpClient = new HttpClient())
{
var httpZipProvider = new HttpZipProvider(httpClient);
var httpZipProvider = new HttpZipProvider(httpClient)
{
RequireAcceptRanges = false,
};
using (var reader = await httpZipProvider.GetReaderAsync(packageUri))
{
// Read the ZIP directory from the .nupkg URL.
Expand Down
4 changes: 2 additions & 2 deletions MiniZip.Test/MiniZip.Test.csproj
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks Condition="'$(CoreOnly)' != 'true'">net472;net5.0</TargetFrameworks>
<TargetFrameworks Condition="'$(CoreOnly)' == 'true'">net5.0</TargetFrameworks>
<TargetFrameworks Condition="'$(CoreOnly)' != 'true'">net472;net6.0</TargetFrameworks>
<TargetFrameworks Condition="'$(CoreOnly)' == 'true'">net6.0</TargetFrameworks>
<AssemblyName>Knapcode.MiniZip.Test</AssemblyName>
<RootNamespace>Knapcode.MiniZip</RootNamespace>

Expand Down
68 changes: 60 additions & 8 deletions MiniZip/BufferedRangeStream/HttpRangeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ public class HttpRangeReader : IRangeReader
private readonly HttpClient _httpClient;
private readonly Uri _requestUri;
private readonly long _length;
private readonly EntityTagHeaderValue _etag;
private string _etag;
private readonly string _etagQuotes;
private readonly string _etagNoQuotes;
private readonly bool _requireContentRange;
private readonly bool _sendXMsVersionHeader;
private readonly bool _allowETagVariants;
private readonly IThrottle _httpThrottle;

/// <summary>
Expand All @@ -26,14 +30,33 @@ public class HttpRangeReader : IRangeReader
/// <param name="length">The length of the request URI, a size in bytes.</param>
/// <param name="etag">The optional etag header to be used for follow-up requests.</param>
/// <param name="requireContentRange">Whether or not to require and validate the Content-Range header.</param>
/// <param name="sendXMsVersionHeader">Whether or not to send the <c>x-ms-version: 2013-08-15</c> request header.</param>
/// <param name="allowETagVariants">Whether or not to allow different variants of the etag.</param>
/// <param name="httpThrottle">The throttle to use for HTTP operations.</param>
public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, EntityTagHeaderValue etag, bool requireContentRange, IThrottle httpThrottle)
public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, string etag, bool requireContentRange, bool sendXMsVersionHeader, bool allowETagVariants, IThrottle httpThrottle)
{
_httpClient = httpClient;
_requestUri = requestUri;
_length = length;

_etag = etag;
if (etag != null && allowETagVariants)
{
if (etag.StartsWith("\"") && etag.EndsWith("\""))
{
_etagQuotes = etag;
_etagNoQuotes = etag.Substring(1, etag.Length - 2);
}
else
{
_etagQuotes = "\"" + etag + "\"";
_etagNoQuotes = etag;
}
}

_requireContentRange = requireContentRange;
_sendXMsVersionHeader = sendXMsVersionHeader;
_allowETagVariants = allowETagVariants;
_httpThrottle = httpThrottle ?? NullThrottle.Instance;
}

Expand All @@ -45,8 +68,9 @@ public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, Entit
/// <param name="length">The length of the request URI, a size in bytes.</param>
/// <param name="etag">The optional etag header to be used for follow-up requests.</param>
/// <param name="requireContentRange">Whether or not to require and validate the Content-Range header.</param>
public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, EntityTagHeaderValue etag, bool requireContentRange)
: this(httpClient, requestUri, length, etag, requireContentRange: true, httpThrottle: null)
/// <param name="httpThrottle">The throttle to use for HTTP operations.</param>
public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, string etag, bool requireContentRange, IThrottle httpThrottle)
: this(httpClient, requestUri, length, etag, requireContentRange, sendXMsVersionHeader: false, allowETagVariants: false, httpThrottle: httpThrottle)
{
}

Expand All @@ -57,7 +81,20 @@ public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, Entit
/// <param name="requestUri">The URL to request bytes from.</param>
/// <param name="length">The length of the request URI, a size in bytes.</param>
/// <param name="etag">The optional etag header to be used for follow-up requests.</param>
public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, EntityTagHeaderValue etag)
/// <param name="requireContentRange">Whether or not to require and validate the Content-Range header.</param>
public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, string etag, bool requireContentRange)
: this(httpClient, requestUri, length, etag, requireContentRange, httpThrottle: null)
{
}

/// <summary>
/// Initializes the HTTP range reader. The provided <see cref="HttpClient"/> is not disposed by this instance.
/// </summary>
/// <param name="httpClient">The HTTP client used to make the HTTP requests.</param>
/// <param name="requestUri">The URL to request bytes from.</param>
/// <param name="length">The length of the request URI, a size in bytes.</param>
/// <param name="etag">The optional etag header to be used for follow-up requests.</param>
public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, string etag)
: this(httpClient, requestUri, length, etag, requireContentRange: true)
{
}
Expand All @@ -72,20 +109,35 @@ public HttpRangeReader(HttpClient httpClient, Uri requestUri, long length, Entit
/// <returns>The number of bytes read.</returns>
public async Task<int> ReadAsync(long srcOffset, byte[] dst, int dstOffset, int count)
{
return await RetryHelper.RetryAsync(async () =>
return await RetryHelper.RetryAsync(async lastException =>
{
await _httpThrottle.WaitAsync();
try
{
using (var request = new HttpRequestMessage(HttpMethod.Get, _requestUri))
{
request.Headers.Range = new RangeHeaderValue(srcOffset, (srcOffset + count) - 1);
if (_sendXMsVersionHeader)
{
request.Headers.TryAddWithoutValidation("x-ms-version", "2013-08-15");
}

if (_etag != null)
{
request.Headers.IfMatch.Add(_etag);
if (lastException is MiniZipHttpException ex
&& ex.StatusCode == HttpStatusCode.PreconditionFailed
&& _allowETagVariants == true)
{
// Swap out the etag version (quoted vs. unquoted, related to an old Azure Blob Storage
// bug) if there is an HTTP 412 Precondition Failed. This may be caused by the wrong
// version of the etag being cached by an intermediate layer.
_etag = _etag == _etagNoQuotes ? _etagQuotes : _etagNoQuotes;
}

request.Headers.TryAddWithoutValidation("If-Match", _etag);
}

request.Headers.Range = new RangeHeaderValue(srcOffset, (srcOffset + count) - 1);

using (var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead))
{
if (response.StatusCode != HttpStatusCode.PartialContent)
Expand Down
7 changes: 5 additions & 2 deletions MiniZip/BufferedRangeStream/RetryHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@ namespace Knapcode.MiniZip
{
internal static class RetryHelper
{
public static async Task<T> RetryAsync<T>(Func<Task<T>> actAsync)
public static async Task<T> RetryAsync<T>(Func<Exception, Task<T>> actAsync)
{
const int maxAttempts = 3;
var attempt = 0;
Exception lastException = null;
while (true)
{
try
{
attempt++;
return await actAsync();
return await actAsync(lastException);
}
catch (Exception ex) when (ex is MiniZipHttpException || ex is IOException || ex is HttpRequestException)
{
lastException = ex;

if (attempt >= maxAttempts)
{
throw;
Expand Down
48 changes: 43 additions & 5 deletions MiniZip/HttpZipProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ public HttpZipProvider(HttpClient httpClient, IThrottle httpThrottle)
/// </summary>
public bool RequireContentRange { get; set; } = true;

/// <summary>
/// Whether or not to allow etag variants when an If-Match returns a 412 Precondition Failed. This can happen
/// when a CDN is in front of Azure Blob Storage and a unexpected (quoted or unquoted) etag is cached. By
/// default, this behavior only enabled if the server is detected to be backed by Azure Blob Storage.
/// </summary>
public bool? AllowETagVariants { get; set; }

/// <summary>
/// Initialize the buffered range reader stream provided request URL.
/// </summary>
Expand All @@ -89,7 +96,7 @@ public async Task<ZipDirectoryReader> GetReaderAsync(Uri requestUri)
private async Task<Tuple<Stream, ILookup<string, string>>> GetStreamAndHeadersAsync(Uri requestUri)
{
// Determine if the exists endpoint's length and whether it supports range requests.
var info = await RetryHelper.RetryAsync(async () =>
var info = await RetryHelper.RetryAsync(async lastException =>
{
using (var request = new HttpRequestMessage(HttpMethod.Head, requestUri))
{
Expand Down Expand Up @@ -117,6 +124,19 @@ private async Task<Tuple<Stream, ILookup<string, string>>> GetStreamAndHeadersAs
throw await response.ToHttpExceptionAsync(Strings.ContentLengthHeaderNotFound);
}

// If unspecified, only allow etag variants if the server appears to be Azure Blob Storage.
bool allowETagVariants;
if (!AllowETagVariants.HasValue
&& response.Headers.TryGetValues("x-ms-version", out _)
&& response.Headers.TryGetValues("x-ms-blob-type", out _))
{
allowETagVariants = true;
}
else
{
allowETagVariants = AllowETagVariants.GetValueOrDefault(false);
}

if (RequireAcceptRanges
&& (response.Headers.AcceptRanges == null
|| !response.Headers.AcceptRanges.Contains(HttpConstants.BytesUnit)))
Expand All @@ -129,8 +149,17 @@ private async Task<Tuple<Stream, ILookup<string, string>>> GetStreamAndHeadersAs
var length = response.Content.Headers.ContentLength.Value;

var etagBehavior = ETagBehavior;
var etag = response.Headers?.ETag;
if (etag != null && (etag.IsWeak || etagBehavior == ETagBehavior.Ignore))

// Even though we may be sending the x-ms-version header to Azure Blob Storage, it's
// possible a CDN or other intermediate layer has cached the response ETag header without
// quotes. This is why we don't use the parsed response.Headers.ETag value.
string etag = null;
if (response.Headers.TryGetValues("ETag", out var etags) && etags.Any())
{
etag = etags.First();
}

if (etag != null && (etag.StartsWith("W/") || etagBehavior == ETagBehavior.Ignore))
{
etag = null;
}
Expand All @@ -149,7 +178,7 @@ private async Task<Tuple<Stream, ILookup<string, string>>> GetStreamAndHeadersAs
.SelectMany(x => x.Value.Select(y => new { x.Key, Value = y }))
.ToLookup(x => x.Key, x => x.Value, StringComparer.OrdinalIgnoreCase);

return new { Length = length, ETag = etag, Headers = headers };
return new { Length = length, ETag = etag, AllowETagVariants = allowETagVariants, Headers = headers };
}
}
finally
Expand All @@ -159,7 +188,16 @@ private async Task<Tuple<Stream, ILookup<string, string>>> GetStreamAndHeadersAs
}
});

var httpRangeReader = new HttpRangeReader(_httpClient, requestUri, info.Length, info.ETag, RequireContentRange);
var httpRangeReader = new HttpRangeReader(
_httpClient,
requestUri,
info.Length,
info.ETag,
RequireContentRange,
SendXMsVersionHeader,
info.AllowETagVariants,
_httpThrottle);

var bufferSizeProvider = BufferSizeProvider ?? NullBufferSizeProvider.Instance;
var stream = new BufferedRangeStream(httpRangeReader, info.Length, bufferSizeProvider);

Expand Down
18 changes: 14 additions & 4 deletions MiniZip/MiniZip.csproj
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk" DefaultTargets="build">

<PropertyGroup>
<TargetFrameworks Condition="'$(CoreOnly)' != 'true'">netstandard2.0;net45</TargetFrameworks>
<TargetFrameworks Condition="'$(CoreOnly)' != 'true'">netstandard2.0;net451</TargetFrameworks>
<TargetFrameworks Condition="'$(CoreOnly)' == 'true'">netstandard2.0</TargetFrameworks>
<AssemblyName>Knapcode.MiniZip</AssemblyName>
<RootNamespace>Knapcode.MiniZip</RootNamespace>
<DocumentationFile>$(OutputPath)$(AssemblyName).xml</DocumentationFile>

<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>..\MiniZip.snk</AssemblyOriginatorKeyFile>
<DelaySign>false</DelaySign>

<Version>0.17.0</Version>
<Version>0.18.0</Version>

<Authors>Joel Verhagen</Authors>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageProjectUrl>https://github.com/joelverhagen/MiniZip</PackageProjectUrl>
<PackageReleaseNotes>Fix bug related to quoted or unquoted etags held by a CDN or caching layer</PackageReleaseNotes>
<PackageReadmeFile>README.md</PackageReadmeFile>
<PackageTags>zip;http;partial;range;mzip;minizip;fetch;directory</PackageTags>
<Description>Read the file listing of a .zip archive without downloading the whole thing.</Description>

<PublishRepositoryUrl>true</PublishRepositoryUrl>
Expand All @@ -29,6 +32,13 @@
<ContinuousIntegrationBuild>true</ContinuousIntegrationBuild>
</PropertyGroup>

<ItemGroup>
<None Include="..\README.md">
<Pack>True</Pack>
<PackagePath>\</PackagePath>
</None>
</ItemGroup>

<ItemGroup>
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0">
<PrivateAssets>all</PrivateAssets>
Expand All @@ -51,7 +61,7 @@
</EmbeddedResource>
</ItemGroup>

<ItemGroup Condition="'$(TargetFramework)' == 'net45'">
<ItemGroup Condition="'$(TargetFramework)' == 'net451'">
<Reference Include="System.Net.Http" />
</ItemGroup>

Expand Down

0 comments on commit 1ab1300

Please sign in to comment.