From ba06dd892175d7c10bdf8169953616e909123e51 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 13 Feb 2023 11:04:07 +1300 Subject: [PATCH] Fix using internal F# types with the row oriented API (#332) --- .github/workflows/ci.yml | 13 +++- CMakeLists.txt | 3 + csharp.test/ParquetSharp.Test.csproj | 2 +- csharp/RowOriented/ParquetFile.cs | 57 +++++++++++++- docs/RowOriented.md | 11 +++ fsharp.test/ParquetSharp.Test.FSharp.fsproj | 28 +++++++ fsharp.test/TestRowOrientedApi.fs | 86 +++++++++++++++++++++ 7 files changed, 194 insertions(+), 6 deletions(-) create mode 100644 fsharp.test/ParquetSharp.Test.FSharp.fsproj create mode 100644 fsharp.test/TestRowOrientedApi.fs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba7ffae0..14d74d04 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -171,6 +171,7 @@ jobs: run: | dotnet build csharp.benchmark --configuration=Release -p:OSArchitecture=${{ matrix.arch }} dotnet build csharp.test --configuration=Release -p:OSArchitecture=${{ matrix.arch }} + dotnet build fsharp.test --configuration=Release -p:OSArchitecture=${{ matrix.arch }} - name: Upload native ParquetSharp library uses: actions/upload-artifact@v3 with: @@ -262,17 +263,25 @@ jobs: run: | dotnet remove csharp.test reference csharp/ParquetSharp.csproj dotnet add csharp.test package ParquetSharp -v ${{ steps.get-version.outputs.version }} + dotnet remove fsharp.test reference csharp/ParquetSharp.csproj + dotnet add fsharp.test package ParquetSharp -v ${{ steps.get-version.outputs.version }} - name: Setup QEMU for arm64 if: matrix.arch == 'arm64' uses: docker/setup-qemu-action@v2 with: platforms: arm64 - - name: Build & Run .NET unit tests (x64) + - name: Build & Run C# unit tests (x64) if: matrix.arch == 'x64' run: dotnet test csharp.test --configuration=Release --framework ${{ matrix.dotnet }} - - name: Build & Run .NET unit tests (arm64) + - name: Build & Run F# unit tests (x64) + if: matrix.arch == 'x64' + run: dotnet test fsharp.test --configuration=Release --framework ${{ matrix.dotnet }} + - name: Build & Run C# unit tests (arm64) if: matrix.arch == 'arm64' run: docker run --rm --platform linux/arm64/v8 -v $PWD:$PWD -w $PWD mcr.microsoft.com/dotnet/sdk:6.0 dotnet test csharp.test --configuration=Release --framework ${{ matrix.dotnet }} + - name: Build & Run F# unit tests (arm64) + if: matrix.arch == 'arm64' + run: docker run --rm --platform linux/arm64/v8 -v $PWD:$PWD -w $PWD mcr.microsoft.com/dotnet/sdk:6.0 dotnet test fsharp.test --configuration=Release --framework ${{ matrix.dotnet }} # Virtual job that can be configured as a required check before a PR can be merged. all-required-checks-done: diff --git a/CMakeLists.txt b/CMakeLists.txt index d73989cb..ac2d92d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,9 @@ if (MSVC) include_external_msproject(ParquetSharp.Test ${CMAKE_SOURCE_DIR}/csharp.test/ParquetSharp.Test.csproj TYPE 9A19103F-16F7-4668-BE54-9A1E7A4F7556 PLATFORM x64) add_dependencies(ParquetSharp.Test ParquetSharp) + + include_external_msproject(ParquetSharp.Test.FSharp ${CMAKE_SOURCE_DIR}/fsharp.test/ParquetSharp.Test.Fsharp.fsproj TYPE 6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705 PLATFORM x64) + add_dependencies(ParquetSharp.Test.FSharp ParquetSharp) endif () add_subdirectory(cpp) diff --git a/csharp.test/ParquetSharp.Test.csproj b/csharp.test/ParquetSharp.Test.csproj index 9bd2d9fe..d3c91ac1 100644 --- a/csharp.test/ParquetSharp.Test.csproj +++ b/csharp.test/ParquetSharp.Test.csproj @@ -18,7 +18,7 @@ - + diff --git a/csharp/RowOriented/ParquetFile.cs b/csharp/RowOriented/ParquetFile.cs index 9e06dd96..71a74800 100644 --- a/csharp/RowOriented/ParquetFile.cs +++ b/csharp/RowOriented/ParquetFile.cs @@ -210,6 +210,13 @@ private static ParquetRowReader.ReadAction CreateReadDelegate(Ma // Use constructor or the property setters. var ctor = typeof(TTuple).GetConstructor(BindingFlags.Public | BindingFlags.Instance, null, fields.Select(f => f.Type).ToArray(), null); + if (ctor == null && !IsMemberInitializable(typeof(TTuple), fields)) + { + // Try to get a private constructor if we can't use public constructors. + // This is necessary if dealing with internal F# types, as all members on internal types are private. + ctor = typeof(TTuple).GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, null, + fields.Select(f => f.Type).ToArray(), null); + } // Buffers. var buffers = fields.Select(f => Expression.Variable(f.Type.MakeArrayType(), $"buffer_{f.Name}")).ToArray(); @@ -326,8 +333,12 @@ private static Expression For( private static MappedField[] GetFieldsAndProperties(Type type) { + // Members mapped to Parquet columns are any public fields and properties, + // and private fields and properties annotated with the + // MapToColumn attribute. Allowing private properties is required for mapping + // internal types in F#, in which all members are always private. var list = new List(); - var flags = BindingFlags.Public | BindingFlags.Instance; + var flags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance; if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(ValueTuple<,,,,,,,>)) { @@ -337,13 +348,26 @@ private static MappedField[] GetFieldsAndProperties(Type type) foreach (var field in type.GetFields(flags)) { var mappedColumn = field.GetCustomAttribute()?.ColumnName; - list.Add(new MappedField(field.Name, mappedColumn, field.FieldType, field)); + if (field.IsPublic || mappedColumn != null) + { + list.Add(new MappedField(field.Name, mappedColumn, field.FieldType, field)); + } } foreach (var property in type.GetProperties(flags)) { var mappedColumn = property.GetCustomAttribute()?.ColumnName; - list.Add(new MappedField(property.Name, mappedColumn, property.PropertyType, property)); + if ((property.GetMethod?.IsPublic ?? false) || mappedColumn != null) + { + list.Add(new MappedField(property.Name, mappedColumn, property.PropertyType, property)); + } + } + + if (list.Count == 0) + { + throw new ArgumentException( + $"Type '{type}' does not have any public fields or properties to map to Parquet columns, " + + $"or any private fields or properties annotated with '{nameof(MapToColumnAttribute)}'", nameof(type)); } // The order in which fields are processed is important given that when a tuple type is used in @@ -385,6 +409,33 @@ private static Column GetColumn(MappedField field) return new Column(field.Type, field.MappedColumn ?? field.Name, isDecimal ? LogicalType.Decimal(29, decimalScale!.Scale) : null); } + private static bool IsMemberInitializable(Type type, MappedField[] fields) + { + if (!type.IsValueType && + type.GetConstructor( + BindingFlags.Public | BindingFlags.Instance, null, Array.Empty(), null) == + null) + { + // No default constructor + return false; + } + + foreach (var field in fields) + { + var memberType = field.Info.MemberType; + if (memberType == MemberTypes.Field && !((FieldInfo) field.Info).IsPublic) + { + return false; + } + if (memberType == MemberTypes.Property && !(((PropertyInfo) field.Info).SetMethod?.IsPublic ?? false)) + { + return false; + } + } + + return true; + } + private static readonly ConcurrentDictionary ReadDelegatesCache = new ConcurrentDictionary(); diff --git a/docs/RowOriented.md b/docs/RowOriented.md index e971067d..b9c2cfdb 100644 --- a/docs/RowOriented.md +++ b/docs/RowOriented.md @@ -67,3 +67,14 @@ using (var rowReader = ParquetFile.CreateRowReader("example.parquet")) } } ``` + +## Using the row-oriented API from F# + +The row-oriented API works with F# types, +but one important issue to note is that if you are mapping an internal type, +all fields must have the `MapToColumn` attribute applied to be mapped to Parquet columns. + +This is because ParquetSharp will only map public fields and properties of a type by default, +and all fields of an internal F# type are private. +However, the `MapToColumn` attribute can be applied to private properties to +opt-in to including them in the column mapping. diff --git a/fsharp.test/ParquetSharp.Test.FSharp.fsproj b/fsharp.test/ParquetSharp.Test.FSharp.fsproj new file mode 100644 index 00000000..24a1ef38 --- /dev/null +++ b/fsharp.test/ParquetSharp.Test.FSharp.fsproj @@ -0,0 +1,28 @@ + + + + net6.0 + + + $(TargetFrameworks);netcoreapp3.1;net5.0 + $(TargetFrameworks);net472 + x64 + true + + + + + + + + + + + + + + + + + + diff --git a/fsharp.test/TestRowOrientedApi.fs b/fsharp.test/TestRowOrientedApi.fs new file mode 100644 index 00000000..1e9c902e --- /dev/null +++ b/fsharp.test/TestRowOrientedApi.fs @@ -0,0 +1,86 @@ +namespace ParquetSharp.Test.FSharp + +open System +open System.IO +open NUnit.Framework +open FsUnit +open ParquetSharp.RowOriented + +module TestRowOrientedApi = + + type PublicRecord = + { + Field : int + } + + type PublicRecordWithMappedField = + { + [] + Field : int + } + + type internal InternalRecord = + { + Field : int + } + + type internal InternalRecordWithMappedField = + { + [] + Field : int + } + + [] + type internal InternalStructRecordWithMappedField = + { + [] + Field : int + } + + let TestWritingRecord<'T> record verify = + let dir = Path.Combine(Path.GetTempPath (), Guid.NewGuid().ToString ()) |> Directory.CreateDirectory + try + let path = Path.Combine (dir.FullName, "test.parquet") + + using (ParquetFile.CreateRowWriter<'T> path) ( fun writer -> + writer.WriteRow record + ) + + use reader = ParquetFile.CreateRowReader<'T> path + let got = + seq { + for i in 0 .. reader.FileMetaData.NumRowGroups-1 do + for r in reader.ReadRows i do + yield r + } |> Seq.toList + + got |> Seq.length |> should equal 1 + got |> Seq.head |> verify + finally + dir.Delete true + + [] + let TestErrorWritingInternalRecord () = + let dir = Path.Combine(Path.GetTempPath (), Guid.NewGuid().ToString ()) |> Directory.CreateDirectory + try + let path = Path.Combine (dir.FullName, "test.parquet") + (fun () -> ParquetFile.CreateRowWriter path |> ignore) + |> should throw typeof + finally + dir.Delete true + + [] + let TestWritingPublicRecord () = + TestWritingRecord { Field = 1 } ( fun readField -> readField.Field |> should equal 1) + + [] + let TestWritingPublicRecordWithMappedField () = + TestWritingRecord { Field = 1 } ( fun readField -> readField.Field |> should equal 1) + + [] + let TestWritingInternalRecordWithMappedField () = + TestWritingRecord { Field = 1 } ( fun readField -> readField.Field |> should equal 1) + + [] + let TestWritingInternalStructRecordWithMappedField () = + TestWritingRecord { Field = 1 } ( fun readField -> readField.Field |> should equal 1)