Skip to content

Commit

Permalink
Fix using internal F# types with the row oriented API (#332)
Browse files Browse the repository at this point in the history
  • Loading branch information
adamreeve authored Feb 12, 2023
1 parent 054d5cd commit ba06dd8
Show file tree
Hide file tree
Showing 7 changed files with 194 additions and 6 deletions.
13 changes: 11 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ jobs:
run: |
dotnet build csharp.benchmark --configuration=Release -p:OSArchitecture=${{ matrix.arch }}
dotnet build csharp.test --configuration=Release -p:OSArchitecture=${{ matrix.arch }}
dotnet build fsharp.test --configuration=Release -p:OSArchitecture=${{ matrix.arch }}
- name: Upload native ParquetSharp library
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -262,17 +263,25 @@ jobs:
run: |
dotnet remove csharp.test reference csharp/ParquetSharp.csproj
dotnet add csharp.test package ParquetSharp -v ${{ steps.get-version.outputs.version }}
dotnet remove fsharp.test reference csharp/ParquetSharp.csproj
dotnet add fsharp.test package ParquetSharp -v ${{ steps.get-version.outputs.version }}
- name: Setup QEMU for arm64
if: matrix.arch == 'arm64'
uses: docker/setup-qemu-action@v2
with:
platforms: arm64
- name: Build & Run .NET unit tests (x64)
- name: Build & Run C# unit tests (x64)
if: matrix.arch == 'x64'
run: dotnet test csharp.test --configuration=Release --framework ${{ matrix.dotnet }}
- name: Build & Run .NET unit tests (arm64)
- name: Build & Run F# unit tests (x64)
if: matrix.arch == 'x64'
run: dotnet test fsharp.test --configuration=Release --framework ${{ matrix.dotnet }}
- name: Build & Run C# unit tests (arm64)
if: matrix.arch == 'arm64'
run: docker run --rm --platform linux/arm64/v8 -v $PWD:$PWD -w $PWD mcr.microsoft.com/dotnet/sdk:6.0 dotnet test csharp.test --configuration=Release --framework ${{ matrix.dotnet }}
- name: Build & Run F# unit tests (arm64)
if: matrix.arch == 'arm64'
run: docker run --rm --platform linux/arm64/v8 -v $PWD:$PWD -w $PWD mcr.microsoft.com/dotnet/sdk:6.0 dotnet test fsharp.test --configuration=Release --framework ${{ matrix.dotnet }}

# Virtual job that can be configured as a required check before a PR can be merged.
all-required-checks-done:
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ if (MSVC)

include_external_msproject(ParquetSharp.Test ${CMAKE_SOURCE_DIR}/csharp.test/ParquetSharp.Test.csproj TYPE 9A19103F-16F7-4668-BE54-9A1E7A4F7556 PLATFORM x64)
add_dependencies(ParquetSharp.Test ParquetSharp)

include_external_msproject(ParquetSharp.Test.FSharp ${CMAKE_SOURCE_DIR}/fsharp.test/ParquetSharp.Test.Fsharp.fsproj TYPE 6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705 PLATFORM x64)
add_dependencies(ParquetSharp.Test.FSharp ParquetSharp)
endif ()

add_subdirectory(cpp)
2 changes: 1 addition & 1 deletion csharp.test/ParquetSharp.Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
<PackageReference Include="NUnit" Version="3.13.2" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="3.17.0" />
<PackageReference Include="Parquet.Net" Version="3.8.6" />
</ItemGroup>
Expand Down
57 changes: 54 additions & 3 deletions csharp/RowOriented/ParquetFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,13 @@ private static ParquetRowReader<TTuple>.ReadAction CreateReadDelegate<TTuple>(Ma

// Use constructor or the property setters.
var ctor = typeof(TTuple).GetConstructor(BindingFlags.Public | BindingFlags.Instance, null, fields.Select(f => f.Type).ToArray(), null);
if (ctor == null && !IsMemberInitializable(typeof(TTuple), fields))
{
// Try to get a private constructor if we can't use public constructors.
// This is necessary if dealing with internal F# types, as all members on internal types are private.
ctor = typeof(TTuple).GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, null,
fields.Select(f => f.Type).ToArray(), null);
}

// Buffers.
var buffers = fields.Select(f => Expression.Variable(f.Type.MakeArrayType(), $"buffer_{f.Name}")).ToArray();
Expand Down Expand Up @@ -326,8 +333,12 @@ private static Expression For(

private static MappedField[] GetFieldsAndProperties(Type type)
{
// Members mapped to Parquet columns are any public fields and properties,
// and private fields and properties annotated with the
// MapToColumn attribute. Allowing private properties is required for mapping
// internal types in F#, in which all members are always private.
var list = new List<MappedField>();
var flags = BindingFlags.Public | BindingFlags.Instance;
var flags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;

if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(ValueTuple<,,,,,,,>))
{
Expand All @@ -337,13 +348,26 @@ private static MappedField[] GetFieldsAndProperties(Type type)
foreach (var field in type.GetFields(flags))
{
var mappedColumn = field.GetCustomAttribute<MapToColumnAttribute>()?.ColumnName;
list.Add(new MappedField(field.Name, mappedColumn, field.FieldType, field));
if (field.IsPublic || mappedColumn != null)
{
list.Add(new MappedField(field.Name, mappedColumn, field.FieldType, field));
}
}

foreach (var property in type.GetProperties(flags))
{
var mappedColumn = property.GetCustomAttribute<MapToColumnAttribute>()?.ColumnName;
list.Add(new MappedField(property.Name, mappedColumn, property.PropertyType, property));
if ((property.GetMethod?.IsPublic ?? false) || mappedColumn != null)
{
list.Add(new MappedField(property.Name, mappedColumn, property.PropertyType, property));
}
}

if (list.Count == 0)
{
throw new ArgumentException(
$"Type '{type}' does not have any public fields or properties to map to Parquet columns, " +
$"or any private fields or properties annotated with '{nameof(MapToColumnAttribute)}'", nameof(type));
}

// The order in which fields are processed is important given that when a tuple type is used in
Expand Down Expand Up @@ -385,6 +409,33 @@ private static Column GetColumn(MappedField field)
return new Column(field.Type, field.MappedColumn ?? field.Name, isDecimal ? LogicalType.Decimal(29, decimalScale!.Scale) : null);
}

private static bool IsMemberInitializable(Type type, MappedField[] fields)
{
if (!type.IsValueType &&
type.GetConstructor(
BindingFlags.Public | BindingFlags.Instance, null, Array.Empty<Type>(), null) ==
null)
{
// No default constructor
return false;
}

foreach (var field in fields)
{
var memberType = field.Info.MemberType;
if (memberType == MemberTypes.Field && !((FieldInfo) field.Info).IsPublic)
{
return false;
}
if (memberType == MemberTypes.Property && !(((PropertyInfo) field.Info).SetMethod?.IsPublic ?? false))
{
return false;
}
}

return true;
}

private static readonly ConcurrentDictionary<Type, Delegate> ReadDelegatesCache =
new ConcurrentDictionary<Type, Delegate>();

Expand Down
11 changes: 11 additions & 0 deletions docs/RowOriented.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,14 @@ using (var rowReader = ParquetFile.CreateRowReader<MyRow>("example.parquet"))
}
}
```

## Using the row-oriented API from F#

The row-oriented API works with F# types,
but one important issue to note is that if you are mapping an internal type,
all fields must have the `MapToColumn` attribute applied to be mapped to Parquet columns.

This is because ParquetSharp will only map public fields and properties of a type by default,
and all fields of an internal F# type are private.
However, the `MapToColumn` attribute can be applied to private properties to
opt-in to including them in the column mapping.
28 changes: 28 additions & 0 deletions fsharp.test/ParquetSharp.Test.FSharp.fsproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net6.0</TargetFrameworks>
<!-- Avoid building for older frameworks when testing locally. -->
<!-- This is to speed up the build process and avoid errors when the required runtimes are not installed. -->
<TargetFrameworks Condition="'$(CI)' == 'true'">$(TargetFrameworks);netcoreapp3.1;net5.0</TargetFrameworks>
<TargetFrameworks Condition="'$(CI)' == 'true' AND '$(OS)'=='Windows_NT'">$(TargetFrameworks);net472</TargetFrameworks>
<PlatformTarget Condition="'$(TargetFramework)'=='net472'">x64</PlatformTarget>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="FsUnit" Version="5.1.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="3.17.0" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\csharp\ParquetSharp.csproj" />
</ItemGroup>

<ItemGroup>
<Compile Include="TestRowOrientedApi.fs" />
</ItemGroup>

</Project>
86 changes: 86 additions & 0 deletions fsharp.test/TestRowOrientedApi.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
namespace ParquetSharp.Test.FSharp

open System
open System.IO
open NUnit.Framework
open FsUnit
open ParquetSharp.RowOriented

module TestRowOrientedApi =

type PublicRecord =
{
Field : int
}

type PublicRecordWithMappedField =
{
[<MapToColumn("field")>]
Field : int
}

type internal InternalRecord =
{
Field : int
}

type internal InternalRecordWithMappedField =
{
[<MapToColumn("field")>]
Field : int
}

[<Struct>]
type internal InternalStructRecordWithMappedField =
{
[<MapToColumn("field")>]
Field : int
}

let TestWritingRecord<'T> record verify =
let dir = Path.Combine(Path.GetTempPath (), Guid.NewGuid().ToString ()) |> Directory.CreateDirectory
try
let path = Path.Combine (dir.FullName, "test.parquet")

using (ParquetFile.CreateRowWriter<'T> path) ( fun writer ->
writer.WriteRow record
)

use reader = ParquetFile.CreateRowReader<'T> path
let got =
seq {
for i in 0 .. reader.FileMetaData.NumRowGroups-1 do
for r in reader.ReadRows i do
yield r
} |> Seq.toList

got |> Seq.length |> should equal 1
got |> Seq.head |> verify
finally
dir.Delete true

[<Test>]
let TestErrorWritingInternalRecord () =
let dir = Path.Combine(Path.GetTempPath (), Guid.NewGuid().ToString ()) |> Directory.CreateDirectory
try
let path = Path.Combine (dir.FullName, "test.parquet")
(fun () -> ParquetFile.CreateRowWriter<InternalRecord> path |> ignore)
|> should throw typeof<System.ArgumentException>
finally
dir.Delete true

[<Test>]
let TestWritingPublicRecord () =
TestWritingRecord<PublicRecord> { Field = 1 } ( fun readField -> readField.Field |> should equal 1)

[<Test>]
let TestWritingPublicRecordWithMappedField () =
TestWritingRecord<PublicRecordWithMappedField> { Field = 1 } ( fun readField -> readField.Field |> should equal 1)

[<Test>]
let TestWritingInternalRecordWithMappedField () =
TestWritingRecord<InternalRecordWithMappedField> { Field = 1 } ( fun readField -> readField.Field |> should equal 1)

[<Test>]
let TestWritingInternalStructRecordWithMappedField () =
TestWritingRecord<InternalStructRecordWithMappedField> { Field = 1 } ( fun readField -> readField.Field |> should equal 1)

0 comments on commit ba06dd8

Please sign in to comment.