@@ -14,36 +14,46 @@ public partial class PrimitiveDataFrameColumn<T> : DataFrameColumn
14
14
{
15
15
public new PrimitiveDataFrameColumn < T > Sort ( bool ascending = true )
16
16
{
17
- PrimitiveDataFrameColumn < long > sortIndices = GetAscendingSortIndices ( ) ;
17
+ PrimitiveDataFrameColumn < long > sortIndices = GetAscendingSortIndices ( out Int64DataFrameColumn _ ) ;
18
18
return Clone ( sortIndices , ! ascending , NullCount ) ;
19
19
}
20
20
21
- internal override PrimitiveDataFrameColumn < long > GetAscendingSortIndices ( )
21
+ internal override PrimitiveDataFrameColumn < long > GetAscendingSortIndices ( out Int64DataFrameColumn nullIndices )
22
22
{
23
- // The return sortIndices contains only the non null indices.
24
- GetSortIndices ( Comparer < T > . Default , out PrimitiveDataFrameColumn < long > sortIndices ) ;
23
+ Int64DataFrameColumn sortIndices = GetSortIndices ( Comparer < T > . Default , out nullIndices ) ;
25
24
return sortIndices ;
26
25
}
27
26
28
- private void GetSortIndices ( IComparer < T > comparer , out PrimitiveDataFrameColumn < long > columnSortIndices )
27
+ private Int64DataFrameColumn GetSortIndices ( IComparer < T > comparer , out Int64DataFrameColumn columnNullIndices )
29
28
{
30
29
List < List < int > > bufferSortIndices = new List < List < int > > ( _columnContainer . Buffers . Count ) ;
30
+ columnNullIndices = new Int64DataFrameColumn ( "NullIndices" , NullCount ) ;
31
+ long nullIndicesSlot = 0 ;
31
32
// Sort each buffer first
32
33
for ( int b = 0 ; b < _columnContainer . Buffers . Count ; b ++ )
33
34
{
34
35
ReadOnlyDataFrameBuffer < T > buffer = _columnContainer . Buffers [ b ] ;
35
36
ReadOnlySpan < byte > nullBitMapSpan = _columnContainer . NullBitMapBuffers [ b ] . ReadOnlySpan ;
36
37
int [ ] sortIndices = new int [ buffer . Length ] ;
37
38
for ( int i = 0 ; i < buffer . Length ; i ++ )
39
+ {
38
40
sortIndices [ i ] = i ;
41
+ }
39
42
IntrospectiveSort ( buffer . ReadOnlySpan , buffer . Length , sortIndices , comparer ) ;
40
43
// Bug fix: QuickSort is not stable. When PrimitiveDataFrameColumn has null values and default values, they move around
41
44
List < int > nonNullSortIndices = new List < int > ( ) ;
42
45
for ( int i = 0 ; i < sortIndices . Length ; i ++ )
43
46
{
44
- if ( _columnContainer . IsValid ( nullBitMapSpan , sortIndices [ i ] ) )
47
+ int localSortIndex = sortIndices [ i ] ;
48
+ if ( _columnContainer . IsValid ( nullBitMapSpan , localSortIndex ) )
49
+ {
45
50
nonNullSortIndices . Add ( sortIndices [ i ] ) ;
46
-
51
+ }
52
+ else
53
+ {
54
+ columnNullIndices [ nullIndicesSlot ] = localSortIndex + b * _columnContainer . Buffers [ 0 ] . Length ;
55
+ nullIndicesSlot ++ ;
56
+ }
47
57
}
48
58
bufferSortIndices . Add ( nonNullSortIndices ) ;
49
59
}
@@ -90,11 +100,13 @@ ValueTuple<T, int> GetFirstNonNullValueAndBufferIndexStartingAtIndex(int bufferI
90
100
heapOfValueAndListOfTupleOfSortAndBufferIndex . Add ( valueAndBufferIndex . Item1 , new List < ValueTuple < int , int > > ( ) { ( valueAndBufferIndex . Item2 , i ) } ) ;
91
101
}
92
102
}
93
- columnSortIndices = new PrimitiveDataFrameColumn < long > ( "SortIndices" ) ;
103
+ Int64DataFrameColumn columnSortIndices = new Int64DataFrameColumn ( "SortIndices" ) ;
94
104
GetBufferSortIndex getBufferSortIndex = new GetBufferSortIndex ( ( int bufferIndex , int sortIndex ) => ( bufferSortIndices [ bufferIndex ] [ sortIndex ] ) + bufferIndex * bufferSortIndices [ 0 ] . Count ) ;
95
105
GetValueAndBufferSortIndexAtBuffer < T > getValueAndBufferSortIndexAtBuffer = new GetValueAndBufferSortIndexAtBuffer < T > ( ( int bufferIndex , int sortIndex ) => GetFirstNonNullValueAndBufferIndexStartingAtIndex ( bufferIndex , sortIndex ) ) ;
96
106
GetBufferLengthAtIndex getBufferLengthAtIndex = new GetBufferLengthAtIndex ( ( int bufferIndex ) => bufferSortIndices [ bufferIndex ] . Count ) ;
97
107
PopulateColumnSortIndicesWithHeap ( heapOfValueAndListOfTupleOfSortAndBufferIndex , columnSortIndices , getBufferSortIndex , getValueAndBufferSortIndexAtBuffer , getBufferLengthAtIndex ) ;
108
+
109
+ return columnSortIndices ;
98
110
}
99
111
}
100
112
}
0 commit comments