-
Notifications
You must be signed in to change notification settings - Fork 263
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(java): jit support for chunk based map serialization (#2027)
## What does this PR do? This PR added jit support for chunk based map serialization, it supports all kinds of map serializaiton by generated code: - final map key and value field type - polymorphic map key and value field type - nested map key and value type This PR also removed the old map serialization protocol code. The new chunk based protocol improve serialized size by **2.3X** at most. data: ``` stringMap: {"k1": "v1", "k2": "v2, ..., "k10": "v10" } intMap: {1:2, 2:4, 3: 6, ..., 10: 20} ``` new protocol: ``` stringMapBytes 68 stringKVStructBytes 69 intMapBytes 28 intKVStructBytes 29 ``` old protocol: ``` stringMapBytes 104 stringKVStructBytes 87 intMapBytes 64 intKVStructBytes 47 ``` And improve performance by 20% ## Related issues Closes #925 #2025 ## Does this PR introduce any user-facing change? <!-- If any user-facing interface changes, please [open an issue](https://github.com/apache/fury/issues/new/choose) describing the need to do so and update the document if necessary. --> - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark [chunk-jmh-result.csv](https://github.com/user-attachments/files/18575900/chunk-jmh-result.csv) [nochunk-jmh-result.csv](https://github.com/user-attachments/files/18575901/nochunk-jmh-result.csv) ![image](https://github.com/user-attachments/assets/754f8e48-b45e-489b-adf5-cca1c5d03f1e)
- Loading branch information
1 parent
1e63705
commit e952b63
Showing
19 changed files
with
1,508 additions
and
760 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
|
||
# Load the CSV data | ||
no_chunk_file = "nochunk-jmh-result.csv" | ||
chunk_file = "chunk-jmh-result.csv" | ||
# Read the CSV files | ||
no_chunk_df = pd.read_csv(no_chunk_file) | ||
chunk_df = pd.read_csv(chunk_file) | ||
|
||
|
||
# Function to plot the figures | ||
def plot_benchmark(ax, data1, data2, operation, struct, datatype, title): | ||
# Filter data | ||
filtered_data1 = data1[ | ||
(data1["Benchmark"] == (operation)) | ||
& (data1["struct"] == struct) | ||
& (data1["datatype"] == datatype) | ||
] | ||
filtered_data2 = data2[ | ||
(data2["Benchmark"] == (operation)) | ||
& (data2["struct"] == struct) | ||
& (data2["datatype"] == datatype) | ||
] | ||
|
||
# Sort data according to 'mapSize' | ||
filtered_data1 = filtered_data1.sort_values("mapSize") | ||
filtered_data2 = filtered_data2.sort_values("mapSize") | ||
|
||
# Plotting | ||
x_labels = filtered_data1["mapSize"].astype(str).tolist() | ||
x = np.arange(len(x_labels)) | ||
width = 0.35 | ||
|
||
ax.bar( | ||
x - width / 2, | ||
filtered_data1["Score"], | ||
width, | ||
yerr=filtered_data1["ScoreError"], | ||
label="No Chunk", | ||
) | ||
ax.bar( | ||
x + width / 2, | ||
filtered_data2["Score"], | ||
width, | ||
yerr=filtered_data2["ScoreError"], | ||
label="Chunk", | ||
) | ||
ax.set_xlabel("Map Size") | ||
ax.set_ylabel("Score (ops/s)") | ||
ax.set_title(title) | ||
ax.set_xticks(x) | ||
ax.set_xticklabels(x_labels) | ||
ax.legend() | ||
|
||
|
||
# Create the subplots for datatype "int" | ||
fig1, axs1 = plt.subplots(2, 2, figsize=(10, 8)) | ||
plot_benchmark( | ||
axs1[0, 0], | ||
no_chunk_df, | ||
chunk_df, | ||
"serialize", | ||
True, | ||
"int", | ||
"Serialize | Datatype: Int", | ||
) | ||
plot_benchmark( | ||
axs1[0, 1], | ||
no_chunk_df, | ||
chunk_df, | ||
"serialize", | ||
True, | ||
"string", | ||
"Serialize | Datatype: String", | ||
) | ||
plot_benchmark( | ||
axs1[1, 0], | ||
no_chunk_df, | ||
chunk_df, | ||
"deserialize", | ||
True, | ||
"int", | ||
"Deserialize | Datatype: Int", | ||
) | ||
plot_benchmark( | ||
axs1[1, 1], | ||
no_chunk_df, | ||
chunk_df, | ||
"deserialize", | ||
True, | ||
"string", | ||
"Deserialize | Datatype: String", | ||
) | ||
plt.tight_layout() | ||
plt.suptitle("Benchmarks for codegen", y=1.05) | ||
|
||
|
||
# Create the subplots for datatype "string" | ||
fig2, axs2 = plt.subplots(2, 2, figsize=(10, 8)) | ||
plot_benchmark( | ||
axs2[0, 0], | ||
no_chunk_df, | ||
chunk_df, | ||
"serialize", | ||
False, | ||
"int", | ||
"Serialize | Datatype: Int", | ||
) | ||
plot_benchmark( | ||
axs2[0, 1], | ||
no_chunk_df, | ||
chunk_df, | ||
"serialize", | ||
False, | ||
"string", | ||
"Serialize | Datatype: String", | ||
) | ||
plot_benchmark( | ||
axs2[1, 0], | ||
no_chunk_df, | ||
chunk_df, | ||
"deserialize", | ||
False, | ||
"int", | ||
"Deserialize | Datatype: Int", | ||
) | ||
plot_benchmark( | ||
axs2[1, 1], | ||
no_chunk_df, | ||
chunk_df, | ||
"deserialize", | ||
False, | ||
"string", | ||
"Deserialize | Datatype: String", | ||
) | ||
plt.tight_layout() | ||
plt.suptitle("Benchmarks for no codegen", y=1.05) | ||
|
||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.