Skip to content

Commit 4abcabb

Browse files
authored
Merge pull request #71 from histogrammar/test_scala213
Use scala213 jar files for spark 4
2 parents c3a7d01 + 9e54617 commit 4abcabb

12 files changed

+47
-43
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
run: |
3636
python -m pip install --upgrade pip
3737
if [ "${{ matrix.numpy_version }}" = "numpy<2" ]; then
38-
pip install ".[test,pandas,spark,test_numpy_pre2]"
38+
pip install ".[test,pandas,test_spark_pre2,test_numpy_pre2]"
3939
else
4040
pip install ".[test,pandas,spark]"
4141
fi

README.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ more quickly via Numpy commands, rather than Python for loops.
1717

1818
This Python implementation of histogrammar been tested to guarantee compatibility with its Scala implementation.
1919

20-
Latest Python release: v1.1.1 (Aug 2025).
21-
Latest update: Aug 2025.
20+
Latest Python release: v1.1.2 (Sep 2025).
21+
Latest update: Sep 2025.
2222

2323
References
2424
==========
@@ -38,19 +38,19 @@ Changes
3838
See Changes log `here <https://github.com/histogrammar/histogrammar-python/blob/master/CHANGES.rst>`_.
3939

4040

41-
Spark 3.X
42-
---------
41+
Spark
42+
-----
4343

44-
With Spark 3.X, based on Scala 2.12 or 2.13, make sure to pick up the correct histogrammar jar files:
44+
With Spark, make sure to pick up the correct histogrammar jar files. Spark 4.X is based on Scala 2.13; Spark 3.X is based on Scala 2.12 or 2.13.
4545

4646
.. code-block:: python
4747
48-
spark = SparkSession.builder.config("spark.jars.packages", "io.github.histogrammar:histogrammar_2.12:1.0.30,io.github.histogrammar:histogrammar-sparksql_2.12:1.0.30").getOrCreate()
48+
spark = SparkSession.builder.config("spark.jars.packages", "io.github.histogrammar:histogrammar_2.13:1.0.30,io.github.histogrammar:histogrammar-sparksql_2.13:1.0.30").getOrCreate()
4949
5050
51-
For Scala 2.13, in the string above simply replace "2.12" with "2.13".
51+
For Scala 2.12, in the string above simply replace "2.13" with "2.12".
5252

53-
December, 2023
53+
September, 2025
5454

5555

5656
Example notebooks

histogrammar/dfinterface/spark_histogrammar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def construct_empty_hist(self, df, features):
225225
for idx, col in enumerate(revcols):
226226
# histogram type depends on the data type
227227
dt = self.var_dtype[col]
228-
quant = df[col]
228+
quant = f.col(col)
229229
hist = self.get_hist_bin(hist, features, quant, col, dt)
230230

231231
return hist

histogrammar/notebooks/histogrammar_tutorial_advanced.ipynb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
{
44
"cell_type": "markdown",
55
"metadata": {
6+
"collapsed": false,
67
"jupyter": {
78
"outputs_hidden": false
89
},
@@ -118,9 +119,9 @@
118119
"# for spark 2.X, in the jars string, for both jar files change \"_2.12\" into \"_2.11\".\n",
119120
"\n",
120121
"if pyspark_installed:\n",
121-
" scala = '2.12' if int(pyspark_version[0]) >= 3 else '2.11'\n",
122-
" hist_jar = f'io.github.histogrammar:histogrammar_{scala}:1.0.20'\n",
123-
" hist_spark_jar = f'io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20'\n",
122+
" scala = '2.12' if int(pyspark_version[0]) == 3 else '2.13'\n",
123+
" hist_jar = f'io.github.histogrammar:histogrammar_{scala}:1.0.30'\n",
124+
" hist_spark_jar = f'io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.30'\n",
124125
"\n",
125126
" spark = SparkSession.builder.config(\n",
126127
" \"spark.jars.packages\", f'{hist_spark_jar},{hist_jar}'\n",
@@ -521,7 +522,7 @@
521522
"name": "python",
522523
"nbconvert_exporter": "python",
523524
"pygments_lexer": "ipython3",
524-
"version": "3.7.6"
525+
"version": "3.11.11"
525526
},
526527
"nteract": {
527528
"version": "0.15.0"

histogrammar/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def __init__(self, expr, name=None):
247247
ok = False
248248
else:
249249
if isinstance(expr, Column) and self.name is None:
250-
self.name = str(expr)[7:-1]
250+
self.name = str(expr)[8:-2]
251251
ok = True
252252
if not ok:
253253
raise TypeError(f"quantity ({expr}) must be a string, function, or SparkSQL Column")

histogrammar/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import re
44

5-
version = "1.1.1"
5+
version = "1.1.2"
66

77

88
def split_version_string(version_string: str) -> tuple[int, int]:

pyproject.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "histogrammar"
7-
description = "Composable histogram primitives for distributed data reduction"
7+
description = "Histograms for Pandas/Spark/Numpy"
88
keywords = [
99
"pandas",
1010
"spark",
@@ -17,7 +17,7 @@ keywords = [
1717
]
1818
readme = "README.rst"
1919
requires-python = ">=3.9"
20-
authors = [{ name = "Jim Pivarski (DIANA-HEP)", email = "[email protected]" }, { name = "Max Baak", email = "[email protected]" }]
20+
authors = [{ name = "Max Baak", email = "[email protected]" }, { name = "Jim Pivarski (DIANA-HEP)", email = "[email protected]" }]
2121
maintainers = [{ name = "Max Baak", email = "[email protected]" }]
2222
license = { type = "Apache Software License v2", file = "LICENSE" }
2323
dependencies = [
@@ -40,7 +40,7 @@ pandas = [
4040
"pandas"
4141
]
4242
spark = [
43-
"pyspark<4; python_version <= '3.11'",
43+
"pyspark",
4444
]
4545
test = [
4646
"ipykernel>=5.1.3",
@@ -55,6 +55,9 @@ test_numpy_pre2 = [
5555
"numpy<2",
5656
"pandas<2",
5757
]
58+
test_spark_pre2 = [
59+
"pyspark<4; python_version <= '3.11'",
60+
]
5861

5962
# files to be shipped with the installation, under: histogrammar/test_data and histogrammar/notebooks
6063
# after installation, these can be found with the functions in resources.py
34 KB
Binary file not shown.
34.1 KB
Binary file not shown.
773 KB
Binary file not shown.

0 commit comments

Comments
 (0)