-
Notifications
You must be signed in to change notification settings - Fork 18
Sparse mode performance, SparseHist input dispatch, and low-memory --noHessian mode #129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 12 commits
57e08eb
c80b409
0e17ff6
91ce1b3
dab00bc
47f1f90
08537b8
f51e53f
83afbd8
b6d7120
b30f867
3f41fc6
183f376
8493332
6c1c187
db274db
d0708b1
fad47bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| import hdf5plugin # noqa: F401 registers Blosc2/LZ4 filter used by the writer | ||
| import tensorflow as tf | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,11 +82,25 @@ def __init__(self, filename, pseudodata=None): | |
| self.sparse = not "hnorm" in f | ||
|
|
||
| if self.sparse: | ||
| print( | ||
| "WARNING: The sparse tensor implementation is experimental and probably slower than with a dense tensor!" | ||
| ) | ||
| self.norm = makesparsetensor(f["hnorm_sparse"]) | ||
| self.logk = makesparsetensor(f["hlogk_sparse"]) | ||
| # Canonicalize index ordering once at load time. The fitter's | ||
| # sparse fast path reduces nonzero entries via row-keyed | ||
| # reductions; sorted row-major indices give coalesced memory | ||
| # access. tf.sparse.reorder sorts into row-major order. | ||
| self.norm = tf.sparse.reorder(self.norm) | ||
| self.logk = tf.sparse.reorder(self.logk) | ||
| # Pre-build a CSRSparseMatrix view of logk for use in the | ||
| # fitter's sparse matvec path via sm.matmul, which dispatches | ||
| # to a multi-threaded CSR kernel and is much faster per call | ||
| # than the equivalent gather + unsorted_segment_sum. NOTE: | ||
| # SparseMatrixMatMul has no XLA kernel, so any tf.function | ||
| # that calls sm.matmul must be built with jit_compile=False. | ||
| from tensorflow.python.ops.linalg.sparse import ( | ||
| sparse_csr_matrix_ops as _tf_sparse_csr, | ||
| ) | ||
|
|
||
| self.logk_csr = _tf_sparse_csr.CSRSparseMatrix(self.logk) | ||
| else: | ||
| self.norm = maketensor(f["hnorm"]) | ||
| self.logk = maketensor(f["hlogk"]) | ||
|
|
@@ -182,6 +196,59 @@ def __init__(self, filename, pseudodata=None): | |
|
|
||
| self.axis_procs = hist.axis.StrCategory(self.procs, name="processes") | ||
|
|
||
| # Load external likelihood terms (optional). | ||
|
||
| # Each entry is a dict with keys: | ||
| # name: str | ||
| # params: 1D ndarray of parameter name strings | ||
| # grad_values: 1D float ndarray or None | ||
| # hess_dense: 2D float ndarray or None | ||
| # hess_sparse: tuple (rows, cols, values) or None | ||
| self.external_terms = [] | ||
| if "external_terms" in f.keys(): | ||
| names = [ | ||
| s.decode() if isinstance(s, bytes) else s | ||
| for s in f["hexternal_term_names"][...] | ||
| ] | ||
| ext_group = f["external_terms"] | ||
| for tname in names: | ||
|
||
| tg = ext_group[tname] | ||
| raw_params = tg["params"][...] | ||
| params = np.array( | ||
| [s.decode() if isinstance(s, bytes) else s for s in raw_params] | ||
| ) | ||
| grad_values = ( | ||
| np.asarray(maketensor(tg["grad_values"])) | ||
| if "grad_values" in tg.keys() | ||
| else None | ||
| ) | ||
| hess_dense = ( | ||
| np.asarray(maketensor(tg["hess_dense"])) | ||
| if "hess_dense" in tg.keys() | ||
| else None | ||
| ) | ||
| hess_sparse = None | ||
| if "hess_sparse" in tg.keys(): | ||
| hg = tg["hess_sparse"] | ||
| idx_dset = hg["indices"] | ||
| if "original_shape" in idx_dset.attrs: | ||
| idx_shape = tuple(idx_dset.attrs["original_shape"]) | ||
| indices = np.asarray(idx_dset).reshape(idx_shape) | ||
| else: | ||
| indices = np.asarray(idx_dset) | ||
| rows = indices[:, 0] | ||
| cols = indices[:, 1] | ||
| vals = np.asarray(hg["values"]) | ||
| hess_sparse = (rows, cols, vals) | ||
| self.external_terms.append( | ||
| { | ||
| "name": tname, | ||
| "params": params, | ||
| "grad_values": grad_values, | ||
| "hess_dense": hess_dense, | ||
| "hess_sparse": hess_sparse, | ||
| } | ||
| ) | ||
|
|
||
| @tf.function | ||
| def expected_events_nominal(self): | ||
| rnorm = tf.ones(self.nproc, dtype=self.dtype) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -202,6 +202,24 @@ def common_parser(): | |
| ], | ||
| help="Mnimizer method used in scipy.optimize.minimize for the nominal fit minimization", | ||
| ) | ||
| parser.add_argument( | ||
| "--hvpMethod", | ||
| default="revrev", | ||
| type=str, | ||
| choices=["fwdrev", "revrev"], | ||
| help="Autodiff mode for the Hessian-vector product. 'revrev' (reverse-over-reverse) " | ||
| "is the default and works well in combination with --jitCompile. 'fwdrev' " | ||
| "(forward-over-reverse, via tf.autodiff.ForwardAccumulator) is an alternative.", | ||
| ) | ||
| parser.add_argument( | ||
| "--noJitCompile", | ||
|
||
| dest="jitCompile", | ||
|
||
| default=True, | ||
| action="store_false", | ||
| help="Disable XLA jit_compile=True on the loss/gradient/HVP tf.functions. " | ||
| "jit_compile is enabled by default and substantially speeds up sparse-mode fits " | ||
| "with very large numbers of parameters.", | ||
| ) | ||
| parser.add_argument( | ||
| "--chisqFit", | ||
| default=False, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.