From 2910cf2aab842d651925668d44e03d2a23b4199d Mon Sep 17 00:00:00 2001
From: Michael Shirts <michael.shirts@colorado.edu>
Date: Sun, 19 Jun 2022 18:59:23 -0600
Subject: [PATCH 1/2] Trying to make MBAR work better adaptively.

* Changed the way defaults are set up.
* Automatically tries both adaptive and L-BFGS-B if the first one chosen fails.
* If multiple solvers are tried, returns the best result.
* Makes adaptive more consistent with other methods.
---
 pymbar/mbar.py         |  34 +++++++++----
 pymbar/mbar_solvers.py | 111 ++++++++++++++++++++++++++++++-----------
 2 files changed, 106 insertions(+), 39 deletions(-)

diff --git a/pymbar/mbar.py b/pymbar/mbar.py
index f3eace28..73f7aab3 100644
--- a/pymbar/mbar.py
+++ b/pymbar/mbar.py
@@ -1,9 +1,9 @@
 ##############################################################################
 # pymbar: A Python Library for MBAR
 #
-# Copyright 2017 University of Colorado Boulder
-# Copyright 2010-2017 Memorial Sloan-Kettering Cancer Center
-# Portions of this software are Copyright (c) 2010-2016 University of Virginia
+# Copyright 2016-2022 University of Colorado Boulder
+# Copyright 2010-2022 Memorial Sloan-Kettering Cancer Center
+# Portions of this software are Copyright (c) 2010-2015 University of Virginia
 # Portions of this software are Copyright (c) 2006-2007 The Regents of the University of California.  All Rights Reserved.
 # Portions of this software are Copyright (c) 2007-2008 Stanford University and Columbia University.
 #
@@ -43,8 +43,6 @@
 from pymbar import mbar_solvers
 from pymbar.utils import kln_to_kn, kn_to_n, ParameterError, DataError, logsumexp, check_w_normalized
 
-DEFAULT_SOLVER_PROTOCOL = mbar_solvers.DEFAULT_SOLVER_PROTOCOL
-
 # =========================================================================
 # MBAR class definition
 # =========================================================================
@@ -106,9 +104,9 @@ def __init__(self, u_kn, N_k, maximum_iterations=10000, relative_tolerance=1.0e-
             from above, once ``u_kln`` is phased out.
 
         maximum_iterations : int, optional
-            Set to limit the maximum number of iterations performed (default 1000)
+            Set to limit the maximum number of iterations performed (default 10000)
         relative_tolerance : float, optional
-            Set to determine the relative tolerance convergence criteria (default 1.0e-6)
+            Set to determine the relative tolerance convergence criteria (default 1.0e-7)
         verbosity : bool, optional
             Set to True if verbose debug output is desired (default False)
         initial_f_k : np.ndarray, float, shape=(K), optional
@@ -292,14 +290,32 @@ def __init__(self, u_kn, N_k, maximum_iterations=10000, relative_tolerance=1.0e-
                 print(self.f_k)
 
         if solver_protocol is None:
-            solver_protocol = ({'method': None},)
+            solver_protocol = mbar_solvers.DEFAULT_SOLVER_PROTOCOL
+
+        # Add backup option if only one method is given
+        # The backup method is 'adaptive', unless 'adaptive' is already the only method selected. 
+        if len(solver_protocol) == 1:
+            #Add backup option.
+            fallback = dict()
+            if solver_protocol[0]['method'] != 'adaptive':
+                fallback['method'] = 'adaptive'
+                print("Adding backup method 'adaptive'")
+            else:
+                # the backup method is already adaptive, try l-bfgs-b
+                fallback['method'] = 'BFGS'
+                print("Adding backup method 'BFGS'")
+            solver_protocol = solver_protocol + (fallback,)
+
+
         for solver in solver_protocol:
             if 'options' not in solver:
                 solver['options'] = dict()
-                solver['options']['maximum_iterations'] = maximum_iterations
+            if 'maxiter' not in solver['options']:
+                solver['options']['maxiter'] = maximum_iterations
             if 'verbose' not in solver['options']:
                 # should add in other ways to get information out of the scipy solvers, not just adaptive,
                 # which might involve passing in different combinations of options, and passing out other strings.
+                # to be addressed with logging in mbar 4.0.
                 solver['options']['verbose'] = self.verbose
 
         self.f_k = mbar_solvers.solve_mbar_for_all_states(self.u_kn, self.N_k, self.f_k, solver_protocol)
diff --git a/pymbar/mbar_solvers.py b/pymbar/mbar_solvers.py
index 2953ffe0..f786670a 100644
--- a/pymbar/mbar_solvers.py
+++ b/pymbar/mbar_solvers.py
@@ -5,14 +5,19 @@
 from pymbar.utils import ensure_type, logsumexp, check_w_normalized
 import warnings
 
-# Below are the recommended default protocols (ordered sequence of minimization algorithms / NLE solvers) for solving the MBAR equations.
+# the 'solver_protocol' dictionary is set up to make it possible to wrap around scipy solvers. 
+# We introduce our our robust solver as well. 
+
 # Note: we use tuples instead of lists to avoid accidental mutability.
 #DEFAULT_SUBSAMPLING_PROTOCOL = (dict(method="L-BFGS-B"), )  # First use BFGS on subsampled data.
 #DEFAULT_SOLVER_PROTOCOL = (dict(method="hybr"), )  # Then do fmin hybrid on full dataset.
 # Use Adpative solver as first attempt
-DEFAULT_SOLVER_METHOD = "adaptive"
-DEFAULT_SOLVER_PROTOCOL = (dict(method=DEFAULT_SOLVER_METHOD,),)
 
+DEFAULT_SOLVER_PROTOCOL = (dict(method="adaptive",),dict(method="L-BFGS-B",))
+# Uses all of the gradient based methods, but not the non-gradient methods
+# ["Nelder-Mead", "Powell", "COBYLA"]",
+SCIPY_OPTIONS = ["L-BFGS-B", "dogleg", "CG", "BFGS", "Newton-CG", "TNC", "trust-ncg", "trust-krylov", "trust-exact", "SLSQP"]
+SCIPY_NOHESS_OPTIONS = ["L-BFGS-B","BFGS","CG","TNC"] # don't pass a hessian to these to avoid warnings to these. 
 
 def validate_inputs(u_kn, N_k, f_k):
     """Check types and return inputs for MBAR calculations.
@@ -246,7 +251,8 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
 
     options: dictionary of options
         gamma (float between 0 and 1) - incrementor for NR iterations (default 1.0).  Usually not changed now, since adaptively switch.
-        maximum_iterations (int) - maximum number of Newton-Raphson iterations (default 250: either NR converges or doesn't, pretty quickly)
+        maxiter (int) - maximum number of Newton-Raphson/self-consistent iterations (default 10000: 
+                        either NR converges or doesn't, pretty quickly, but the number of self-consistent iterations can take a while.)
         verbose (boolean) - verbosity level for debug output
 
     NOTES
@@ -268,11 +274,12 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
     """
     # put the defaults here in case we get passed an 'options' dictionary that is only partial
     options.setdefault('verbose',False)
-    options.setdefault('maximum_iterations',10000)
+    options.setdefault('maxiter',10000)
     options.setdefault('print_warning',False)
     options.setdefault('gamma',1.0)
 
     gamma = options['gamma']
+        
     doneIterating = False
     if options['verbose'] == True:
         print("Determining dimensionless free energies by Newton-Raphson / self-consistent iteration.")
@@ -286,8 +293,9 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
     f_sci = np.zeros(len(f_k), dtype=np.float64)
     f_nr = np.zeros(len(f_k), dtype=np.float64)
 
+    success = False
     # Perform Newton-Raphson iterations (with sci computed on the way)
-    for iteration in range(0, options['maximum_iterations']):
+    for iteration in range(0, options['maxiter']):
         g = mbar_gradient(u_kn, N_k, f_k)  # Objective function gradient
         H = mbar_hessian(u_kn, N_k, f_k)  # Objective function hessian
         Hinvg = np.linalg.lstsq(H, g, rcond=-1)[0]
@@ -331,23 +339,33 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
         max_delta = np.max(np.abs(f_k[1:]-f_old[1:])/div)
         if np.isnan(max_delta) or (max_delta < tol):
             doneIterating = True
+            success = True
+            warn = "Convergence achieved by value of gnorm"
             break
 
     if doneIterating:
+
         if options['verbose']:
             print('Converged to tolerance of {:e} in {:d} iterations.'.format(max_delta, iteration + 1))
             print('Of {:d} iterations, {:d} were Newton-Raphson iterations and {:d} were self-consistent iterations'.format(iteration + 1, nr_iter, sci_iter))
             if np.all(f_k == 0.0):
                 # all f_k appear to be zero
-                print('WARNING: All f_k appear to be zero.')
+                warn = 'WARNING: All f_k appear to be zero.'
+                message = warn
+                success = False
     else:
-        print('WARNING: Did not converge to within specified tolerance.')
-        if options['maximum_iterations'] <= 0:
-            print("No iterations ran be cause maximum_iterations was <= 0 ({})!".format(options['maximum_iterations']))
+        warn = 'WARNING: Did not converge to within specified tolerance.\n'
+        if options['maxiter'] <= 0:
+            warn += "No iterations ran because maximum_iterations was <= 0 ({})!".format(options['maxiter'])
         else:
-            print('max_delta = {:e}, tol = {:e}, maximum_iterations = {:d}, iterations completed = {:d}'.format(max_delta,tol, options['maximum_iterations'], iteration))
-    return f_k
+            warn += 'max_delta = {:e}, tol = {:e}, maximum_iterations = {:d}, iterations completed = {:d}'.format(max_delta,tol, options['maxiter'], iteration)
+
+    results = dict()
+    results['success'] = success
+    results['message'] = warn
+    results['x'] = f_k
 
+    return results
 
 def precondition_u_kn(u_kn, N_k, f_k):
     """Subtract a sample-dependent constant from u_kn to improve precision
@@ -380,7 +398,7 @@ def precondition_u_kn(u_kn, N_k, f_k):
     return u_kn
 
 
-def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method="hybr", tol=1E-12, options=None):
+def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method = "adaptive", tol=1E-12, options=None):
     """Solve MBAR self-consistent equations using some form of equation solver.
 
     Parameters
@@ -392,7 +410,7 @@ def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method="hybr", tol=1
         The number of samples in each state for the nonempty states
     f_k_nonzero : np.ndarray, shape=(n_states), dtype='float'
         The reduced free energies for the nonempty states
-    method : str, optional, default="hybr"
+    method : str, optional, "adaptive"
         The optimization routine to use.  This can be any of the methods
         available via scipy.optimize.minimize() or scipy.optimize.root().
     tol : float, optional, default=1E-14
@@ -435,15 +453,16 @@ def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method="hybr", tol=1
     hess = lambda x: mbar_hessian(u_kn_nonzero, N_k_nonzero, pad(x))[1:][:, 1:]  # Hessian of objective function
 
     with warnings.catch_warnings(record=True) as w:
-        if method in ["L-BFGS-B", "dogleg", "CG", "BFGS", "Newton-CG", "TNC", "trust-ncg", "SLSQP"]:
-            if method in ["L-BFGS-B", "CG"]:
+        if method in SCIPY_OPTIONS:
+            if method in SCIPY_NOHESS_OPTIONS:
                 hess = None  # To suppress warning from passing a hessian function.
             results = scipy.optimize.minimize(grad_and_obj, f_k_nonzero[1:], jac=True, hess=hess, method=method, tol=tol, options=options)
             f_k_nonzero = pad(results["x"])
         elif method == 'adaptive':
             results = adaptive(u_kn_nonzero, N_k_nonzero, f_k_nonzero, tol=tol, options=options)
-            f_k_nonzero = results # they are the same for adaptive, until we decide to return more.
+            f_k_nonzero = results["x"] 
         else:
+            # find the root in the gradient.  Not sure how well this has been tested . . . ?
             results = scipy.optimize.root(grad, f_k_nonzero[1:], jac=hess, method=method, tol=tol, options=options)
             f_k_nonzero = pad(results["x"])
 
@@ -460,7 +479,7 @@ def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method="hybr", tol=1
             # Ensure MBAR solved correctly
             w_nk_check = mbar_W_nk(u_kn_nonzero, N_k_nonzero, f_k_nonzero)
             check_w_normalized(w_nk_check, N_k_nonzero)
-            print("MBAR weights converged within tolerance, despite the SciPy Warnings. Please validate your results.")
+            print("MBAR weights converged within tolerance, despite the SciPy Warnings. However, please validate your results.")
 
     return f_k_nonzero, results
 
@@ -498,24 +517,56 @@ def solve_mbar(u_kn_nonzero, N_k_nonzero, f_k_nonzero, solver_protocol=None):
     by subtracting off the first component of f_k and fixing that component
     to be zero.
 
-    This function calls `solve_mbar_once()` multiple times to achieve
-    converged results.  Generally, a single call to solve_mbar_once()
-    will not give fully converged answers because of limited numerical precision.
+    This function calls `solve_mbar_once()`.  Usually, a single call
+    will converge to the correct answer. However, there are a number
+    specific cases that may succeed or fail with a specific algorithm.  
+    
+    The default case is to use the adaptive solver, which alternates
+    between N-R iteration and self-consistent iteration, depending on
+    which has the the lowest gradient.  If the adaptive solver fails,
+    it then tries the BFGS algorithm. If one starts with BFSG,
+    then it changes to adaptive if BFGS fails..  Currently, it restarts 
+    with whatever the initialized values are when it switches algorithms, 
+    with the assumption that if it is failing, the current guess 
+    may not be a workable starting point.
+  
     Each call to `solve_mbar_once()` re-conditions the nonlinear
     equations using the current guess.
+
     """
-    if solver_protocol is None:
-        solver_protocol = DEFAULT_SOLVER_PROTOCOL
-    for protocol in solver_protocol:
-        if protocol['method'] is None:
-            protocol['method'] = DEFAULT_SOLVER_METHOD
 
+    all_fks = []
     all_results = []
-    for k, options in enumerate(solver_protocol):
-        f_k_nonzero, results = solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, **options)
+    all_gnorms = []
+    success = False
+    for k, solver in enumerate(solver_protocol):
+        print(f"Trying solution with solver {solver['method']}")
+        f_k_nonzero_result, results = solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, **solver)
+        all_fks.append(f_k_nonzero_result)
         all_results.append(results)
-        all_results.append(("Final gradient norm: %.3g" % np.linalg.norm(mbar_gradient(u_kn_nonzero, N_k_nonzero, f_k_nonzero))))
-    return f_k_nonzero, all_results
+        all_gnorms.append(np.linalg.norm(mbar_gradient(u_kn_nonzero, N_k_nonzero, f_k_nonzero_result)))
+
+        if results["success"] == True:
+            success = True
+            best_gnorm = all_gnorms[-1]
+            break
+        else:
+            print(f"Failed to reach a solution to within tolerance with {solver['method']}: trying next method")
+
+    if success is True:
+        print("Solution found within tolerance!")
+    else:
+        i_best_gnorm = np.argmin(all_gnorms)
+        print("No solution found to within tolerance.")
+        best_method = solver_protocol[i_best_gnorm]['method']
+        print(f"The solution with the smallest lowest gradient norm is {best_method}")
+        best_gnorm = all_gnorm[i_best_gnorm]
+        f_k_nonzero_result = all_fks[i_best_ngnorm]
+        print("Please exercise caution with this solution and consider alternative methods or a different tolerance") 
+
+    print(f"Final gradient norm: {best_gnorm:.3g}")
+
+    return f_k_nonzero_result, all_results
 
 
 def solve_mbar_for_all_states(u_kn, N_k, f_k, solver_protocol):

From 4be3ac85f0439abc7a8212d16e937252a10b3cbf Mon Sep 17 00:00:00 2001
From: Michael Shirts <michael.shirts@colorado.edu>
Date: Tue, 21 Jun 2022 09:48:29 -0600
Subject: [PATCH 2/2] More changes in adaptive solving

---
 pymbar/mbar.py         |  2 ++
 pymbar/mbar_solvers.py | 62 ++++++++++++++++++++----------------------
 pymbar/utils.py        |  8 +++---
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/pymbar/mbar.py b/pymbar/mbar.py
index 73f7aab3..549ae779 100644
--- a/pymbar/mbar.py
+++ b/pymbar/mbar.py
@@ -310,6 +310,8 @@ def __init__(self, u_kn, N_k, maximum_iterations=10000, relative_tolerance=1.0e-
         for solver in solver_protocol:
             if 'options' not in solver:
                 solver['options'] = dict()
+            if 'continuation' not in solver:
+                solver['continuation'] = None
             if 'maxiter' not in solver['options']:
                 solver['options']['maxiter'] = maximum_iterations
             if 'verbose' not in solver['options']:
diff --git a/pymbar/mbar_solvers.py b/pymbar/mbar_solvers.py
index f786670a..82b41b29 100644
--- a/pymbar/mbar_solvers.py
+++ b/pymbar/mbar_solvers.py
@@ -9,15 +9,13 @@
 # We introduce our our robust solver as well. 
 
 # Note: we use tuples instead of lists to avoid accidental mutability.
-#DEFAULT_SUBSAMPLING_PROTOCOL = (dict(method="L-BFGS-B"), )  # First use BFGS on subsampled data.
-#DEFAULT_SOLVER_PROTOCOL = (dict(method="hybr"), )  # Then do fmin hybrid on full dataset.
-# Use Adpative solver as first attempt
-
-DEFAULT_SOLVER_PROTOCOL = (dict(method="adaptive",),dict(method="L-BFGS-B",))
+#DEFAULT_SOLVER_PROTOCOL = (dict(method="adaptive",),dict(method="L-BFGS-B",))
+DEFAULT_SOLVER_PROTOCOL = (dict(method="BFGS",continuation=True),dict(method="adaptive",))
 # Uses all of the gradient based methods, but not the non-gradient methods
 # ["Nelder-Mead", "Powell", "COBYLA"]",
-SCIPY_OPTIONS = ["L-BFGS-B", "dogleg", "CG", "BFGS", "Newton-CG", "TNC", "trust-ncg", "trust-krylov", "trust-exact", "SLSQP"]
-SCIPY_NOHESS_OPTIONS = ["L-BFGS-B","BFGS","CG","TNC"] # don't pass a hessian to these to avoid warnings to these. 
+scipy_minimize_options = ["L-BFGS-B", "dogleg", "CG", "BFGS", "Newton-CG", "TNC", "trust-ncg", "trust-krylov", "trust-exact", "SLSQP"]
+scipy_nohess_options = ["L-BFGS-B","BFGS","CG","TNC","SLSQP"] # don't pass a hessian to these to avoid warnings to these. 
+scipy_root_options = ['hybr','lm'] # only use root options with the hessian.
 
 def validate_inputs(u_kn, N_k, f_k):
     """Check types and return inputs for MBAR calculations.
@@ -247,7 +245,7 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
     Is slower than NR since it calculates the log norms twice each iteration.
 
     OPTIONAL ARGUMENTS
-    tol (float between 0 and 1) - relative tolerance for convergence (default 1.0e-12)
+    tol (float between 0 and 1) - relative tolerance for convergence (default 1.0e-8)
 
     options: dictionary of options
         gamma (float between 0 and 1) - incrementor for NR iterations (default 1.0).  Usually not changed now, since adaptively switch.
@@ -279,12 +277,12 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
     options.setdefault('gamma',1.0)
 
     gamma = options['gamma']
-        
+
     doneIterating = False
     if options['verbose'] == True:
         print("Determining dimensionless free energies by Newton-Raphson / self-consistent iteration.")
 
-    if tol < 1.5e-15:
+    if tol < 1.5e-8:
         print("Tolerance may be too close to machine precision to converge.")
     # keep track of Newton-Raphson and self-consistent iterations
     nr_iter = 0
@@ -306,11 +304,11 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
         f_sci = self_consistent_update(u_kn, N_k, f_k)
         f_sci = f_sci -  f_sci[0]   # zero out the minimum
         g_sci = mbar_gradient(u_kn, N_k, f_sci)
-        gnorm_sci = np.dot(g_sci, g_sci)
+        gnorm_sci = np.sqrt(np.dot(g_sci, g_sci))
 
         # newton raphson gradient norm and saved log sums.
         g_nr = mbar_gradient(u_kn, N_k, f_nr)
-        gnorm_nr = np.dot(g_nr, g_nr)
+        gnorm_nr = np.sqrt(np.dot(g_nr, g_nr))
 
         # we could save the gradient, for the next round, but it's not too expensive to
         # compute since we are doing the Hessian anyway.
@@ -340,7 +338,7 @@ def adaptive(u_kn, N_k, f_k, tol = 1.0e-12, options = None):
         if np.isnan(max_delta) or (max_delta < tol):
             doneIterating = True
             success = True
-            warn = "Convergence achieved by value of gnorm"
+            warn = "Convergence achieved by change in f with respect to previous guess."
             break
 
     if doneIterating:
@@ -398,7 +396,7 @@ def precondition_u_kn(u_kn, N_k, f_k):
     return u_kn
 
 
-def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method = "adaptive", tol=1E-12, options=None):
+def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method = "adaptive", tol=1E-12, continuation=None, options=None):
     """Solve MBAR self-consistent equations using some form of equation solver.
 
     Parameters
@@ -410,16 +408,7 @@ def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method = "adaptive",
         The number of samples in each state for the nonempty states
     f_k_nonzero : np.ndarray, shape=(n_states), dtype='float'
         The reduced free energies for the nonempty states
-    method : str, optional, "adaptive"
-        The optimization routine to use.  This can be any of the methods
-        available via scipy.optimize.minimize() or scipy.optimize.root().
-    tol : float, optional, default=1E-14
-        The convergance tolerance for minimize() or root()
-    verbose: bool
-        Whether to print information about the solution method.
-    options: dict, optional, default=None
-        Optional dictionary of algorithm-specific parameters.  See
-        scipy.optimize.root or scipy.optimize.minimize for details.
+    solver: dict of solver options
 
     Returns
     -------
@@ -440,6 +429,7 @@ def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method = "adaptive",
     For fast but precise convergence, we recommend calling this function
     multiple times to polish the result.  `solve_mbar()` facilitates this.
     """
+
     u_kn_nonzero, N_k_nonzero, f_k_nonzero = validate_inputs(u_kn_nonzero, N_k_nonzero, f_k_nonzero)
     f_k_nonzero = f_k_nonzero - f_k_nonzero[0]  # Work with reduced dimensions with f_k[0] := 0
     u_kn_nonzero = precondition_u_kn(u_kn_nonzero, N_k_nonzero, f_k_nonzero)
@@ -453,19 +443,21 @@ def solve_mbar_once(u_kn_nonzero, N_k_nonzero, f_k_nonzero, method = "adaptive",
     hess = lambda x: mbar_hessian(u_kn_nonzero, N_k_nonzero, pad(x))[1:][:, 1:]  # Hessian of objective function
 
     with warnings.catch_warnings(record=True) as w:
-        if method in SCIPY_OPTIONS:
-            if method in SCIPY_NOHESS_OPTIONS:
+        if method in scipy_minimize_options:
+            if method in scipy_nohess_options:
                 hess = None  # To suppress warning from passing a hessian function.
             results = scipy.optimize.minimize(grad_and_obj, f_k_nonzero[1:], jac=True, hess=hess, method=method, tol=tol, options=options)
             f_k_nonzero = pad(results["x"])
         elif method == 'adaptive':
             results = adaptive(u_kn_nonzero, N_k_nonzero, f_k_nonzero, tol=tol, options=options)
             f_k_nonzero = results["x"] 
-        else:
+        elif method in scipy_root_options:
             # find the root in the gradient.  Not sure how well this has been tested . . . ?
             results = scipy.optimize.root(grad, f_k_nonzero[1:], jac=hess, method=method, tol=tol, options=options)
             f_k_nonzero = pad(results["x"])
-
+        else:
+            raise ParameterError(f"Method {method} for solution of free energies not recognized")
+            
     # If there were runtime warnings, show the messages
     if len(w) > 0:
         can_ignore = True
@@ -535,6 +527,8 @@ def solve_mbar(u_kn_nonzero, N_k_nonzero, f_k_nonzero, solver_protocol=None):
 
     """
 
+    import pdb
+    pdb.set_trace()
     all_fks = []
     all_results = []
     all_gnorms = []
@@ -552,6 +546,10 @@ def solve_mbar(u_kn_nonzero, N_k_nonzero, f_k_nonzero, solver_protocol=None):
             break
         else:
             print(f"Failed to reach a solution to within tolerance with {solver['method']}: trying next method")
+            print(all_gnorms[-1])
+            if solver["continuation"]:
+                f_k_nonzero = f_k_nonzero_result
+                print("Will continue with results from previous method")
 
     if success is True:
         print("Solution found within tolerance!")
@@ -559,10 +557,10 @@ def solve_mbar(u_kn_nonzero, N_k_nonzero, f_k_nonzero, solver_protocol=None):
         i_best_gnorm = np.argmin(all_gnorms)
         print("No solution found to within tolerance.")
         best_method = solver_protocol[i_best_gnorm]['method']
-        print(f"The solution with the smallest lowest gradient norm is {best_method}")
-        best_gnorm = all_gnorm[i_best_gnorm]
-        f_k_nonzero_result = all_fks[i_best_ngnorm]
-        print("Please exercise caution with this solution and consider alternative methods or a different tolerance") 
+        print(f"The solution with the smallest gradient norm is {best_method}")
+        best_gnorm = all_gnorms[i_best_gnorm]
+        f_k_nonzero_result = all_fks[i_best_gnorm]
+        print("Please exercise caution with this solution and consider alternative methods or a different tolerance.") 
 
     print(f"Final gradient norm: {best_gnorm:.3g}")
 
diff --git a/pymbar/utils.py b/pymbar/utils.py
index a44f294c..7ce3d6d0 100644
--- a/pymbar/utils.py
+++ b/pymbar/utils.py
@@ -356,8 +356,8 @@ def check_w_normalized(W, N_k, tolerance = 1.0e-4):
         which_badcolumns = np.arange(K)[badcolumns]
         firstbad = which_badcolumns[0]
         raise ParameterError(
-            'Warning: Should have \sum_n W_nk = 1.  Actual column sum for state %d was %f. %d other columns have similar problems' %
-            (firstbad, column_sums[firstbad], np.sum(badcolumns)))
+            'Warning: Should have \sum_n W_nk = 1.  Actual column sum for state %d was %f. %d other columns have similar problems.' %
+            (firstbad, column_sums[firstbad], np.sum(badcolumns)) + ' This generally indicates the free energies are not converged.')
 
     row_sums = np.sum(W * N_k, axis=1)
     badrows = (np.abs(row_sums - 1) > tolerance)
@@ -365,8 +365,8 @@ def check_w_normalized(W, N_k, tolerance = 1.0e-4):
         which_badrows = np.arange(N)[badrows]
         firstbad = which_badrows[0]
         raise ParameterError(
-            'Warning: Should have \sum_k N_k W_nk = 1.  Actual row sum for sample %d was %f. %d other rows have similar problems' %
-            (firstbad, row_sums[firstbad], np.sum(badrows)))
+            'Warning: Should have \sum_k N_k W_nk = 1.  Actual row sum for sample %d was %f. %d other rows have similar problems.' %
+            (firstbad, row_sums[firstbad], np.sum(badrows)) + ' This generally indicates the free energies are not converged.')
 
     return