Skip to content
35 changes: 30 additions & 5 deletions src/dwi_ml/cli/dwiml_divide_volume_into_blocs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Utilitary script to allow investigating the "blocs" if we divide a volume
by L x P x H blocs.

Useful to understand what the option 'connectivity_nb_blocs' does in the config
file of the HDF5 creation
(see here https://dwi-ml.readthedocs.io/en/latest/for_users/hdf5.html).

"""
import argparse

import nibabel as nib
Expand All @@ -15,20 +25,23 @@ def _build_arg_parser():
p = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
p.add_argument('in_image', metavar='IN_FILE',
help='Input file name, in nifti format.')
help='Input file name, in nifti format. Any reference file.')
p.add_argument('out_filename',
help='name of the output file, which will be saved as a '
'text file.')
'nifti file.')
p.add_argument('nb_blocs', nargs='+', type=int,
help="Number of blocs. Either a single int, or a list of "
"3 values.")
p.add_argument('--shuffle_colors', action='store_true',
help="If set, will randomly shuffle the label of blocs in "
"the volume.")

add_overwrite_arg(p)

return p


def color_mri_connectivity_blocs(nb_blocs, volume_size):
def color_mri_connectivity_blocs(nb_blocs, volume_size, shuffle_colors):

# For tracking coordinates: we can work with float.
# Here, dividing as ints.
Expand All @@ -37,13 +50,24 @@ def color_mri_connectivity_blocs(nb_blocs, volume_size):
sizex, sizey, sizez = (volume_size / nb_blocs).astype(int)
print("Coloring into blocs of size: ", sizex, sizey, sizez)

# Preparing colors.
nb_blocs_total = np.prod(nb_blocs)
all_colors = np.arange(nb_blocs_total)

# Shuffling. Else, in Mi-Brain, the colors of blocs on beside the other are
# the same color.
if shuffle_colors:
all_colors = np.random.permutation(all_colors)

all_colors = all_colors.reshape(nb_blocs)

final_volume = np.zeros(volume_size)
for i in range(nb_blocs[0]):
for j in range(nb_blocs[1]):
for k in range(nb_blocs[2]):
final_volume[i*sizex: (i+1)*sizex,
j*sizey: (j+1)*sizey,
k*sizez: (k+1)*sizez] = i + 10*j + 100*k
k*sizez: (k+1)*sizez] = all_colors[i, j, k]

return final_volume

Expand All @@ -61,7 +85,8 @@ def main():
volume = nib.load(args.in_image)

# Processing
final_volume = color_mri_connectivity_blocs(args.nb_blocs, volume.shape)
final_volume = color_mri_connectivity_blocs(args.nb_blocs, volume.shape,
args.shuffle_colors)

# Saving
img = nib.Nifti1Image(final_volume, volume.affine)
Expand Down
58 changes: 47 additions & 11 deletions src/dwi_ml/cli/dwiml_visualize_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,31 @@

The number of graphs per figure can be modified with --nb_plots_per_fig.

Specifying which logs
---------------------
You may also specify the logs you want to plot. Use the option --graph, with
the graph's title and the name(s) of the logs to add to one graph. This option
can be used many times. Ex:
the graph's title and the name(s) of the logs to add to one graph. Ex:
>> --graph "Training loss" train_loss_monitor_per_epoch
>> --graph "Some plot" log1 --graph "Two plots" log2 log3
>> --graph "Two plots" log2 log3

You may use many times the option --graph.

Specifying y-axis limits
------------------------
Optionally, you can add the 2-valued ylims for each graph. These value will
supersede the given --ylim, if any.
>> --graph "Some plot" log1 0 100

Operations on logs
------------------
Finally, you can also supply operations to apply to your logs, amongst:
['diff', 'sum']. Ex:
>> --graph "Training minus validation" diff(log1, log2) 0 100
>> --graph "Value1 plus value2" sum(log1, log2) 0 100

** Note that we only accept one operation per graph. The following is not
supported:
>> --graph "Training minus validation" diff(log1, log2) log 3 0 100
>> --graph "Training minus validation" diff(log1, log2) log3 0 100

------------------------------
"""
Expand Down Expand Up @@ -62,7 +71,8 @@ def _build_arg_parser():

g = p.add_argument_group("Figure options")
g.add_argument("--graph", action='append', nargs='+', dest='graphs',
help="See description above for usage.")
help="See description above for usage."
"If not set, will plot all logs in current directory.")
g.add_argument("--nb_plots_per_fig", type=int, default=3, metavar='n',
help="Number of (rows) of plot per figure. Default: 3.")
g.add_argument('--xlim', type=int, metavar='epoch_max',
Expand All @@ -86,7 +96,18 @@ def _build_arg_parser():


def _parse_graphs_arg(parser, args):
"""Parse args.graphs"""
"""
Parse args.graphs. Possible options:
--graph title log_name ylim1 ylim2
--graph title diff(log1, log2) ylim1 ylim2

Returns
-------
graphs_titles: list
graphs_logs: list
graphs_ylims: list
graph_operations: list
"""
if args.graphs is None:
return None, None, None, None
else:
Expand Down Expand Up @@ -121,7 +142,7 @@ def _parse_graphs_arg(parser, args):
_ylims = None

# Verify if user gave an operation (diff or sum)
_logs, operation = __parse_log_operations(parser, _logs)
_logs, operation = _parse_graphs_arg_operations(parser, _logs)

# Remove .npy to log names if added by user.
for i, log in enumerate(_logs):
Expand All @@ -141,7 +162,11 @@ def _parse_graphs_arg(parser, args):
return graphs_titles, graphs_logs, graphs_ylims, graph_operations


def __parse_log_operations(parser, graph):
def _parse_graphs_arg_operations(parser, graph):
"""
Used when parsing option args.graphs. Checking if the operation uses
diff or sum.
"""
if len(graph) == 1:
_graph = graph[0]
if _graph[0:5] == 'diff(':
Expand Down Expand Up @@ -171,7 +196,7 @@ def __parse_log_operations(parser, graph):

def _load_all_logs(parser, args, logs_path, previous_graphs):
"""
Load all logs in an experiment's dir (no option --graph was supplied)
Load all logs in an experiment's dir (if no option --graph was supplied)
"""
logging.debug("Loading all logs for that experiment.")
files_to_load = list(logs_path.glob('*.npy'))
Expand Down Expand Up @@ -203,7 +228,8 @@ def _load_all_logs(parser, args, logs_path, previous_graphs):
def _load_chosen_logs(parser, args, logs_path, parsed_graphs,
graph_operations):
"""
Load only logs specified through --graph
Load only logs specified through if option --graph was given.
(args.graphs is already parsed)
"""
logging.debug("Loading only chosen logs for that experiment")
this_exp_dict = {}
Expand Down Expand Up @@ -275,7 +301,9 @@ def main():
pil_logger = logging.getLogger('PIL')
pil_logger.setLevel('WARNING')

# ---------------
# Verifications
# ---------------
if not (args.save_figures or args.show_now):
parser.error("This script will plot nothing. Choose either --show_now "
"or --save_figures.")
Expand All @@ -291,9 +319,12 @@ def main():
# Prefix given, but directory does not exist.
parser.error("Output dir for figures does not exist.")

# Loop on all experiments
# ---------------
# Loop on all experiments to load all logs
# ---------------
loaded_logs = {} # dict of dicts
for i, exp_path in enumerate(args.experiments):

# Verifications for this experiment
if not pathlib.Path(exp_path).exists():
raise ValueError("Experiment folder does not exist: {}"
Expand All @@ -314,7 +345,9 @@ def main():
graphs_logs, graphs_operation)
loaded_logs[exp_name] = this_exp_dict

# ---------------
# Formatting the final graphs choice.
# ---------------
if args.graphs is None:
graphs_titles = graphs_logs
graphs_logs = [[log] for log in graphs_logs]
Expand All @@ -330,6 +363,9 @@ def main():
graphs_ylims = [args.ylims if ylim is None else ylim
for ylim in graphs_ylims]

# ---------------
# MAIN CALL: Plotting everything
# ---------------
_args = (loaded_logs, graphs_titles, graphs_logs, graphs_ylims,
args.nb_plots_per_fig)
kwargs = {'xlim': args.xlim,
Expand Down
101 changes: 80 additions & 21 deletions src/dwi_ml/cli/dwiml_visualize_logs_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ def _build_arg_parser():
p.add_argument('--ignore_first_epochs', type=int, metavar='n',
help="If set, ignores the first n epochs of each "
"experiment.")
p.add_argument('--show_individual_logs', action='store_true',
help="If set, shows individual logs as well as the "
"correlation graph (3 graphs total)")
p.add_argument('--show_first_order_fit', action='store_true',
help="If set, shows first order fit.")
p.add_argument('--show_second_order_fit', action='store_true',
help="If set, show the quadratic fit.")
p.add_argument('--xlim', nargs=2, type=float)
p.add_argument('--ylim', nargs=2, type=float)

add_overwrite_arg(p)
add_verbose_arg(p)
Expand All @@ -54,7 +63,9 @@ def _load_chosen_logs(parser, args, logs_path):


def _compute_correlations(loaded_dicts, log1_key, log2_key,
name_log1, name_log2, first_epoch):
name_log1, name_log2, first_epoch, xlim, ylim,
show_individual,
show_first_order, show_second_order):
# One color per experiment
jet = plt.get_cmap('jet')
exp_names = list(loaded_dicts.keys())
Expand All @@ -64,7 +75,11 @@ def _compute_correlations(loaded_dicts, log1_key, log2_key,
all_x = []
all_y = []
labels = []
fig, axs = plt.subplots(3, 1)
if show_individual:
fig, axs = plt.subplots(3, 1)
else:
fig, axs = plt.subplots(1, 1)
axs = [axs]
for i, exp in enumerate(loaded_dicts.keys()):
color_val = scalar_map.to_rgba(i)
x = loaded_dicts[exp][log1_key][first_epoch:]
Expand All @@ -74,30 +89,70 @@ def _compute_correlations(loaded_dicts, log1_key, log2_key,
corr = np.corrcoef(x, y)[0][1]
print("Correlation for exp {} is {}".format(exp, corr))

axs[0].scatter(epochs, x, color=color_val, s=10)
axs[1].scatter(epochs, y, color=color_val, s=10)
axs[2].scatter(x, y, color=color_val, s=10)
if show_individual:
axs[0].scatter(epochs, x, color=color_val, s=10)
axs[1].scatter(epochs, y, color=color_val, s=10)
axs[2].scatter(x, y, color=color_val, s=10)
axs[0].set_ylabel(name_log1)
axs[0].set_xlabel("Epochs")
axs[1].set_ylabel(name_log2)
axs[1].set_xlabel("Epochs")
ax_corr = axs[2]
else:
axs[0].scatter(x, y, color=color_val, s=10)
ax_corr = axs[0]
labels.append(exp + ':{:.2f}'.format(corr))

all_x.extend(x)
all_y.extend(y)

corr = np.corrcoef(all_x, all_y)[0][1]
b, m = np.polynomial.polynomial.polyfit(all_x, all_y, 1)
xx = np.linspace(np.min(all_x), np.max(all_x), 100)
axs[2].plot(xx, b + m * xx, color='k',
label="y={:.4f}x + {:.4f}".format(m, b))
axs[2].legend()

print("Correlation over all experiments is {}".format(corr))
axs[0].set_ylabel(name_log1)
axs[0].set_xlabel("Epochs")
axs[1].set_ylabel(name_log2)
axs[1].set_xlabel("Epochs")
axs[2].set_xlabel(name_log1)
axs[2].set_ylabel(name_log2)
axs[2].set_title("Correlation between {} and {} = {:.4f}"
.format(name_log1, name_log2, corr))
idx = np.argsort(all_x)
all_x = np.asarray(all_x)[idx]
all_y = np.asarray(all_y)[idx]
titre = ("Correlation between {} and {} = {:.3f}."
.format(name_log1, name_log2, corr))
print("Correlation over all experiments is {}.".format(corr))

# Linear fitting
x_line = np.linspace(min(all_x), max(all_x), 200)
if show_first_order:
# Fit
coef_lin = np.polyfit(all_x, all_y, 1)

# Residuals
y_lin = np.polyval(coef_lin, all_x)
res_lin = all_y - y_lin
mse_lin = np.mean(res_lin**2)

# Nice plot
y_lin_line = np.polyval(coef_lin, x_line)
ax_corr.plot(x_line, y_lin_line, color='k')
titre += "\nMSE (linear): {:.1e}".format(mse_lin)

# Quadratic fitting
if show_second_order:
# Fit
coef_quad = np.polyfit(all_x, all_y, 2)

# Residuals
y_quad = np.polyval(coef_quad, all_x)
res_quad = all_y - y_quad
mse_quad = np.mean(res_quad ** 2)

# Nice plot
y_quad_line = np.polyval(coef_quad, x_line)
ax_corr.plot(x_line, y_quad_line, color='k')
titre += "\nMSE (quadratic): {:.1e}".format(mse_quad)


ax_corr.set_xlabel(name_log1)
ax_corr.set_ylabel(name_log2)
ax_corr.set_title(titre)
if xlim:
ax_corr.set_xlim(*xlim)
if ylim:
ax_corr.set_ylim(*ylim)

# The xlabels and titles overlap
# plt.tight_layout() # Makes the subplots very thin.
Expand Down Expand Up @@ -139,7 +194,11 @@ def main():
name_log2 = args.rename_log2 or args.log2
first_epoch = args.ignore_first_epochs or 0
_compute_correlations(loaded_logs, args.log1, args.log2,
name_log1, name_log2, first_epoch)
name_log1, name_log2, first_epoch,
args.xlim, args.ylim,
args.show_individual_logs,
args.show_first_order_fit,
args.show_second_order_fit)


if __name__ == '__main__':
Expand Down
12 changes: 7 additions & 5 deletions src/dwi_ml/cli/tt_visualize_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def main():
# 1) Finding the jupyter notebook
dwi_ml_dir = dirname(dirname(__file__))
raw_ipynb_filename = os.path.join(
dwi_ml_dir, 'dwi_ml/testing/projects/tt_visualize_weights.ipynb')
dwi_ml_dir, 'projects/Transformers/tester/tt_visualize_weights.ipynb')
if not os.path.isfile(raw_ipynb_filename):
raise ValueError(
"We could not find the jupyter notebook file. Probably a "
Expand All @@ -68,12 +68,14 @@ def main():
# 2) Verify that output dir exists but not the html output files.
args = get_out_dir_and_create(args)

out_html_filename = args.out_prefix + 'tt_bertviz.html'
out_html_file = os.path.join(args.out_dir, out_html_filename)
bertviz_out = os.path.join(args.out_dir, 'bertviz')
out_html_filename = os.path.join(
bertviz_out, args.out_prefix + 'tt_bertviz.html')
out_html_file = os.path.join(bertviz_out, out_html_filename)
out_ipynb_file = os.path.join(
args.out_dir, args.out_prefix + 'tt_bertviz.ipynb')
bertviz_out, args.out_prefix + 'tt_bertviz.ipynb')
out_config_file = os.path.join(
args.out_dir, args.out_prefix + 'tt_bertviz.config')
bertviz_out, args.out_prefix + 'tt_bertviz.config')
assert_outputs_exist(parser, args,
[out_html_file, out_ipynb_file, out_config_file])

Expand Down
Loading
Loading