code and tests

freeman-lab · freeman-lab · commit ff50d1c3ee59 · 2016-03-22T20:57:44.000-04:00
diff --git a/README.md b/README.md
@@ -2,7 +2,13 @@
 
 > python module for testing neuron finding algorithms.
 
-This repository contains a module and a CLI for working with neuron finding algorithms. It is used by the [neurofinder](https://github.com/neurofinder) benchmarking challenge to compare ground truth results to results from submitted algorithms.
+This repository contains a module and a CLI for working with neuron finding algorithm results. It is used by the [neurofinder](https://github.com/neurofinder) benchmarking challenge to compare ground truth results to results from submitted algorithms.
+
+Assumes a standard format for spatial regions, in either `JSON` or `MAT`.
+
+The `JSON` format is:
+
+And the `MAT` format is:
 
 ## install
 
@@ -26,12 +32,20 @@ You can also pass `MAT` files as one or both arguments
 neurofinder evaluate neurons1.mat neurons2.mat
 ```
 
-## use as a python module
+## methods
 
-Import the module and pass it two dictionaries
+#### `neurofinder.load(file)`
 
-```
-import neurofinder
+Load regions from either a `JSON` or `MAT` file.
+
+#### `neurofinder.match(a, b, unique=True, min_distance=inf)`
+
+Match regions from `a` to `b` based on distances between their centers. Returns a list of indicies specifying, for each region in `a`, what the index of the matching region in `b` is. If `unique` is true, will ensure uniqueness of matches. If `min_distance` is less than `inf`, will not allow matches that exceed this distance.
+
+#### `neurofinder.centers(a, b, threshold=5)`
+
+Compare centers between two sets of regions `a` and `b`. Returns two metrics, the `recall` and `precision`, which are defined as the total number of matching regions, according to the given distance `threshold`, dividing by the number of regions in `a`, or `b`, respectively.
+
+#### `neurofinder.shapes(a, b, min_distance=inf)`
 
-results = neurofinder.evaluate(neurons1, neurons2)
-```
+Compare shapes between regions in `a` and `b` after first finding matches. For each pair of matched regions, the `overlap` and `exactness` are computed as the number of intersecting pixels divided by the number of pixels in the first, or second, region, respectively.
diff --git a/a.json b/a.json
@@ -0,0 +1,6 @@
+[
+  {"coordinates": [[0, 0], [0, 1], [1, 0], [1, 1]]}, 
+  {"coordinates": [[10, 10], [10, 11], [11, 10], [11, 11]]},
+  {"coordinates": [[20, 20], [20, 21], [21, 20], [21, 21]]},
+  {"coordinates": [[30, 30], [30, 31], [31, 30], [31, 31]]}
+]
diff --git a/b.json b/b.json
@@ -0,0 +1,5 @@
+[
+  {"coordinates": [[0, 0], [0, 1], [1, 2], [1, 0], [1, 1]]},
+  {"coordinates": [[10, 10], [11, 10], [11, 11]]},
+  {"coordinates": [[30, 30], [30, 31], [32, 30], [31, 31]]}
+]
diff --git a/example.py b/example.py
@@ -0,0 +1,18 @@
+import json
+from neurofinder import load, match, similarity, overlap
+
+a = load('a.json')
+b = load('b.json')
+
+#print(match(a, b, min_distance=5))
+print(similarity(a, b))
+
+# recall, precision = similarity(a, b, metric='distance', minDistance=threshold)
+# stats = overlap(a, b, method='rates', minDistance=threshold)
+
+# score = 2 * (recall * precision) / (recall + precision)
+
+# if sum(~isnan(stats)) > 0:
+#     overlap, exactness = tuple(nanmean(stats, axis=0))
+# else:
+#     overlap, exactness = 0.0, 1.0
diff --git a/neurofinder/__init__.py b/neurofinder/__init__.py
@@ -1 +1,3 @@
+from main import load, match, centers, shapes
+
 __version__='1.0.0'
diff --git a/neurofinder/cli.py b/neurofinder/cli.py
@@ -6,8 +6,9 @@
 @click.group(options_metavar='', subcommand_metavar='<command>', context_settings=settings)
 def cli():
     """
-    Hi! This is a command line tool for comparing neuron finding algorithms.
+    Hi! This is a tool for working with neuron finding algorithm results.
+
+    Check out the list of commands to see what you can do.
     """
-    print 'hi'
 
 cli.add_command(evaluate)
diff --git a/neurofinder/commands/evaluate.py b/neurofinder/commands/evaluate.py
@@ -1,6 +1,16 @@
 import os
 import click
+from .. import load, centers, shapes
 
+@click.argument('file1', nargs=1, metavar='<file1>', required=True)
+@click.argument('file2', nargs=1, metavar='<file2>', required=True)
 @click.command('evaluate', short_help='compare results of two algorithms', options_metavar='<options>')
-def evaluate():
-    print('evaluating algorithms')
+def evaluate(file1, file2):
+    a = load(file1)
+    b = load(file2)
+    precision, recall = centers(a, b)
+    overlap, exactness = shapes(a, b)
+    average = 2 * (recall * precision) / (recall + precision)
+    
+    result = {'average': average, 'overlap': overlap, 'precision': precision, 'recall': recall, 'exactness': exactness}
+    print(result)
diff --git a/neurofinder/main.py b/neurofinder/main.py
@@ -1,37 +1,133 @@
-import os
+import json
+from numpy import inf, NaN, newaxis, argmin, delete, asarray, isnan, sum, nanmean
+from scipy.spatial.distance import cdist
+from regional import one, many
 
-def init(force):
+def load(file):
     """
-    This command initializes a folder with the typical contents of a Python package.
-    After running this and writing your code, you should be ready to publish your package.
+    Load neuronal regions from a file.
     """
-    echo('\nThis utility will help you set up a new python module for publishing on PyPi!\n')
-    echo('After answering a few questions, it will create a few files.')
-    echo('\nPress ^C at any time to bail!\n')
+    with open(file, 'r') as f:
+        values = json.load(f)
+        return many([v['coordinates'] for v in values])
+    
+def match(a, b, unique=True, min_distance=inf):
+    """
+    Find matches between two sets of regions.
+
+    Can select nearest matches with or without enforcing uniqueness;
+    if unique is False, will return the closest source in other for
+    each source in self, possibly repeating sources multiple times
+    if unique is True, will only allow each source in other to be matched
+    with a single source in self, as determined by a greedy selection procedure.
+    The min_distance parameter can be used to prevent far-away sources from being
+    chosen during greedy selection.
+
+    Params
+    ------
+    a, b : regions
+        The regions to match.
+
+    unique : boolean, optional, deafult = True
+        Whether to only return unique matches.
+
+    min_distance : scalar, optiona, default = inf
+        Minimum distance to use when selecting matches.
+    """
+    targets = b.center
+    target_inds = range(0, len(targets))
+    matches = []
+    for s in a:
+        update = 1
 
-    remap = {
-        'entry': 'entry point',
-        'package': 'package name'
-    }
-    d = _defaults()
-    for k, v in d.items():
-        d[k] = prompt.query('%s:' % remap.get(k, k), default=v)
+        # skip if no targets left, otherwise update
+        if len(targets) == 0:
+            update = 0
+        else:
+            dists = cdist(targets, s.center[newaxis])
+            if dists.min() < min_distance:
+                ind = argmin(dists)
+            else:
+                update = 0
+
+        # apply updates, otherwise add a nan
+        if update == 1:
+            matches.append(target_inds[ind])
+            if unique is True:
+                targets = delete(targets, ind, axis=0)
+                target_inds = delete(target_inds, ind)
+        else:
+            matches.append(NaN)
+
+    return matches
+
+def shapes(a, b, min_distance=inf):
+    """
+    Compare shapes between two sets of regions.
+    
+    Parameters
+    ----------
+    a, b : regions
+        The regions for which to estimate overlap.
+
+    min_distance : scalar, optional, default = inf
+        Minimum distance to use when matching indices.
+    """
+    inds = match(a, b, unique=True, min_distance=min_distance)
+    d = []
+    for jj, ii in enumerate(inds):
+        if ii is not NaN:
+            d.append(a[jj].overlap(b[ii], method='rates'))
+        else:
+            d.append((NaN, NaN))
+
+    result = asarray(d)
+
+    if sum(~isnan(result)) > 0:
+        overlap, exactness = tuple(nanmean(result, axis=0))
+    else:
+        overlap, exactness = 0.0, 1.0
+
+    return overlap, exactness
+
+def centers(a, b, threshold=5):
+    """
+    Compare centers between two sets of regions.
+
+    The recall rate is the number of matches divided by the number in self,
+    and the precision rate is the number of matches divided by the number in other.
+    Typically a is ground truth and b is an estimate.
+    The F score is defined as 2 * (recall * precision) / (recall + precision)
+
+    Before computing metrics, all sources in self are matched to other,
+    and a minimum distance can be set to control matching.
+
+    Parameters
+    ----------
+    a, b : regions
+        The regions for which to estimate overlap.
+
+    threshold : scalar, optional, default = 5
+        The distance below which a source is considered found.
+
+    min_distance : scalar, optional, default = inf
+        Minimum distance to use when matching indices.
+    """
+    inds = match(a, b, unique=True, min_distance=threshold)
 
-    echo('\nReady to create the following files:')
+    d = []
+    for jj, ii in enumerate(inds):
+        if ii is not NaN:
+            d.append(a[jj].distance(b[ii]))
+        else:
+            d.append(NaN)
 
-    with indent(4, quote='  -'):
-        puts('setup.py')
-        puts('setup.cfg')
-        puts('MANIFEST.in')
-        puts(d['package'] + '/' + '__init__.py')
-        puts(d['package'] + '/' + d['entry'])
-        puts('requirements.txt')
+    result = asarray(d)
 
-    finalize = prompt.yn('\nSound like a plan?', default='y')
+    result[isnan(result)] = inf
+    compare = lambda x: x < threshold
 
-    if finalize:
-        echo('')
-        _make_package(d, force)
-        echo('')
+    recall = sum(map(compare, result)) / float(a.count)
+    precision = sum(map(compare, result)) / float(b.count)
 
-    success('Your package is initialized!')
+    return recall, precision
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
-click
+click
+regional
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -0,0 +1,59 @@
+from numpy import allclose, nan
+from regional import many
+from neurofinder import match, shapes, centers
+
+
+def test_match():
+  a = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [10, 11], [11, 10], [11, 11]]])
+  b = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[30, 30], [31, 30], [31, 31]]])
+  assert match(a, b) == [0, 1]
+  assert match(a, b, min_distance=5) == [0, nan]
+
+
+def test_match_flipped():
+  a = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [10, 11], [11, 10], [11, 11]]])
+  b = many([[[30, 30], [31, 30], [31, 31]], [[0, 0], [0, 1], [1, 0], [1, 1]]])
+  assert match(a, b) == [1, 0]
+  assert match(a, b, min_distance=5) == [1, nan]
+
+
+def test_similarity():
+  a = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [10, 11], [11, 10], [11, 11]]])
+  b = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[30, 30], [31, 30], [31, 31]]])
+  assert centers(a, b) == (0.5, 0.5)
+
+
+def test_similarity_perfect():
+  a = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [10, 11], [11, 10], [11, 11]]])
+  b = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [10, 11], [11, 10], [11, 11]]])
+  assert centers(a, b) == (1.0, 1.0)
+
+
+def test_similarity_perfect_flipped():
+  a = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [10, 11], [11, 10], [11, 11]]])
+  b = many([[[10, 10], [10, 11], [11, 10], [11, 11]], [[0, 0], [0, 1], [1, 0], [1, 1]]])
+  assert centers(a, b) == (1.0, 1.0)
+
+
+def test_overlap_too_few():
+  a = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [10, 11], [11, 10], [11, 11]]])
+  b = many([[[0, 0], [0, 1], [1, 0], [1, 1]], [[10, 10], [11, 11]]])
+  assert shapes(a, b) == (0.75, 1.0)
+
+
+def test_overlap_too_many():
+  a = many([[[0, 0], [0, 1]], [[10, 10], [10, 11]]])
+  b = many([[[0, 0], [0, 1]], [[10, 10], [10, 11], [11, 10], [11, 12]]])
+  assert shapes(a, b) == (1.0, 0.75)
+
+
+def test_overlap_perfect():
+  a = many([[[0, 0], [0, 1]], [[10, 10], [10, 11]]])
+  b = many([[[0, 0], [0, 1]], [[10, 10], [10, 11]]])
+  assert shapes(a, b) == (1.0, 1.0)
+
+
+def test_overlap_perfect_flipped():
+  a = many([[[0, 0], [0, 1]], [[10, 10], [10, 11]]])
+  b = many([[[10, 10], [10, 11]], [[0, 0], [0, 1]]])
+  assert shapes(a, b) == (1.0, 1.0)

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
	`1`	`+from main import load, match, centers, shapes`
	`2`	`+`
`1`	`3`	`__version__='1.0.0'`