Skip to content

Commit

Permalink
Merge pull request #1588 from girder/plottable-data-lock
Browse files Browse the repository at this point in the history
Make the plottable data class thread safe.
  • Loading branch information
manthey authored Jul 26, 2024
2 parents 8439db9 + d8bf3c3 commit 07e0429
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 37 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Change Log

## 1.29.5

### Improvements

- Make the plottable data class threadsafe ([#1588](../../pull/1588))

## 1.29.4

### Improvements
Expand Down
90 changes: 53 additions & 37 deletions girder_annotation/girder_large_image_annotation/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import math
import re
import threading

from bson.objectid import ObjectId

Expand Down Expand Up @@ -378,6 +379,7 @@ def __init__(self, user, item, annotations=None, adjacentItems=False, sources=No
self._fullScan = adjacentItems == '__all__'
self._findItems(item, adjacentItems)
self._findAnnotations(annotations)
self._dataLock = threading.RLock()

def _findItems(self, item, adjacentItems=False):
"""
Expand Down Expand Up @@ -764,8 +766,7 @@ def _collectRecordRows(
col['distinct'].add(value)
if self._datacolumns and colkey in self._datacolumns:
self._datacolumns[colkey][(
iid[recidx] if isinstance(iid, list) else iid or '',
aid or '', eid or '',
iid or '', aid or '', eid or '',
rowidx if length is not None else -1)] = value

def _collectRecords(self, columns, recordlist, doctype, iid=None, aid=None):
Expand All @@ -781,6 +782,8 @@ def _collectRecords(self, columns, recordlist, doctype, iid=None, aid=None):
"""
eid = None
for colkey, col in columns.items():
if self._datacolumns and colkey not in self._datacolumns:
continue
if doctype not in col['where']:
continue
getData, selector, length = col['where'][doctype]
Expand Down Expand Up @@ -850,34 +853,14 @@ def _collectColumns(self, columns, recordlist, doctype, first=True, iid=None, ai
self._columnsFromData(columns, doctype, getData, record)
self._collectRecords(columns, recordlist, doctype, iid, aid)

@property
def columns(self):
def _getColumns(self):
"""
Get a sorted list of plottable columns with some metadata for each.
Each data entry contains
:key: the column key. For database entries, this is (item|
annotation|annotationelement).(id|name|description|group|
label). For bounding boxes this is bbox.(x0|y0|x1|y1). For
data from meta / attributes / user, this is
data.(key)[.0][.(key2)][.0]
:type: 'string' or 'number'
:title: a human readable title
:count: the number of non-null entries in the column
:[distinct]: a list of distinct values if there are less than some
maximum number of distinct values. This might not include
values from adjacent items
:[distinctcount]: if distinct is populated, this is len(distinct)
:[min]: for number data types, the lowest value present
:[max]: for number data types, the highest value present
:returns: a sorted list of data entries.
"""
from ..models.annotationelement import Annotationelement

if self._columns is not None:
return self._columns
columns = {}
if not self._sources or 'folder' in self._sources:
self._collectColumns(columns, [self.folder], 'folder')
Expand Down Expand Up @@ -916,9 +899,39 @@ def columns(self):
result.pop('min', None)
result.pop('max', None)
prefixOrder = {'item': 0, 'annotation': 1, 'annotationelement': 2, 'data': 3, 'bbox': 4}
self._columns = sorted(columns.values(), key=lambda x: (
columns = sorted(columns.values(), key=lambda x: (
prefixOrder.get(x['key'].split('.', 1)[0], len(prefixOrder)), x['key']))
return [{k: v for k, v in c.items() if k != 'where'} for c in self._columns]
return columns

@property
def columns(self):
"""
Get a sorted list of plottable columns with some metadata for each.
Each data entry contains
:key: the column key. For database entries, this is (item|
annotation|annotationelement).(id|name|description|group|
label). For bounding boxes this is bbox.(x0|y0|x1|y1). For
data from meta / attributes / user, this is
data.(key)[.0][.(key2)][.0]
:type: 'string' or 'number'
:title: a human readable title
:count: the number of non-null entries in the column
:[distinct]: a list of distinct values if there are less than some
maximum number of distinct values. This might not include
values from adjacent items
:[distinctcount]: if distinct is populated, this is len(distinct)
:[min]: for number data types, the lowest value present
:[max]: for number data types, the highest value present
:returns: a sorted list of data entries.
"""
if self._columns is not None:
return self._columns
columns = self._getColumns()
self._columns = columns
return [{k: v for k, v in c.items() if k != 'where'} for c in self._columns if c['count']]

def _collectData(self, rows, colsout):
"""
Expand Down Expand Up @@ -972,18 +985,20 @@ def data(self, columns, requiredColumns=None):
columns = columns.split(',')
if not isinstance(requiredColumns, list):
requiredColumns = requiredColumns.split(',') if requiredColumns is not None else []
self._datacolumns = {c: {} for c in columns}
rows = set()
# collects data as a side effect
collist = self.columns
for coldata in self._datacolumns.values():
rows |= set(coldata.keys())
rows = sorted(rows)
colsout = [col.copy() for col in collist if col['key'] in columns]
for cidx, col in enumerate(colsout):
col['index'] = cidx
logger.info(f'Gathering {len(self._datacolumns)} x {len(rows)} data')
data, rows = self._collectData(rows, colsout)
with self._dataLock:
self._datacolumns = {c: {} for c in columns}
rows = set()
# collects data as a side effect
collist = self._getColumns()
for coldata in self._datacolumns.values():
rows |= set(coldata.keys())
rows = sorted(rows)
colsout = [col.copy() for col in collist if col['key'] in columns]
for cidx, col in enumerate(colsout):
col['index'] = cidx
logger.info(f'Gathering {len(self._datacolumns)} x {len(rows)} data')
data, rows = self._collectData(rows, colsout)
self._datacolumns = None
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
Expand All @@ -1005,6 +1020,7 @@ def data(self, columns, requiredColumns=None):
else:
col.pop('distinct', None)
col.pop('distinctcount', None)
colsout = [{k: v for k, v in c.items() if k != 'where'} for c in colsout]
return {
'columns': colsout,
'data': data}
39 changes: 39 additions & 0 deletions girder_annotation/test_annotation/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,3 +748,42 @@ def testMigrateAnnotationAccessControlNullUserError(self, user, admin):
logger.debug.assert_called_once()
annot = Annotation().load(annot['_id'], force=True)
assert 'access' not in annot


def testPlottableDataAccess(admin):
import girder_large_image_annotation

exampleData = {
'ex1_nucleus_radius': 4.5,
'ex1_nucleus_circularity': 0.9,
'ex2_nuclei_radii': [4.5, 5.5, 5.1],
'ex2_nuclei_circularity': [0.9, 0.86, 0.92],
'ex3_nucleus': {
'radius': 4.5,
'circularity': 0.9,
},
'ex4_nucleii': {
'radii': [4.5, 5.5, 5.1],
'circularity': [0.9, 0.86, 0.92],
},
'ex5_nucleus': [{
'radius': 4.5,
'circularity': 0.9,
}, {
'radius': 5.5,
'circularity': 0.86,
}, {
'radius': 5.1,
'circularity': 0.92,
}],
}
item = Item().createItem('sample', admin, utilities.namedFolder(admin, 'Public'))
item = Item().setMetadata(item, exampleData)
plottable = girder_large_image_annotation.utils.PlottableItemData(admin, item)
col = plottable.columns
# Also contains item id, name, and description
assert len(col) == 12

data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 12
assert len(data['data']) == 3

0 comments on commit 07e0429

Please sign in to comment.