-
Notifications
You must be signed in to change notification settings - Fork 10
find_sample_from_2dscan #57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -581,3 +581,97 @@ def inner(x, *args, **kwargs): | |
| return func(*args, **kwargs) | ||
|
|
||
| return inner | ||
|
|
||
|
|
||
|
|
||
|
|
||
| def find_sample_from_2dscan(I_arr, xy_arr, Q_arr=None, params=None): | ||
| """Find sample positions from xy-scan | ||
|
|
||
| Parameters | ||
| ---------- | ||
| xy_arr : x,y of scan points | ||
| I_arr : Intensities for each scan point | ||
| Q_arr : Q-points (optional) | ||
|
|
||
| params : (optional) | ||
| Parameters for DBSCAN clustering and point selection. | ||
| See DBSCAN documentation: | ||
| http://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html | ||
| - eps: The maximum distance between two samples for them to | ||
| be considered as in the same neighborhood. | ||
| - min_samples: The number of samples (or total weight) | ||
| in a neighborhood for a point to be considered as a core point. | ||
| This includes the point itself. | ||
| - n_jobs: The number of parallel jobs to run. None means 1 | ||
| unless in a joblib.parallel_backend context. | ||
| -1 means using all processors. | ||
| s_ratio: Ratio of sample points to total points. If we have | ||
| 100 dx,dy points and sample is expected to be found in 20 of | ||
| these points, then the remaining 80 should be background. | ||
| So, s_ratio is 0.2. Default value of 0.5 works fine. | ||
|
|
||
| use_unclassified: Sometimes DBSCAN is unable to classify | ||
| points around sample boundary. It that case, it gives -1. | ||
| If this keyword is True, that point is considered in the | ||
| sample positions (pts). | ||
|
|
||
|
|
||
| Returns | ||
| ------- | ||
| center : ndarray | ||
| xy coordinates of the center of the sample | ||
| pts : ndarray | ||
| xy coordinates of points considered within the sample | ||
|
|
||
| """ | ||
|
|
||
| from sklearn.cluster import DBSCAN | ||
| from scipy.stats import spearmanr | ||
|
|
||
| if params is None: | ||
| print('params is not provided. Using default parameters') | ||
| params = {'eps':0.05,'min_samples':20,'n_jobs':1, | ||
|
||
| 's_ratio':0.5,'qrange':(1,5), | ||
| 'use_unclassified':True} | ||
|
|
||
| if isinstance(Q_arr,np.ndarray): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this might be able to be |
||
| if params['qrange']: | ||
| # Trim to selected Q range. Because we do not want mess around | ||
| # sample holder and detector edge. This also speedups DBSCAN calculation | ||
| sel = (Q_arr > params['qrange'][0]) & (Q_arr < params['qrange'][1]) | ||
| I_arr = np.array([i[sel] for i in I_arr]) | ||
| else: | ||
| print('Q array is not provided. Using whole points') | ||
|
|
||
| # Use DBSCAN package to cluster I_arr | ||
| dbs = DBSCAN(params['eps'], min_samples=params['min_samples'], | ||
| metric=lambda i, j: 1 - spearmanr(i, j)[0], n_jobs=params['n_jobs']) | ||
| preds = dbs.fit_predict(np.array(I_arr)) | ||
| uniques, counts = np.unique(preds, return_counts=True) | ||
| ratios = counts / sum(counts) | ||
|
|
||
| # Collect x,y data for determining points which should correspond to the sample. | ||
| pts = [] | ||
|
|
||
| for j,u in enumerate(uniques): | ||
| mask = (preds == u) | ||
| masked = [] | ||
| for i,tf in enumerate(mask): | ||
| if tf: | ||
| masked.append(xy_arr[i]) | ||
| # these are not classified | ||
| if u == -1: | ||
| if params['use_unclassified']: | ||
| pts.extend(masked) | ||
| else: | ||
| # these are possibly background | ||
| if not (ratios[j] >= params['s_ratio']): | ||
| pts.extend(masked) | ||
| pts = np.array(pts) | ||
|
|
||
| center = np.mean(pts, axis=0) | ||
|
|
||
| # TODO: Get rid of s_ratio | ||
|
|
||
| return center, pts | ||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to put these imports at the top of the module?