-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSubsetDetPoissonFitMat.py
155 lines (132 loc) · 6.53 KB
/
SubsetDetPoissonFitMat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# This file fits a determinatally-thinned point process to a
# (dependently-)thinned-point process based on the method outlined in the
# paper by Blaszczyszyn and Keeler[1], which is essentially the method
# developed by Kulesza and Taskar[2] in Section 4.1.1.
#
# This is the second file (of three files) to run to reproduce results similar
# to those presented in the paper by Blaszczyszyn and Keeler[1].
#
# The data used for fitting (or training) is stored in the file Subset.mat,
# which is generated with the MATLAB file SubsetGenerate.m; see
#
# https://github.com/hpaulkeeler/DetPoisson_MATLAB
#
# The fitting paramters are stored locally in the file SubsetFitParam.npz
#
# This code was originally written by H.P. Keeler in MATLAB; see
# https://github.com/hpaulkeeler/DetPoisson_MATLAB
#
# Author: H.P. Keeler, Inria/ENS, Paris, and University of Melbourne,
# Melbourne, 2019
#
# References:
# [1] Blaszczyszyn and Keeler, Determinantal thinning of point processes
# with network learning applications, 2018.
# [2] Kulesza and Taskar, "Determinantal point processes for machine
# learning",Now Publisers, 2012
import numpy as np; #NumPy package for arrays, random number generation, etc
import matplotlib.pyplot as plt #for plotting
from matplotlib import collections as mc #for plotting line segments
from scipy.io import loadmat #for reading mat files
from scipy.optimize import minimize #For optimizing
from scipy.stats import poisson #for the Poisson probability mass function
from funNeighbourL import funNeighbourL
plt.close("all"); # close all figures
T=100;#Number of training/learning samples
###START Load up values from MATLAB .mat file START###
dataMATLAB=loadmat('Subset.mat');
lambda0=np.double(dataMATLAB['lambda']);#intensity of underlying Poisson PP
xx0=np.double(dataMATLAB['xx0']);
yy0=np.double(dataMATLAB['yy0']);
areaSample=np.double(dataMATLAB['areaSample']); #area of sample window
rSub=np.double(dataMATLAB['rSub']); #radius of matern or triangular process
lambdaSub=np.double(dataMATLAB['lambdaSub']); #intensity of subset PP
windowSample=dataMATLAB['windowSample'][0]; #vector describing window dims
choiceModel=np.int(dataMATLAB['choiceModel']); #model number (ie 1,2 or 3)
labelModel=str(dataMATLAB['labelModel'][0]); #name/label of model
booleDisk=np.int(dataMATLAB['booleDisk'])!=0; #if simulation window is disk
#x/y values of all underlying Poisson PPs
ppStructTemp=dataMATLAB['ppStructPoisson'];
numbTrain=ppStructTemp.size; #total number of simulations
#extract data for underlying Poisson point processes
xxList=[np.concatenate(ppStructTemp[ss][0][0]) for ss in range(numbTrain)];
yyList=[np.concatenate(ppStructTemp[ss][0][1]) for ss in range(numbTrain)];
ppXYPoisson=[(xxList[ss],yyList[ss])for ss in range(numbTrain)];
nList=[np.int(ppStructTemp[ss][0][2]) for ss in range(numbTrain)];
nArray=np.array(nList);
#extract data for subset point processes
indexSubTemp=dataMATLAB['indexCellSub'];
indexListSub=[np.array(np.concatenate(indexSubTemp[ss][0])-1,dtype=int) for ss in range(numbTrain)];
#NOTE: need to subtract one from MATLAB indices as Python indexing starts at zero.
###END Load up values from MATLAB .mat file END###
###START -- Model fitting parameters -- START
#The parameters can be changed, but these were observed to work well.
if any(choiceModel==np.array([1,2])):
#Fitting parameters for Matern hard-core (I/II) point process
N=1; #number of neighbours for distances -- use N=1 or N=2
M=0;#Must be N=M-1 or M=0.
booleOptSigma=True; #Set to true to also optimize sigma value
choiceKernel=1; #1 for Gaussian kernel, 2 for Cauchy kernel
sigma=1;#sigma for Gaussian or Cauchy kernel
#Sigma value is ignored if booleOptSigma=true
else:
# Fitting parameters for triangle point process
N=2; #number of neighbours for distances -- use N=1 or N=2
M=1;#Must be N=M-1 or M=0.
booleOptSigma=False; #Set to true to also optimize sigma value
choiceKernel=1; #1 for Gaussian kernel, 2 for Cauchy kernel
sigma=0;#sigma for Gaussian or Cauchy kernel
#Sigma value is ignored if booleOptSigma=true
###END -- Model fitting parameters -- END
##Probability of a Poisson realization with two few points
probLessM=np.sum(poisson.pmf(np.arange(N+1),lambda0));
if any(nArray<=N):
raise SystemExit('Underlying Poisson realization needs at least N points');
#total number of possible training/learning samples
numbTrain=nArray.size;
if T>numbTrain:
raise SystemExit('Not enough training samples ie T>numbSim.');
#Deterministic (ie gradient) optimization method
if booleOptSigma:
thetaGuess=np.ones(N+M+2); #Take initial guess for sigma values
else:
thetaGuess=np.ones(N+M+1); #Take initial guess for theta values
#Function definitions for log-likelihood.
def funLikelihood_data(T,ppXYPoisson,indexListSub,choiceKernel,lambda0,sigma,theta,booleOptSigma,N,M):
if booleOptSigma:
#sets sigma to one of the theta parameters to optimize
sigma=theta[-1:];
theta=theta[:-1];
#initialize vector
logLikelihoodVector=np.zeros(T);
#Loop through all training/learning samples
for tt in range(T):
xx=ppXYPoisson[tt][0];yy=ppXYPoisson[tt][1];
indexSub=indexListSub[tt]; #index for sub point process
#Create L matrix (ie for Phi) based on nearest neighbours
L=funNeighbourL(xx,yy,lambda0,choiceKernel,sigma,theta,N,M);
#Create sub L matrix (ie for Psi)
subL=L[np.ix_(indexSub,indexSub)];
logLikelihoodVector[tt]=(np.log(np.linalg.det(subL))-np.log(np.linalg.det(L+np.eye(L.shape[0]))));
#END for-loop
logLikelihood=np.sum(logLikelihoodVector);
return logLikelihood
#function to maximize. See above for funLikelihood_Data function
def funMax_theta(theta):
return funLikelihood_data(T,ppXYPoisson,indexListSub,choiceKernel,lambda0,sigma,theta,booleOptSigma,N,M);
#define function to be minimized
def funMin(theta):
return (-1*funMax_theta(theta));
#Minimize function -- may take a while.
resultsOpt=minimize(funMin,thetaGuess, method='BFGS',options={'disp': True});
thetaMax=resultsOpt.x;
if booleOptSigma:
sigma=thetaMax[-1:]; #retrieve sigma values from theta vector
thetaMax=thetaMax[:-1];
print('sigma = ', sigma);
print('thetaMax', thetaMax);
choiceModelFitted=choiceModel; #record which model was used for fitting
#save model fitting parameters in a .npz file
np.savez('SubsetFitParam',thetaMax=thetaMax,T=T,sigma=sigma,N=N,M=M,\
choiceModelFitted=choiceModelFitted,booleOptSigma=booleOptSigma,\
choiceKernel=choiceKernel);