-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmisclassification.R
executable file
·56 lines (56 loc) · 2.08 KB
/
misclassification.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#' Computes misclassification rate
#'
#' Missclasification is a commonly used performance measure in subspace
#' clustering. It allows to compare two partitions with the same number of
#' clusters.
#'
#' As getting exact value of misclassification requires checking all
#' permutations and is therefore intrackable even for modest number of clusters,
#' a heuristic approach is proposed. It is assumed that there are K classes of
#' maximum M elements. Additional requirement is that classes labels are from
#' range [1, K].
#'
#' @param group A vector, first partition.
#' @param true_group A vector, second (reference) partition.
#' @param M An integer, maximal number of elements in one class.
#' @param K An integer, number of classes.
#' @references {R. Vidal. Subspace clustering. Signal Processing Magazine, IEEE,
#' 28(2):52-68,2011}
#' @export
#' @return Misclassification rate.
#' @examples
#' \donttest{
#' sim.data <- data.simulation(n = 100, SNR = 1, K = 5, numb.vars = 30, max.dim = 2)
#' mlcc.fit <- mlcc.reps(sim.data$X, numb.clusters = 5, numb.runs = 20, max.dim = 2, numb.cores = 1)
#' misclassification(mlcc.fit$segmentation, sim.data$s, 30, 5)
#'
#'
#' # one can use this function not only for clusters
#' partition1 <- sample(10, 300, replace = TRUE)
#' partition2 <- sample(10, 300, replace = TRUE)
#' misclassification(partition1, partition1, max(table(partition1)), 10)
#' misclassification(partition1, partition2, max(table(partition2)), 10)
#' }
misclassification <- function(group, true_group, M, K) {
if (length(group) != length(true_group)) {
stop("Partitions are of different lengths")
}
forbidden <- NULL
suma <- 0
nG <- max(group)
for (i in M:1) { # differnet concordance levels
for (j in 1:nG) { # subspaces numbers (found)
if (sum(j == forbidden) == 0) { # subspace not yet used
for (k in 1:K) { # subspaces numbers (true)
if (sum(j == group[true_group == k]) == i) {
suma <- suma + i
forbidden <- c(forbidden, j)
break
}
}
}
}
}
mis <- 1 - suma / length(true_group)
return(mis)
}