diff --git a/.gitignore b/.gitignore
index cb58c8a..f8745a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ inst/doc
 misc
 vignettes/.*R
 vignettes/.*html
+.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
index e73b6d2..b113edd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: blocking
 Type: Package
 Title: Various Blocking Methods for Entity Resolution
-Version: 1.0.1
+Version: 1.0.2
 Authors@R: 
   c(person(given = "Maciej",
              family = "Beręsewicz",
@@ -19,7 +19,7 @@ LazyData: true
 URL: https://github.com/ncn-foreigners/blocking, https://ncn-foreigners.ue.poznan.pl/blocking/
 BugReports: https://github.com/ncn-foreigners/blocking/issues
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Imports: 
     text2vec,
     tokenizers,
diff --git a/NAMESPACE b/NAMESPACE
index 6afe324..0ecf1f9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(logLik,est_block_error)
 S3method(print,blocking)
 S3method(print,est_block_error)
 export(blocking)
@@ -35,6 +36,7 @@ importFrom(mlpack,lsh)
 importFrom(readr,read_table)
 importFrom(rnndescent,rnnd_build)
 importFrom(rnndescent,rnnd_query)
+importFrom(stats,AIC)
 importFrom(stats,dist)
 importFrom(stats,dpois)
 importFrom(stats,runif)
diff --git a/NEWS.md b/NEWS.md
index 4b40110..ff5177b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # development
 
+# version 1.0.2
+
++ Updated `est_block_error` function.
+
 # version 1.0.1
 
 + Fixed CRAN errors.
diff --git a/R/blocking.R b/R/blocking.R
index 76a2697..19d04c0 100644
--- a/R/blocking.R
+++ b/R/blocking.R
@@ -127,16 +127,16 @@ blocking <- function(x,
                                             "lsh" = NULL,
                                             "kd" = NULL)
 
-  stopifnot("Only character, dense or sparse (dgCMatrix) matrix x is supported" =
+  stopifnot("Only character, dense or sparse (dgCMatrix) matrix x is supported." =
               is.character(x) | is.matrix(x) | inherits(x, "Matrix"))
 
 
   if (!is.null(ann_write)) {
-    stopifnot("Path provided in the `ann_write` is incorrect" = file.exists(ann_write) )
+    stopifnot("Path provided in the `ann_write` is incorrect." = file.exists(ann_write) )
   }
 
   if (ann == "nnd") {
-    stopifnot("Distance for NND should be `euclidean, cosine, manhatan, hamming`" =
+    stopifnot("Distance for NND should be `euclidean, cosine, manhatan, hamming`." =
                 distance %in% c("euclidean", "cosine","manhatan", "hamming"))
   }
 
@@ -145,15 +145,18 @@ blocking <- function(x,
   }
 
   if (ann == "hnsw") {
-    stopifnot("Distance for HNSW should be `l2, euclidean, cosine, ip`" =
+    stopifnot("Distance for HNSW should be `l2, euclidean, cosine, ip`." =
                 distance %in% c("l2", "euclidean", "cosine", "ip"))
   }
 
   if (ann == "annoy") {
-    stopifnot("Distance for Annoy should be `euclidean, manhatan, hamming, angular`" =
+    stopifnot("Distance for Annoy should be `euclidean, manhatan, hamming, angular`." =
                 distance %in% c("euclidean", "manhatan", "hamming", "angular"))
   }
 
+  stopifnot("Algorithm should be `nnd, hnsw, annoy, lsh, kd`." =
+              ann %in% c("nnd", "hnsw", "annoy", "lsh", "kd"))
+
   if (!is.null(y)) {
     deduplication <- FALSE
     y_default <- FALSE
@@ -167,15 +170,15 @@ blocking <- function(x,
 
   if (!is.null(true_blocks)) {
 
-    stopifnot("`true_blocks` should be a data.frame" = is.data.frame(true_blocks))
+    stopifnot("`true_blocks` should be a data.frame." = is.data.frame(true_blocks))
 
     if (deduplication == FALSE) {
-      stopifnot("`true blocks` should be a data.frame with columns: x, y, block" =
+      stopifnot("`true blocks` should be a data.frame with columns: x, y, block." =
                   length(colnames(true_blocks)) == 3,
                 all(colnames(true_blocks) == c("x", "y", "block")))
     }
     if (deduplication) {
-      stopifnot("`true blocks` should be a data.frame with columns: x, block" =
+      stopifnot("`true blocks` should be a data.frame with columns: x, block." =
                   length(colnames(true_blocks)) == 2,
                 all(colnames(true_blocks) == c("x", "block")))
     }
diff --git a/R/est_block_error.R b/R/est_block_error.R
index 38d5d4a..2d33194 100644
--- a/R/est_block_error.R
+++ b/R/est_block_error.R
@@ -1,5 +1,6 @@
 #' @importFrom stats dpois
 #' @importFrom stats runif
+#' @importFrom stats AIC
 #'
 #' @title Estimate errors due to blocking in record linkage
 #'
@@ -11,25 +12,34 @@
 #' @param x Reference data (required if `n` and `N` are not provided).
 #' @param y Query data (required if `n` is not provided).
 #' @param blocking_result `data.frame` or `data.table` containing blocking results (required if `n` is not provided).
+#' It must contain a column named `y` storing the indices of the records in the query data set.
 #' @param n Integer vector of numbers of accepted pairs formed by each record in the query data set
 #' with records in the reference data set, based on blocking criteria (if `NULL`, derived from `blocking_result`).
 #' @param N Total number of records in the reference data set (if `NULL`, derived as `length(x)`).
-#' @param G Number of classes in the finite mixture model.
+#' @param G Integer or vector of integers. Number of classes in the finite mixture model.
+#' If `G` is a vector, the optimal number of classes is selected from the provided values
+#' based on the Akaike Information Criterion (AIC).
 #' @param alpha Numeric vector of initial class proportions (length `G`; if `NULL`, initialized as `rep(1/G, G)`).
 #' @param p Numeric vector of initial matching probabilities in each class of the mixture model
-#' (length `G`; if `NULL`, randomly initialized from `runif(G, 0.5, 1)`).
+#' (length `G`; if `NULL`, randomly initialized from `runif(G, 0.5, 1)` or `rep(runif(1, 0.5, 1), G)`,
+#' depending on the parameter `equal_p`).
 #' @param lambda Numeric vector of initial Poisson distribution parameters for non-matching records in each class of the mixture model
 #' (length `G`; if `NULL`, randomly initialized from `runif(G, 0.1, 2)`).
-#' @param tol Convergence tolerance for the EM algorithm (default `10^(-6)`).
-#' @param maxiter Maximum number of iterations for the EM algorithm (default `1000`).
+#' @param equal_p Logical, indicating whether the matching probabilities
+#' `p` should be constrained to be equal across all latent classes (default `FALSE`).
+#' @param tol Convergence tolerance for the EM algorithm (default `10^(-4)`).
+#' @param maxiter Maximum number of iterations for the EM algorithm (default `100`).
 #' @param sample_size Bootstrap sample (from `n`) size used for calculations (if `NULL`, uses all data).
 #'
 #' @details
-#' Consider a large finite population that comprises of \eqn{N} individuals, and two duplicate-free data sources: a register and a file.
+#' Consider a large finite population that comprises of \eqn{N} individuals, and two duplicate-free data sources:
+#' a register (reference data `x`) and a file (query data `y`).
 #' Assume that the register has no undercoverage,
-#' i.e. each record from the file corresponds to exactly one record from the same individual in the register.
+#' i.e., each record from the file corresponds to exactly one record from the same individual in the register.
 #' Let \eqn{n_i} denote the number of register records which form an accepted (by the blocking criteria) pair with
-#' record \eqn{i} on the file. Assume that:\cr
+#' record \eqn{i} on the file, for \eqn{i=1,2,\ldots,m}, where \eqn{m} is the number of records in the file.
+#' Let \eqn{v_i} denote record \eqn{i} from the file.
+#' Assume that:\cr
 #' \itemize{
 #' \item two matched records are neighbours with a probability that is bounded away from \eqn{0} regardless of \eqn{N},
 #' \item two unmatched records are accidental neighbours with a probability of \eqn{O(\frac{1}{N})}.
@@ -71,14 +81,20 @@
 #' }
 #' where \eqn{E[p(v_i)] = \sum_{g=1}^G\alpha_gp_g} and \eqn{E[\lambda(v_i)] = \sum_{g=1}^G\alpha_g\lambda_g}.
 #'
+#' @note
+#' The matching probabilities \eqn{p_g} can be constrained to be equal across all latent classes
+#' by setting `equal_p = TRUE`.
 #'
-#'
-#' @returns Returns a list containing:\cr
+#' @returns Returns an object of class `est_block_error`, with a list containing:\cr
 #' \itemize{
 #' \item{`FPR` -- estimated false positive rate,}
 #' \item{`FNR` -- estimated false negative rate,}
+#' \item{`G` -- number of classes used in the optimal model,}
+#' \item{`log_lik` -- final log-likelihood value,}
+#' \item{`equal_p` -- logical, indicating whether the matching probabilities were constrained,}
 #' \item{`iter` -- number of the EM algorithm iterations performed,}
-#' \item{`convergence` -- logical, indicating whether the EM algorithm converged within `maxiter` iterations.}
+#' \item{`convergence` -- logical, indicating whether the EM algorithm converged within `maxiter` iterations,}
+#' \item{`AIC` -- Akaike Information Criterion value in the optimal model.}
 #' }
 #'
 #' @references
@@ -92,15 +108,15 @@
 #' ## an example proposed by Dasylva and Goussanou (2021)
 #' ## we obtain results very close to those reported in the paper
 #'
-#' set.seed(111)
+#' set.seed(11)
 #'
 #' neighbors <- rep(0:5, c(1659, 53951, 6875, 603, 62, 5))
 #'
 #' errors <- est_block_error(n = neighbors,
 #'                           N = 63155,
-#'                           G = 2,
+#'                           G = 1:3,
 #'                           tol = 10^(-3),
-#'                           maxiter = 50)
+#'                           equal_p = TRUE)
 #'
 #' errors
 #'
@@ -114,6 +130,7 @@ est_block_error <- function(x = NULL,
                             alpha = NULL,
                             p = NULL,
                             lambda = NULL,
+                            equal_p = FALSE,
                             tol = 10^(-4),
                             maxiter = 100,
                             sample_size = NULL) {
@@ -135,6 +152,29 @@ est_block_error <- function(x = NULL,
     n <- sample(n, size = sample_size, replace = TRUE)
   }
 
+  if (length(G) > 1) {
+
+    G_cand <- sort(G)
+    results_list <- list()
+    aic_values <- numeric(length(G_cand))
+
+    for (i in seq_along(G_cand)) {
+
+      fit <- est_block_error(n = n, N = N, G = G_cand[i],
+                             alpha = NULL, p = NULL, lambda = NULL,
+                             equal_p = equal_p, tol = tol, maxiter = maxiter)
+      results_list[[i]] <- fit
+      aic_values[i] <- fit$AIC
+
+    }
+
+    best_idx <- which.min(aic_values)
+    best_model <- results_list[[best_idx]]
+
+    return(best_model)
+
+  }
+
   convergence <- FALSE
   m <- length(n)
 
@@ -143,7 +183,13 @@ est_block_error <- function(x = NULL,
   }
 
   if (is.null(p)) {
-    p <- runif(G, min = 0.5, max = 1)
+    if (equal_p) {
+      p <- rep(runif(1, min = 0.5, max = 1), G)
+    } else {
+      p <- runif(G, min = 0.5, max = 1)
+    }
+  } else if (equal_p && length(p) == G) {
+    p <- rep(mean(p), G)
   }
 
   if (is.null(lambda)) {
@@ -192,7 +238,11 @@ est_block_error <- function(x = NULL,
     ## M
 
     alpha <- 1 / m * colSums(probs_c_n)
-    p <- colSums(E_c_n_M) / (m * alpha)
+    if (equal_p) {
+      p <- rep(sum(E_c_n_M) / m, G)
+    } else {
+      p <- colSums(E_c_n_M) / (m * alpha)
+    }
     lambda <- colSums(E_c_n_U) / (m * alpha)
 
     ## check
@@ -215,13 +265,20 @@ est_block_error <- function(x = NULL,
   FNR <- 1 - sum(alpha * p)
   FPR <- sum(alpha * lambda) / (N - 1)
 
-  return(structure(
+  res <- structure(
     list(
-    FPR = FPR,
-    FNR = FNR,
-    iter = l,
-    convergence = convergence
-  ),
-  class = "est_block_error"))
+      FPR = FPR,
+      FNR = FNR,
+      G = G,
+      log_lik = log_lik_new,
+      equal_p = equal_p,
+      iter = l,
+      convergence = convergence
+    ),
+    class = "est_block_error")
+
+  res$AIC <- AIC(res)
+
+  return(res)
 
 }
diff --git a/R/methods.R b/R/methods.R
index 0815c90..310f482 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -34,8 +34,9 @@ print.blocking <- function(x, ...) {
 #' @exportS3Method
 print.est_block_error <- function(x, ...) {
 
-  cat("FPR: ", x$FPR, "\n")
-  cat("FNR: ", x$FNR, "\n")
+  cat("Estimated FPR: ", x$FPR, "\n")
+  cat("Estimated FNR: ", x$FNR, "\n")
+  cat("Number of classes in the model: ", x$G, "\n")
 
   cat("========================================================\n")
 
@@ -46,3 +47,20 @@ print.est_block_error <- function(x, ...) {
   }
 }
 
+#' @method logLik est_block_error
+#' @exportS3Method
+logLik.est_block_error <- function(object, ...) {
+
+  val <- object$log_lik
+  if (object$equal_p) {
+    k <- 2 * object$G
+  } else {
+    k <- 3 * object$G - 1
+  }
+
+  attr(val, "df") <- k
+  class(val) <- "logLik"
+
+  val
+
+}
diff --git a/data/RLdata500.rda b/data/RLdata500.rda
index 4190fe2..c23fa9b 100644
Binary files a/data/RLdata500.rda and b/data/RLdata500.rda differ
diff --git a/data/census.rda b/data/census.rda
index bd45fdb..ed6ab8d 100644
Binary files a/data/census.rda and b/data/census.rda differ
diff --git a/data/cis.rda b/data/cis.rda
index b67cbea..7b604ae 100644
Binary files a/data/cis.rda and b/data/cis.rda differ
diff --git a/data/foreigners.rda b/data/foreigners.rda
index a7e0944..a6b624a 100644
Binary files a/data/foreigners.rda and b/data/foreigners.rda differ
diff --git a/man/est_block_error.Rd b/man/est_block_error.Rd
index c327dbe..324c793 100644
--- a/man/est_block_error.Rd
+++ b/man/est_block_error.Rd
@@ -14,6 +14,7 @@ est_block_error(
   alpha = NULL,
   p = NULL,
   lambda = NULL,
+  equal_p = FALSE,
   tol = 10^(-4),
   maxiter = 100,
   sample_size = NULL
@@ -24,36 +25,47 @@ est_block_error(
 
 \item{y}{Query data (required if \code{n} is not provided).}
 
-\item{blocking_result}{\code{data.frame} or \code{data.table} containing blocking results (required if \code{n} is not provided).}
+\item{blocking_result}{\code{data.frame} or \code{data.table} containing blocking results (required if \code{n} is not provided).
+It must contain a column named \code{y} storing the indices of the records in the query data set.}
 
 \item{n}{Integer vector of numbers of accepted pairs formed by each record in the query data set
 with records in the reference data set, based on blocking criteria (if \code{NULL}, derived from \code{blocking_result}).}
 
 \item{N}{Total number of records in the reference data set (if \code{NULL}, derived as \code{length(x)}).}
 
-\item{G}{Number of classes in the finite mixture model.}
+\item{G}{Integer or vector of integers. Number of classes in the finite mixture model.
+If \code{G} is a vector, the optimal number of classes is selected from the provided values
+based on the Akaike Information Criterion (AIC).}
 
 \item{alpha}{Numeric vector of initial class proportions (length \code{G}; if \code{NULL}, initialized as \code{rep(1/G, G)}).}
 
 \item{p}{Numeric vector of initial matching probabilities in each class of the mixture model
-(length \code{G}; if \code{NULL}, randomly initialized from \code{runif(G, 0.5, 1)}).}
+(length \code{G}; if \code{NULL}, randomly initialized from \code{runif(G, 0.5, 1)} or \code{rep(runif(1, 0.5, 1), G)},
+depending on the parameter \code{equal_p}).}
 
 \item{lambda}{Numeric vector of initial Poisson distribution parameters for non-matching records in each class of the mixture model
 (length \code{G}; if \code{NULL}, randomly initialized from \code{runif(G, 0.1, 2)}).}
 
-\item{tol}{Convergence tolerance for the EM algorithm (default \code{10^(-6)}).}
+\item{equal_p}{Logical, indicating whether the matching probabilities
+\code{p} should be constrained to be equal across all latent classes (default \code{FALSE}).}
 
-\item{maxiter}{Maximum number of iterations for the EM algorithm (default \code{1000}).}
+\item{tol}{Convergence tolerance for the EM algorithm (default \code{10^(-4)}).}
+
+\item{maxiter}{Maximum number of iterations for the EM algorithm (default \code{100}).}
 
 \item{sample_size}{Bootstrap sample (from \code{n}) size used for calculations (if \code{NULL}, uses all data).}
 }
 \value{
-Returns a list containing:\cr
+Returns an object of class \code{est_block_error}, with a list containing:\cr
 \itemize{
 \item{\code{FPR} -- estimated false positive rate,}
 \item{\code{FNR} -- estimated false negative rate,}
+\item{\code{G} -- number of classes used in the optimal model,}
+\item{\code{log_lik} -- final log-likelihood value,}
+\item{\code{equal_p} -- logical, indicating whether the matching probabilities were constrained,}
 \item{\code{iter} -- number of the EM algorithm iterations performed,}
-\item{\code{convergence} -- logical, indicating whether the EM algorithm converged within \code{maxiter} iterations.}
+\item{\code{convergence} -- logical, indicating whether the EM algorithm converged within \code{maxiter} iterations,}
+\item{\code{AIC} -- Akaike Information Criterion value in the optimal model.}
 }
 }
 \description{
@@ -62,11 +74,14 @@ as proposed by Dasylva and Goussanou (2021). Assumes duplicate-free data sources
 complete coverage of the reference data set and blocking decisions based solely on record pairs.
 }
 \details{
-Consider a large finite population that comprises of \eqn{N} individuals, and two duplicate-free data sources: a register and a file.
+Consider a large finite population that comprises of \eqn{N} individuals, and two duplicate-free data sources:
+a register (reference data \code{x}) and a file (query data \code{y}).
 Assume that the register has no undercoverage,
-i.e. each record from the file corresponds to exactly one record from the same individual in the register.
+i.e., each record from the file corresponds to exactly one record from the same individual in the register.
 Let \eqn{n_i} denote the number of register records which form an accepted (by the blocking criteria) pair with
-record \eqn{i} on the file. Assume that:\cr
+record \eqn{i} on the file, for \eqn{i=1,2,\ldots,m}, where \eqn{m} is the number of records in the file.
+Let \eqn{v_i} denote record \eqn{i} from the file.
+Assume that:\cr
 \itemize{
 \item two matched records are neighbours with a probability that is bounded away from \eqn{0} regardless of \eqn{N},
 \item two unmatched records are accidental neighbours with a probability of \eqn{O(\frac{1}{N})}.
@@ -108,19 +123,23 @@ As \eqn{N \to \infty}, the error rates and the model parameters are related as f
 }
 where \eqn{E[p(v_i)] = \sum_{g=1}^G\alpha_gp_g} and \eqn{E[\lambda(v_i)] = \sum_{g=1}^G\alpha_g\lambda_g}.
 }
+\note{
+The matching probabilities \eqn{p_g} can be constrained to be equal across all latent classes
+by setting \code{equal_p = TRUE}.
+}
 \examples{
 ## an example proposed by Dasylva and Goussanou (2021)
 ## we obtain results very close to those reported in the paper
 
-set.seed(111)
+set.seed(11)
 
 neighbors <- rep(0:5, c(1659, 53951, 6875, 603, 62, 5))
 
 errors <- est_block_error(n = neighbors,
                           N = 63155,
-                          G = 2,
+                          G = 1:3,
                           tol = 10^(-3),
-                          maxiter = 50)
+                          equal_p = TRUE)
 
 errors
 
diff --git a/vignettes/v1-deduplication.Rmd b/vignettes/v1-deduplication.Rmd
index b92967b..92f4846 100644
--- a/vignettes/v1-deduplication.Rmd
+++ b/vignettes/v1-deduplication.Rmd
@@ -21,6 +21,7 @@ knitr::opts_chunk$set(
   message = FALSE,
   comment = "#>"
 )
+data.table::setDTthreads(1)
 ```
 
 # Setup
@@ -36,6 +37,7 @@ Read the `RLdata500` data (taken from the [RecordLinkage](https://CRAN.R-project
 
 ```{r}
 data(RLdata500)
+setDT(RLdata500)
 head(RLdata500)
 ```
 This dataset contains `r nrow(RLdata500)` rows with `r NROW(unique(RLdata500$ent_id))` entities.
diff --git a/vignettes/v2-reclin.Rmd b/vignettes/v2-reclin.Rmd
index 193bd1d..034099f 100644
--- a/vignettes/v2-reclin.Rmd
+++ b/vignettes/v2-reclin.Rmd
@@ -23,6 +23,7 @@ knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
 )
+data.table::setDTthreads(1)
 ```
 
 # Setup
@@ -58,6 +59,8 @@ all records in the cis).
 ```{r}
 data(census)
 data(cis)
+setDT(census)
+setDT(cis)
 ```
 
 + `census` object has `r nrow(census)` rows and `r ncol(census)` columns,
diff --git a/vignettes/v3-integration.Rmd b/vignettes/v3-integration.Rmd
index 1b0300f..eca4f63 100644
--- a/vignettes/v3-integration.Rmd
+++ b/vignettes/v3-integration.Rmd
@@ -22,12 +22,14 @@ knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
 )
+data.table::setDTthreads(1)
 ```
 
 # Setup
 
 ```{r setup}
 library(blocking)
+library(data.table)
 library(reclin2)
 ```
 
@@ -38,6 +40,8 @@ In the example we will use the same dataset as in the *Blocking records for reco
 ```{r}
 data(census)
 data(cis)
+setDT(census)
+setDT(cis)
 census[, x:=1:.N]
 cis[, y:=1:.N]
 ```