diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index ed7650c..39f4ec6 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -46,3 +46,4 @@ jobs: clean: false branch: gh-pages folder: docs + diff --git a/NEWS.md b/NEWS.md index 7bb0268..28e289c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,24 @@ +# version 1.0.0 + ++ Added support for word embeddings. ++ Updated controls for the ANN algorithms. ++ Updated evaluation metrics. ++ Added 4 data sets. ++ Added 2 new vignettes. ++ Added estimators for FPR and FNR due to blocking in record linkage, +as proposed by Dasylva and Goussanou (2021). ++ Updated examples and documentation. + # version 0.1.0 -1. supports the following packages: `RcppHNSW`, `mlpack` and `RcppAnnoy`. -2. supports blocking for deduplication and record linkage. -3. metrics when true blocking is known based on `igraph::compare`. -4. testing with the `tinytest` package. -5. initial support for the `reclin2` package. -6. class `blocking` introduced. -7. s3method for printing. -8. first vignette added. -9. evaluation with standard metrics (recall, fpr etc) added, works with vector for deduplication. -10. added saving index for hnsw and annoy -11. `rnndescend` support added. ++ Supports the following packages: `RcppHNSW`, `mlpack` and `RcppAnnoy`. ++ Supports blocking for deduplication and record linkage. ++ Metrics when true blocking is known based on `igraph::compare`. ++ Testing with the `tinytest` package. ++ Initial support for the `reclin2` package. ++ Class `blocking` introduced. ++ S3method for printing. ++ First vignette added. ++ Evaluation with standard metrics (recall, fpr, etc.) added, works with vector for deduplication. ++ Added saving index for hnsw and annoy. ++ `rnndescend` support added. diff --git a/R/eval.R b/R/eval.R index 9d9d92d..9e4ed6f 100644 --- a/R/eval.R +++ b/R/eval.R @@ -13,6 +13,7 @@ #' @returns #' Returns a list containing TP, FP, FN and TN. #' +#' @keywords internal eval_reclin <- function(pred_df, true_df) { pred_x_map <- unique(pred_df[, c("x", "block"), with = FALSE]) @@ -80,6 +81,7 @@ eval_reclin <- function(pred_df, true_df) { #' @returns #' Returns a list containing TP, FP, FN and TN. #' +#' @keywords internal eval_dedup <- function(pred_df, true_df) { pred_lbl <- melt(pred_df, @@ -140,6 +142,7 @@ eval_dedup <- function(pred_df, true_df) { #' @returns #' Returns a list containing evaluation metrics. #' +#' @keywords internal get_metrics <- function(TP, FP, FN, TN) { recall <- if (TP + FN != 0) TP / (TP + FN) else 0 @@ -172,6 +175,7 @@ get_metrics <- function(TP, FP, FN, TN) { #' @returns #' Returns a confusion matrix. #' +#' @keywords internal get_confusion <- function(TP, FP, FN, TN) { cm <- matrix(c(TP, FP, FN, TN), nrow = 2) diff --git a/R/method_annoy.R b/R/method_annoy.R index 2df7e40..8ec0693 100644 --- a/R/method_annoy.R +++ b/R/method_annoy.R @@ -21,8 +21,7 @@ #' @description #' See details of the \link[RcppAnnoy]{RcppAnnoy} package. #' -#' - +#' @keywords internal method_annoy <- function(x, y, k, diff --git a/R/method_hnsw.R b/R/method_hnsw.R index 4524f30..f0b7a37 100644 --- a/R/method_hnsw.R +++ b/R/method_hnsw.R @@ -24,7 +24,7 @@ #' @description #' See details of \link[RcppHNSW]{hnsw_build} and \link[RcppHNSW]{hnsw_search}. #' -#' +#' @keywords internal method_hnsw <- function(x, y, k, diff --git a/R/method_mlpack.R b/R/method_mlpack.R index 6608488..15f5c1e 100644 --- a/R/method_mlpack.R +++ b/R/method_mlpack.R @@ -18,6 +18,7 @@ #' @description #' See details of \link[mlpack]{lsh} and \link[mlpack]{knn}. #' +#' @keywords internal method_mlpack <- function(x, y, algo = c("lsh", "kd"), diff --git a/R/method_nnd.R b/R/method_nnd.R index 54b4c60..f707e6f 100644 --- a/R/method_nnd.R +++ b/R/method_nnd.R @@ -18,7 +18,7 @@ #' @description #' See details of \link[rnndescent]{rnnd_build} and \link[rnndescent]{rnnd_query}. #' -#' +#' @keywords internal method_nnd <- function(x, y, k, diff --git a/R/sentence_to_vector.R b/R/sentence_to_vector.R index 57c4365..fda1920 100644 --- a/R/sentence_to_vector.R +++ b/R/sentence_to_vector.R @@ -9,6 +9,7 @@ #' @param sentences a character vector, #' @param model a matrix containing word embeddings (e.g., GloVe). #' +#' @keywords internal sentence_to_vector <- function(sentences, model) { tokens <- text2vec::space_tokenizer(tolower(sentences)) diff --git a/_pkgdown.yml b/_pkgdown.yml index 6cfb1a4..d4f606a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,10 @@ -url: https://ncn-foreigners.github.io/blocking/ +url: https://ncn-foreigners.ue.poznan.pl/blocking/ template: bootstrap: 5 + math-rendering: katex + includes: + in_header: | + + +