Skip to content
Merged

Dev #23

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,4 @@ jobs:
clean: false
branch: gh-pages
folder: docs

33 changes: 22 additions & 11 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
# version 1.0.0

+ Added support for word embeddings.
+ Updated controls for the ANN algorithms.
+ Updated evaluation metrics.
+ Added 4 data sets.
+ Added 2 new vignettes.
+ Added estimators for FPR and FNR due to blocking in record linkage,
as proposed by Dasylva and Goussanou (2021).
+ Updated examples and documentation.

# version 0.1.0

1. supports the following packages: `RcppHNSW`, `mlpack` and `RcppAnnoy`.
2. supports blocking for deduplication and record linkage.
3. metrics when true blocking is known based on `igraph::compare`.
4. testing with the `tinytest` package.
5. initial support for the `reclin2` package.
6. class `blocking` introduced.
7. s3method for printing.
8. first vignette added.
9. evaluation with standard metrics (recall, fpr etc) added, works with vector for deduplication.
10. added saving index for hnsw and annoy
11. `rnndescend` support added.
+ Supports the following packages: `RcppHNSW`, `mlpack` and `RcppAnnoy`.
+ Supports blocking for deduplication and record linkage.
+ Metrics when true blocking is known based on `igraph::compare`.
+ Testing with the `tinytest` package.
+ Initial support for the `reclin2` package.
+ Class `blocking` introduced.
+ S3method for printing.
+ First vignette added.
+ Evaluation with standard metrics (recall, fpr, etc.) added, works with vector for deduplication.
+ Added saving index for hnsw and annoy.
+ `rnndescend` support added.
4 changes: 4 additions & 0 deletions R/eval.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' @returns
#' Returns a list containing TP, FP, FN and TN.
#'
#' @keywords internal
eval_reclin <- function(pred_df, true_df) {

pred_x_map <- unique(pred_df[, c("x", "block"), with = FALSE])
Expand Down Expand Up @@ -80,6 +81,7 @@ eval_reclin <- function(pred_df, true_df) {
#' @returns
#' Returns a list containing TP, FP, FN and TN.
#'
#' @keywords internal
eval_dedup <- function(pred_df, true_df) {

pred_lbl <- melt(pred_df,
Expand Down Expand Up @@ -140,6 +142,7 @@ eval_dedup <- function(pred_df, true_df) {
#' @returns
#' Returns a list containing evaluation metrics.
#'
#' @keywords internal
get_metrics <- function(TP, FP, FN, TN) {

recall <- if (TP + FN != 0) TP / (TP + FN) else 0
Expand Down Expand Up @@ -172,6 +175,7 @@ get_metrics <- function(TP, FP, FN, TN) {
#' @returns
#' Returns a confusion matrix.
#'
#' @keywords internal
get_confusion <- function(TP, FP, FN, TN) {

cm <- matrix(c(TP, FP, FN, TN), nrow = 2)
Expand Down
3 changes: 1 addition & 2 deletions R/method_annoy.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
#' @description
#' See details of the \link[RcppAnnoy]{RcppAnnoy} package.
#'
#'

#' @keywords internal
method_annoy <- function(x,
y,
k,
Expand Down
2 changes: 1 addition & 1 deletion R/method_hnsw.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' @description
#' See details of \link[RcppHNSW]{hnsw_build} and \link[RcppHNSW]{hnsw_search}.
#'
#'
#' @keywords internal
method_hnsw <- function(x,
y,
k,
Expand Down
1 change: 1 addition & 0 deletions R/method_mlpack.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#' @description
#' See details of \link[mlpack]{lsh} and \link[mlpack]{knn}.
#'
#' @keywords internal
method_mlpack <- function(x,
y,
algo = c("lsh", "kd"),
Expand Down
2 changes: 1 addition & 1 deletion R/method_nnd.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' @description
#' See details of \link[rnndescent]{rnnd_build} and \link[rnndescent]{rnnd_query}.
#'
#'
#' @keywords internal
method_nnd <- function(x,
y,
k,
Expand Down
1 change: 1 addition & 0 deletions R/sentence_to_vector.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param sentences a character vector,
#' @param model a matrix containing word embeddings (e.g., GloVe).
#'
#' @keywords internal
sentence_to_vector <- function(sentences, model) {
tokens <- text2vec::space_tokenizer(tolower(sentences))

Expand Down
8 changes: 7 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
url: https://ncn-foreigners.github.io/blocking/
url: https://ncn-foreigners.ue.poznan.pl/blocking/
template:
bootstrap: 5
math-rendering: katex
includes:
in_header: |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css" integrity="sha384-nB0miv6/jRmo5UMMR1wu3Gz6NLsoTkbqJghGIsx//Rlm+ZU03BU6SQNC66uf4l5+" crossorigin="anonymous">
<script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.js" integrity="sha384-7zkQWkzuo3B5mTepMUcHkMB5jZaolc2xDwL6VFqjFALcbeS9Ggm/Yr2r3Dy4lfFg" crossorigin="anonymous"></script>
<script defer src="https://cdn.jsdelivr.net/npm/[email protected]/dist/contrib/auto-render.min.js" integrity="sha384-43gviWU0YVjaDtb/GhzOouOXtZMP/7XUzwPTstBeZFe/+rCMvRwr4yROQP43s0Xk" crossorigin="anonymous" onload="renderMathInElement(document.body);"></script>

Loading