Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
58d8250
fixing for nnd and annoy
ASuGuit Apr 5, 2025
1a77887
controls for hnsw
ASuGuit Apr 5, 2025
86c79d1
fixing for nnd, lsh and annoy
ASuGuit Apr 9, 2025
9864c2a
fixing for kd and lsh
ASuGuit Apr 9, 2025
ad4e863
word embeddings
ASuGuit Apr 17, 2025
0470a41
readr import removed:
Apr 25, 2025
290f667
tests and docs
ASuGuit Apr 25, 2025
64924b7
imports
ASuGuit Apr 25, 2025
7940bc8
build ignore
Apr 26, 2025
7c040bb
docs
ASuGuit Apr 26, 2025
ab58f2e
examples
ASuGuit Apr 26, 2025
41e61c2
printing update
ASuGuit May 1, 2025
d38fd5c
controls
ASuGuit May 1, 2025
02fe64a
minor updates
ASuGuit May 1, 2025
de63bac
previous printing
ASuGuit May 1, 2025
a16c7db
eval improvement
ASuGuit May 2, 2025
f0e67cf
tests and docs
ASuGuit May 3, 2025
8f1f5e0
estimate_errors function
ASuGuit May 17, 2025
a29f644
numerical improvements
ASuGuit May 18, 2025
b57b2c1
errors estimation
ASuGuit May 19, 2025
6bacdbb
description updated
May 21, 2025
1a97ba6
updates
ASuGuit May 22, 2025
56a3e53
global variables removed
ASuGuit May 23, 2025
207f387
global variables removed
ASuGuit May 23, 2025
f70c7f9
docs
ASuGuit May 23, 2025
dfd6adb
data
ASuGuit May 23, 2025
07586ff
census data
ASuGuit May 23, 2025
0ea3be2
CIS data
ASuGuit May 24, 2025
f1756a4
v1
ASuGuit May 24, 2025
309a753
notebooks
May 24, 2025
220d272
changes to data, vignette etc
May 24, 2025
b610d14
v1
ASuGuit May 25, 2025
23440ef
data and vignettes
ASuGuit May 25, 2025
628cd51
vignettes and examples
ASuGuit May 26, 2025
b2f4f0a
foreigners data
ASuGuit May 26, 2025
a7ea139
rproj removed
May 27, 2025
b43db4f
small changes
May 27, 2025
1e2924d
upload
May 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ playground
^docs$
^pkgdown$
check
misc
7 changes: 0 additions & 7 deletions .Rproj.user/E3DB6272/build_options

This file was deleted.

2 changes: 0 additions & 2 deletions .Rproj.user/E3DB6272/copilot_options

This file was deleted.

13 changes: 0 additions & 13 deletions .Rproj.user/E3DB6272/pcs/files-pane.pper

This file was deleted.

4 changes: 0 additions & 4 deletions .Rproj.user/E3DB6272/pcs/source-pane.pper

This file was deleted.

14 changes: 0 additions & 14 deletions .Rproj.user/E3DB6272/pcs/windowlayoutstate.pper

This file was deleted.

5 changes: 0 additions & 5 deletions .Rproj.user/E3DB6272/pcs/workbench-pane.pper

This file was deleted.

6 changes: 0 additions & 6 deletions .Rproj.user/E3DB6272/sources/prop/67301661

This file was deleted.

6 changes: 0 additions & 6 deletions .Rproj.user/E3DB6272/sources/prop/AB62ED0C

This file was deleted.

6 changes: 0 additions & 6 deletions .Rproj.user/E3DB6272/sources/prop/AE0C7A4A

This file was deleted.

6 changes: 0 additions & 6 deletions .Rproj.user/E3DB6272/sources/prop/B8117F7C

This file was deleted.

37 changes: 0 additions & 37 deletions .Rproj.user/E3DB6272/sources/prop/INDEX

This file was deleted.

Empty file.
41 changes: 13 additions & 28 deletions .Rproj.user/shared/notebooks/paths
Original file line number Diff line number Diff line change
@@ -1,28 +1,13 @@
/Users/berenz/Downloads/Symulacje - MC, MM, BP.Rmd="6F10509D"
/Users/berenz/Downloads/run_splink_benchmarks_in_ec2-0.0.3/README.md="CA7B3BF6"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/.gitignore="C912F95E"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/DESCRIPTION="019D16E4"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/controls.R="5BC637B7"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_annoy.R="684202BA"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_hnsw.R="A4FAA5A3"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_mlpack.R="B6A90565"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/method_nnd.R="87049873"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/methods.R="B7F84C4B"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/reclin2_pair_ann.R="1D89EE3E"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/README.Rmd="CBB944CE"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/index-colnames.txt="0350B51E"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_annoy.R="4302FC18"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_blocking.R="DABEA252"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_data.R="9D1011B0"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_hnsw.R="2E19A832"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_mlpack.R="51D2EAA1"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_print.R="AA7835F7"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_reclin2.R="E3E08D07"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/inst/tinytest/test_true_blocks.R="8B9CECC7"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/misc/hnsw-nndesc.Rmd="F39A0093"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/tests/tinytest.R="D6BBCDC1"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v1-deduplication.Rmd="9D34DD44"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v2-reclin.Rmd="289A4D2F"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v3-evaluation.Rmd="E778A54F"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v4-integration.Rmd="E3EFC8F1"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v5-bigdata.Rmd="335CBF49"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/.gitignore="DF69E985"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/DESCRIPTION="CB8B0A33"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/NAMESPACE="EBD0CE51"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/blocking.R="25D2A128"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/controls.R="9BA1FC11"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/data.R="2F3A9433"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/R/eval.R="AACD4DF9"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/README.Rmd="610BE353"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/man/cis.Rd="485EF5A5"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/tests/tinytest.R="AADD0AFB"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v1-deduplication.Rmd="DCCF1C6F"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v2-reclin.Rmd="7253B478"
/Users/berenz/mac/nauka/ncn-foreigners/software/blocking/vignettes/v3-integration.Rmd="A9B0ECDA"
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ playground
docs
inst/doc
misc
vignettes/.*R
vignettes/.*html
33 changes: 21 additions & 12 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,38 @@ Authors@R:
family = "Beręsewicz",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-8281-4301")))
comment = c(ORCID = "0000-0002-8281-4301")),
person(given = "Adam",
family = "Struzik",
role = c("aut", "ctr"),
email = "[email protected]"))
Description: An R package that uses various approximate nearest neighbours algorithms and graphs to block records for data deduplication / record linkage / entity resolution.
License: GPL-3
Encoding: UTF-8
LazyData: true
URL: https://github.com/ncn-foreigners/blocking, https://ncn-foreigners.github.io/blocking/
BugReports: https://github.com/ncn-foreigners/blocking
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Imports:
text2vec,
tokenizers,
RcppHNSW,
RcppAnnoy,
mlpack,
rnndescent,
igraph,
data.table,
RcppAlgos,
methods
text2vec,
tokenizers,
RcppHNSW,
RcppAnnoy,
mlpack,
rnndescent,
igraph,
data.table,
RcppAlgos,
methods,
readr,
utils,
Matrix
Suggests:
tinytest,
reclin2,
knitr,
rmarkdown
VignetteBuilder: knitr
Depends:
R (>= 3.5)
18 changes: 18 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
# Generated by roxygen2: do not edit by hand

S3method(print,blocking)
S3method(print,est_block_error)
export(blocking)
export(control_annoy)
export(control_hnsw)
export(control_kd)
export(control_lsh)
export(control_nnd)
export(controls_ann)
export(controls_txt)
export(est_block_error)
export(pair_ann)
import(data.table)
importFrom(Matrix,colSums)
importFrom(Matrix,rowSums)
importFrom(Matrix,sparseMatrix)
importFrom(RcppAlgos,comboGeneral)
importFrom(RcppAnnoy,AnnoyAngular)
importFrom(RcppAnnoy,AnnoyEuclidean)
Expand All @@ -25,12 +35,20 @@ importFrom(igraph,make_clusters)
importFrom(methods,new)
importFrom(mlpack,knn)
importFrom(mlpack,lsh)
importFrom(readr,read_table)
importFrom(rnndescent,rnnd_build)
importFrom(rnndescent,rnnd_query)
importFrom(stats,dist)
importFrom(stats,dpois)
importFrom(stats,runif)
importFrom(stats,setNames)
importFrom(text2vec,create_dtm)
importFrom(text2vec,create_vocabulary)
importFrom(text2vec,itoken)
importFrom(text2vec,itoken_parallel)
importFrom(text2vec,space_tokenizer)
importFrom(text2vec,vocab_vectorizer)
importFrom(utils,download.file)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
importFrom(utils,unzip)
Loading
Loading