proxy: Distance and Similarity Measures
距離と類似度の測定
- CRAN: http://cran.r-project.org/web/packages/proxy/index.html
- Vignettes:
> library(proxy)
Attaching package: 'proxy'
The following objects are masked from 'package:stats':
as.dist, dist
The following object is masked from 'package:base':
as.matrix
バージョン: 0.4.15
. |
---|
dist Matrix Distance/Similarity Computation |
pr_DB Registry of proximities |
rowSums.dist Row Sums/Means of Sparse Symmetric Matrices |
関数名 | 概略 |
---|---|
dist |
Matrix Distance/Similarity Computation |
pr_DB |
Registry of proximities |
rowSums.dist |
Row Sums/Means of Sparse Symmetric Matrices |
dist / simil
ref) base::dist()
Arguments
- x
- y
- method...
Euclidean
.... - diag
- upper
- pairwise
- by_rows
- convert_similarities, convert_distances
- auto_convert_data_frames
- FUN
- ...
> dist
function (x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE,
pairwise = FALSE, by_rows = TRUE, convert_similarities = TRUE,
auto_convert_data_frames = TRUE)
{
if ((is.function(y) || is.character(y)) && is.null(method)) {
method <- y
y <- NULL
}
is.n_l_c <- function(x) all(sapply(x, is.numeric)) || all(sapply(x,
is.logical)) || all(sapply(x, is.complex))
if (is.data.frame(x) && auto_convert_data_frames && is.n_l_c(x))
x <- as.matrix(x)
if (is.data.frame(y) && !is.null(y) && auto_convert_data_frames &&
is.n_l_c(y))
y <- as.matrix(y)
if (is.vector(x) && is.atomic(x))
x <- as.matrix(x)
if (!is.null(y) && is.vector(y) && is.atomic(y))
y <- as.matrix(y)
reg_entry <- NULL
if (is.null(method))
method <- if (is.data.frame(x))
"Gower"
else if (is.logical(x))
"Jaccard"
else "Euclidean"
if (!is.function(method))
reg_entry <- if (inherits(method, "proxy_registry_entry"))
method
else pr_DB$get_entry(method)
if (!is.data.frame(x) && !is.matrix(x) && !is.list(x))
stop("Can only handle data frames, vectors, matrices, and lists!")
if (is.data.frame(x) && !by_rows)
stop("Cannot transpose mixed data frames")
if (!is.null(y)) {
if (is.data.frame(x) && !is.data.frame(y) || is.matrix(x) &&
!is.matrix(y) || is.list(x) && !is.list(y))
stop("x and y must be of same type.")
if (is.matrix(x) && is.matrix(y) || is.data.frame(x) &&
is.data.frame(y))
if (by_rows && (ncol(x) != ncol(y)))
stop("x and y must be conform in columns.")
else if (!by_rows && (nrow(x) != nrow(y)))
stop("x and y must be conform in rows.")
}
params <- list(...)
if (!is.null(reg_entry)) {
if (!is.na(reg_entry$PREFUN)) {
tmp <- do.call(reg_entry$PREFUN, c(list(x, y, pairwise,
params, reg_entry)))
if (!is.null(tmp)) {
x <- tmp$x
y <- tmp$y
pairwise <- tmp$pairwise
params <- tmp$p
reg_entry <- tmp$reg_entry
}
}
method <- reg_entry$FUN
}
.proxy_external <- function(CFUN, x, y) do.call(".External",
c(list(CFUN, x, y, pairwise, if (!is.function(method)) get(method) else method),
params))
result <- if (!is.null(reg_entry) && !reg_entry$loop) {
if (!by_rows && !is.list(x)) {
x <- t(x)
if (!is.null(y))
y <- t(y)
}
if (reg_entry$C_FUN)
do.call(".Call", c(list(method), list(x), list(y),
pairwise, params, list(PACKAGE = reg_entry$PACKAGE)))
else {
if (!is.null(reg_entry$PACKAGE))
do.call(method, c(list(x), list(y), params),
envir = asNamespace(reg_entry$PACKAGE))
else do.call(method, c(list(x), list(y), params))
}
}
else if (is.null(y)) {
if (!by_rows && !is.list(x))
x <- t(x)
if (is.list(x) && !is.null(reg_entry) && reg_entry$abcd)
x <- do.call("rbind", x)
if (is.matrix(x) && !is.null(reg_entry) && reg_entry$abcd)
.proxy_external(R_apply_dist_binary_matrix, x !=
0, NULL)
else if (is.matrix(x))
.proxy_external(R_apply_dist_matrix, x, NULL)
else if (is.list(x) && !(is.data.frame(x) && by_rows))
.proxy_external(R_apply_dist_list, x, NULL)
else .proxy_external(R_apply_dist_data_frame, x, NULL)
}
else {
if (!by_rows && !is.list(x)) {
x <- t(x)
y <- t(y)
}
if (is.list(x) && !is.null(reg_entry) && reg_entry$abcd) {
x <- do.call("rbind", x)
y <- do.call("rbind", x)
}
if (is.matrix(x) && !is.null(reg_entry) && reg_entry$abcd)
.proxy_external(R_apply_dist_binary_matrix, x !=
0, y != 0)
else if (is.matrix(x))
.proxy_external(R_apply_dist_matrix, x, y)
else if (is.list(x) && !(is.data.frame(x) && by_rows))
.proxy_external(R_apply_dist_list, x, y)
else .proxy_external(R_apply_dist_data_frame, x, y)
}
if (is.matrix(result) && is.null(dimnames(result)))
if (is.list(x) && !is.data.frame(x)) {
rownames(result) <- names(x)
colnames(result) <- names(y)
}
else if (by_rows) {
rownames(result) <- rownames(x)
colnames(result) <- rownames(y)
}
else {
rownames(result) <- colnames(x)
colnames(result) <- colnames(y)
}
if (!is.null(reg_entry)) {
if (!is.na(reg_entry$POSTFUN))
result <- do.call(reg_entry$POSTFUN, c(list(result,
params)))
if (!reg_entry$distance && !(is.logical(convert_similarities) &&
!convert_similarities)) {
result <- if (is.function(convert_similarities) ||
is.character(convert_similarities))
do.call(convert_similarities, list(result))
else if (is.null(reg_entry$convert))
pr_simil2dist(result)
else do.call(reg_entry$convert, list(result))
}
method <- reg_entry$names[1]
}
result <- if (is.matrix(result))
structure(result, class = "crossdist")
else if (inherits(result, "dist"))
structure(result, Diag = diag, Upper = upper)
else structure(result, class = "pairdist")
structure(result, method = if (is.character(method))
method
else if (missing(method))
deparse(substitute(y))
else deparse(substitute(method)), call = match.call())
}
<environment: namespace:proxy>
> sample(c(FALSE, TRUE), 8, rep = TRUE) %>% matrix(ncol = 2) %>% {
+ dist(., method = "Jaccard") %>% print()
+ dist(., method = "")
+ }
1 2 3
2 1.0
3 0.0 1.0
4 1.0 0.5 1.0
Error in .get_entry_index(name, stop_if_missing): Entry "" not in registry.
pr_DB
> pr_DB
An object of class "registry" with 48 entries.
> summary(pr_DB)
* Similarity measures:
Braun-Blanquet, Chi-squared, correlation, cosine, Cramer, Dice,
eJaccard, Fager, Faith, Gower, Hamman, Jaccard, Kulczynski1,
Kulczynski2, Michael, Mountford, Mozley, Ochiai, Pearson, Phi,
Phi-squared, Russel, simple matching, Simpson, Stiles, Tanimoto,
Tschuprow, Yule, Yule2
* Distance measures:
Bhjattacharyya, Bray, Canberra, Chord, divergence, Euclidean,
fJaccard, Geodesic, Hellinger, Kullback, Levenshtein, Mahalanobis,
Manhattan, Minkowski, Podani, Soergel, supremum, Wave, Whittaker
> pr_DB$get_entry("Jaccard")
names Jaccard, binary, Reyssac, Roux
FUN R_bjaccard
distance FALSE
PREFUN pr_Jaccard_prefun
POSTFUN NA
convert pr_simil2dist
type binary
loop FALSE
C_FUN TRUE
PACKAGE proxy
abcd FALSE
formula a / (a + b + c)
reference Jaccard, P. (1908). Nouvelles recherches sur la
distribution florale. Bull. Soc. Vaud. Sci. Nat., 44,
pp. 223--270.
description The Jaccard Similarity (C implementation) for binary
data. It is the proportion of (TRUE, TRUE) pairs, but
not considering (FALSE, FALSE) pairs. So it compares
the intersection with the union of object sets.
rowSums.dist / rowMeans.dist / colSums.dist / colMeans.dist
> runif(10 * 2) %>% matrix(ncol = 2) %>% dist() %>%
+ as.matrix() %>% {
+ print(.)
+ rowSums(.)
+ }
1 2 3 4 5 6 7
1 0.0000000 0.5604138 0.3630898 0.4007057 0.3708736 0.5757461 0.4191103
2 0.5604138 0.0000000 0.3948037 0.4710453 0.6714994 0.6890793 0.9768773
3 0.3630898 0.3948037 0.0000000 0.5868850 0.2830718 0.8169070 0.7002736
4 0.4007057 0.4710453 0.5868850 0.0000000 0.7372394 0.2356526 0.7511751
5 0.3708736 0.6714994 0.2830718 0.7372394 0.0000000 0.9384471 0.5173843
6 0.5757461 0.6890793 0.8169070 0.2356526 0.9384471 0.0000000 0.8380427
7 0.4191103 0.9768773 0.7002736 0.7511751 0.5173843 0.8380427 0.0000000
8 0.4858666 0.8571609 0.8282448 0.4099925 0.8443178 0.3318496 0.5761292
9 0.2427433 0.5009358 0.4948941 0.1632721 0.5977090 0.3410606 0.5931067
10 0.5072823 0.8310136 0.4376350 0.8913188 0.1646215 1.0825972 0.5322357
8 9 10
1 0.4858666 0.2427433 0.5072823
2 0.8571609 0.5009358 0.8310136
3 0.8282448 0.4948941 0.4376350
4 0.4099925 0.1632721 0.8913188
5 0.8443178 0.5977090 0.1646215
6 0.3318496 0.3410606 1.0825972
7 0.5761292 0.5931067 0.5322357
8 0.0000000 0.3640393 0.9548987
9 0.3640393 0.0000000 0.7447389
10 0.9548987 0.7447389 0.0000000
1 2 3 4 5 6 7 8
3.925832 5.952829 4.905805 4.647287 5.125164 5.849382 5.904335 5.652499
9 10
4.042500 6.146342