widyr: Widen, Process, then Re-Tidy Data

> library(widyr)

バージョン: 0.0.0.9000


関数名 概略
cast_dual Cast a item-feature dataset into a wide matrix while keeping the table
cor_sparse Find the Pearson correlation of a sparse matrix efficiently
pairwise_cor Correlations of pairs of items
pairwise_count Count pairs of items within a group
pairwise_dist Distances of pairs of items
squarely A special case of the widely adverb for creating tidy square matrices
widely Adverb for functions that operate on matrices in "wide" format

cast_dual

> library(gapminder)
> library(dplyr)
> 
> gapminder %<>%
+   select(country, continent, year, lifeExp)
> head(gapminder)
# A tibble: 6 x 4
      country continent  year lifeExp
       <fctr>    <fctr> <int>   <dbl>
1 Afghanistan      Asia  1952  28.801
2 Afghanistan      Asia  1957  30.332
3 Afghanistan      Asia  1962  31.997
4 Afghanistan      Asia  1967  34.020
5 Afghanistan      Asia  1972  36.088
6 Afghanistan      Asia  1977  38.438
> gapminder %>%
+   cast_dual(country, year, lifeExp, sparse = FALSE)
$tbl
# A tibble: 142 x 2
       country continent
        <fctr>    <fctr>
1  Afghanistan      Asia
2      Albania      Asia
3      Algeria      Asia
4       Angola      Asia
5    Argentina      Asia
6    Australia      Asia
7      Austria      Asia
8      Bahrain      Asia
9   Bangladesh      Asia
10     Belgium      Asia
# ... with 132 more rows

$matrix
                           1952     1957     1962     1967     1972
Afghanistan              28.801 30.33200 31.99700 34.02000 36.08800
Albania                  55.230 59.28000 64.82000 66.22000 67.69000
Algeria                  43.077 45.68500 48.30300 51.40700 54.51800
Angola                   30.015 31.99900 34.00000 35.98500 37.92800
Argentina                62.485 64.39900 65.14200 65.63400 67.06500
Australia                69.120 70.33000 70.93000 71.10000 71.93000
Austria                  66.800 67.48000 69.54000 70.14000 70.63000
Bahrain                  50.939 53.83200 56.92300 59.92300 63.30000
Bangladesh               37.484 39.34800 41.21600 43.45300 45.25200
Belgium                  68.000 69.24000 70.25000 70.94000 71.44000
Benin                    38.223 40.35800 42.61800 44.88500 47.01400
Bolivia                  40.414 41.89000 43.42800 45.03200 46.71400
Bosnia and Herzegovina   53.820 58.45000 61.93000 64.79000 67.45000
Botswana                 47.622 49.61800 51.52000 53.29800 56.02400
Brazil                   50.917 53.28500 55.66500 57.63200 59.50400
Bulgaria                 59.600 66.61000 69.51000 70.42000 70.90000
Burkina Faso             31.975 34.90600 37.81400 40.69700 43.59100
Burundi                  39.031 40.53300 42.04500 43.54800 44.05700
Cambodia                 39.417 41.36600 43.41500 45.41500 40.31700
Cameroon                 38.523 40.42800 42.64300 44.79900 47.04900
Canada                   68.750 69.96000 71.30000 72.13000 72.88000
Central African Republic 35.463 37.46400 39.47500 41.47800 43.45700
Chad                     38.092 39.88100 41.71600 43.60100 45.56900
Chile                    54.745 56.07400 57.92400 60.52300 63.44100
China                    44.000 50.54896 44.50136 58.38112 63.11888
Colombia                 50.643 55.11800 57.86300 59.96300 61.62300
Comoros                  40.715 42.46000 44.46700 46.47200 48.94400
Congo, Dem. Rep.         39.143 40.65200 42.12200 44.05600 45.98900
Congo, Rep.              42.111 45.05300 48.43500 52.04000 54.90700
Costa Rica               57.206 60.02600 62.84200 65.42400 67.84900
Cote d'Ivoire            40.477 42.46900 44.93000 47.35000 49.80100
Croatia                  61.210 64.77000 67.13000 68.50000 69.61000
Cuba                     59.421 62.32500 65.24600 68.29000 70.72300
Czech Republic           66.870 69.03000 69.90000 70.38000 70.29000
Denmark                  70.780 71.81000 72.35000 72.96000 73.47000
Djibouti                 34.812 37.32800 39.69300 42.07400 44.36600
Dominican Republic       45.928 49.82800 53.45900 56.75100 59.63100
Ecuador                  48.357 51.35600 54.64000 56.67800 58.79600
Egypt                    41.893 44.44400 46.99200 49.29300 51.13700
El Salvador              45.262 48.57000 52.30700 55.85500 58.20700
Equatorial Guinea        34.482 35.98300 37.48500 38.98700 40.51600
Eritrea                  35.928 38.04700 40.15800 42.18900 44.14200
Ethiopia                 34.078 36.66700 40.05900 42.11500 43.51500
Finland                  66.550 67.49000 68.75000 69.83000 70.87000
France                   67.410 68.93000 70.51000 71.55000 72.38000
Gabon                    37.003 38.99900 40.48900 44.59800 48.69000
Gambia                   30.000 32.06500 33.89600 35.85700 38.30800
Germany                  67.500 69.10000 70.30000 70.80000 71.00000
Ghana                    43.149 44.77900 46.45200 48.07200 49.87500
Greece                   65.860 67.86000 69.51000 71.00000 72.34000
Guatemala                42.023 44.14200 46.95400 50.01600 53.73800
Guinea                   33.609 34.55800 35.75300 37.19700 38.84200
Guinea-Bissau            32.500 33.48900 34.48800 35.49200 36.48600
Haiti                    37.579 40.69600 43.59000 46.24300 48.04200
Honduras                 41.912 44.66500 48.04100 50.92400 53.88400
Hong Kong, China         60.960 64.75000 67.65000 70.00000 72.00000
Hungary                  64.030 66.41000 67.96000 69.50000 69.76000
Iceland                  72.490 73.47000 73.68000 73.73000 74.46000
India                    37.373 40.24900 43.60500 47.19300 50.65100
Indonesia                37.468 39.91800 42.51800 45.96400 49.20300
Iran                     44.869 47.18100 49.32500 52.46900 55.23400
Iraq                     45.320 48.43700 51.45700 54.45900 56.95000
Ireland                  66.910 68.90000 70.29000 71.08000 71.28000
Israel                   65.390 67.84000 69.39000 70.75000 71.63000
Italy                    65.940 67.81000 69.24000 71.06000 72.19000
Jamaica                  58.530 62.61000 65.61000 67.51000 69.00000
Japan                    63.030 65.50000 68.73000 71.43000 73.42000
Jordan                   43.158 45.66900 48.12600 51.62900 56.52800
Kenya                    42.270 44.68600 47.94900 50.65400 53.55900
Korea, Dem. Rep.         50.056 54.08100 56.65600 59.94200 63.98300
Korea, Rep.              47.453 52.68100 55.29200 57.71600 62.61200
Kuwait                   55.565 58.03300 60.47000 64.62400 67.71200
Lebanon                  55.928 59.48900 62.09400 63.87000 65.42100
Lesotho                  42.138 45.04700 47.74700 48.49200 49.76700
Liberia                  38.480 39.48600 40.50200 41.53600 42.61400
Libya                    42.723 45.28900 47.80800 50.22700 52.77300
Madagascar               36.681 38.86500 40.84800 42.88100 44.85100
Malawi                   36.256 37.20700 38.41000 39.48700 41.76600
Malaysia                 48.463 52.10200 55.73700 59.37100 63.01000
Mali                     33.685 35.30700 36.93600 38.48700 39.97700
Mauritania               40.543 42.33800 44.24800 46.28900 48.43700
Mauritius                50.986 58.08900 60.24600 61.55700 62.94400
Mexico                   50.789 55.19000 58.29900 60.11000 62.36100
                             1977   1982   1987   1992   1997   2002
Afghanistan              38.43800 39.854 40.822 41.674 41.763 42.129
Albania                  68.93000 70.420 72.000 71.581 72.950 75.651
Algeria                  58.01400 61.368 65.799 67.744 69.152 70.994
Angola                   39.48300 39.942 39.906 40.647 40.963 41.003
Argentina                68.48100 69.942 70.774 71.868 73.275 74.340
Australia                73.49000 74.740 76.320 77.560 78.830 80.370
Austria                  72.17000 73.180 74.940 76.040 77.510 78.980
Bahrain                  65.59300 69.052 70.750 72.601 73.925 74.795
Bangladesh               46.92300 50.009 52.819 56.018 59.412 62.013
Belgium                  72.80000 73.930 75.350 76.460 77.530 78.320
Benin                    49.19000 50.904 52.337 53.919 54.777 54.406
Bolivia                  50.02300 53.859 57.251 59.957 62.050 63.883
Bosnia and Herzegovina   69.86000 70.690 71.140 72.178 73.244 74.090
Botswana                 59.31900 61.484 63.622 62.745 52.556 46.634
Brazil                   61.48900 63.336 65.205 67.057 69.388 71.006
Bulgaria                 70.81000 71.080 71.340 71.190 70.320 72.140
Burkina Faso             46.13700 48.122 49.557 50.260 50.324 50.650
Burundi                  45.91000 47.471 48.211 44.736 45.326 47.360
Cambodia                 31.22000 50.957 53.914 55.803 56.534 56.752
Cameroon                 49.35500 52.961 54.985 54.314 52.199 49.856
Canada                   74.21000 75.760 76.860 77.950 78.610 79.770
Central African Republic 46.77500 48.295 50.485 49.396 46.066 43.308
Chad                     47.38300 49.517 51.051 51.724 51.573 50.525
Chile                    67.05200 70.565 72.492 74.126 75.816 77.860
China                    63.96736 65.525 67.274 68.690 70.426 72.028
Colombia                 63.83700 66.653 67.768 68.421 70.313 71.682
Comoros                  50.93900 52.933 54.926 57.939 60.660 62.974
Congo, Dem. Rep.         47.80400 47.784 47.412 45.548 42.587 44.966
Congo, Rep.              55.62500 56.695 57.470 56.433 52.962 52.970
Costa Rica               70.75000 73.450 74.752 75.713 77.260 78.123
Cote d'Ivoire            52.37400 53.983 54.655 52.044 47.991 46.832
Croatia                  70.64000 70.460 71.520 72.527 73.680 74.876
Cuba                     72.64900 73.717 74.174 74.414 76.151 77.158
Czech Republic           70.71000 70.960 71.580 72.400 74.010 75.510
Denmark                  74.69000 74.630 74.800 75.330 76.110 77.180
Djibouti                 46.51900 48.812 50.040 51.604 53.157 53.373
Dominican Republic       61.78800 63.727 66.046 68.457 69.957 70.847
Ecuador                  61.31000 64.342 67.231 69.613 72.312 74.173
Egypt                    53.31900 56.006 59.797 63.674 67.217 69.806
El Salvador              56.69600 56.604 63.154 66.798 69.535 70.734
Equatorial Guinea        42.02400 43.662 45.664 47.545 48.245 49.348
Eritrea                  44.53500 43.890 46.453 49.991 53.378 55.240
Ethiopia                 44.51000 44.916 46.684 48.091 49.402 50.725
Finland                  72.52000 74.550 74.830 75.700 77.130 78.370
France                   73.83000 74.890 76.340 77.460 78.640 79.590
Gabon                    52.79000 56.564 60.190 61.366 60.461 56.761
Gambia                   41.84200 45.580 49.265 52.644 55.861 58.041
Germany                  72.50000 73.800 74.847 76.070 77.340 78.670
Ghana                    51.75600 53.744 55.729 57.501 58.556 58.453
Greece                   73.68000 75.240 76.670 77.030 77.869 78.256
Guatemala                56.02900 58.137 60.782 63.373 66.322 68.978
Guinea                   40.76200 42.891 45.552 48.576 51.455 53.676
Guinea-Bissau            37.46500 39.327 41.245 43.266 44.873 45.504
Haiti                    49.92300 51.461 53.636 55.089 56.671 58.137
Honduras                 57.40200 60.909 64.492 66.399 67.659 68.565
Hong Kong, China         73.60000 75.450 76.200 77.601 80.000 81.495
Hungary                  69.95000 69.390 69.580 69.170 71.040 72.590
Iceland                  76.11000 76.990 77.230 78.770 78.950 80.500
India                    54.20800 56.596 58.553 60.223 61.765 62.879
Indonesia                52.70200 56.159 60.137 62.681 66.041 68.588
Iran                     57.70200 59.620 63.040 65.742 68.042 69.451
Iraq                     60.41300 62.038 65.044 59.461 58.811 57.046
Ireland                  72.03000 73.100 74.360 75.467 76.122 77.783
Israel                   73.06000 74.450 75.600 76.930 78.269 79.696
Italy                    73.48000 74.980 76.420 77.440 78.820 80.240
Jamaica                  70.11000 71.210 71.770 71.766 72.262 72.047
Japan                    75.38000 77.110 78.670 79.360 80.690 82.000
Jordan                   61.13400 63.739 65.869 68.015 69.772 71.263
Kenya                    56.15500 58.766 59.339 59.285 54.407 50.992
Korea, Dem. Rep.         67.15900 69.100 70.647 69.978 67.727 66.662
Korea, Rep.              64.76600 67.123 69.810 72.244 74.647 77.045
Kuwait                   69.34300 71.309 74.174 75.190 76.156 76.904
Lebanon                  66.09900 66.983 67.926 69.292 70.265 71.028
Lesotho                  52.20800 55.078 57.180 59.685 55.558 44.593
Liberia                  43.76400 44.852 46.027 40.802 42.221 43.753
Libya                    57.44200 62.155 66.234 68.755 71.555 72.737
Madagascar               46.88100 48.969 49.350 52.214 54.978 57.286
Malawi                   43.76700 45.642 47.457 49.420 47.495 45.009
Malaysia                 65.25600 68.000 69.500 70.693 71.938 73.044
Mali                     41.71400 43.916 46.364 48.388 49.903 51.818
Mauritania               50.85200 53.599 56.145 58.333 60.430 62.247
Mauritius                64.93000 66.711 68.740 69.745 70.736 71.954
Mexico                   65.03200 67.405 69.498 71.455 73.670 74.902
                           2007
Afghanistan              43.828
Albania                  76.423
Algeria                  72.301
Angola                   42.731
Argentina                75.320
Australia                81.235
Austria                  79.829
Bahrain                  75.635
Bangladesh               64.062
Belgium                  79.441
Benin                    56.728
Bolivia                  65.554
Bosnia and Herzegovina   74.852
Botswana                 50.728
Brazil                   72.390
Bulgaria                 73.005
Burkina Faso             52.295
Burundi                  49.580
Cambodia                 59.723
Cameroon                 50.430
Canada                   80.653
Central African Republic 44.741
Chad                     50.651
Chile                    78.553
China                    72.961
Colombia                 72.889
Comoros                  65.152
Congo, Dem. Rep.         46.462
Congo, Rep.              55.322
Costa Rica               78.782
Cote d'Ivoire            48.328
Croatia                  75.748
Cuba                     78.273
Czech Republic           76.486
Denmark                  78.332
Djibouti                 54.791
Dominican Republic       72.235
Ecuador                  74.994
Egypt                    71.338
El Salvador              71.878
Equatorial Guinea        51.579
Eritrea                  58.040
Ethiopia                 52.947
Finland                  79.313
France                   80.657
Gabon                    56.735
Gambia                   59.448
Germany                  79.406
Ghana                    60.022
Greece                   79.483
Guatemala                70.259
Guinea                   56.007
Guinea-Bissau            46.388
Haiti                    60.916
Honduras                 70.198
Hong Kong, China         82.208
Hungary                  73.338
Iceland                  81.757
India                    64.698
Indonesia                70.650
Iran                     70.964
Iraq                     59.545
Ireland                  78.885
Israel                   80.745
Italy                    80.546
Jamaica                  72.567
Japan                    82.603
Jordan                   72.535
Kenya                    54.110
Korea, Dem. Rep.         67.297
Korea, Rep.              78.623
Kuwait                   77.588
Lebanon                  71.993
Lesotho                  42.592
Liberia                  45.678
Libya                    73.952
Madagascar               59.443
Malawi                   48.303
Malaysia                 74.241
Mali                     54.467
Mauritania               64.164
Mauritius                72.801
Mexico                   76.195
 [ reached getOption("max.print") -- omitted 59 rows ]

attr(,"class")
[1] "cast_tbl"

cor_sparse

pairwise_cor

> gapminder %>%
+   pairwise_cor(country, year, lifeExp)
# A tibble: 20,022 x 3
        item1       item2 correlation
       <fctr>      <fctr>       <dbl>
1     Albania Afghanistan   0.9656953
2     Algeria Afghanistan   0.9868220
3      Angola Afghanistan   0.9855294
4   Argentina Afghanistan   0.9705203
5   Australia Afghanistan   0.9393751
6     Austria Afghanistan   0.9557228
7     Bahrain Afghanistan   0.9956190
8  Bangladesh Afghanistan   0.9466653
9     Belgium Afghanistan   0.9626319
10      Benin Afghanistan   0.9970561
# ... with 20,012 more rows

pairwise_count

> data_frame(group = rep(1:5, each = 2),
+                   letter = c("a", "b",
+                              "a", "c",
+                              "a", "c",
+                              "b", "e",
+                              "b", "f")) %>% 
+   pairwise_count(letter, group)
# A tibble: 8 x 3
  item1 item2     n
  <chr> <chr> <dbl>
1     b     a     1
2     c     a     2
3     a     b     1
4     e     b     1
5     f     b     1
6     a     c     2
7     b     e     1
8     b     f     1

pairwise_dist

> gapminder %>%
+   pairwise_dist(country, year, lifeExp) %>%
+   arrange(distance)
# A tibble: 20,022 x 3
            item1          item2 distance
           <fctr>         <fctr>    <dbl>
1         Germany        Belgium 1.075702
2         Belgium        Germany 1.075702
3  United Kingdom    New Zealand 1.509025
4     New Zealand United Kingdom 1.509025
5          Norway    Netherlands 1.557933
6     Netherlands         Norway 1.557933
7           Italy         Israel 1.662690
8          Israel          Italy 1.662690
9         Finland        Austria 1.936558
10        Austria        Finland 1.936558
# ... with 20,012 more rows

widely

> gapminder %>%
+   widely(dist, country, year, lifeExp)()
# A tibble: 10,011 x 3
        item1       item2     value
       <fctr>      <fctr>     <dbl>
1     Albania Afghanistan 107.41825
2     Algeria Afghanistan  76.75286
3      Angola Afghanistan   4.64934
4   Argentina Afghanistan 109.50686
5   Australia Afghanistan 128.95745
6     Austria Afghanistan 123.51771
7     Bahrain Afghanistan  98.13426
8  Bangladesh Afghanistan  45.33990
9     Belgium Afghanistan 125.41156
10      Benin Afghanistan  39.32262
# ... with 10,001 more rows