purrrlyr: Tools at the Intersection of 'purrr' and 'dplyr'

> library(purrrlyr)

バージョン: 0.0.2


関数名 概略
by_row Apply a function to each row of a data frame
by_slice Apply a function to slices of a data frame
dmap Map over the columns of a data frame
slice_rows Slice a data frame into groups of rows

by_row / invoke_rows

データフレームの各行に関数を適用する

> mtcars[1:4, 1:3] %>% by_row(sum)
   mpg cyl disp  .out
1 21.0   6  160   187
2 21.0   6  160   187
3 22.8   4  108 134.8
4 21.4   6  258 285.4
> mtcars[1:4, 1:3] %>% by_row(purrr::lift_vl(mean))
   mpg cyl disp          .out
1 21.0   6  160 62.3333333333
2 21.0   6  160 62.3333333333
3 22.8   4  108 44.9333333333
4 21.4   6  258 95.1333333333
> mtcars[1:4, 1:3] %>% invoke_rows(.f = purrr::lift_vd(mean))
   mpg cyl disp          .out
1 21.0   6  160 62.3333333333
2 21.0   6  160 62.3333333333
3 22.8   4  108 44.9333333333
4 21.4   6  258 95.1333333333

by_slice

> df <- mtcars %>% slice_rows(c("cyl", "am"))
> df %>% by_slice(dmap, ~ .x / sum(.x), .collate = "rows")

dmap / dmap_at / dmap_if

(返り値がデータフレーム)

> mtcars[, 1] %>% summary() %>% names()
[1] "Min."    "1st Qu." "Median"  "Mean"    "3rd Qu." "Max."
> mtcars %>% 
+   dmap(summary) %>% 
+   set_rownames(c("min", "1st_qu", "median", "mean", "3rd_qu", "max")) %>% 
+ #  column_to_rownames() %>% 
+   rownames_to_column()
Warning: Setting row names on a tibble is deprecated.
# A tibble: 6 x 12
  rowname         mpg         cyl        disp          hp        drat
    <chr> <S3: table> <S3: table> <S3: table> <S3: table> <S3: table>
1     min   10.400000      4.0000   71.100000     52.0000   2.7600000
2  1st_qu   15.425000      4.0000  120.825000     96.5000   3.0800000
3  median   19.200000      6.0000  196.300000    123.0000   3.6950000
4    mean   20.090625      6.1875  230.721875    146.6875   3.5965625
5  3rd_qu   22.800000      8.0000  326.000000    180.0000   3.9200000
6     max   33.900000      8.0000  472.000000    335.0000   4.9300000
# ... with 6 more variables: wt <S3: table>, qsec <S3: table>, vs <S3:
#   table>, am <S3: table>, gear <S3: table>, carb <S3: table>
> sliced_df <- mtcars[1:5] %>% slice_rows("cyl")
> dplyr::group_size(sliced_df)
[1] 11  7 14
> sliced_df %>% dmap(mean)
  cyl           mpg          disp             hp          drat
1   4 26.6636363636 105.136363636  82.6363636364 4.07090909091
2   6 19.7428571429 183.314285714 122.2857142857 3.58571428571
3   8 15.1000000000 353.100000000 209.2142857143 3.22928571429

slice_rows / unslice

> slice_rows(mtcars, "cyl") %>% 
+   dplyr::summarise(mean = mean(disp))
# A tibble: 3 x 2
    cyl          mean
  <dbl>         <dbl>
1     4 105.136363636
2     6 183.314285714
3     8 353.100000000