Specify measure.vars via regex or separator
measure.Rd
These functions compute an integer vector or list for use as
the measure.vars
argument to melt
.
Each measured variable name is converted into several groups that occupy
different columns in the output melted data.
measure
allows specifying group names/conversions in R code
(each group and conversion specified as an argument)
whereas measurev
allows specifying group names/conversions using
data values
(each group and conversion specified as a list element).
See
vignette("datatable-reshape")
for more info.
Usage
measure(..., sep, pattern, cols, multiple.keyword="value.name")
measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name")
Arguments
- ...
One or more (1) symbols (without argument name; symbol is used for group name) or (2) functions to convert the groups (with argument name that is used for group name). Must have same number of arguments as groups that are specified by either
sep
orpattern
arguments.- fun.list
Named list which must have the same number of elements as groups that are specified by either
sep
orpattern
arguments. Each name used for a group name, and each value must be either a function (to convert the group from a character vector to an atomic vector of the same size) or NULL (no conversion).- sep
Separator to split each element of
cols
into groups. Columns that result in the maximum number of groups are considered measure variables.- pattern
Perl-compatible regex with capture groups to match to
cols
. Columns that match the regex are considered measure variables.- cols
A character vector of column names.
- multiple.keyword
A string, if used as a group name, then measure returns a list and melt returns multiple value columns (with names defined by the unique values in that group). Otherwise if the string not used as a group name, then measure returns a vector and melt returns a single value column.
Examples
(two.iris = data.table(datasets::iris)[c(1,150)])
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <num> <num> <num> <num> <fctr>
#> 1: 5.1 3.5 1.4 0.2 setosa
#> 2: 5.9 3.0 5.1 1.8 virginica
# melt into a single value column.
melt(two.iris, measure.vars = measure(part, dim, sep="."))
#> Species part dim value
#> <fctr> <char> <char> <num>
#> 1: setosa Sepal Length 5.1
#> 2: virginica Sepal Length 5.9
#> 3: setosa Sepal Width 3.5
#> 4: virginica Sepal Width 3.0
#> 5: setosa Petal Length 1.4
#> 6: virginica Petal Length 5.1
#> 7: setosa Petal Width 0.2
#> 8: virginica Petal Width 1.8
# do the same, programmatically with measurev
my.list = list(part=NULL, dim=NULL)
melt(two.iris, measure.vars=measurev(my.list, sep="."))
#> Species part dim value
#> <fctr> <char> <char> <num>
#> 1: setosa Sepal Length 5.1
#> 2: virginica Sepal Length 5.9
#> 3: setosa Sepal Width 3.5
#> 4: virginica Sepal Width 3.0
#> 5: setosa Petal Length 1.4
#> 6: virginica Petal Length 5.1
#> 7: setosa Petal Width 0.2
#> 8: virginica Petal Width 1.8
# melt into two value columns, one for each part.
melt(two.iris, measure.vars = measure(value.name, dim, sep="."))
#> Species dim Sepal Petal
#> <fctr> <char> <num> <num>
#> 1: setosa Length 5.1 1.4
#> 2: virginica Length 5.9 5.1
#> 3: setosa Width 3.5 0.2
#> 4: virginica Width 3.0 1.8
# melt into two value columns, one for each dim.
melt(two.iris, measure.vars = measure(part, value.name, sep="."))
#> Species part Length Width
#> <fctr> <char> <num> <num>
#> 1: setosa Sepal 5.1 3.5
#> 2: virginica Sepal 5.9 3.0
#> 3: setosa Petal 1.4 0.2
#> 4: virginica Petal 5.1 1.8
# melt using sep, converting child number to integer.
(two.families = data.table(sex_child1="M", sex_child2="F", age_child1=10, age_child2=20))
#> sex_child1 sex_child2 age_child1 age_child2
#> <char> <char> <num> <num>
#> 1: M F 10 20
print(melt(two.families, measure.vars = measure(
value.name, child=as.integer,
sep="_child"
)), class=TRUE)
#> child sex age
#> <int> <char> <num>
#> 1: 1 M 10
#> 2: 2 F 20
# same melt using pattern.
print(melt(two.families, measure.vars = measure(
value.name, child=as.integer,
pattern="(.*)_child(.)"
)), class=TRUE)
#> child sex age
#> <int> <char> <num>
#> 1: 1 M 10
#> 2: 2 F 20
# same melt with pattern and measurev function list.
print(melt(two.families, measure.vars = measurev(
list(value.name=NULL, child=as.integer),
pattern="(.*)_child(.)"
)), class=TRUE)
#> child sex age
#> <int> <char> <num>
#> 1: 1 M 10
#> 2: 2 F 20
# inspired by data(who, package="tidyr")
(who <- data.table(id=1, new_sp_m5564=2, newrel_f65=3))
#> id new_sp_m5564 newrel_f65
#> <num> <num> <num>
#> 1: 1 2 3
# melt to three variable columns, all character.
melt(who, measure.vars = measure(diagnosis, gender, ages, pattern="new_?(.*)_(.)(.*)"))
#> id diagnosis gender ages value
#> <num> <char> <char> <char> <num>
#> 1: 1 sp m 5564 2
#> 2: 1 rel f 65 3
# melt to five variable columns, two numeric (with custom conversion).
print(melt(who, measure.vars = measure(
diagnosis, gender, ages,
ymin=as.numeric,
ymax=function(y)ifelse(y=="", Inf, as.numeric(y)),
pattern="new_?(.*)_(.)(([0-9]{2})([0-9]{0,2}))"
)), class=TRUE)
#> id diagnosis gender ages ymin ymax value
#> <num> <char> <char> <char> <num> <num> <num>
#> 1: 1 sp m 5564 55 64 2
#> 2: 1 rel f 65 65 Inf 3