strsplit and transpose the resulting list efficiently
tstrsplit.Rd
This is equivalent to transpose(strsplit(...))
. This is a convenient wrapper function to split a column using strsplit
and assign the transposed result to individual columns. See examples.
Arguments
- x
The vector to split (and transpose).
- ...
All the arguments to be passed to
strsplit
.- fill
Default is
NA
. It is used to fill shorter list elements so as to return each element of the transposed result of equal lengths.- type.convert
TRUE
callstype.convert
withas.is=TRUE
on the columns. May also be a function, list of functions, or named list of functions to apply to each part; see examples.- keep
Specify indices corresponding to just those list elements to retain in the transposed result. Default is to return all.
- names
TRUE
auto names the list withV1, V2
etc. Default (FALSE
) is to return an unnamed list.
Details
It internally calls strsplit
first, and then transpose
on the result.
names
argument can be used to return an auto named list, although this argument does not have any effect when used with :=
, which requires names to be provided explicitly. It might be useful in other scenarios.
Examples
x = c("abcde", "ghij", "klmnopq")
strsplit(x, "", fixed=TRUE)
#> [[1]]
#> [1] "a" "b" "c" "d" "e"
#>
#> [[2]]
#> [1] "g" "h" "i" "j"
#>
#> [[3]]
#> [1] "k" "l" "m" "n" "o" "p" "q"
#>
tstrsplit(x, "", fixed=TRUE)
#> [[1]]
#> [1] "a" "g" "k"
#>
#> [[2]]
#> [1] "b" "h" "l"
#>
#> [[3]]
#> [1] "c" "i" "m"
#>
#> [[4]]
#> [1] "d" "j" "n"
#>
#> [[5]]
#> [1] "e" NA "o"
#>
#> [[6]]
#> [1] NA NA "p"
#>
#> [[7]]
#> [1] NA NA "q"
#>
tstrsplit(x, "", fixed=TRUE, fill="<NA>")
#> [[1]]
#> [1] "a" "g" "k"
#>
#> [[2]]
#> [1] "b" "h" "l"
#>
#> [[3]]
#> [1] "c" "i" "m"
#>
#> [[4]]
#> [1] "d" "j" "n"
#>
#> [[5]]
#> [1] "e" "<NA>" "o"
#>
#> [[6]]
#> [1] "<NA>" "<NA>" "p"
#>
#> [[7]]
#> [1] "<NA>" "<NA>" "q"
#>
# using keep to return just 1,3,5
tstrsplit(x, "", fixed=TRUE, keep=c(1,3,5))
#> [[1]]
#> [1] "a" "g" "k"
#>
#> [[2]]
#> [1] "c" "i" "m"
#>
#> [[3]]
#> [1] "e" NA "o"
#>
# names argument
tstrsplit(x, "", fixed=TRUE, keep=c(1,3,5), names=LETTERS[1:3])
#> $A
#> [1] "a" "g" "k"
#>
#> $B
#> [1] "c" "i" "m"
#>
#> $C
#> [1] "e" NA "o"
#>
DT = data.table(x=c("A/B", "A", "B"), y=1:3)
DT[, c("c1") := tstrsplit(x, "/", fixed=TRUE, keep=1L)][]
#> x y c1
#> <char> <int> <char>
#> 1: A/B 1 A
#> 2: A 2 A
#> 3: B 3 B
DT[, c("c1", "c2") := tstrsplit(x, "/", fixed=TRUE)][]
#> x y c1 c2
#> <char> <int> <char> <char>
#> 1: A/B 1 A B
#> 2: A 2 A <NA>
#> 3: B 3 B <NA>
# type.convert argument
DT = data.table(
w = c("Yes/F", "No/M"),
x = c("Yes 2000-03-01 A/T", "No 2000-04-01 E/R"),
y = c("1/1/2", "2/5/2.5"),
z = c("Yes/1/2", "No/5/3.5"),
v = c("Yes 10 30.5 2000-03-01 A/T", "No 20 10.2 2000-04-01 E/R"))
# convert each element in the transpose list to type factor
DT[, tstrsplit(w, "/", type.convert=as.factor)]
#> V1 V2
#> <fctr> <fctr>
#> 1: Yes F
#> 2: No M
# convert part and leave any others
DT[, tstrsplit(z, "/", type.convert=list(as.numeric=2:3))]
#> V1 V2 V3
#> <char> <num> <num>
#> 1: Yes 1 2.0
#> 2: No 5 3.5
# convert part with one function and any others with another
DT[, tstrsplit(z, "/", type.convert=list(as.factor=1L, as.numeric))]
#> V1 V2 V3
#> <fctr> <num> <num>
#> 1: Yes 1 2.0
#> 2: No 5 3.5
# convert the remaining using 'type.convert(x, as.is=TRUE)' (i.e. what type.convert=TRUE does)
DT[, tstrsplit(v, " ", type.convert=list(as.IDate=4L, function(x) type.convert(x, as.is=TRUE)))]
#> V1 V2 V3 V4 V5
#> <char> <int> <num> <IDat> <char>
#> 1: Yes 10 30.5 2000-03-01 A/T
#> 2: No 20 10.2 2000-04-01 E/R