merge {data.table}  R Documentation 
Fast merge of two data.table
s. The data.table
method behaves
similarly to data.frame
except that row order is specified, and by
default the columns to merge on are chosen:
at first based on the shared key columns, and if there are none,
then based on key columns of the first argument x
, and if there
are none,
then based on the common columns between the two data.table
s.
Use the by
, by.x
and by.y
arguments explicitly to override this default.
## S3 method for class 'data.table'
merge(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FALSE,
all.x = all, all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE,
allow.cartesian=getOption("datatable.allow.cartesian"), # default FALSE
incomparables = NULL, ...)
x, y 

by 
A vector of shared column names in 
by.x, by.y 
Vectors of column names in 
all 
logical; 
all.x 
logical; if 
all.y 
logical; analogous to 
sort 
logical. If 
suffixes 
A 
no.dups 
logical indicating that 
allow.cartesian 
See 
incomparables 
values which cannot be matched and therefore are excluded from by columns. 
... 
Not used at this time. 
merge
is a generic function in base R. It dispatches to either the
merge.data.frame
method or merge.data.table
method depending on
the class of its first argument. Note that, unlike SQL
join, NA
is
matched against NA
(and NaN
against NaN
) while merging.
For a more data.table
centric way of merging two data.table
s, see
[.data.table
; e.g., x[y, ...]
. See FAQ 1.11 for a detailed
comparison of merge
and x[y, ...]
.
A new data.table
based on the merged data table
s, and sorted by the
columns set (or inferred for) the by
argument if argument sort
is
set to TRUE
.
data.table
, setkey
, [.data.table
,
merge.data.frame
(dt1 < data.table(A = letters[1:10], X = 1:10, key = "A"))
(dt2 < data.table(A = letters[5:14], Y = 1:10, key = "A"))
merge(dt1, dt2)
merge(dt1, dt2, all = TRUE)
(dt1 < data.table(A = letters[rep(1:3, 2)], X = 1:6, key = "A"))
(dt2 < data.table(A = letters[rep(2:4, 2)], Y = 6:1, key = "A"))
merge(dt1, dt2, allow.cartesian=TRUE)
(dt1 < data.table(A = c(rep(1L, 5), 2L), B = letters[rep(1:3, 2)], X = 1:6, key = "A,B"))
(dt2 < data.table(A = c(rep(1L, 5), 2L), B = letters[rep(2:4, 2)], Y = 6:1, key = "A,B"))
merge(dt1, dt2)
merge(dt1, dt2, by="B", allow.cartesian=TRUE)
# test it more:
d1 < data.table(a=rep(1:2,each=3), b=1:6, key="a,b")
d2 < data.table(a=0:1, bb=10:11, key="a")
d3 < data.table(a=0:1, key="a")
d4 < data.table(a=0:1, b=0:1, key="a,b")
merge(d1, d2)
merge(d2, d1)
merge(d1, d2, all=TRUE)
merge(d2, d1, all=TRUE)
merge(d3, d1)
merge(d1, d3)
merge(d1, d3, all=TRUE)
merge(d3, d1, all=TRUE)
merge(d1, d4)
merge(d1, d4, by="a", suffixes=c(".d1", ".d4"))
merge(d4, d1)
merge(d1, d4, all=TRUE)
merge(d4, d1, all=TRUE)
# setkey is automatic by default
set.seed(1L)
d1 < data.table(a=sample(rep(1:3,each=2)), z=1:6)
d2 < data.table(a=2:0, z=10:12)
merge(d1, d2, by="a")
merge(d1, d2, by="a", all=TRUE)
# using by.x and by.y
setnames(d2, "a", "b")
merge(d1, d2, by.x="a", by.y="b")
merge(d1, d2, by.x="a", by.y="b", all=TRUE)
merge(d2, d1, by.x="b", by.y="a")
# using incomparables values
d1 < data.table(a=c(1,2,NA,NA,3,1), z=1:6)
d2 < data.table(a=c(1,2,NA), z=10:12)
merge(d1, d2, by="a")
merge(d1, d2, by="a", incomparables=NA)