library(data.table)
DT = data.table(a=LETTERS[c(1,1:3)],b=4:7,key="a")
DT
:= 通过引用的方式增加或者更新列
类似transform方法,但是更容易操作
DT <- data.table(a=rep(1:3, each=2), b=1:6)
DT2 <- transform(DT, c = a^2)
DT[, c:=a^2]
identical(DT,DT2)
| a | b | c |
|---|
| 1 | 1 | 1 | 1 |
|---|
| 2 | 1 | 2 | 1 |
|---|
| 3 | 2 | 3 | 4 |
|---|
| 4 | 2 | 4 | 4 |
|---|
| 5 | 3 | 5 | 9 |
|---|
| 6 | 3 | 6 | 9 |
|---|
TRUE
DT2 <- within(DT, {
b <- rev(b)
c <- a*2
rm(a)
})
DT[,`:=`(b = rev(b),
c = a*2,
a = NULL)]
identical(DT,DT2)
| b | c |
|---|
| 1 | 6 | 2 |
|---|
| 2 | 5 | 2 |
|---|
| 3 | 4 | 4 |
|---|
| 4 | 3 | 4 |
|---|
| 5 | 2 | 6 |
|---|
| 6 | 1 | 6 |
|---|
TRUE
DT[,c:=8]
DT[,d:=9L]
DT[,c:=NULL]
DT[2,d:=10L]
| a | b | d | c |
|---|
| 1 | A | 4 | 9 | 8 |
|---|
| 2 | A | 5 | 10 | 8 |
|---|
| 3 | B | 6 | 9 | 8 |
|---|
| 4 | C | 7 | 9 | 8 |
|---|
| a | b | d | c |
|---|
| 1 | A | 4 | 9 | 8 |
|---|
| 2 | A | 5 | 9 | 8 |
|---|
| 3 | B | 6 | 9 | 8 |
|---|
| 4 | C | 7 | 9 | 8 |
|---|
DT[b>4,b:=d*2L]
DT["A",b:=0L]
| a | b | d |
|---|
| 1 | A | 4 | 9 |
|---|
| 2 | A | 20 | 10 |
|---|
| 3 | B | 18 | 9 |
|---|
| 4 | C | 18 | 9 |
|---|
| a | b | d |
|---|
| 1 | A | 0 | 9 |
|---|
| 2 | A | 0 | 10 |
|---|
| 3 | B | 18 | 9 |
|---|
| 4 | C | 18 | 9 |
|---|
DT[,e:=mean(d),by=a]
DT["B",f:=mean(d)]
| a | b | d | e |
|---|
| 1 | A | 0 | 9 | 9.5 |
|---|
| 2 | A | 0 | 10 | 9.5 |
|---|
| 3 | B | 18 | 9 | 9 |
|---|
| 4 | C | 18 | 9 | 9 |
|---|
| a | b | d | e | f |
|---|
| 1 | A | 0 | 9 | 9.5 | NA |
|---|
| 2 | A | 0 | 10 | 9.5 | NA |
|---|
| 3 | B | 18 | 9 | 9 | 9 |
|---|
| 4 | C | 18 | 9 | 9 | NA |
|---|
between 选择符合范围判断的项
DT = data.table(a=1:5, b=6:10)
DT[b %between% c(7,9)]
chmatch 快速匹配两个字符向量,返回一个匹配成功位置的向量
a<-LETTERS[c(1:5)]
b<-LETTERS[c(4:10)]
chmatch(a,b)
a %chin% b
- NA
- NA
- NA
- 1
- 2
- FALSE
- FALSE
- FALSE
- TRUE
- TRUE
dcast.data.table 数据表的整形
melt.data.table用法
require(reshape2)
require(data.table)
DT <- data.table(
i1 = c(1:5, NA),
i2 = c(NA,6,7,8,9,10),
f1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
c1 = sample(c(letters[1:3], NA), 6, TRUE),
d1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
d2 = as.Date(6:1, origin="2012-01-01"))
DT[, l1 := DT[, list(c=list(rep(i1, sample(5,1)))), by = i1]$c] # list cols
DT[, l2 := DT[, list(c=list(rep(c1, sample(5,1)))), by = i1]$c]
melt(DT, id=1:2, measure=3)
melt(DT, id=c("i1", "i2"), measure="f1", value.factor=TRUE) # same as above, but value is factor
melt(DT, id=c("i1", "f1"), measure=c("d1", "d2")) # date class attribute lost
melt(DT, id=c("i1", "f1"), measure=c("c1", "d1")) # value is char, date attribute lost
melt(DT, id=1, measure=c("l1", "l2")) # value is a list
melt(DT, id=1, measure=c("c1", "l1")) # c1 coerced to list
melt(DT, id=1, measure=c("c1", "f1")) # value is char
melt(DT, id=1, measure=c("c1", "i2")) # i2 coerced to char
melt(DT, id=1, measure=c("c1", "i2"), na.rm=TRUE) # remove NA
Loading required package: reshape2
| i1 | i2 | f1 | c1 | d1 | d2 | l1 |
|---|
| 1 | 1 | NA | c | a | 2013-09-02 | 2012-01-07 | 1,1,1 |
|---|
| 2 | 2 | 6 | a | b | 2013-09-03 | 2012-01-06 | 2,2,2,2 |
|---|
| 3 | 3 | 7 | c | a | 2013-09-04 | 2012-01-05 | 3,3,3,3,3 |
|---|
| 4 | 4 | 8 | NA | NA | NA | 2012-01-04 | 4 |
|---|
| 5 | 5 | 9 | a | b | 2013-09-05 | 2012-01-03 | 5,5,5,5,5 |
|---|
| 6 | NA | 10 | c | a | 2013-09-06 | 2012-01-02 | NA |
|---|
| i1 | i2 | f1 | c1 | d1 | d2 | l1 | l2 |
|---|
| 1 | 1 | NA | c | a | 2013-09-02 | 2012-01-07 | 1,1,1 | a,a,a |
|---|
| 2 | 2 | 6 | a | b | 2013-09-03 | 2012-01-06 | 2,2,2,2 | b,b,b,b,b |
|---|
| 3 | 3 | 7 | c | a | 2013-09-04 | 2012-01-05 | 3,3,3,3,3 | a,a |
|---|
| 4 | 4 | 8 | NA | NA | NA | 2012-01-04 | 4 | NA |
|---|
| 5 | 5 | 9 | a | b | 2013-09-05 | 2012-01-03 | 5,5,5,5,5 | b,b,b,b,b |
|---|
| 6 | NA | 10 | c | a | 2013-09-06 | 2012-01-02 | NA | a,a,a |
|---|
| i1 | i2 | variable | value |
|---|
| 1 | 1 | NA | f1 | c |
|---|
| 2 | 2 | 6 | f1 | a |
|---|
| 3 | 3 | 7 | f1 | c |
|---|
| 4 | 4 | 8 | f1 | NA |
|---|
| 5 | 5 | 9 | f1 | a |
|---|
| 6 | NA | 10 | f1 | c |
|---|
| i1 | i2 | variable | value |
|---|
| 1 | 1 | NA | f1 | c |
|---|
| 2 | 2 | 6 | f1 | a |
|---|
| 3 | 3 | 7 | f1 | c |
|---|
| 4 | 4 | 8 | f1 | NA |
|---|
| 5 | 5 | 9 | f1 | a |
|---|
| 6 | NA | 10 | f1 | c |
|---|
| i1 | f1 | variable | value |
|---|
| 1 | 1 | c | d1 | 15950 |
|---|
| 2 | 2 | a | d1 | 15951 |
|---|
| 3 | 3 | c | d1 | 15952 |
|---|
| 4 | 4 | NA | d1 | NA |
|---|
| 5 | 5 | a | d1 | 15953 |
|---|
| 6 | NA | c | d1 | 15954 |
|---|
| 7 | 1 | c | d2 | 15346 |
|---|
| 8 | 2 | a | d2 | 15345 |
|---|
| 9 | 3 | c | d2 | 15344 |
|---|
| 10 | 4 | NA | d2 | 15343 |
|---|
| 11 | 5 | a | d2 | 15342 |
|---|
| 12 | NA | c | d2 | 15341 |
|---|
Warning message:
In melt.data.table(DT, id = c("i1", "f1"), measure = c("c1", "d1")): All 'measure.vars are NOT of the SAME type. By order of hierarchy, the molten data value column will be of type 'character'. Therefore all measure variables that are not of type 'character' will be coerced to. Check the DETAILS section of ?melt.data.table for more on coercion.
| i1 | f1 | variable | value |
|---|
| 1 | 1 | c | c1 | a |
|---|
| 2 | 2 | a | c1 | b |
|---|
| 3 | 3 | c | c1 | a |
|---|
| 4 | 4 | NA | c1 | NA |
|---|
| 5 | 5 | a | c1 | b |
|---|
| 6 | NA | c | c1 | a |
|---|
| 7 | 1 | c | d1 | 15950 |
|---|
| 8 | 2 | a | d1 | 15951 |
|---|
| 9 | 3 | c | d1 | 15952 |
|---|
| 10 | 4 | NA | d1 | NA |
|---|
| 11 | 5 | a | d1 | 15953 |
|---|
| 12 | NA | c | d1 | 15954 |
|---|
| i1 | variable | value |
|---|
| 1 | 1 | l1 | 1,1,1 |
|---|
| 2 | 2 | l1 | 2,2,2,2 |
|---|
| 3 | 3 | l1 | 3,3,3,3,3 |
|---|
| 4 | 4 | l1 | 4 |
|---|
| 5 | 5 | l1 | 5,5,5,5,5 |
|---|
| 6 | NA | l1 | NA |
|---|
| 7 | 1 | l2 | a,a,a |
|---|
| 8 | 2 | l2 | b,b,b,b,b |
|---|
| 9 | 3 | l2 | a,a |
|---|
| 10 | 4 | l2 | NA |
|---|
| 11 | 5 | l2 | b,b,b,b,b |
|---|
| 12 | NA | l2 | a,a,a |
|---|
Warning message:
In melt.data.table(DT, id = 1, measure = c("c1", "l1")): All 'measure.vars are NOT of the SAME type. By order of hierarchy, the molten data value column will be of type 'list'. Therefore all measure variables that are not of type 'list' will be coerced to. Check the DETAILS section of ?melt.data.table for more on coercion.
| i1 | variable | value |
|---|
| 1 | 1 | c1 | a |
|---|
| 2 | 2 | c1 | b |
|---|
| 3 | 3 | c1 | a |
|---|
| 4 | 4 | c1 | NA |
|---|
| 5 | 5 | c1 | b |
|---|
| 6 | NA | c1 | a |
|---|
| 7 | 1 | l1 | 1,1,1 |
|---|
| 8 | 2 | l1 | 2,2,2,2 |
|---|
| 9 | 3 | l1 | 3,3,3,3,3 |
|---|
| 10 | 4 | l1 | 4 |
|---|
| 11 | 5 | l1 | 5,5,5,5,5 |
|---|
| 12 | NA | l1 | NA |
|---|
| i1 | variable | value |
|---|
| 1 | 1 | c1 | a |
|---|
| 2 | 2 | c1 | b |
|---|
| 3 | 3 | c1 | a |
|---|
| 4 | 4 | c1 | NA |
|---|
| 5 | 5 | c1 | b |
|---|
| 6 | NA | c1 | a |
|---|
| 7 | 1 | f1 | c |
|---|
| 8 | 2 | f1 | a |
|---|
| 9 | 3 | f1 | c |
|---|
| 10 | 4 | f1 | NA |
|---|
| 11 | 5 | f1 | a |
|---|
| 12 | NA | f1 | c |
|---|
Warning message:
In melt.data.table(DT, id = 1, measure = c("c1", "i2")): All 'measure.vars are NOT of the SAME type. By order of hierarchy, the molten data value column will be of type 'character'. Therefore all measure variables that are not of type 'character' will be coerced to. Check the DETAILS section of ?melt.data.table for more on coercion.
| i1 | variable | value |
|---|
| 1 | 1 | c1 | a |
|---|
| 2 | 2 | c1 | b |
|---|
| 3 | 3 | c1 | a |
|---|
| 4 | 4 | c1 | NA |
|---|
| 5 | 5 | c1 | b |
|---|
| 6 | NA | c1 | a |
|---|
| 7 | 1 | i2 | NA |
|---|
| 8 | 2 | i2 | 6 |
|---|
| 9 | 3 | i2 | 7 |
|---|
| 10 | 4 | i2 | 8 |
|---|
| 11 | 5 | i2 | 9 |
|---|
| 12 | NA | i2 | 10 |
|---|
Warning message:
In melt.data.table(DT, id = 1, measure = c("c1", "i2"), na.rm = TRUE): All 'measure.vars are NOT of the SAME type. By order of hierarchy, the molten data value column will be of type 'character'. Therefore all measure variables that are not of type 'character' will be coerced to. Check the DETAILS section of ?melt.data.table for more on coercion.
| i1 | variable | value |
|---|
| 1 | 1 | c1 | a |
|---|
| 2 | 2 | c1 | b |
|---|
| 3 | 3 | c1 | a |
|---|
| 4 | 5 | c1 | b |
|---|
| 5 | NA | c1 | a |
|---|
| 6 | 2 | i2 | 6 |
|---|
| 7 | 3 | i2 | 7 |
|---|
| 8 | 4 | i2 | 8 |
|---|
| 9 | 5 | i2 | 9 |
|---|
| 10 | NA | i2 | 10 |
|---|
melt后,进行cast
reshape2的精髓是把整体数据(比如数据框)打碎(melt,或说熔化),让其回到一个一个数据点的状态,不过,这个数据点是可以根据观测的id名称和变量名称定位找到的(事实上,数据框是二维的,在数据框中,根据观测id和某变量名称,即可交叉定位到某数据点)。然后,再根据观测id和变量名称的组合,随心所欲的锻造需要的数据形态(cast),中间可能有求和、求平均值之类的数据运算。
names(ChickWeight) <- tolower(names(ChickWeight))
DT <- melt(as.data.table(ChickWeight), id=2:4)
dcast(DT, time ~ variable, fun=mean)
dcast(DT, diet ~ variable, fun=mean)
dcast(DT, diet+chick ~ time, drop=FALSE)
dcast(DT, diet+chick ~ time, drop=FALSE, fill=0)
dcast.data.table(DT, chick ~ time, fun=mean, subset=.(time < 10 & chick < 20))
| time | weight |
|---|
| 1 | 0 | 41.06 |
|---|
| 2 | 2 | 49.22 |
|---|
| 3 | 4 | 59.95918 |
|---|
| 4 | 6 | 74.30612 |
|---|
| 5 | 8 | 91.2449 |
|---|
| 6 | 10 | 107.8367 |
|---|
| 7 | 12 | 129.2449 |
|---|
| 8 | 14 | 143.8125 |
|---|
| 9 | 16 | 168.0851 |
|---|
| 10 | 18 | 190.1915 |
|---|
| 11 | 20 | 209.7174 |
|---|
| 12 | 21 | 218.6889 |
|---|
| diet | weight |
|---|
| 1 | 1 | 102.6455 |
|---|
| 2 | 2 | 122.6167 |
|---|
| 3 | 3 | 142.95 |
|---|
| 4 | 4 | 135.2627 |
|---|
Warning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generated
| diet | chick | 0 | 2 | 4 | 6 | 8 | 10 | 12 | 14 | 16 | 18 | 20 | 21 |
|---|
| 1 | 1 | 18 | 39 | 35 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 2 | 1 | 16 | 41 | 45 | 49 | 51 | 57 | 51 | 54 | NA | NA | NA | NA | NA |
|---|
| 3 | 1 | 15 | 41 | 49 | 56 | 64 | 68 | 68 | 67 | 68 | NA | NA | NA | NA |
|---|
| 4 | 1 | 13 | 41 | 48 | 53 | 60 | 65 | 67 | 71 | 70 | 71 | 81 | 91 | 96 |
|---|
| 5 | 1 | 9 | 42 | 51 | 59 | 68 | 85 | 96 | 90 | 92 | 93 | 100 | 100 | 98 |
|---|
| 6 | 1 | 20 | 41 | 47 | 54 | 58 | 65 | 73 | 77 | 89 | 98 | 107 | 115 | 117 |
|---|
| 7 | 1 | 10 | 41 | 44 | 52 | 63 | 74 | 81 | 89 | 96 | 101 | 112 | 120 | 124 |
|---|
| 8 | 1 | 8 | 42 | 50 | 61 | 71 | 84 | 93 | 110 | 116 | 126 | 134 | 125 | NA |
|---|
| 9 | 1 | 17 | 42 | 51 | 61 | 72 | 83 | 89 | 98 | 103 | 113 | 123 | 133 | 142 |
|---|
| 10 | 1 | 19 | 43 | 48 | 55 | 62 | 65 | 71 | 82 | 88 | 106 | 120 | 144 | 157 |
|---|
| 11 | 1 | 4 | 42 | 49 | 56 | 67 | 74 | 87 | 102 | 108 | 136 | 154 | 160 | 157 |
|---|
| 12 | 1 | 6 | 41 | 49 | 59 | 74 | 97 | 124 | 141 | 148 | 155 | 160 | 160 | 157 |
|---|
| 13 | 1 | 11 | 43 | 51 | 63 | 84 | 112 | 139 | 168 | 177 | 182 | 184 | 181 | 175 |
|---|
| 14 | 1 | 3 | 43 | 39 | 55 | 67 | 84 | 99 | 115 | 138 | 163 | 187 | 198 | 202 |
|---|
| 15 | 1 | 1 | 42 | 51 | 59 | 64 | 76 | 93 | 106 | 125 | 149 | 171 | 199 | 205 |
|---|
| 16 | 1 | 12 | 41 | 49 | 56 | 62 | 72 | 88 | 119 | 135 | 162 | 185 | 195 | 205 |
|---|
| 17 | 1 | 2 | 40 | 49 | 58 | 72 | 84 | 103 | 122 | 138 | 162 | 187 | 209 | 215 |
|---|
| 18 | 1 | 5 | 41 | 42 | 48 | 60 | 79 | 106 | 141 | 164 | 197 | 199 | 220 | 223 |
|---|
| 19 | 1 | 14 | 41 | 49 | 62 | 79 | 101 | 128 | 164 | 192 | 227 | 248 | 259 | 266 |
|---|
| 20 | 1 | 7 | 41 | 49 | 57 | 71 | 89 | 112 | 146 | 174 | 218 | 250 | 288 | 305 |
|---|
| 21 | 1 | 24 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 22 | 1 | 30 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 23 | 1 | 22 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 24 | 1 | 23 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 25 | 1 | 27 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 26 | 1 | 28 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 27 | 1 | 26 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 28 | 1 | 25 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 29 | 1 | 29 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 30 | 1 | 21 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 31 | NA | NA | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
|---|
| 171 | 4 | 24 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 172 | 4 | 30 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 173 | 4 | 22 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 174 | 4 | 23 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 175 | 4 | 27 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 176 | 4 | 28 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 177 | 4 | 26 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 178 | 4 | 25 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 179 | 4 | 29 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 180 | 4 | 21 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 181 | 4 | 33 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 182 | 4 | 37 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 183 | 4 | 36 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 184 | 4 | 31 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 185 | 4 | 39 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 186 | 4 | 38 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 187 | 4 | 32 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 188 | 4 | 40 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 189 | 4 | 34 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 190 | 4 | 35 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
|---|
| 191 | 4 | 44 | 42 | 51 | 65 | 86 | 103 | 118 | 127 | 138 | 145 | 146 | NA | NA |
|---|
| 192 | 4 | 45 | 41 | 50 | 61 | 78 | 98 | 117 | 135 | 141 | 147 | 174 | 197 | 196 |
|---|
| 193 | 4 | 43 | 42 | 55 | 69 | 96 | 131 | 157 | 184 | 188 | 197 | 198 | 199 | 200 |
|---|
| 194 | 4 | 41 | 42 | 51 | 66 | 85 | 103 | 124 | 155 | 153 | 175 | 184 | 199 | 204 |
|---|
| 195 | 4 | 47 | 41 | 53 | 66 | 79 | 100 | 123 | 148 | 157 | 168 | 185 | 210 | 205 |
|---|
| 196 | 4 | 49 | 40 | 53 | 64 | 85 | 108 | 128 | 152 | 166 | 184 | 203 | 233 | 237 |
|---|
| 197 | 4 | 46 | 40 | 52 | 62 | 82 | 101 | 120 | 144 | 156 | 173 | 210 | 231 | 238 |
|---|
| 198 | 4 | 50 | 41 | 54 | 67 | 84 | 105 | 122 | 155 | 175 | 205 | 234 | 264 | 264 |
|---|
| 199 | 4 | 42 | 42 | 49 | 63 | 84 | 103 | 126 | 160 | 174 | 204 | 234 | 269 | 281 |
|---|
| 200 | 4 | 48 | 39 | 50 | 62 | 80 | 104 | 125 | 154 | 170 | 222 | 261 | 303 | 322 |
|---|
Warning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, ri, value = "⋮"): invalid factor level, NA generated
| diet | chick | 0 | 2 | 4 | 6 | 8 | 10 | 12 | 14 | 16 | 18 | 20 | 21 |
|---|
| 1 | 1 | 18 | 39 | 35 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 2 | 1 | 16 | 41 | 45 | 49 | 51 | 57 | 51 | 54 | 0 | 0 | 0 | 0 | 0 |
|---|
| 3 | 1 | 15 | 41 | 49 | 56 | 64 | 68 | 68 | 67 | 68 | 0 | 0 | 0 | 0 |
|---|
| 4 | 1 | 13 | 41 | 48 | 53 | 60 | 65 | 67 | 71 | 70 | 71 | 81 | 91 | 96 |
|---|
| 5 | 1 | 9 | 42 | 51 | 59 | 68 | 85 | 96 | 90 | 92 | 93 | 100 | 100 | 98 |
|---|
| 6 | 1 | 20 | 41 | 47 | 54 | 58 | 65 | 73 | 77 | 89 | 98 | 107 | 115 | 117 |
|---|
| 7 | 1 | 10 | 41 | 44 | 52 | 63 | 74 | 81 | 89 | 96 | 101 | 112 | 120 | 124 |
|---|
| 8 | 1 | 8 | 42 | 50 | 61 | 71 | 84 | 93 | 110 | 116 | 126 | 134 | 125 | 0 |
|---|
| 9 | 1 | 17 | 42 | 51 | 61 | 72 | 83 | 89 | 98 | 103 | 113 | 123 | 133 | 142 |
|---|
| 10 | 1 | 19 | 43 | 48 | 55 | 62 | 65 | 71 | 82 | 88 | 106 | 120 | 144 | 157 |
|---|
| 11 | 1 | 4 | 42 | 49 | 56 | 67 | 74 | 87 | 102 | 108 | 136 | 154 | 160 | 157 |
|---|
| 12 | 1 | 6 | 41 | 49 | 59 | 74 | 97 | 124 | 141 | 148 | 155 | 160 | 160 | 157 |
|---|
| 13 | 1 | 11 | 43 | 51 | 63 | 84 | 112 | 139 | 168 | 177 | 182 | 184 | 181 | 175 |
|---|
| 14 | 1 | 3 | 43 | 39 | 55 | 67 | 84 | 99 | 115 | 138 | 163 | 187 | 198 | 202 |
|---|
| 15 | 1 | 1 | 42 | 51 | 59 | 64 | 76 | 93 | 106 | 125 | 149 | 171 | 199 | 205 |
|---|
| 16 | 1 | 12 | 41 | 49 | 56 | 62 | 72 | 88 | 119 | 135 | 162 | 185 | 195 | 205 |
|---|
| 17 | 1 | 2 | 40 | 49 | 58 | 72 | 84 | 103 | 122 | 138 | 162 | 187 | 209 | 215 |
|---|
| 18 | 1 | 5 | 41 | 42 | 48 | 60 | 79 | 106 | 141 | 164 | 197 | 199 | 220 | 223 |
|---|
| 19 | 1 | 14 | 41 | 49 | 62 | 79 | 101 | 128 | 164 | 192 | 227 | 248 | 259 | 266 |
|---|
| 20 | 1 | 7 | 41 | 49 | 57 | 71 | 89 | 112 | 146 | 174 | 218 | 250 | 288 | 305 |
|---|
| 21 | 1 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 22 | 1 | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 23 | 1 | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 24 | 1 | 23 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 25 | 1 | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 26 | 1 | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 27 | 1 | 26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 28 | 1 | 25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 29 | 1 | 29 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 30 | 1 | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 31 | NA | NA | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
|---|
| 171 | 4 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 172 | 4 | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 173 | 4 | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 174 | 4 | 23 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 175 | 4 | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 176 | 4 | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 177 | 4 | 26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 178 | 4 | 25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 179 | 4 | 29 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 180 | 4 | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 181 | 4 | 33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 182 | 4 | 37 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 183 | 4 | 36 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 184 | 4 | 31 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 185 | 4 | 39 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 186 | 4 | 38 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 187 | 4 | 32 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 188 | 4 | 40 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 189 | 4 | 34 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 190 | 4 | 35 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 191 | 4 | 44 | 42 | 51 | 65 | 86 | 103 | 118 | 127 | 138 | 145 | 146 | 0 | 0 |
|---|
| 192 | 4 | 45 | 41 | 50 | 61 | 78 | 98 | 117 | 135 | 141 | 147 | 174 | 197 | 196 |
|---|
| 193 | 4 | 43 | 42 | 55 | 69 | 96 | 131 | 157 | 184 | 188 | 197 | 198 | 199 | 200 |
|---|
| 194 | 4 | 41 | 42 | 51 | 66 | 85 | 103 | 124 | 155 | 153 | 175 | 184 | 199 | 204 |
|---|
| 195 | 4 | 47 | 41 | 53 | 66 | 79 | 100 | 123 | 148 | 157 | 168 | 185 | 210 | 205 |
|---|
| 196 | 4 | 49 | 40 | 53 | 64 | 85 | 108 | 128 | 152 | 166 | 184 | 203 | 233 | 237 |
|---|
| 197 | 4 | 46 | 40 | 52 | 62 | 82 | 101 | 120 | 144 | 156 | 173 | 210 | 231 | 238 |
|---|
| 198 | 4 | 50 | 41 | 54 | 67 | 84 | 105 | 122 | 155 | 175 | 205 | 234 | 264 | 264 |
|---|
| 199 | 4 | 42 | 42 | 49 | 63 | 84 | 103 | 126 | 160 | 174 | 204 | 234 | 269 | 281 |
|---|
| 200 | 4 | 48 | 39 | 50 | 62 | 80 | 104 | 125 | 154 | 170 | 222 | 261 | 303 | 322 |
|---|
| chick | 0 | 2 | 4 | 6 | 8 |
|---|
| 1 | 18 | 39 | 35 | NaN | NaN | NaN |
|---|
| 2 | 16 | 41 | 45 | 49 | 51 | 57 |
|---|
| 3 | 15 | 41 | 49 | 56 | 64 | 68 |
|---|
| 4 | 13 | 41 | 48 | 53 | 60 | 65 |
|---|
| 5 | 9 | 42 | 51 | 59 | 68 | 85 |
|---|
duplicated 根据键值判断当前行是否与之前的有重复
unique 返回去掉重复后的值
anyDuplicated 返回第一个重复值的index i 没有就为0
uniqueN 相等于length(unique(x))
DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B")
DT
duplicated(DT)
unique(DT)
| A | B | C |
|---|
| 1 | 1 | 1 | 1 |
|---|
| 2 | 1 | 1 | 2 |
|---|
| 3 | 1 | 1 | 1 |
|---|
| 4 | 1 | 2 | 2 |
|---|
| 5 | 2 | 2 | 1 |
|---|
| 6 | 2 | 2 | 2 |
|---|
| 7 | 2 | 3 | 1 |
|---|
| 8 | 2 | 3 | 2 |
|---|
| 9 | 3 | 3 | 1 |
|---|
| 10 | 3 | 4 | 2 |
|---|
| 11 | 3 | 4 | 1 |
|---|
| 12 | 3 | 4 | 2 |
|---|
- FALSE
- TRUE
- TRUE
- FALSE
- FALSE
- TRUE
- FALSE
- TRUE
- FALSE
- FALSE
- TRUE
- TRUE
| A | B | C |
|---|
| 1 | 1 | 1 | 1 |
|---|
| 2 | 1 | 2 | 2 |
|---|
| 3 | 2 | 2 | 1 |
|---|
| 4 | 2 | 3 | 1 |
|---|
| 5 | 3 | 3 | 1 |
|---|
| 6 | 3 | 4 | 2 |
|---|
DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B")
DT
duplicated(DT, by="B", fromLast=TRUE)
unique(DT, by="B", fromLast=TRUE)
| A | B | C |
|---|
| 1 | 1 | 1 | 1 |
|---|
| 2 | 1 | 1 | 2 |
|---|
| 3 | 1 | 1 | 1 |
|---|
| 4 | 1 | 2 | 2 |
|---|
| 5 | 2 | 2 | 1 |
|---|
| 6 | 2 | 2 | 2 |
|---|
| 7 | 2 | 3 | 1 |
|---|
| 8 | 2 | 3 | 2 |
|---|
| 9 | 3 | 3 | 1 |
|---|
| 10 | 3 | 4 | 2 |
|---|
| 11 | 3 | 4 | 1 |
|---|
| 12 | 3 | 4 | 2 |
|---|
- TRUE
- TRUE
- FALSE
- TRUE
- TRUE
- FALSE
- TRUE
- TRUE
- FALSE
- TRUE
- TRUE
- FALSE
anyDuplicated(DT, by=c("A", "B"))
any(duplicated(DT, by=c("A", "B")))
2
TRUE
x[i, j, by, keyby, with = TRUE…..]
根据语法,当数据表的i为data.table时,x必须设置key值,i和x连接
DT[J(…)]就是一种更可读的方法,通过创造一个data.table作为i传入,与DT连接
x[data.table(id)]与x[J(id)]等同 也可以直接写成x[id],结果等与subset(x, key == id)
x[list(id)]与x[.(id)]等同
DT[“b”]
DT = data.table(A=5:1,B=letters[5:1])
DT
setkey(DT,B)
DT[J("b")]
CJ(c(5,NA,1), c(1,3,2))
CJ(c(5,NA,1), c(1,3,2), sorted=FALSE)
x = c(1,1,2)
y = c(4,6,4)
CJ(x, y, unique=TRUE)
| V1 | V2 |
|---|
| 1 | NA | 1 |
|---|
| 2 | NA | 2 |
|---|
| 3 | NA | 3 |
|---|
| 4 | 1 | 1 |
|---|
| 5 | 1 | 2 |
|---|
| 6 | 1 | 3 |
|---|
| 7 | 5 | 1 |
|---|
| 8 | 5 | 2 |
|---|
| 9 | 5 | 3 |
|---|
| V1 | V2 |
|---|
| 1 | 5 | 1 |
|---|
| 2 | 5 | 3 |
|---|
| 3 | 5 | 2 |
|---|
| 4 | NA | 1 |
|---|
| 5 | NA | 3 |
|---|
| 6 | NA | 2 |
|---|
| 7 | 1 | 1 |
|---|
| 8 | 1 | 3 |
|---|
| 9 | 1 | 2 |
|---|
| V1 | V2 | unique |
|---|
| 1 | 1 | 4 | TRUE |
|---|
| 2 | 1 | 4 | TRUE |
|---|
| 3 | 1 | 6 | TRUE |
|---|
| 4 | 1 | 4 | TRUE |
|---|
| 5 | 1 | 4 | TRUE |
|---|
| 6 | 1 | 6 | TRUE |
|---|
| 7 | 2 | 4 | TRUE |
|---|
| 8 | 2 | 4 | TRUE |
|---|
| 9 | 2 | 6 | TRUE |
|---|
like 通过正则表达式匹配
DT = data.table(Name=c("Mary","George","Martha"), Salary=c(2,3,4))
DT
DT[Name %like% "^Mar"]
| Name | Salary |
|---|
| 1 | Mary | 2 |
|---|
| 2 | George | 3 |
|---|
| 3 | Martha | 4 |
|---|
| Name | Salary |
|---|
| 1 | Mary | 2 |
|---|
| 2 | Martha | 4 |
|---|
merge 合并 data.table的merge方法和data.frame的merge方法,由一个区别就是,在默认情况下,data.table通过共同的key进行合并,而data.frame则是通过共同的列名
dt1 <- data.table(A = letters[1:10], X = 1:10, key = "A")
dt2 <- data.table(A = letters[5:14], Y = 1:10, key = "A")
merge(dt1, dt2)
merge(dt1, dt2, all = TRUE)
| A | X | Y |
|---|
| 1 | e | 5 | 1 |
|---|
| 2 | f | 6 | 2 |
|---|
| 3 | g | 7 | 3 |
|---|
| 4 | h | 8 | 4 |
|---|
| 5 | i | 9 | 5 |
|---|
| 6 | j | 10 | 6 |
|---|
| A | X | Y |
|---|
| 1 | a | 1 | NA |
|---|
| 2 | b | 2 | NA |
|---|
| 3 | c | 3 | NA |
|---|
| 4 | d | 4 | NA |
|---|
| 5 | e | 5 | 1 |
|---|
| 6 | f | 6 | 2 |
|---|
| 7 | g | 7 | 3 |
|---|
| 8 | h | 8 | 4 |
|---|
| 9 | i | 9 | 5 |
|---|
| 10 | j | 10 | 6 |
|---|
| 11 | k | NA | 7 |
|---|
| 12 | l | NA | 8 |
|---|
| 13 | m | NA | 9 |
|---|
| 14 | n | NA | 10 |
|---|
dt1 <- data.table(A = letters[rep(1:3, 2)], X = 1:6, key = "A")
dt2 <- data.table(A = letters[rep(2:4, 2)], Y = 6:1, key = "A")
merge(dt1, dt2, allow.cartesian=TRUE)
| A | X | Y |
|---|
| 1 | b | 2 | 6 |
|---|
| 2 | b | 2 | 3 |
|---|
| 3 | b | 5 | 6 |
|---|
| 4 | b | 5 | 3 |
|---|
| 5 | c | 3 | 5 |
|---|
| 6 | c | 3 | 2 |
|---|
| 7 | c | 6 | 5 |
|---|
| 8 | c | 6 | 2 |
|---|
na.omit.data.table 根据指定的列去掉行值为空值的行
na.omit(object, cols=seq_along(object), invert=FALSE, …)
DT = data.table(x=c(1,NaN,NA,3), y=c(NAinteger, 1:3), z=c(“a”, NAcharacter, “b”, “c”))
/ default behaviour /
na.omit(DT)
/ omit rows where ‘x’ has a missing value/
na.omit(DT, cols=”x”)
/ omit rows where either ‘x’ or ‘y’ have missing values/
na.omit(DT, cols=c(“x”, “y”))
rbindlist 对一列表的data.table进行行合并,组成一个data.table
DT1 = data.table(A=1:3,B=letters[1:3])
DT2 = data.table(A=4:5,B=letters[4:5])
l = list(DT1,DT2)
rbindlist(l)
DT1 = data.table(A=1:3,B=letters[1:3])
DT2 = data.table(B=letters[4:5],A=4:5)
l = list(DT1,DT2)
rbindlist(l, use.names=TRUE)
DT1 = data.table(A=1:3,B=letters[1:3])
DT2 = data.table(B=letters[4:5],C=factor(1:2))
l = list(DT1,DT2)
rbindlist(l, use.names=TRUE, fill=TRUE)
| A | B | C |
|---|
| 1 | 1 | a | NA |
|---|
| 2 | 2 | b | NA |
|---|
| 3 | 3 | c | NA |
|---|
| 4 | NA | d | 1 |
|---|
| 5 | NA | e | 2 |
|---|
setattr(x,name,value)
setnames(x,old,new)
DT = data.table(a=1:2,b=3:4,c=5:6)
setnames(DT,"b","B")
setnames(DT,3,"C")
DT
setcolorder(x, neworder) 对列名进行指定的排序
DT = data.table(A=sample(3, 10, TRUE),B=sample(letters[1:3], 10, TRUE), C=sample(10))
setcolorder(DT, c("C", "A", "B"))
DT
| C | A | B |
|---|
| 1 | 4 | 2 | a |
|---|
| 2 | 2 | 3 | a |
|---|
| 3 | 3 | 3 | a |
|---|
| 4 | 9 | 2 | a |
|---|
| 5 | 7 | 2 | b |
|---|
| 6 | 10 | 1 | c |
|---|
| 7 | 1 | 3 | c |
|---|
| 8 | 5 | 2 | b |
|---|
| 9 | 8 | 3 | b |
|---|
| 10 | 6 | 2 | b |
|---|
setorder 对行进行排序
DT = data.table(A=sample(3, 10, TRUE),B=sample(letters[1:3], 10, TRUE), C=sample(10))
setorder(DT, A, -B)
setorderv(DT, c("A", "B"), c(1, -1))
DT
| A | B | C |
|---|
| 1 | 1 | a | 4 |
|---|
| 2 | 2 | c | 9 |
|---|
| 3 | 2 | c | 5 |
|---|
| 4 | 2 | c | 2 |
|---|
| 5 | 2 | b | 10 |
|---|
| 6 | 2 | a | 7 |
|---|
| 7 | 3 | c | 1 |
|---|
| 8 | 3 | c | 8 |
|---|
| 9 | 3 | a | 6 |
|---|
| 10 | 3 | a | 3 |
|---|
setDF 通过引用的方式把数据表变成数据框
setDT 通过引用的方式把数据框变成数据表
X = data.table(x=1:5, y=6:10)
setDF(X)[]
A = list(a=1:5, a=6:10)
setDT(A)[]
tables 列出所有class为data.table的对象
tables()
NAME NROW NCOL MB COLS KEY
[1,] A 5 2 1 a,a
[2,] DT 10 3 1 A,B,C
[3,] dt1 6 2 1 A,X A
[4,] DT1 3 2 1 A,B
[5,] dt2 6 2 1 A,Y A
[6,] DT2 2 2 1 B,C
Total: 6MB