data.table Way 2

继续data.table1

1
2
3
library(data.table)
library(ggplot2)
library(plyr)

我们继续使用Basketball Reference: boxscore数据和mtcars

1
2
3
4
5
boxscore <- fread("/Users/shihchosen/Desktop/boxscore.csv")
dtb <- copy(boxscore)
dtb[,1:2:=NULL]
dtb[,year:=year(date)]
head(dtb)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
## date game_id team_home_abbr team player player_id
## 1: 2012-10-30 201210300CLE CLE WAS A.J. Price priceaj01
## 2: 2012-10-30 201210300CLE CLE WAS Emeka Okafor okafoem01
## 3: 2012-10-30 201210300CLE CLE WAS Trevor Ariza arizatr01
## 4: 2012-10-30 201210300CLE CLE WAS Bradley Beal bealbr01
## 5: 2012-10-30 201210300CLE CLE WAS Trevor Booker booketr01
## 6: 2012-10-30 201210300CLE CLE WAS Jordan Crawford crawfjo02
## minutes fgm fga 3pm 3pa ftm fta offreb defreb totreb assists steals
## 1: 29.40 2 13 2 9 1 1 1 1 2 6 0
## 2: 24.58 4 10 0 0 2 4 5 2 7 0 0
## 3: 24.58 3 8 2 4 1 2 1 2 3 4 3
## 4: 21.55 2 8 2 4 2 2 0 3 3 3 1
## 5: 16.75 2 9 0 1 0 0 1 0 1 1 1
## 6: 28.70 4 13 0 6 3 4 1 2 3 5 1
## blocks turnovers fouls points plusminus year
## 1: 0 1 1 7 -11 2012
## 2: 4 1 1 10 -5 2012
## 3: 2 0 0 9 -9 2012
## 4: 0 2 1 8 -16 2012
## 5: 1 4 4 4 -15 2012
## 6: 1 1 1 11 0 2012
1
2
dtm <- data.table(mtcars)
head(dtm)
1
2
3
4
5
6
7
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1

set()

set family usage
set() :=的循环版本
setnames() 设置或更改列名
setorder() 更改行的顺序
setcolorder() 更改行的顺序
setkey() 设置data.table中的key

set()

1
2
3
dtb1 <- copy(dtb)
set(dtb1, j = which(colnames(dtb)=='plusminus'), value = 1)
head(dtb1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
## date game_id team_home_abbr team player player_id
## 1: 2012-10-30 201210300CLE CLE WAS A.J. Price priceaj01
## 2: 2012-10-30 201210300CLE CLE WAS Emeka Okafor okafoem01
## 3: 2012-10-30 201210300CLE CLE WAS Trevor Ariza arizatr01
## 4: 2012-10-30 201210300CLE CLE WAS Bradley Beal bealbr01
## 5: 2012-10-30 201210300CLE CLE WAS Trevor Booker booketr01
## 6: 2012-10-30 201210300CLE CLE WAS Jordan Crawford crawfjo02
## minutes fgm fga 3pm 3pa ftm fta offreb defreb totreb assists steals
## 1: 29.40 2 13 2 9 1 1 1 1 2 6 0
## 2: 24.58 4 10 0 0 2 4 5 2 7 0 0
## 3: 24.58 3 8 2 4 1 2 1 2 3 4 3
## 4: 21.55 2 8 2 4 2 2 0 3 3 3 1
## 5: 16.75 2 9 0 1 0 0 1 0 1 1 1
## 6: 28.70 4 13 0 6 3 4 1 2 3 5 1
## blocks turnovers fouls points plusminus year
## 1: 0 1 1 7 1 2012
## 2: 4 1 1 10 1 2012
## 3: 2 0 0 9 1 2012
## 4: 0 2 1 8 1 2012
## 5: 1 4 4 4 1 2012
## 6: 1 1 1 11 1 2012
1
set(dtb1, j = which(colnames(dtb)=='plusminus'), value = dtb$plusminus)

创建一个data.table, 随机将每行的5个数字变成10

1
2
3
4
5
6
7
8
9
temp <- data.table(A = sample(1:9),
B = sample(1:9),
C = sample(1:9),
D = sample(1:9),
E = sample(1:9))
# for each row, randomly change 3 values to 10
for (i in 1:9) set(temp, i, sample(5, 3), 10)
temp
1
2
3
4
5
6
7
8
9
10
## A B C D E
## 1: 10 7 10 10 3
## 2: 10 10 10 9 9
## 3: 10 9 2 10 10
## 4: 4 2 10 10 10
## 5: 6 10 10 8 10
## 6: 7 10 10 10 8
## 7: 10 6 10 10 1
## 8: 9 1 10 10 10
## 9: 3 5 10 10 10

setnames()

temp的列名更改为列_*的格式:

1
2
3
# setnames(x, old, new)
setnames(temp, names(temp), paste0('列_', names(temp)))
head(temp, 3)
1
2
3
4
## 列_A 列_B 列_C 列_D 列_E
## 1: 10 7 10 10 3
## 2: 10 10 10 9 9
## 3: 10 9 2 10 10
1
2
3
#把`temp`的列名更改为`Col_*`的格式:
setnames(temp, gsub('^列_', 'Col_', names(temp)))
head(temp, 3)
1
2
3
4
## Col_A Col_B Col_C Col_D Col_E
## 1: 10 7 10 10 3
## 2: 10 10 10 9 9
## 3: 10 9 2 10 10

setorder() and setcolorder()

按照年份和得分排序

1
2
setorder(dtb1, year, -points)
dtb1[, head(.SD), by=year]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
## year date game_id team_home_abbr team player
## 1: 2004 2004-12-18 200412180MIL MIL PHI Allen Iverson
## 2: 2004 2004-12-02 200412020DAL DAL DAL Dirk Nowitzki
## 3: 2004 2004-12-20 200412200PHI PHI PHI Allen Iverson
## 4: 2004 2004-12-02 200412020DAL DAL HOU Tracy McGrady
## 5: 2004 2004-12-28 200412280LAL LAL LAL Kobe Bryant
## 6: 2004 2004-11-24 200411240CLE CLE CLE LeBron James
## 7: 2005 2005-12-20 200512200LAL LAL LAL Kobe Bryant
## 8: 2005 2005-02-12 200502120PHI PHI PHI Allen Iverson
## 9: 2005 2005-03-20 200503200TOR TOR CLE LeBron James
## 10: 2005 2005-01-04 200501040IND IND IND Jermaine O'Neal
## 11: 2005 2005-01-14 200501140NOH NOH POR Damon Stoudamire
## 12: 2005 2005-12-23 200512230ATL ATL PHI Allen Iverson
## 13: 2006 2006-01-22 200601220LAL LAL LAL Kobe Bryant
## 14: 2006 2006-12-17 200612170LAL LAL WAS Gilbert Arenas
## 15: 2006 2006-12-29 200612290CHA CHA LAL Kobe Bryant
## 16: 2006 2006-11-11 200611110MIL MIL MIL Michael Redd
## 17: 2006 2006-12-22 200612220PHO PHO WAS Gilbert Arenas
## 18: 2006 2006-12-15 200612150LAL LAL LAL Kobe Bryant
## 19: 2007 2007-03-16 200703160LAL LAL LAL Kobe Bryant
## 20: 2007 2007-03-22 200703220MEM MEM LAL Kobe Bryant
## 21: 2007 2007-01-12 200701120OKC OKC OKC Ray Allen
## 22: 2007 2007-03-30 200703300LAL LAL LAL Kobe Bryant
## 23: 2007 2007-01-26 200701260NYK NYK NYK Jamal Crawford
## 24: 2007 2007-03-04 200703040MIL MIL MIL Michael Redd
## 25: 2008 2008-11-05 200811050MIN MIN SAS Tony Parker
## 26: 2008 2008-03-28 200803280LAL LAL LAL Kobe Bryant
## 27: 2008 2008-12-18 200812180POR POR POR Brandon Roy
## 28: 2008 2008-03-02 200803020LAL LAL LAL Kobe Bryant
## 29: 2008 2008-01-15 200801150MEM MEM CLE LeBron James
## 30: 2008 2008-12-20 200812200CHA CHA GSW Jamal Crawford
## 31: 2009 2009-02-02 200902020NYK NYK LAL Kobe Bryant
## 32: 2009 2009-11-14 200911140MIL MIL MIL Brandon Jennings
## 33: 2009 2009-02-20 200902200MIL MIL CLE LeBron James
## 34: 2009 2009-04-12 200904120MIA MIA MIA Dwyane Wade
## 35: 2009 2009-02-04 200902040NYK NYK CLE LeBron James
## 36: 2009 2009-03-13 200903130SAC SAC CLE LeBron James
## 37: 2010 2010-01-30 201001300DAL DAL POR Andre Miller
## 38: 2010 2010-02-08 201002080ORL ORL ORL Vince Carter
## 39: 2010 2010-02-06 201002060CLE CLE CLE LeBron James
## 40: 2010 2010-10-27 201010270GSW GSW GSW Monta Ellis
## 41: 2010 2010-11-09 201011090MIA MIA UTA Paul Millsap
## 42: 2010 2010-02-03 201002030DAL DAL GSW Monta Ellis
## 43: 2011 2011-02-03 201102030ORL ORL MIA LeBron James
## 44: 2011 2011-02-07 201102070DEN DEN DEN Carmelo Anthony
## 45: 2011 2011-05-17 201105170DAL DAL DAL Dirk Nowitzki
## 46: 2011 2011-01-17 201101170LAC LAC LAC Blake Griffin
## 47: 2011 2011-01-26 201101260MIN MIN OKC Kevin Durant
## 48: 2011 2011-04-16 201104160ORL ORL ORL Dwight Howard
## 49: 2012 2012-03-04 201203040CHA CHA BRK Deron Williams
## 50: 2012 2012-02-19 201202190OKC OKC OKC Kevin Durant
## 51: 2012 2012-03-23 201203230OKC OKC MIN Kevin Love
## 52: 2012 2012-01-10 201201100LAL LAL LAL Kobe Bryant
## 53: 2012 2012-02-07 201202070GSW GSW GSW Monta Ellis
## 54: 2012 2012-11-02 201211020ATL ATL HOU James Harden
## 55: 2013 2013-02-27 201302270NYK NYK GSW Stephen Curry
## 56: 2013 2013-01-18 201301180DAL DAL OKC Kevin Durant
## 57: 2013 2013-04-02 201304020MIA MIA NYK Carmelo Anthony
## 58: 2013 2013-03-25 201303250WAS WAS WAS John Wall
## 59: 2013 2013-04-10 201304100POR POR LAL Kobe Bryant
## 60: 2013 2013-04-12 201304120LAL LAL GSW Stephen Curry
## year date game_id team_home_abbr team player
## player_id minutes fgm fga 3pm 3pa ftm fta offreb defreb totreb assists
## 1: iversal01 40.00 17 29 4 7 16 21 2 6 8 5
## 2: nowitdi01 49.00 15 32 2 5 21 22 4 12 16 2
## 3: iversal01 46.00 18 31 4 6 11 17 0 1 1 6
## 4: mcgratr01 49.00 19 36 6 13 4 7 5 4 9 9
## 5: bryanko01 46.00 14 26 4 7 16 18 0 2 2 4
## 6: jamesle01 38.00 15 22 2 3 11 12 3 3 6 5
## 7: bryanko01 32.88 18 31 4 10 22 25 3 5 8 0
## 8: iversal01 42.00 17 36 2 5 24 27 1 3 4 6
## 9: jamesle01 48.00 18 36 6 12 14 15 2 8 10 5
## 10: onealje01 36.00 18 28 0 0 19 25 3 8 11 3
## 11: stoudda01 43.00 20 32 8 16 6 6 0 0 0 1
## 12: iversal01 44.50 17 31 0 1 19 21 0 6 6 5
## 13: bryanko01 41.93 28 46 7 13 18 20 2 4 6 2
## 14: arenagi01 49.00 17 32 5 12 21 27 0 8 8 8
## 15: bryanko01 54.47 22 45 4 11 10 12 0 5 5 4
## 16: reddmi01 45.73 18 32 6 12 15 17 0 2 2 0
## 17: arenagi01 47.57 21 37 6 12 6 6 0 3 3 4
## 18: bryanko01 54.02 17 38 5 8 14 16 0 10 10 8
## 19: bryanko01 49.97 23 39 8 12 11 12 1 6 7 3
## 20: bryanko01 45.18 20 37 3 7 17 18 2 3 5 4
## 21: allenra02 43.12 17 32 8 12 12 12 3 7 10 5
## 22: bryanko01 47.57 19 44 3 9 12 14 1 1 2 2
## 23: crawfja01 39.48 20 30 8 10 4 4 2 1 3 1
## 24: reddmi01 43.63 18 32 8 13 8 9 1 2 3 2
## 25: parketo01 50.53 22 36 2 3 9 10 0 7 7 10
## 26: bryanko01 41.87 19 37 9 17 6 7 2 8 10 1
## 27: roybr01 43.82 14 27 5 7 19 21 1 4 5 6
## 28: bryanko01 51.40 15 27 2 3 20 27 1 10 11 4
## 29: jamesle01 47.17 18 28 6 12 9 16 3 5 8 9
## 30: crawfja01 45.85 14 26 5 8 17 18 0 2 2 5
## 31: bryanko01 36.80 19 31 3 6 20 20 0 0 0 3
## 32: jennibr01 40.72 21 34 7 8 6 8 1 4 5 5
## 33: jamesle01 40.07 16 29 8 11 15 22 1 4 5 9
## 34: wadedw01 39.28 19 30 6 12 11 13 2 7 9 4
## 35: jamesle01 44.05 17 33 2 7 16 19 0 9 9 11
## 36: jamesle01 46.98 16 30 4 11 15 17 0 4 4 9
## 37: millean02 42.23 22 31 1 1 7 8 1 4 5 2
## 38: cartevi01 38.05 19 27 6 10 4 4 0 7 7 2
## 39: jamesle01 43.85 17 31 6 12 7 7 1 7 8 8
## 40: ellismo01 39.75 18 24 1 2 9 12 0 3 3 2
## 41: millspa01 43.45 19 28 3 3 5 7 4 5 9 1
## 42: ellismo01 40.85 17 23 4 6 8 13 3 1 4 2
## 43: jamesle01 42.65 17 25 3 5 14 17 2 9 11 8
## 44: anthoca01 39.47 16 24 2 3 16 18 1 10 11 0
## 45: nowitdi01 40.90 12 15 0 0 24 24 0 6 6 4
## 46: griffbl01 36.93 19 24 0 0 9 11 3 11 14 3
## 47: duranke01 49.83 15 28 4 8 13 14 2 16 18 2
## 48: howardw01 45.80 16 23 0 1 14 22 6 13 19 1
## 49: willide01 37.75 16 29 4 11 21 21 1 5 6 7
## 50: duranke01 45.45 19 28 5 6 8 10 2 6 8 3
## 51: loveke01 49.32 16 27 7 11 12 16 6 8 14 0
## 52: bryanko01 39.12 18 31 0 3 12 13 0 5 5 3
## 53: ellismo01 41.75 18 29 3 6 9 10 1 6 7 2
## 54: hardeja01 39.82 14 19 2 4 15 17 1 6 7 2
## 55: curryst01 48.00 18 28 11 13 7 7 0 6 6 7
## 56: duranke01 49.67 13 31 5 9 21 21 0 9 9 1
## 57: anthoca01 40.33 18 26 7 10 7 8 0 2 2 2
## 58: walljo01 44.55 13 22 2 4 19 24 0 7 7 8
## 59: bryanko01 48.00 14 27 1 5 18 18 0 8 8 5
## 60: curryst01 44.28 17 31 9 15 4 4 3 3 6 9
## player_id minutes fgm fga 3pm 3pa ftm fta offreb defreb totreb assists
## steals blocks turnovers fouls points plusminus
## 1: 2 0 3 0 54 25
## 2: 3 4 1 2 53 12
## 3: 7 0 7 6 51 0
## 4: 2 3 2 3 48 0
## 5: 2 0 5 2 48 21
## 6: 3 1 3 3 43 12
## 7: 3 0 2 3 62 35
## 8: 5 1 3 1 60 27
## 9: 2 0 4 0 56 -7
## 10: 0 2 3 3 55 22
## 11: 1 0 0 3 54 -7
## 12: 2 0 0 0 53 -5
## 13: 3 1 3 1 81 25
## 14: 2 0 3 3 60 10
## 15: 0 0 4 6 58 5
## 16: 3 1 3 2 57 2
## 17: 1 0 4 5 54 12
## 18: 2 1 5 1 53 20
## 19: 3 0 2 3 65 4
## 20: 0 0 2 4 60 2
## 21: 1 0 2 2 54 10
## 22: 0 0 3 2 53 1
## 23: 2 0 3 2 52 27
## 24: 3 0 4 2 52 -12
## 25: 0 0 4 0 55 7
## 26: 3 0 1 4 53 1
## 27: 0 1 0 1 52 12
## 28: 1 2 5 2 52 4
## 29: 3 1 4 1 51 10
## 30: 0 1 1 2 50 15
## 31: 0 1 2 1 61 19
## 32: 0 0 4 1 55 11
## 33: 0 0 6 3 55 13
## 34: 0 1 3 3 55 24
## 35: 0 2 3 1 52 7
## 36: 2 3 3 2 51 6
## 37: 2 0 2 2 52 4
## 38: 0 0 2 5 48 19
## 39: 5 0 5 2 47 4
## 40: 2 0 1 4 46 9
## 41: 1 1 1 4 46 2
## 42: 1 1 7 5 46 -9
## 43: 1 1 3 2 51 2
## 44: 1 3 4 2 50 -3
## 45: 0 4 2 3 48 12
## 46: 0 1 4 3 47 17
## 47: 2 2 2 2 47 -3
## 48: 0 1 8 5 46 -10
## 49: 1 1 5 2 57 11
## 50: 4 0 5 1 51 19
## 51: 1 1 4 3 51 4
## 52: 3 0 2 2 48 16
## 53: 2 1 3 2 48 5
## 54: 0 0 5 3 45 17
## 55: 3 0 4 3 54 -4
## 56: 2 0 4 2 52 -1
## 57: 1 1 0 3 50 15
## 58: 1 1 2 2 47 26
## 59: 3 4 1 4 47 7
## 60: 3 0 1 3 47 3
## steals blocks turnovers fouls points plusminus

按照字母表为列名排序

1
2
setcolorder(dtb1, sort(names(dtb)))
head(dtb1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
## 3pa 3pm assists blocks date defreb fga fgm fouls fta ftm
## 1: 7 4 5 0 2004-12-18 6 29 17 0 21 16
## 2: 5 2 2 4 2004-12-02 12 32 15 2 22 21
## 3: 6 4 6 0 2004-12-20 1 31 18 6 17 11
## 4: 13 6 9 3 2004-12-02 4 36 19 3 7 4
## 5: 7 4 4 0 2004-12-28 2 26 14 2 18 16
## 6: 3 2 5 1 2004-11-24 3 22 15 3 12 11
## game_id minutes offreb player player_id plusminus points
## 1: 200412180MIL 40 2 Allen Iverson iversal01 25 54
## 2: 200412020DAL 49 4 Dirk Nowitzki nowitdi01 12 53
## 3: 200412200PHI 46 0 Allen Iverson iversal01 0 51
## 4: 200412020DAL 49 5 Tracy McGrady mcgratr01 0 48
## 5: 200412280LAL 46 0 Kobe Bryant bryanko01 21 48
## 6: 200411240CLE 38 3 LeBron James jamesle01 12 43
## steals team team_home_abbr totreb turnovers year
## 1: 2 PHI MIL 8 3 2004
## 2: 3 DAL DAL 16 1 2004
## 3: 7 PHI PHI 1 7 2004
## 4: 2 HOU DAL 9 2 2004
## 5: 2 LAL LAL 2 5 2004
## 6: 3 CLE CLE 6 3 2004

setkey()

character类型的column设为key

setkey() sorts a data.table and marks it as sorted (with an attribute sorted). The sorted columns are the key. The key can be any columns in any order. The columns are sorted in ascending order always.

setkey reorders (or sorts) the rows of a data.table by the columns provided. In versions 1.9+, for integer columns… It is extremely fast, but is limited by the range of integer values being \<= 1e5

查询球员各年平均数据

1
2
3
4
5
6
7
player.query <- function(name,...){
dtb.key <- copy(dtb)
setkey(dtb.key, player)
data<-dtb.key[name][, lapply(.SD, function(x)round(mean(x),2)), .SDcols=7:23, by=.(year, player)][order(player, year)]
return(data)}
# 查询 Kobe Bryant
player.query("Kobe Bryant")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
## year player minutes fgm fga 3pm 3pa ftm fta offreb
## 1: 2004 Kobe Bryant 43.70 8.48 21.22 1.89 5.74 9.56 11.85 1.41
## 2: 2005 Kobe Bryant 39.33 9.97 22.28 1.69 5.15 7.76 9.49 1.43
## 3: 2006 Kobe Bryant 41.05 11.47 24.67 2.42 6.66 8.17 9.51 0.63
## 4: 2007 Kobe Bryant 39.92 10.43 23.05 1.80 5.31 8.36 9.58 1.04
## 5: 2008 Kobe Bryant 38.96 9.81 20.79 1.57 4.50 7.28 8.71 1.15
## 6: 2009 Kobe Bryant 38.03 10.42 22.24 1.46 4.28 6.43 7.49 1.08
## 7: 2010 Kobe Bryant 37.30 9.09 20.50 1.64 4.71 6.05 7.45 0.98
## 8: 2011 Kobe Bryant 34.62 9.03 19.92 1.33 4.27 5.58 6.77 0.97
## 9: 2012 Kobe Bryant 38.91 10.21 22.99 1.74 5.31 6.80 8.04 1.09
## 10: 2013 Kobe Bryant 38.60 8.94 19.77 1.40 4.83 6.35 7.65 0.83
## defreb totreb assists steals blocks turnovers fouls points plusminus
## 1: 5.67 7.07 7.07 1.37 1.15 4.67 2.78 28.41 3.11
## 2: 3.85 5.28 4.93 1.22 0.57 3.34 2.68 29.40 -1.50
## 3: 4.64 5.27 4.81 1.84 0.30 3.43 3.08 33.52 3.13
## 4: 5.02 6.06 5.20 1.65 0.54 3.35 2.36 31.01 1.42
## 5: 4.80 5.94 5.20 1.65 0.41 2.97 2.75 28.47 7.36
## 6: 4.29 5.37 5.08 1.69 0.52 2.70 2.46 28.75 8.10
## 7: 4.27 5.24 5.08 1.24 0.33 3.26 2.57 25.88 4.26
## 8: 4.08 5.05 4.80 1.23 0.20 3.03 2.14 24.97 5.28
## 9: 4.14 5.23 4.54 1.28 0.28 3.39 2.15 28.96 2.37
## 10: 4.85 5.69 6.71 1.29 0.33 3.77 1.92 25.62 -0.58
1
player.query("Jeremy Lin")
1
2
3
4
5
6
7
8
9
10
## year player minutes fgm fga 3pm 3pa ftm fta offreb defreb
## 1: 2010 Jeremy Lin 8.55 0.71 2.24 0.00 0.12 0.53 0.76 0.29 0.47
## 2: 2011 Jeremy Lin 9.74 1.07 2.47 0.07 0.20 0.80 0.93 0.53 0.93
## 3: 2012 Jeremy Lin 30.91 4.98 11.24 0.76 2.59 3.33 4.14 0.52 3.03
## 4: 2013 Jeremy Lin 31.09 4.71 10.78 1.18 3.29 2.76 3.58 0.29 2.20
## totreb assists steals blocks turnovers fouls points plusminus
## 1: 0.76 0.94 1.12 0.41 0.65 0.88 1.94 -0.53
## 2: 1.47 1.80 1.00 0.13 0.60 1.33 3.00 1.00
## 3: 3.56 6.56 1.78 0.38 3.41 2.38 14.06 3.14
## 4: 2.49 5.58 1.42 0.27 2.78 2.51 13.36 0.24
1
player.query(c("Kobe Bryant", "LeBron James"))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
## year player minutes fgm fga 3pm 3pa ftm fta offreb
## 1: 2004 Kobe Bryant 43.70 8.48 21.22 1.89 5.74 9.56 11.85 1.41
## 2: 2005 Kobe Bryant 39.33 9.97 22.28 1.69 5.15 7.76 9.49 1.43
## 3: 2006 Kobe Bryant 41.05 11.47 24.67 2.42 6.66 8.17 9.51 0.63
## 4: 2007 Kobe Bryant 39.92 10.43 23.05 1.80 5.31 8.36 9.58 1.04
## 5: 2008 Kobe Bryant 38.96 9.81 20.79 1.57 4.50 7.28 8.71 1.15
## 6: 2009 Kobe Bryant 38.03 10.42 22.24 1.46 4.28 6.43 7.49 1.08
## 7: 2010 Kobe Bryant 37.30 9.09 20.50 1.64 4.71 6.05 7.45 0.98
## 8: 2011 Kobe Bryant 34.62 9.03 19.92 1.33 4.27 5.58 6.77 0.97
## 9: 2012 Kobe Bryant 38.91 10.21 22.99 1.74 5.31 6.80 8.04 1.09
## 10: 2013 Kobe Bryant 38.60 8.94 19.77 1.40 4.83 6.35 7.65 0.83
## 11: 2004 LeBron James 40.86 9.07 18.66 0.93 2.59 5.34 7.03 1.31
## 12: 2005 LeBron James 42.61 10.46 21.95 1.57 4.49 6.85 9.06 1.18
## 13: 2006 LeBron James 42.69 10.84 22.78 1.56 4.59 7.10 9.88 1.15
## 14: 2007 LeBron James 41.48 9.72 21.05 1.18 4.09 6.64 9.34 1.23
## 15: 2008 LeBron James 39.67 10.17 21.04 1.46 4.81 7.55 10.27 1.62
## 16: 2009 LeBron James 38.79 10.06 20.50 1.83 5.03 7.82 10.09 1.23
## 17: 2010 LeBron James 39.00 9.33 18.92 1.59 4.68 7.63 10.05 0.86
## 18: 2011 LeBron James 40.90 10.00 19.27 1.09 3.40 6.20 8.19 1.34
## 19: 2012 LeBron James 38.82 10.16 19.37 1.05 2.99 5.98 7.98 1.65
## 20: 2013 LeBron James 38.98 9.74 17.77 1.41 3.63 5.81 7.53 1.36
## defreb totreb assists steals blocks turnovers fouls points plusminus
## 1: 5.67 7.07 7.07 1.37 1.15 4.67 2.78 28.41 3.11
## 2: 3.85 5.28 4.93 1.22 0.57 3.34 2.68 29.40 -1.50
## 3: 4.64 5.27 4.81 1.84 0.30 3.43 3.08 33.52 3.13
## 4: 5.02 6.06 5.20 1.65 0.54 3.35 2.36 31.01 1.42
## 5: 4.80 5.94 5.20 1.65 0.41 2.97 2.75 28.47 7.36
## 6: 4.29 5.37 5.08 1.69 0.52 2.70 2.46 28.75 8.10
## 7: 4.27 5.24 5.08 1.24 0.33 3.26 2.57 25.88 4.26
## 8: 4.08 5.05 4.80 1.23 0.20 3.03 2.14 24.97 5.28
## 9: 4.14 5.23 4.54 1.28 0.28 3.39 2.15 28.96 2.37
## 10: 4.85 5.69 6.71 1.29 0.33 3.77 1.92 25.62 -0.58
## 11: 5.66 6.97 7.17 2.48 0.86 3.24 1.83 24.41 4.59
## 12: 5.91 7.09 6.67 1.95 0.67 3.34 2.04 29.33 2.29
## 13: 6.19 7.34 6.65 1.44 0.77 3.55 2.35 30.33 1.95
## 14: 5.89 7.13 6.82 1.74 0.75 3.16 2.18 27.25 4.08
## 15: 6.03 7.66 6.87 1.86 1.13 3.30 2.15 29.35 5.74
## 16: 6.69 7.93 7.71 1.54 1.00 3.28 1.76 29.77 8.92
## 17: 6.54 7.40 8.23 1.62 0.94 3.51 1.76 27.89 8.18
## 18: 6.77 8.11 6.54 1.74 0.96 3.36 2.19 27.29 5.77
## 19: 6.81 8.46 6.28 1.75 0.77 3.31 1.60 27.36 7.35
## 20: 6.61 7.97 7.14 1.79 0.86 3.06 1.60 26.71 9.69

查询每天所有球员的数据

1
2
3
4
5
date.query <- function(date,...){
dtb.key <- copy(dtb)
setkey(dtb.key, date)
data<-dtb.key[date][, lapply(.SD, function(x)round(mean(x),2)), .SDcols=7:23, by=.(date, player)]
return(data)}

也可以把numeric类型的column设为key
查询盖帽数为10的球员

1
2
3
dtb.key <- copy(dtb)
setkey(dtb.key,blocks)
dtb.key[J(10)][,.(date, player, blocks)]
1
2
3
4
5
6
7
8
9
10
## date player blocks
## 1: 2012-11-30 Larry Sanders 10
## 2: 2012-02-01 Serge Ibaka 10
## 3: 2012-02-09 Serge Ibaka 10
## 4: 2012-04-29 Andrew Bynum 10
## 5: 2008-11-12 Dwight Howard 10
## 6: 2007-12-26 Marcus Camby 10
## 7: 2007-01-12 Emeka Okafor 10
## 8: 2006-03-25 Andrei Kirilenko 10
## 9: 2004-12-18 Josh Smith 10
1
2
# OR
dtb.key[.(12)][,.(date, player, blocks)]
1
2
## date player blocks
## 1: 2011-03-15 JaVale McGee 12

也可以设置多个key
调出LeBron James在骑士的数据

1
2
setkey(dtb.key, player, team)
dtb.key[.('LeBron James', "CLE")]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
## date game_id team_home_abbr team player player_id
## 1: 2009-10-28 200910280TOR TOR CLE LeBron James jamesle01
## 2: 2009-10-31 200910310CLE CLE CLE LeBron James jamesle01
## 3: 2009-11-03 200911030CLE CLE CLE LeBron James jamesle01
## 4: 2009-11-06 200911060NYK NYK CLE LeBron James jamesle01
## 5: 2009-11-11 200911110ORL ORL CLE LeBron James jamesle01
## ---
## 536: 2010-02-18 201002180CLE CLE CLE LeBron James jamesle01
## 537: 2010-04-17 201004170CLE CLE CLE LeBron James jamesle01
## 538: 2008-11-07 200811070CLE CLE CLE LeBron James jamesle01
## 539: 2007-11-04 200711040PHO PHO CLE LeBron James jamesle01
## 540: 2006-02-15 200602150BOS BOS CLE LeBron James jamesle01
## minutes fgm fga 3pm 3pa ftm fta offreb defreb totreb assists steals
## 1: 39.78 7 19 1 5 8 14 2 9 11 12 2
## 2: 37.23 5 11 0 1 4 6 1 6 7 9 0
## 3: 35.98 7 16 2 7 11 13 3 5 8 6 3
## 4: 35.62 12 17 2 4 7 9 0 8 8 9 3
## 5: 39.12 13 23 2 3 8 9 2 6 8 4 1
## ---
## 536: 47.00 15 33 1 9 12 17 1 12 13 15 2
## 537: 40.23 9 19 0 1 6 7 0 6 6 5 1
## 538: 39.33 11 24 1 6 4 7 2 7 9 8 1
## 539: 43.22 11 26 3 5 2 2 1 4 5 8 2
## 540: 53.92 16 32 0 5 11 15 1 11 12 11 2
## blocks turnovers fouls points plusminus year
## 1: 0 4 1 23 2 2009
## 2: 0 4 1 14 19 2009
## 3: 0 4 0 27 5 2009
## 4: 0 7 3 33 8 2009
## 5: 0 4 2 36 7 2009
## ---
## 536: 4 3 1 43 -5 2010
## 537: 4 3 2 24 19 2010
## 538: 4 4 2 27 -3 2008
## 539: 4 3 0 27 -18 2007
## 540: 4 2 5 43 5 2006

选择分组

Rolling joins

1
2
setkey(dtb.key, blocks)
dtb.key[.(15), roll=TRUE]
1
2
3
4
5
6
## date game_id team_home_abbr team player player_id
## 1: 2011-03-15 201103150CHI CHI WAS JaVale McGee mcgeeja01
## minutes fgm fga 3pm 3pa ftm fta offreb defreb totreb assists steals
## 1: 39.37 5 14 0 0 1 3 3 9 12 1 0
## blocks turnovers fouls points plusminus year
## 1: 15 2 4 11 -15 2011

其他data.table函数

fread()快速读取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Borrowed from data.table document:
n=1e6
DT = data.table( a=sample(1:1000,n,replace=TRUE),
b=sample(1:1000,n,replace=TRUE),
c=rnorm(n),
d=sample(c("foo","bar","baz","qux","quux"),n,replace=TRUE),
e=rnorm(n),
f=sample(1:1000,n,replace=TRUE) )
DT[2,b:=NA_integer_]
DT[4,c:=NA_real_]
DT[3,d:=NA_character_]
DT[5,d:=""]
DT[2,e:=+Inf]
DT[3,e:=-Inf]
write.table(DT,"test.csv",sep=",",row.names=FALSE,quote=FALSE)
cat("File size (MB):", round(file.info("test.csv")$size/1024^2),"\n")
1
## File size (MB): 50
1
system.time(DF1 <-read.csv("test.csv",stringsAsFactors=FALSE))
1
2
## user system elapsed
## 13.518 0.290 14.087
1
2
3
4
system.time(DF2 <- read.table("test.csv",header=TRUE,sep=",",quote="",
stringsAsFactors=FALSE,comment.char="",nrows=n,
colClasses=c("integer","integer","numeric",
"character","numeric","integer")))
1
2
## user system elapsed
## 7.553 0.107 7.908
1
system.time(DT <- fread("test.csv"))
1
2
## user system elapsed
## 0.627 0.026 0.657