- What's the differences? (dataset: trees)
trees[order(Height),] dt <- data.table(trees) dt[, .(Girth, Volume), by=Height]
Thursday, October 29, 2015
trees[order(Height),] dt <- data.table(trees) dt[, .(Girth, Volume), by=Height]
require(datasets)
str(warpbreaks)
## 'data.frame': 54 obs. of 3 variables: ## $ breaks : num 26 30 54 25 70 52 51 26 67 18 ... ## $ wool : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ... ## $ tension: Factor w/ 3 levels "L","M","H": 1 1 1 1 1 1 1 1 1 2 ...
Count the occurrences of each break, and assign the results to a variable, freq.
freq <- table(warpbreaks[,1]); freq
## ## 10 12 13 14 15 16 17 18 19 20 21 24 25 26 27 28 29 30 31 35 36 39 41 42 43 ## 1 1 1 1 3 2 2 3 2 2 4 2 1 4 1 3 4 2 1 1 2 2 1 1 1 ## 44 51 52 54 67 70 ## 1 1 1 1 1 1
Based on Question 1, compute the proportion of each break, and assign the output to a variable percent.
percent <- table(warpbreaks[,1])/sum(warpbreaks[,1]) percent
## ## 10 12 13 14 15 ## 0.0006578947 0.0006578947 0.0006578947 0.0006578947 0.0019736842 ## 16 17 18 19 20 ## 0.0013157895 0.0013157895 0.0019736842 0.0013157895 0.0013157895 ## 21 24 25 26 27 ## 0.0026315789 0.0013157895 0.0006578947 0.0026315789 0.0006578947 ## 28 29 30 31 35 ## 0.0019736842 0.0026315789 0.0013157895 0.0006578947 0.0006578947 ## 36 39 41 42 43 ## 0.0013157895 0.0013157895 0.0006578947 0.0006578947 0.0006578947 ## 44 51 52 54 67 ## 0.0006578947 0.0006578947 0.0006578947 0.0006578947 0.0006578947 ## 70 ## 0.0006578947
Draw a histogram for freq, and draw a red line on the histgram based on its density.
hist(freq) lines(density(freq), col='red')
Draw a pie chart for percent.
pie(percent)
What can a boxplot be used for?
# a boxplot can be used to present # min, median, max and quantiles of the data.
Draw a boxplot on breaks, and rotate the y-axis labels 90 degrees clockwise.
boxplot(warpbreaks[,1], las=1)
Draw a bar chart for number of breaks with type A wool, give a title name Type A wool.
attach(warpbreaks) barplot(breaks[wool=='A'], main='Type A wool')
detach(warpbreaks)
We would use "anorexia" dataset in this exercise.
require(MASS)
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.1.3
str(anorexia)
## 'data.frame': 72 obs. of 3 variables: ## $ Treat : Factor w/ 3 levels "CBT","Cont","FT": 2 2 2 2 2 2 2 2 2 2 ... ## $ Prewt : num 80.7 89.4 91.8 74 78.1 88.3 87.3 75.1 80.6 78.4 ... ## $ Postwt: num 80.2 80.1 86.4 86.3 76.1 78.1 75.1 86.7 73.5 84.6 ...
# '=' is to assign a variable; # whereas '==' refers to having the same values. attach(anorexia) hist(Postwt, xlab='Weight of patient after study period, in lbs')
boxplot(Prewt~Treat)
barplot(Postwt[Treat=='FT'], horiz=T)
barplot(table(Prewt, Treat))
cols <- c('red','yellow','green') pie(table(Treat)/length(Treat), col=cols) legend('topright',c('CBT','Cont','FT'), fill=cols) box(); detach(anorexia)