R/개인공부
[R] 모두를 위한 R데이터 분석 입문 연습문제 7장
gangee
2023. 6. 4. 01:22
728x90
반응형
* 모든 문제는 '모두를 위한 R 데이터 분석 입문'의 연습문제입니다.
1번
# (1)
for (i in 1:ncol(ds)) {
this.na <- is.na(ds[,i])
cat(colnames(ds)[i],"\t",sum(this.na),'\n')
} # for문 이용
col_na <- function(y) {
return(sum(is.na(y)))
}
apply(ds, 2, FUN=col_na) # apply 이용
# (2)
ds[!complete.cases(ds),]
# (3)
sum(rowSums(is.na(ds))>0)
# (4)
ds.new <- ds[complete.cases(ds),]
head(ds.new)
2번
# (1)
boxplot(st$Income)
# (2)
st$Income[st$Income %in% boxplot.stats(st$Income)$out] <- NA
head(st)
# (3)
st2 <- st[complete.cases(st),]
head(st2)
3번
# (1)
AQ <- data.frame(airquality)
# (2)
col_na <- function(y) {
return(sum(is.na(y)))
}
apply(AQ, 2, FUN=col_na)
# (3)
rowSums(is.na(AQ))
apply(AQ, 1, FUN=col_na) # apply 이용
# (4)
AQ[!(apply(AQ, 1, col_na)>0),]
# (5)
for (i in 1:(ncol(AQ))) {
ifelse(length(AQ[(AQ[,i] %in% NA), ][,i]) != 0,
AQ[(AQ[,i] %in% NA),][,i] <- mean(AQ[!(AQ[,i] %in% NA),i]), next)
}
AQ2 <- AQ
AQ2
4번
# (1)
state.x77[order(state.x77[,'Population']),]
# (2)
state.x77[order(state.x77[,'Income'], decreasing=T),]
# (3)
state.x77[order(state.x77[,'Illiteracy']),][1:10,]
5번
# (1)
mt.gear <- split(mtcars, mtcars$gear)
mt.gear
# (2)
mt.gear$'4'
# (3)
mt.gear.35 <- merge(mt.gear$'3', mt.gear$'5', all=T)
mt.gear.35
# (4)
subset(mtcars, wt>1.5 & wt<3.0)
6번
# (1)
library(mlbench)
data('Glass')
myds <- Glass
# (2)
aggregate(myds[,c('RI', 'Na', 'Mg', 'Al', 'Si')], by=list(myds$Type), FUN=mean)
7번
# (1)
library(mlbench)
data('Ionosphere')
myds <- Ionosphere
# (2)
aggregate(. ~ Class + V1, myds[, c("Class", "V1", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10")], FUN = sd)
8번
set.seed(100)
idx <- sample(1:nrow(state.x77), size=20, replace=FALSE)
st20 <- state.x77[idx,]
st.other <- state.x77[-idx,]
st20
st.other
9번
set.seed(100)
spcs <- levels(iris[,5])
iris.10 <- data.frame()
for(i in 1:length(spcs)) {
set.seed(100)
if (i == 1){
iris.10 <- subset(iris, Species==spcs[i])[sample(1:50, size=10, replace=FALSE),]
} else{
iris.10 <- rbind(iris.10, subset(iris, Species==spcs[i])[sample(1:50, size=10, replace=FALSE),])
}
}
iris.10
10번
set.seed(100)
iris[sample(1:nrow(iris), size=nrow(iris), replace=FALSE),]
11번
spcs <- levels(iris[,'Species'])
for (i in 1:dim(combn(length(spcs), 2))[2]) {
cat(spcs[combn(length(spcs),2)[1,i]], spcs[combn(length(spcs),2)[2,i]], '\n')
}
spcs[combn(3,2)]
12번
combn.color <- c('red', 'green', 'blue', 'black', 'yellow')[combn(5,3)]
for(i in 1:(length(combn.color)/3)) {
cat(combn.color[3*i-2], combn.color[3*i-1], combn.color[3*i], '\n')
}
13번
subset(state.x77, subset = state.x77[,'Area'] > state.x77['Alabama', 'Area']
& state.x77[,'Area'] < state.x77['California', 'Area'], select=c('Population', 'Income', 'Area'))
15번
# (1)
authors <- data.frame(
surname = c('Twein', 'Venables', 'Tierney', 'Ripley', 'McNeil'),
nationality = c('US', 'Australia', 'US', 'UK', 'Australia'),
retired = c('yes', rep('no', 4))
)
books <- data.frame(
name = c('Johns', 'Venables', 'Tierney', 'Ripley', 'Ripley', 'McNeil'),
title = c('Exploratory Data Analysis',
'Modern Applied Statistics ...',
'LISP-STAT',
'Spatial Statistics',
'Stochastic Simulation',
'Interactive Data Analysis'),
other.author = c(NA, 'Ripley', NA, NA, NA, NA)
)
# (2)
merge(x=authors, y=books, by.x='surname', by.y='name')
# (3)
merge(x=authors, y=books, by.x='surname', by.y='name', all.x=TRUE)
# (4)
merge(x=authors, y=books, by.x='surname', by.y='name', all.y=TRUE)
# (5)
merge(x=authors, y=books, by.x='surname', by.y='other.author')
* 파일이 필요한 14번 문제는 제외하였음
728x90
반응형