R/개인공부

[R] 모두를 위한 R데이터 분석 입문 연습문제 7장

gangee 2023. 6. 4. 01:22
728x90
반응형
* 모든 문제는 '모두를 위한 R 데이터 분석 입문'의 연습문제입니다.

1번

# (1)
for (i in 1:ncol(ds)) {
  this.na <- is.na(ds[,i])
  cat(colnames(ds)[i],"\t",sum(this.na),'\n')
}   # for문 이용

col_na <- function(y) {
  return(sum(is.na(y)))
}
apply(ds, 2, FUN=col_na)  # apply 이용

# (2)
ds[!complete.cases(ds),]

# (3)
sum(rowSums(is.na(ds))>0)

# (4)
ds.new <- ds[complete.cases(ds),]
head(ds.new)

2번

# (1)
boxplot(st$Income)

# (2) 
st$Income[st$Income %in% boxplot.stats(st$Income)$out] <- NA
head(st)

# (3)
st2 <- st[complete.cases(st),]
head(st2)

3번

# (1)
AQ <- data.frame(airquality)

# (2)
col_na <- function(y) {
  return(sum(is.na(y)))
}
apply(AQ, 2, FUN=col_na)


# (3)
rowSums(is.na(AQ))

apply(AQ, 1, FUN=col_na)   # apply 이용

# (4)
AQ[!(apply(AQ, 1, col_na)>0),]

# (5)
for (i in 1:(ncol(AQ))) {
  ifelse(length(AQ[(AQ[,i] %in% NA), ][,i]) != 0,
         AQ[(AQ[,i] %in% NA),][,i] <- mean(AQ[!(AQ[,i] %in% NA),i]), next)
}
AQ2 <- AQ
AQ2

4번

# (1)
state.x77[order(state.x77[,'Population']),]

# (2)
state.x77[order(state.x77[,'Income'], decreasing=T),]

# (3)
state.x77[order(state.x77[,'Illiteracy']),][1:10,]

5번

# (1)
mt.gear <- split(mtcars, mtcars$gear)
mt.gear

# (2)
mt.gear$'4'

# (3)
mt.gear.35 <- merge(mt.gear$'3', mt.gear$'5', all=T)
mt.gear.35

# (4)
subset(mtcars, wt>1.5 & wt<3.0)

6번

# (1) 
library(mlbench)
data('Glass')
myds <- Glass

# (2)
aggregate(myds[,c('RI', 'Na', 'Mg', 'Al', 'Si')], by=list(myds$Type), FUN=mean)

7번

# (1) 
library(mlbench)
data('Ionosphere')
myds <- Ionosphere

# (2)
aggregate(. ~ Class + V1, myds[, c("Class", "V1", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10")], FUN = sd)

8번

set.seed(100)

idx <- sample(1:nrow(state.x77), size=20, replace=FALSE)
st20 <- state.x77[idx,]
st.other <- state.x77[-idx,]
st20
st.other

9번

set.seed(100)

spcs <- levels(iris[,5])
iris.10 <- data.frame()

for(i in 1:length(spcs)) {
  set.seed(100)
  if (i == 1){
    iris.10 <- subset(iris, Species==spcs[i])[sample(1:50, size=10, replace=FALSE),]
  } else{
    iris.10 <- rbind(iris.10, subset(iris, Species==spcs[i])[sample(1:50, size=10, replace=FALSE),])
  }
}

iris.10

10번

set.seed(100)
iris[sample(1:nrow(iris), size=nrow(iris), replace=FALSE),]

11번

spcs <- levels(iris[,'Species'])

for (i in 1:dim(combn(length(spcs), 2))[2]) {
  cat(spcs[combn(length(spcs),2)[1,i]], spcs[combn(length(spcs),2)[2,i]], '\n')
}

spcs[combn(3,2)]

12번

combn.color <- c('red', 'green', 'blue', 'black', 'yellow')[combn(5,3)]

for(i in 1:(length(combn.color)/3)) {
  cat(combn.color[3*i-2], combn.color[3*i-1], combn.color[3*i], '\n')
}

13번

subset(state.x77, subset = state.x77[,'Area'] > state.x77['Alabama', 'Area'] 
        & state.x77[,'Area'] < state.x77['California', 'Area'], select=c('Population', 'Income', 'Area'))

15번

# (1)
authors <- data.frame(
  surname = c('Twein', 'Venables', 'Tierney', 'Ripley', 'McNeil'),
  nationality = c('US', 'Australia', 'US', 'UK', 'Australia'),
  retired = c('yes', rep('no', 4))
)
books <- data.frame(
  name = c('Johns', 'Venables', 'Tierney', 'Ripley', 'Ripley', 'McNeil'),
  title = c('Exploratory Data Analysis',
            'Modern Applied Statistics ...',
            'LISP-STAT', 
            'Spatial Statistics', 
            'Stochastic Simulation',
            'Interactive Data Analysis'),
  other.author = c(NA, 'Ripley', NA, NA, NA, NA)
)

# (2)
merge(x=authors, y=books, by.x='surname', by.y='name')

# (3)
merge(x=authors, y=books, by.x='surname', by.y='name', all.x=TRUE)

# (4)
merge(x=authors, y=books, by.x='surname', by.y='name', all.y=TRUE)

# (5)
merge(x=authors, y=books, by.x='surname', by.y='other.author')
* 파일이 필요한 14번 문제는 제외하였음
728x90
반응형