# create a sample df with ts cols
c1 <- sample(1:70,70)
df1 <- data.frame(c1,
c2=(c1+runif(length(c1),0,1.25))*1.2,
c3=((c1*0.6)+runif(length(c1),0,1))*2 -7 )
# rolling base stat generator
roldfsta <- function(df,interval=1){
library(zoo)
dfn <- names(df)
for(i in 1:length(dfn)){
col_name <- paste0(dfn[i],"_mean", as.character(interval))
df[, ncol(df)+1] <- c(rep(NA,interval-1), rollapply(df[,i], interval, mean, na.rm=F))
names(df)[ncol(df)] <- col_name
col_name <- paste0(dfn[i],"_median", as.character(interval))
df[, ncol(df)+1] <- c(rep(NA,interval-1), rollapply(df[,i], interval, median, na.rm=F))
names(df)[ncol(df)] <- col_name
col_name <- paste0(dfn[i],"_sd", as.character(interval))
df[, ncol(df)+1] <- c(rep(NA,interval-1), rollapply(df[,i], interval, sd, na.rm=F))
names(df)[ncol(df)] <- col_name
}
return(df[,(length(dfn)+1):ncol(df)])
}
tail(roldfsta(df1,3))
df2 <- roldfsta(df1,3)
plot(df2[,1], type="l", col="blue")
lines(df2[,2])
lines(df2[,3], col="blue")
lines(df2[,4])
df3 <- roldfsta(df1,6)
df12 <- roldfsta(df1,12)
df4 <- cbind(df1, df2, df3, df12)
df4$c0 <- c((df4$c1 + df4$c2 + runif(nrow(df4),0,1)*2.5)[1:(nrow(df4)-1)], NA)
df5 <- tail(head(df4, nrow(df4)-1),nrow(df4)-15)
tail(df5)
ncol(df5) # = 3+ (3*3*3) + 1
summary(lm(c0~., data=df5))
library(randomForest)
rf1 <- randomForest(c0~., data=df5, ntree=300)
varImpPlot(rf1)
plot(df5$c1, df5$c0)
plot(df5$c3, df5$c0)
plot(df5$c1,df5$c2, col=rgb(0,0,df5$c0/max(df5$c0),0.3), pch=19)
plot(df5$c1_mean3,df5$c2_sd12, col=rgb(0,0,df5$c0/max(df5$c0),0.3), pch=19)
'R 데이터 분석' 카테고리의 다른 글
[CRMAJU2018] 예측분석 Review (0) | 2018.06.15 |
---|---|
rolling 2 (0) | 2018.05.12 |
[CRMAJU2018] R 프로그래밍 기초 복습 -- 2018.04.25 (0) | 2018.04.23 |
[CRMAJU2018] 데이터 분석 기초 연습문제[3] (0) | 2018.04.16 |
[CRMAJU2018] --- R 데이터 분석 연습문제 [2] (0) | 2018.04.16 |