>RE::VISION CRM

R 데이터 분석

[R분석] rolling base stat generator for time series data

YONG_X 2018. 5. 1. 17:59

# create a sample df with ts cols

c1 <- sample(1:70,70)

df1 <- data.frame(c1, 

  c2=(c1+runif(length(c1),0,1.25))*1.2, 

  c3=((c1*0.6)+runif(length(c1),0,1))*2 -7 )


# rolling base stat generator

roldfsta <- function(df,interval=1){

  library(zoo) 

  dfn <- names(df)

  for(i in 1:length(dfn)){

    col_name <- paste0(dfn[i],"_mean", as.character(interval))

    df[, ncol(df)+1] <-  c(rep(NA,interval-1), rollapply(df[,i], interval, mean, na.rm=F))

    names(df)[ncol(df)] <- col_name

    col_name <- paste0(dfn[i],"_median", as.character(interval))

    df[, ncol(df)+1] <-  c(rep(NA,interval-1), rollapply(df[,i], interval, median, na.rm=F))

    names(df)[ncol(df)] <- col_name

    col_name <- paste0(dfn[i],"_sd", as.character(interval))

    df[, ncol(df)+1] <-  c(rep(NA,interval-1), rollapply(df[,i], interval, sd, na.rm=F))

    names(df)[ncol(df)] <- col_name

  }

  return(df[,(length(dfn)+1):ncol(df)])

}



tail(roldfsta(df1,3))

df2 <- roldfsta(df1,3)

plot(df2[,1], type="l", col="blue")

lines(df2[,2])

lines(df2[,3], col="blue")

lines(df2[,4])


df3 <- roldfsta(df1,6)

df12 <- roldfsta(df1,12)

df4 <- cbind(df1, df2, df3, df12)

df4$c0 <- c((df4$c1 + df4$c2 + runif(nrow(df4),0,1)*2.5)[1:(nrow(df4)-1)], NA)

df5 <- tail(head(df4, nrow(df4)-1),nrow(df4)-15)


tail(df5)

ncol(df5) # = 3+ (3*3*3) + 1


summary(lm(c0~., data=df5))


library(randomForest)

rf1 <- randomForest(c0~., data=df5, ntree=300)

varImpPlot(rf1)



plot(df5$c1, df5$c0)

plot(df5$c3, df5$c0)


plot(df5$c1,df5$c2, col=rgb(0,0,df5$c0/max(df5$c0),0.3), pch=19)

plot(df5$c1_mean3,df5$c2_sd12, col=rgb(0,0,df5$c0/max(df5$c0),0.3), pch=19)