>RE::VISION CRM

R 데이터 분석

rolling 2

YONG_X 2018. 5. 12. 21:24

# daily data with category

#  date, dayoweek, weekofyear, dept, sales



c1 <- as.Date("2018-04-01")

c2 <- c1 + c(0:30)


df1 <- data.frame(date=as.character(c2)

  )


library(lubridate)

wday(c2)

df1$dayow <- wday(c2)

# 1 : 일요일, 2: 월요일

# 주말 = {1,7}

# 주중 = 2:6


week(c2)

df1$week <- week(c2) 


df1 <- df1[df1$dayow %in% c(2:6),]

head(df1)


c11 <- c("A", "B", "C")

c12 <- c(150, 100, 70)

dftmp <- df1

dftmp$dept <- rep(c11[1], nrow(dftmp))

dftmp$sale <- rep(c12[1], nrow(dftmp))


dftmp1 <- dftmp

dftmp$dept <- rep(c11[2], nrow(dftmp))

dftmp$sale <- rep(c12[2], nrow(dftmp))

dftmp1 <- rbind(dftmp1, dftmp)

dftmp$dept <- rep(c11[3], nrow(dftmp))

dftmp$sale <- rep(c12[3], nrow(dftmp))

dftmp1 <- rbind(dftmp1, dftmp)



df2 <- dftmp1[order(dftmp1$date, dftmp1$dept),]

row_id_tmp <- 1:66

df2$rowid <- row_id_tmp


df2 <- df2[!(df2$rowid %in% c(15, 29, 30)),]

df2 <- df2[df2$date != "2018-04-27",]

df2 <- df2[df2$date != "2018-04-05",]



# create a rolling predictor set


# get unique dates first



daylist <- unique(df2$date)

length(daylist)


df2$yr <- substr(as.character(df2$date),1,4)

weeklist <- unique(paste(df2$yr, df2$week))

df2$weekid  <- paste(df2$yr, df2$week)




# get daily summary

agg1 <-aggregate(df2$sale, by=list(df2$date), 

   FUN=sum, na.rm=TRUE)

names(agg1) <- c("date", "sumsale")



# get weekly mean of dailysum

weekdf <- unique(df2[, c("date", "weekid")])

agg1$weekid <- weekdf$weekid


agg2 <-aggregate(agg1$sumsale, by=list(agg1$weekid), 

   FUN=mean, na.rm=TRUE)

names(agg2) <- c("weekid", "avgsale")


agg3 <-aggregate(agg1$sumsale, by=list(agg1$weekid), 

   FUN=length)

names(agg3) <- c("weekid", "cntdays")


agg2$cntdays <- agg3$cntdays



agg4 <-aggregate(as.Date(df2$date), by=list(df2$weekid), 

   FUN=max)

names(agg4) <- c("weekid", "lastdayofw")

agg4$lastdayofw <- as.character(agg4$lastdayofw)




#-- rolling mean ---


library(zoo) 


dfpbase <- agg1

interval <- 5

dfpbase$sale5ma <- c(rep(NA,interval-1), 

   rollapply(agg1$sumsale, interval, mean, na.rm=F))


dfpbase$sale5sd <- c(rep(NA,interval-1), 

   rollapply(agg1$sumsale, interval, sd, na.rm=F))


dfpbase$sale5cv <- dfpbase$sale5sd / dfpbase$sale5ma