# daily data with category
# date, dayoweek, weekofyear, dept, sales
c1 <- as.Date("2018-04-01")
c2 <- c1 + c(0:30)
df1 <- data.frame(date=as.character(c2)
)
library(lubridate)
wday(c2)
df1$dayow <- wday(c2)
# 1 : 일요일, 2: 월요일
# 주말 = {1,7}
# 주중 = 2:6
week(c2)
df1$week <- week(c2)
df1 <- df1[df1$dayow %in% c(2:6),]
head(df1)
c11 <- c("A", "B", "C")
c12 <- c(150, 100, 70)
dftmp <- df1
dftmp$dept <- rep(c11[1], nrow(dftmp))
dftmp$sale <- rep(c12[1], nrow(dftmp))
dftmp1 <- dftmp
dftmp$dept <- rep(c11[2], nrow(dftmp))
dftmp$sale <- rep(c12[2], nrow(dftmp))
dftmp1 <- rbind(dftmp1, dftmp)
dftmp$dept <- rep(c11[3], nrow(dftmp))
dftmp$sale <- rep(c12[3], nrow(dftmp))
dftmp1 <- rbind(dftmp1, dftmp)
df2 <- dftmp1[order(dftmp1$date, dftmp1$dept),]
row_id_tmp <- 1:66
df2$rowid <- row_id_tmp
df2 <- df2[!(df2$rowid %in% c(15, 29, 30)),]
df2 <- df2[df2$date != "2018-04-27",]
df2 <- df2[df2$date != "2018-04-05",]
# create a rolling predictor set
# get unique dates first
daylist <- unique(df2$date)
length(daylist)
df2$yr <- substr(as.character(df2$date),1,4)
weeklist <- unique(paste(df2$yr, df2$week))
df2$weekid <- paste(df2$yr, df2$week)
# get daily summary
agg1 <-aggregate(df2$sale, by=list(df2$date),
FUN=sum, na.rm=TRUE)
names(agg1) <- c("date", "sumsale")
# get weekly mean of dailysum
weekdf <- unique(df2[, c("date", "weekid")])
agg1$weekid <- weekdf$weekid
agg2 <-aggregate(agg1$sumsale, by=list(agg1$weekid),
FUN=mean, na.rm=TRUE)
names(agg2) <- c("weekid", "avgsale")
agg3 <-aggregate(agg1$sumsale, by=list(agg1$weekid),
FUN=length)
names(agg3) <- c("weekid", "cntdays")
agg2$cntdays <- agg3$cntdays
agg4 <-aggregate(as.Date(df2$date), by=list(df2$weekid),
FUN=max)
names(agg4) <- c("weekid", "lastdayofw")
agg4$lastdayofw <- as.character(agg4$lastdayofw)
#-- rolling mean ---
library(zoo)
dfpbase <- agg1
interval <- 5
dfpbase$sale5ma <- c(rep(NA,interval-1),
rollapply(agg1$sumsale, interval, mean, na.rm=F))
dfpbase$sale5sd <- c(rep(NA,interval-1),
rollapply(agg1$sumsale, interval, sd, na.rm=F))
dfpbase$sale5cv <- dfpbase$sale5sd / dfpbase$sale5ma
'R 데이터 분석' 카테고리의 다른 글
[ CRMAJU2018 ] 기말고사 (0) | 2018.06.20 |
---|---|
[CRMAJU2018] 예측분석 Review (0) | 2018.06.15 |
[R분석] rolling base stat generator for time series data (0) | 2018.05.01 |
[CRMAJU2018] R 프로그래밍 기초 복습 -- 2018.04.25 (0) | 2018.04.23 |
[CRMAJU2018] 데이터 분석 기초 연습문제[3] (0) | 2018.04.16 |