>RE::VISION CRM

R 데이터 분석

[kbdaa_bda] 시계열예측

YONG_X 2017. 9. 21. 17:13

# 시계열분석-예측 time series analysis

#---------------------

# c.f. http://a-little-book-of-r-for-time-series.readthedocs.io/en/latest/src/timeseries.html

# the number of births per month in New York city, from January 1946 to December 1959 


# number of births per month in New York city

births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")

# 시계열형식으로 데이터 형식 변경

# 월별 데이터이므로 frequency 지정

birthstimeseries <- ts(births, frequency=12, start=c(1946,1))

birthstimeseries


# 분포 확인

mean(birthstimeseries)

sd(birthstimeseries)

range(birthstimeseries)

plot.ts(birthstimeseries)


# install.packages("TTR")

library("TTR")


# 단순이동평균 simple moving average로 smoothing

plot.ts(SMA(birthstimeseries, n=3))

plot.ts(SMA(birthstimeseries, n=8))


# 추세와 계절성 분해

plot(decompose(birthstimeseries))


birthstimeseriescomponents <- decompose(birthstimeseries)


# 계절성을 제거한 시계열 생성

birthstimeseriesseasonallyadjusted <- birthstimeseries - birthstimeseriescomponents$seasonal

plot(birthstimeseriesseasonallyadjusted, col="blue")

lines(birthstimeseries, lty=2)


library(zoo)

mbirths <- as.Date(as.yearmon("1946-01-01") + 0:length(births)/12)



# ggplot으로 시각화


# dataframe을 먼저 생성한 후 사용

adjbirths <- data.frame( births = as.numeric(birthstimeseriesseasonallyadjusted), 

   Month = as.Date(as.yearmon(time(birthstimeseriesseasonallyadjusted)))) 


library(ggplot2)

ggplot(adjbirths, aes(x=Month, y=births)) +

  geom_line() +

  geom_smooth()



#---- Holt-Winters -----------------

# 예측모델 생성 후 back-fitting


birthstimeseriesforecasts <- HoltWinters(birthstimeseries)

birthstimeseriesforecasts

# Coefficients: a=y축절편(다음시점 예측값 출발점), b=기울기, s=계절성

# 28.04366357 +  0.04199921 -0.78546221 == 다음시점 예측치 = 27.30020


plot(birthstimeseriesforecasts)

# 본래값과 예측값의 선 색상, 모양을 구별

plot(birthstimeseriesforecasts, col = "blue", col.predicted = "red", lty.predicted=2)

birthstimeseriesforecasts$SSE


# 12개 값은 예측치가 나오지 않기에 계산에서 제외

sqrt(birthstimeseriesforecasts$SSE)/ (length(birthstimeseries)-12)


# 예측치와 실제값 차이의 절대값 시각화

plot(abs( birthstimeseriesforecasts$fitted[,1] - birthstimeseries[13:168]))

# SSE 라인 참조선 추가

abline(h=mean(abs( birthstimeseriesforecasts$fitted[,1] - birthstimeseries[13:168])), lty=2)



# 옵션 변경 :: exponential smoothing and non-seasonal fitting 

birthstimeseriesforecasts <- HoltWinters(birthstimeseries, beta=FALSE, gamma=FALSE)

plot(birthstimeseriesforecasts)

sqrt(birthstimeseriesforecasts$SSE)/ (length(birthstimeseries)-12)


# beta=FALSE, gamma=FALSE 옵션 설정에 따른 차이 확인!

# alpha, beta, gamma에 옵션 설정 없으면 NULL로 설정 == MSE 최소기준으로 자동설정

# alpha, beta, gamma 모두 smoothing parameter

# alpha :: 1에 가까운 큰 값이면 smoothing 시 최근 값들에 가중치 높여 level 추정

# beta :: slope of trend 추정에 사용. 클 수록 최근에 가중

# gamma :: seasonality에 사용 




# install.packages("forecast")

library("forecast")


# 생성된 모델을 활용, 미래 예측치 생성


birthstimeseriesforecasts2 <- forecast(birthstimeseriesforecasts, h=12)

plot(birthstimeseriesforecasts2)


# the dark is the 80% prediction interval, the light gray is the 95% interval


autoplot.forecast(birthstimeseriesforecasts2) # ggplot2 형식