# 시계열분석-예측 time series analysis
#---------------------
# c.f. http://a-little-book-of-r-for-time-series.readthedocs.io/en/latest/src/timeseries.html
# the number of births per month in New York city, from January 1946 to December 1959
# number of births per month in New York city
births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")
# 시계열형식으로 데이터 형식 변경
# 월별 데이터이므로 frequency 지정
birthstimeseries <- ts(births, frequency=12, start=c(1946,1))
birthstimeseries
# 분포 확인
mean(birthstimeseries)
sd(birthstimeseries)
range(birthstimeseries)
plot.ts(birthstimeseries)
# install.packages("TTR")
library("TTR")
# 단순이동평균 simple moving average로 smoothing
plot.ts(SMA(birthstimeseries, n=3))
plot.ts(SMA(birthstimeseries, n=8))
# 추세와 계절성 분해
plot(decompose(birthstimeseries))
birthstimeseriescomponents <- decompose(birthstimeseries)
# 계절성을 제거한 시계열 생성
birthstimeseriesseasonallyadjusted <- birthstimeseries - birthstimeseriescomponents$seasonal
plot(birthstimeseriesseasonallyadjusted, col="blue")
lines(birthstimeseries, lty=2)
library(zoo)
mbirths <- as.Date(as.yearmon("1946-01-01") + 0:length(births)/12)
# ggplot으로 시각화
# dataframe을 먼저 생성한 후 사용
adjbirths <- data.frame( births = as.numeric(birthstimeseriesseasonallyadjusted),
Month = as.Date(as.yearmon(time(birthstimeseriesseasonallyadjusted))))
library(ggplot2)
ggplot(adjbirths, aes(x=Month, y=births)) +
geom_line() +
geom_smooth()
#---- Holt-Winters -----------------
# 예측모델 생성 후 back-fitting
birthstimeseriesforecasts <- HoltWinters(birthstimeseries)
birthstimeseriesforecasts
# Coefficients: a=y축절편(다음시점 예측값 출발점), b=기울기, s=계절성
# 28.04366357 + 0.04199921 -0.78546221 == 다음시점 예측치 = 27.30020
plot(birthstimeseriesforecasts)
# 본래값과 예측값의 선 색상, 모양을 구별
plot(birthstimeseriesforecasts, col = "blue", col.predicted = "red", lty.predicted=2)
birthstimeseriesforecasts$SSE
# 12개 값은 예측치가 나오지 않기에 계산에서 제외
sqrt(birthstimeseriesforecasts$SSE)/ (length(birthstimeseries)-12)
# 예측치와 실제값 차이의 절대값 시각화
plot(abs( birthstimeseriesforecasts$fitted[,1] - birthstimeseries[13:168]))
# SSE 라인 참조선 추가
abline(h=mean(abs( birthstimeseriesforecasts$fitted[,1] - birthstimeseries[13:168])), lty=2)
# 옵션 변경 :: exponential smoothing and non-seasonal fitting
birthstimeseriesforecasts <- HoltWinters(birthstimeseries, beta=FALSE, gamma=FALSE)
plot(birthstimeseriesforecasts)
sqrt(birthstimeseriesforecasts$SSE)/ (length(birthstimeseries)-12)
# beta=FALSE, gamma=FALSE 옵션 설정에 따른 차이 확인!
# alpha, beta, gamma에 옵션 설정 없으면 NULL로 설정 == MSE 최소기준으로 자동설정
# alpha, beta, gamma 모두 smoothing parameter
# alpha :: 1에 가까운 큰 값이면 smoothing 시 최근 값들에 가중치 높여 level 추정
# beta :: slope of trend 추정에 사용. 클 수록 최근에 가중
# gamma :: seasonality에 사용
# install.packages("forecast")
library("forecast")
# 생성된 모델을 활용, 미래 예측치 생성
birthstimeseriesforecasts2 <- forecast(birthstimeseriesforecasts, h=12)
plot(birthstimeseriesforecasts2)
# the dark is the 80% prediction interval, the light gray is the 95% interval
autoplot.forecast(birthstimeseriesforecasts2) # ggplot2 형식
'R 데이터 분석' 카테고리의 다른 글
[kbdaa_bda] 블로그 크롤링 후 텍스트 분석 (0) | 2017.09.22 |
---|---|
[kbdaa_bda] 은행마케팅 데이터 분석 실습 (0) | 2017.09.22 |
[kbdaa_bda] 데이터 처리 연습 GDA (0) | 2017.09.21 |
[kbdaa_bda] 고객빅데이터분석 _은행모델 (0) | 2017.09.21 |
[kbdaa_bda] 빅데이터고객분석 _ 군집 (0) | 2017.09.21 |