[KDATA PLOT EDA retail] 플롯 그리기

R 데이터 분석

[KDATA PLOT EDA retail] 플롯 그리기

YONG_X 2019. 6. 8. 13:02

a <- 1:5

a1 <- (a*2)+20

a2 <- (a+10)*1.2

a3 <- (a^1.1)/15 +20

range(a1)

range(a2)

range(a3)

plot(a,

ylim = c(0,max(a1)),

type='b')

lines(a1, col='red', type='b')

lines(a2, col='blue', type='b')

lines(a3, col='green', type='b')

varNames <- c('사과', '배', '당근','말')

varNames

#---------- scatter plot ------------

years <- 2010:2014

plot(years, a,

ylim = c(0,max(a1)),

type='b')

lines(years, a1, col='red', type='b')

lines(years, a2, col='blue', type='b')

lines(years, a3, col='green', type='b')

#-- 여러 컬럼에서 최대값은 ------

df1 <- data.frame(a,a1,a2,a3)

max(df1)

#--- 컬럼별 작업을 apply로 자동 반복 실행 --------

df2 <- df1

df3 <- as.data.frame(apply(df2, 2, function(x) {as.character(x)}))

df3

df4 <- as.data.frame(apply(df3, 2, function(x) {as.character(x)}))

df4

df1/3

mtcars %>%
  group_by(am) %>%
  summarize(mean_mpg = mean(mpg, na.rm = TRUE))

# ---- 서점고객세분화 사례에서 클러스터링 참고 ------------

# 링크 ::

# http://blog.daum.net/revisioncrm/405

# 일부 컬럼만 제외하기

head(mtcars[,!(names(mtcars) %in% c('wt', 'drat'))])

#---- matrix의 transpose (행렬 바꿈) 연습 ------

x <- matrix(1:9,3,3)

> x

[,1] [,2] [,3]

[1,] 1 4 7

[2,] 2 5 8

[3,] 3 6 9

> x <- matrix(rep('aa',9),3,3)

> x

[,1] [,2] [,3]

[1,] "aa" "aa" "aa"

[2,] "aa" "aa" "aa"

[3,] "aa" "aa" "aa"

> t(x)

[,1] [,2] [,3]

[1,] "aa" "aa" "aa"

[2,] "aa" "aa" "aa"

[3,] "aa" "aa" "aa"

> str(t(x))

chr [1:3, 1:3] "aa" "aa" "aa" "aa" "aa" "aa" "aa" "aa" "aa"

> x <- matrix(1:9,3,3)

> t(x)

[,1] [,2] [,3]

[1,] 1 2 3

[2,] 4 5 6

[3,] 7 8 9

##############

# NULL 처리 예

df$cnt <- ifelse(is.na(df$cnt), 0, df$cnt)

############

# 트리그리기 샘플

library(party)

dt1 <- ctree(mpg ~ hp + wt,

controls = ctree_control(maxdepth = 5, minbucket=2),

data=mtcars)

plot(dt1)

#maxdepth : 몇 개 층까지 내려가도록 허용할 것인가

# minbucket : 한 셀에 몇개의 표본이 들어가는 것 까지는 충족되어야 하는가

## copy dataframe without NA record

mtcars1 <- mtcars[!is.na(mtcars$mpg),]

# mt1$vsam = vs 와 am 이라는 별개 변수의 조합을 하나의 문자변수(카테고리)로 결합

# vsam을 타겟으로 사용하려고

mt1 <- head(mtcars)

mt1

mt1$vsam <- paste0(mt1$vs, mt1$am)

mt1

# 회귀선 추가 제

plot(mtcars$mpg, mtcars$wt)

abline(lm( mtcars$wt~mtcars$mpg))

mtcars2 <- mtcars

mtcars2$carNm <- row.names(mtcars2)

mtcars2$carNmShort <- substr(mtcars2$carNm,1,4)

head(mtcars2)

# 한번에 모두 0으로 NA 처

 d[is.na(d)] <- 0

#-------------------

# 회귀선 추가한 후

plot(mtcars$mpg, mtcars$wt)

abline(lm( mtcars$wt~mtcars$mpg))

# 플롯에 줄긋기

abline(h=5, col='orange', lty=3)

abline(v=15, lty=3)

abline(h=c(1,2,3), col='orange', lty=3) # 한번에 줄 여러개 긋

#--------------------

# 품목별 추이간 상관관계 플롯으로 시각화

# install.packages("Hmisc")
library(Hmisc)

# 먼저 상관관계를 구해서 저장
res1<-rcorr(as.matrix(pilot.5_1))

# install.packages("corrplot")
library(corrplot)

# 상관관계 매트릭스를 시각화 실행
corrplot(res1$r, type = "upper", order = "hclust",
tl.col = "black", tl.srt = 45)

#############################

## MCR 활용

mcr1 <- read.csv('C:/Users/KDATA14/Desktop/AA/MCR2018.csv')

df01 <- as.data.frame(table(mcr1$A_003, mcr1$C_11_001))

df02 <- df01[df01$Var2==1,]

df03 <- df01[df01$Var2==98,]

df04 <- merge(df02, df03, by='Var1', all.x=T)

df05 <- df04[,c("Var1", "Freq.x", "Freq.y")]

names(df05) <- c("age", "cnt_y","cnt_n")

df05$rt_y <- df05$cnt_y/ (df05$cnt_y + df05$cnt_n)

plot(as.numeric(df05$age), df05$rt_y*100,

type='b', col='red', pch=19,

ylim=c(0,100),

main='연령별 인터넷/모바일 사용수준 분포')

abline(h=80, col='orange', lty=3)

######

# 컬럼의 데이터타입 일괄변경 예제

mt1 <- mtcars

mt1[, 1:5] <- sapply(mt1[, 1:5], as.character)

str(mt1)

##########

# 성별 구분해서 한장으로

mcr1 <- mcr

mcr2 <- mcr1[mcr1$A_001==1,]

df01 <- as.data.frame(table(mcr2$A_003, mcr2$C_11_001))

df02 <- df01[df01$Var2==1,]

df03 <- df01[df01$Var2==98,]

df04 <- merge(df02, df03, by='Var1', all.x=T)

df05 <- df04[,c("Var1", "Freq.x", "Freq.y")]

names(df05) <- c("age", "cnt_y","cnt_n")

df05$rt_y <- df05$cnt_y/ (df05$cnt_y + df05$cnt_n)

plot(as.numeric(df05$age), df05$rt_y*100,

type='b', col='blue', pch=19,

ylim=c(0,100),

main='남성/여성 연령별 인터넷/모바일 사용수준 분포[MCR_data]')

abline(h=c(90,80), col='orange', lty=3)

mcr2 <- mcr1[mcr1$A_001==2,]

df01 <- as.data.frame(table(mcr2$A_003, mcr2$C_11_001))

df02 <- df01[df01$Var2==1,]

df03 <- df01[df01$Var2==98,]

df04 <- merge(df02, df03, by='Var1', all.x=T)

df05 <- df04[,c("Var1", "Freq.x", "Freq.y")]

names(df05) <- c("age", "cnt_y","cnt_n")

df05$rt_y <- df05$cnt_y/ (df05$cnt_y + df05$cnt_n)

lines(as.numeric(df05$age), df05$rt_y*100,

type='b', col='red', pch=19)

#------------------
# 평일 하루 평균 모바일 인터넷 이용 시간(분) (B_03_029)
#-------------------
mcr1 <- mcr
mcr2 <- mcr1[mcr1$A_001==1,]

# mcr2$A_003 == age
plot(jitter(as.numeric(mcr2$A_003)), jitter(mcr2$B_03_029),
   col=rgb(0,0,1,0.2), pch=19,
   cex=0.5,
   main='남성/여성 연령별 평일 하루 평균 모바일 인터넷 이용 시간(분) 분포[MCR_data]')
lines(lowess(mcr2$B_03_029~as.numeric(mcr2$A_003)), col = 'blue')

mcr2 <- mcr1[mcr1$A_001==2,]
points(jitter(as.numeric(mcr2$A_003)), jitter(mcr2$B_03_029),
col=rgb(1,0,0,0.2), pch=19,
cex=0.5)
lines(lowess(mcr2$B_03_029~as.numeric(mcr2$A_003)), col = 'red')

#------------------
# 평일 하루 평균 모바일 인터넷 이용 시간(분) (B_03_029)b
# vs. 최근 1년 이내 아웃도어 구입 빈도 (J_23_11_003)
#-------------------
mcr1 <- mcr
mcr2 <- mcr1[mcr1$A_001==1,]

# mcr2$A_003 == age
plot(jitter(as.numeric(mcr2$B_03_029)), jitter(mcr2$J_23_11_003),
   col=rgb(0,0,1,0.2), pch=19,
   cex=0.5,
   main='[성별] 평일 하루 평균 모바일 인터넷 이용 시간(분) \nVs. 최근 1년 이내 아웃도어 구입 빈도 분포[MCR_data]',
   xlab='평일 하루 평균 모바일 인터넷 이용 시간(분)')
lines(lowess(mcr2$J_23_11_003~as.numeric(mcr2$B_03_029)), col = 'blue', lwd=2)

mcr2 <- mcr1[mcr1$A_001==2,]
points(jitter(as.numeric(mcr2$B_03_029)), jitter(mcr2$J_23_11_003),
col=rgb(1,0,0,0.2), pch=19,
cex=0.5)
lines(lowess(mcr2$J_23_11_003~as.numeric(mcr2$B_03_029)), col = 'red', lwd=2)

# 변수명을 지정하고 컬럼의 데이터프레임에서의 순서 (=인덱스)를 찾기

grep("A_003", colnames(mcr))

저작자표시 비영리 변경금지 (새창열림)

'R 데이터 분석' 카테고리의 다른 글

[데이터분석] 데이터 분석 도구 사용 현황 조사 (0)	2019.09.05
[KDATA VDXF] 금융분석 R (0)	2019.07.27
[kdata 2019 recsys 0030] retail recommender using R[전용준 리비젼 recsys r] (0)	2019.05.18
[AI Summit workshop] rf anomaly 1206 (0)	2018.11.30
GameLog-In 데이터준비 (0)	2018.10.16

현재글[KDATA PLOT EDA retail] 플롯 그리기

리비젼 CRM ( revisioncrm )

인공지능, R, 전용준 빅데이터, 디지털마케팅, CRM, GPT, 프롬프트엔지니어링, 머신러닝, 데이터 사이언티스트, 리비젼컨설팅, 빅데이터, 전용준, 프롬프트, 빅 데이터, 리비젼, 챗GPT, AI, 데이터분석, 데이터 분석, chatGPT,

Today :
Yesterday :