[CRMAJU2018] R 프로그래밍 기초 복습 -- 2018.04.25

R 데이터 분석

[CRMAJU2018] R 프로그래밍 기초 복습 -- 2018.04.25

YONG_X 2018. 4. 23. 17:12

# CRM 고객데이터 분석

# R 프로그래밍 기초 복습 -- 2018.04.25

#---------------------

# vector 선언

c1 <- c(3,2,7,1)

c2 <- c("seoul", "busan", "inchon", NA)

c3 <- c(3,2,1,5)

custlist <- data.frame(c1,c2,c3)

custlist

names(custlist) <- c("prchs","location","years") # variable names

custlist

str(custlist)

names(custlist)

head(custlist,2)

nrow(custlist)

# add a new col

custlist$name <- c('jon', 'may', 'kim', 'park')

custlist

# append a row

custlist[nrow(custlist)+1,] <- c(4, 'busan', 2, 'yoon')

custlist[nrow(custlist)+1,] <- c(4, 'LA', 2, 'yoon')

# error?

nrow(custlist)

# a new vector

point <- c(32,21,11,33,0,0)

plot(point)

length(point) # number of elements

str(point) # structure of an object

class(point) # class or type of an object

# sort a vector

sort(point)

plot(sort(point))

sort(point, decreasing=T)

plot(sort(point, decreasing=T))

# basic stat

mean(point)

median(point)

max(point)

range(point)

max(point)-min(point)

range(point)[2] - range(point)[1] # max deviation

# combine

point1 <- c(point, point+1) # combine objects into a vector

point1

plot(point1)

plot(point1, col="blue", pch=19)

# add point to custlist

custlist$point <- point

head(custlist)

# combine two dfs

cbind(custlist, custlist) # combine objects as columns

rbind(custlist, custlist) # combine objects as rows

# check missing values

is.na(custlist$location) # returns whether TRUE of x is missing

custlist$location[is.na(custlist$location)==F]

custlist$age <- rep(c(23,27),3)

plot(custlist$age)

# scatterplot

plot(custlist$age, custlist$point)

# handling string var

names(custlist)

names(custlist)[4] <- "lastname"

custlist$frstname <- c("tim","ma","mo","ne","sh","ih")

head(custlist)

custlist$fullname <- paste(custlist$frstname, custlist$lastname)

head(custlist)

# sort df

custlist[order(age),]

custlist[order(custlist$age),]

custlist[order(-custlist$age),]

# add computed col (derived col)

custlist$ppp <- custlist$point/custlist$prchs # point per purchase

# data type conversion

custlist$prchs <- as.numeric(custlist$prchs)

custlist$ppp <- custlist$point/custlist$prchs # point per purchase

head(custlist)

#--- select subset ------

# 24세 이상 고객을 추출하라

custlist[custlist$age>=24,]

# sh yoon 고객을 추출하라

custlist[custlist$fullname=="sh yoon",]

# 25세 이상이고 거래년수 3년 미만인 고객을 추출하라

custlist[custlist$age>=24 & custlist$year<3,]

# 부산과 인천 고객을 추출하라

custlist[custlist$location=='busan' | custlist$location=='inchon', ]

custlist[custlist$location %in% c('busan','inchon'), ]

# 부산과 인천 고객의 수를 파악하라

# nrow() 또는 length()를 활용

# mo kim 고객의 연령을 24세로 변경하라

custlist[custlist$fullname=="mo kim","age"] <- 24

# 24세 이상이면 "old", 24세 미만이면 "young" 으로 구분하는 컬럼을 추가하라

custlist$isOld <- ifelse(custlist$age>=24, "old", "young")

# 연령과 구매횟수간 관계 scatterplot

plot(custlist$age, custlist$prchs)

# 연령과 회원가입후기간인 years간의 관계 scatterplot?

# 나이가 가장 많은 2명의 고객의 이름을 추출하라

custlist[order(-custlist$age),]$fullname[1:2]

# location이 missing인 경우를 "unkown"으로 변경하라

custlist$location <- as.character(custlist$location)

custlist$location[is.na(custlist$location)] <- "unkown"

# 연령과 구매건수만을 추출한 새로운 테이블을 생성하라

#--- merge and aggregate -------

# 지역별 고객수 테이블 생성

locpop <- data.frame(location=c('seoul', 'busan','inchon'),

custpop=c(101,98,78))

head(locpop)

# custlist에 결합

custlist1 <- merge(custlist, locpop, by="location", all.x=T)

head(custlist1)

plot(custlist1$age, custlist1$custpop)

plot(jitter(custlist1$age), jitter(custlist1$custpop),

col="blue", pch=19)

# 연령별 구매건수 평균 산출

agg1 <-aggregate(custlist$prchs, by=list(custlist$age),

FUN=mean, na.rm=TRUE)

names(agg1) <- c("age","avg_prchs")

agg1

# 지역별 point 중위수 산출

agg2 <-aggregate(custlist$point, by=list(custlist$location),

FUN=mean, na.rm=TRUE)

names(agg2) <- c("location","mdn_point")

agg2

# barplot(agg2$mdn_point, names.arg=agg2$location)

# 포인트의 합계가 가장 큰 지역은 어디인가??

CRMAJU2018_Rprg기초복습_20180425.txt

0.0MB

CRMAJU2018_Rprg기초복습_20180425.txt

0.0MB

저작자표시 비영리 변경금지

'R 데이터 분석' 카테고리의 다른 글

rolling 2 (0)	2018.05.12
[R분석] rolling base stat generator for time series data (0)	2018.05.01
[CRMAJU2018] 데이터 분석 기초 연습문제[3] (0)	2018.04.16
[CRMAJU2018] --- R 데이터 분석 연습문제 [2] (0)	2018.04.16
CRMAJU2018] --- R 데이터 처리 기초 연습문제 [1] 후보답안 (0)	2018.04.12

현재글[CRMAJU2018] R 프로그래밍 기초 복습 -- 2018.04.25

리비젼 CRM ( revisioncrm )

chatGPT, R, 데이터분석, 프롬프트, 챗GPT, 프롬프트엔지니어링, 빅데이터, 전용준 빅데이터, 인공지능, 전용준, GPT, 리비젼컨설팅, 데이터 분석, 디지털마케팅, 빅 데이터, CRM, 머신러닝, AI, 리비젼, 데이터 사이언티스트,

Today :
Yesterday :