>RE::VISION CRM

R 데이터 분석

twitter access tmp

YONG_X 2013. 8. 29. 00:22

library(rJava)
library(KoNLP)
library(wordcloud)
library(plyr)
library(twitteR)
library(tm)
library(RColorBrewer)


keyword <- enc2utf8("mobile")

install.packages("ROAuth")
require("ROAuth")

# c.f : http://davetang.org/muse/2013/04/06/using-the-r_twitter-package/

 #necessary step for Windows
download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")   #to get your consumerKey and consumerSecret see the twitteR documentation for instructions

cred <- OAuthFactory$new(consumerKey='RLJcxPjVrH4VxapXa7PmyQ', consumerSecret='ir0HhiOcfdTKbBA4RYlxKCxUonNwUxw03pZRhjDfA', requestURL='https://api.twitter.com/oauth/request_token', accessURL='http://api.twitter.com/oauth/access_token', authURL='http://api.twitter.com/oauth/authorize')  

#necessary step for Windows
cred$handshake(cainfo="cacert.pem")


:: https://api.twitter.com/oauth/authorize?oauth_token=cRmqO9w4eaVekChR06c4bWTGkl3U5DHWqP3OUP7PE

http://blog.daum.net/revisioncrm?oauth_token=cRmqO9w4eaVekChR06c4bWTGkl3U5DHWqP3OUP7PE&oauth_verifier=vlTYvEB5Jxoboi9PCeATREm3Hxs55Q57hoPFAZ47w


#save for later use for Windows
save(cred, file="twitter authentication.Rdata")
registerTwitterOAuth(cred)  


## https://dev.twitter.com/apps/5003477/show

application name : mobile market report

Access level Read-only

About the application permission model

Consumer key RLJcxPjVrH4VxapXa7PmyQ

Consumer secret ir0HhiOcfdTKbBA4RYlxKCxUonNwUxw03pZRhjDfA


Access token 113886071-7rJbxdYMuvmTZiNrhQTqX0mqwP74Q60KyYurPWuY
Access token secret Vwhvn9evz4jVclQFjTQreNoIOk2ieVDucKtdDOZPbs
Access level Read-only


result <- searchTwitter(keyword, ,lang="en", n=1000)
result <- searchTwitter(keyword, lang="ko", n=100, cainfo="cacert.pem")

result.df <- twListToDF(result)
result.text <- result.df$text
result.text <- gsub("\n", "", result.text)
result.text <- gsub("\r", "", result.text)
result.text <- gsub("RT", "", result.text)
result.text <- gsub("http", "", result.text)
result.text <- gsub("CO", "", result.text)
result.text <- gsub("co", "", result.text)
result.text <- gsub("ㅋㅋ", "", result.text)
result.text <- gsub("ㅋㅋㅋ", "", result.text)
result.text <- gsub("ㅋㅋㅋㅋ", "", result.text)
result.text <- gsub("ㅠㅠ", "", result.text)


# 문자 분리
result_nouns <- Map(extractNoun, result.text)

 

# 쓸모없는 문자들을 제거. 특히 영문자의 경우 tm의  stopwords를 활용
result_wordsvec <- unlist(result_nouns, use.name=F)
result_wordsvec <- result_wordsvec[-which(result_wordsvec %in% stopwords("english"))]
result_wordsvec <- gsub("[[:punct:]]","", result_wordsvec)
result_wordsvec <- Filter(function(x){nchar(x)>=2}, result_wordsvec)

 

# 문자 카운팅
result_wordcount <- table(result_wordsvec)

# 컬러 세팅
pal <- brewer.pal(12,"Paired")

 

# 폰트 세팅. 띄어쓰기나 대소문자에 민감하다는 점에 주의
# 맑은고딕 : windowsFonts(malgun=windowsFont("맑은 고딕"))
# 나눔고딕 : windowsFonts(malgun=windowsFont("나눔고딕"))
windowsFonts(malgun=windowsFont("Arial"))

 

# 그리기 - min.freq를 너무 크게 설정하면 남겨지는 단어가 대폭 줄어들게 됨. 조절 필요
wordcloud(names(result_wordcount), freq=result_wordcount,

scale=c(4,0.5), min.freq=3, random.order=F, rot.per=.1,

colors=pal, family="malgun")