>RE::VISION CRM

R 데이터 분석

R분석 : 데이터 수집 예제

YONG_X 2017. 2. 8. 08:26

# install.packages("XML")

# install.packages("rvest")


library(XML)

library(rvest)



#기업교육에 대한 네이버 뉴스 20161201-20161231

GetStockCommentData <- function(num){

  url = gsub(" ","",paste0("http://news.naver.com/main/search/search.nhn?query=%B1%E2%BE%F7%B1%B3%C0%B0&st=news.all&q_enc=EUC-KR&r_enc=UTF-8&r_format=xml&rp=none&sm=all.basic&ic=all&so=rel.dsc&rcnews=exist:032:005:086:020:021:081:022:023:025:028:038:469:421:003:001:422:449:004:215:437:056:214:019:057:096:374:055:448:052:009:008:011:277:018:366:014:015:016:375:079:119:006:047:143:002:138:029:293:031:030:092:145:024:417:242:308:262:140:094:243:007:033:037:053:042:353:105:036:050:&rcsection=exist:101:&stDate=range:20160101:20161031&detail=0&pd=4&r_cluster2_start=1&r_cluster2_display=10&start=1&display=10&startDate=2016-12-01&endDate=2016-12-31&page=",as.character(num)))

  doc = htmlTreeParse(url, useInternalNodes = T)


  subject <- xpathSApply(doc, "//div[@class='ct']/a", xmlValue)#subject 

  date <- xpathSApply(doc, "//div[@class='ct']/div/span[4]", xmlValue)#보도일자

  main <- xpathSApply(doc, "//div[@class='ct']/p", xmlValue)#요약보기

  press <-xpathSApply(doc, "//div[@class='ct']/div/span[2]", xmlValue)#신문사

  url2 <- xpathSApply(doc, "//div[@class='ct']/div/a", xmlGetAttr,'href')

  

  subject <- iconv(subject,"UTF-8","EUC-KR")

  date <- iconv(date,"UTF-8","EUC-KR")

  main <- iconv(main,"UTF-8","EUC-KR")

  press <- iconv(press,"UTF-8","EUC-KR")

  

  main <- gsub("\r|\t|\n","",main) 

  

  stock_data <-cbind(subject,date,main,press)

  stock_data

  return(stock_data)

}

gc()

DATA<-NULL

StartPage <- 1

EndPage <- 20


for (i in StartPage:EndPage) {

  getData<-GetStockCommentData(i)

  DATA<-rbind(DATA,getData)

  

}

write.csv(DATA,"F:/data/기업교육.csv")

#end 




#install.packages("XML")

library(XML)



#기저귀에 대한 네이버 뉴스 20170101-20170201

GetStockCommentData <- function(num){

  url = gsub(" ","",paste0("http://section.blog.naver.com/sub/SearchBlog.nhn?type=post&option.keyword=%EB%A7%88%EB%AF%B8%ED%8F%AC%ED%81%AC&term=period&option.startDate=2017-01-01&option.endDate=2017-02-01&option.page.currentPage=",as.character(num)))

  doc = htmlTreeParse(url, useInternalNodes = T, encoding="UTF-8")

  xpathSApply(doc, "//*[@id='blogSearchForm']/div[2]/ul[3]", xmlValue)

  

  

  subject <- xpathSApply(doc, "//ul[@class='list_type_1 search_list']/li/h5/a", xmlValue)#subject 

  date <- xpathSApply(doc, "//span[@class='date']", xmlValue)#작성일

  main <- xpathSApply(doc, "//div[@class='list_content']", xmlValue)#본문요약

  main <- gsub("\r|\n","",main)

  nick <- xpathSApply(doc, "//div[@class='list_data']/a", xmlValue)#블로그 닉네임

  category <- xpathSApply(doc, "//span[@class='category']/a", xmlValue)#블로그 카테고리

  href <- xpathSApply(doc, "//ul[@class='list_type_1 search_list']/li/h5/a", xmlGetAttr,'href')

  

  stock_data <-cbind(subject,date,main,nick,category,href)

  stock_data

  return(stock_data)

}



gc()

DATA<-NULL

StartPage <- 1

EndPage <- 10


for (i in StartPage:EndPage) {

  getData<-GetStockCommentData(i)

  DATA<-rbind(DATA,getData)

}


write.csv(DATA,"C:/data/마미포크blog.csv",row.names = F)

#end 


'R 데이터 분석' 카테고리의 다른 글

[SKK_DA1] scrpts plus  (0) 2017.05.15
R 분석 : 다이아몬드  (0) 2017.02.08
단순선형회귀분석 연습 : 007 한기대  (0) 2017.02.07
빅데이터 기획 : 분석 : 일정  (0) 2017.02.06
분석용 데이터 : Bank Marketing  (0) 2017.01.30