# install.packages("XML")
# install.packages("rvest")
library(XML)
library(rvest)
#기업교육에 대한 네이버 뉴스 20161201-20161231
GetStockCommentData <- function(num){
url = gsub(" ","",paste0("http://news.naver.com/main/search/search.nhn?query=%B1%E2%BE%F7%B1%B3%C0%B0&st=news.all&q_enc=EUC-KR&r_enc=UTF-8&r_format=xml&rp=none&sm=all.basic&ic=all&so=rel.dsc&rcnews=exist:032:005:086:020:021:081:022:023:025:028:038:469:421:003:001:422:449:004:215:437:056:214:019:057:096:374:055:448:052:009:008:011:277:018:366:014:015:016:375:079:119:006:047:143:002:138:029:293:031:030:092:145:024:417:242:308:262:140:094:243:007:033:037:053:042:353:105:036:050:&rcsection=exist:101:&stDate=range:20160101:20161031&detail=0&pd=4&r_cluster2_start=1&r_cluster2_display=10&start=1&display=10&startDate=2016-12-01&endDate=2016-12-31&page=",as.character(num)))
doc = htmlTreeParse(url, useInternalNodes = T)
subject <- xpathSApply(doc, "//div[@class='ct']/a", xmlValue)#subject
date <- xpathSApply(doc, "//div[@class='ct']/div/span[4]", xmlValue)#보도일자
main <- xpathSApply(doc, "//div[@class='ct']/p", xmlValue)#요약보기
press <-xpathSApply(doc, "//div[@class='ct']/div/span[2]", xmlValue)#신문사
url2 <- xpathSApply(doc, "//div[@class='ct']/div/a", xmlGetAttr,'href')
subject <- iconv(subject,"UTF-8","EUC-KR")
date <- iconv(date,"UTF-8","EUC-KR")
main <- iconv(main,"UTF-8","EUC-KR")
press <- iconv(press,"UTF-8","EUC-KR")
main <- gsub("\r|\t|\n","",main)
stock_data <-cbind(subject,date,main,press)
stock_data
return(stock_data)
}
gc()
DATA<-NULL
StartPage <- 1
EndPage <- 20
for (i in StartPage:EndPage) {
getData<-GetStockCommentData(i)
DATA<-rbind(DATA,getData)
}
write.csv(DATA,"F:/data/기업교육.csv")
#end
#install.packages("XML")
library(XML)
#기저귀에 대한 네이버 뉴스 20170101-20170201
GetStockCommentData <- function(num){
url = gsub(" ","",paste0("http://section.blog.naver.com/sub/SearchBlog.nhn?type=post&option.keyword=%EB%A7%88%EB%AF%B8%ED%8F%AC%ED%81%AC&term=period&option.startDate=2017-01-01&option.endDate=2017-02-01&option.page.currentPage=",as.character(num)))
doc = htmlTreeParse(url, useInternalNodes = T, encoding="UTF-8")
xpathSApply(doc, "//*[@id='blogSearchForm']/div[2]/ul[3]", xmlValue)
subject <- xpathSApply(doc, "//ul[@class='list_type_1 search_list']/li/h5/a", xmlValue)#subject
date <- xpathSApply(doc, "//span[@class='date']", xmlValue)#작성일
main <- xpathSApply(doc, "//div[@class='list_content']", xmlValue)#본문요약
main <- gsub("\r|\n","",main)
nick <- xpathSApply(doc, "//div[@class='list_data']/a", xmlValue)#블로그 닉네임
category <- xpathSApply(doc, "//span[@class='category']/a", xmlValue)#블로그 카테고리
href <- xpathSApply(doc, "//ul[@class='list_type_1 search_list']/li/h5/a", xmlGetAttr,'href')
stock_data <-cbind(subject,date,main,nick,category,href)
stock_data
return(stock_data)
}
gc()
DATA<-NULL
StartPage <- 1
EndPage <- 10
for (i in StartPage:EndPage) {
getData<-GetStockCommentData(i)
DATA<-rbind(DATA,getData)
}
write.csv(DATA,"C:/data/마미포크blog.csv",row.names = F)
#end
'R 데이터 분석' 카테고리의 다른 글
[SKK_DA1] scrpts plus (0) | 2017.05.15 |
---|---|
R 분석 : 다이아몬드 (0) | 2017.02.08 |
단순선형회귀분석 연습 : 007 한기대 (0) | 2017.02.07 |
빅데이터 기획 : 분석 : 일정 (0) | 2017.02.06 |
분석용 데이터 : Bank Marketing (0) | 2017.01.30 |