本章節(jié)源碼,并附有部分注解,希望可以幫助到有需要的同學
//爬取imooc單章內(nèi)容,并整理篩選打印
var http = require('http')
var cheerio = require('cheerio')
var url = 'http://idcbgp.cn/learn/348'
//把html作為參數(shù)傳遞給一個函數(shù),讓函數(shù)對數(shù)據(jù)做相應的過濾
function filterChapters(html){
//裝載html
var $ = cheerio.load(html)
//charpts拿到每章的標題
var chapters = $('.chapter')
//數(shù)據(jù)結(jié)構(gòu)
// var chapters = $('.chapter')
// [{
// chapterTitle :
// videos:[
// videoTitle:'',
// videoId:''
// ]
// }]
var courseData = []
chapters.each(function(item){
var chapter = $(this)
var chapterTitle = chapter.find('h3').text().trim()
var videos = chapter.find('.video').children('li')
var chapterData = {
chapterTitle: chapterTitle,
videos :[]
}
videos.each(function(liList){
var video = $(this).find('.J-media-item')
var videoTitle = video.text().trim()
var videoId = video.attr('href').split('video/')[1]
chapterData.videos.push({
videoTitle: videoTitle,
videoId: videoId
})
})
courseData.push(chapterData)
})
return courseData
}
function printCourseInfo(courseData){
courseData.forEach(function(item){
var chapterTitle = item.chapterTitle
console.log(chapterTitle + '\n')
item.videos.forEach(function(video){
console.log('['+ video.videoId +']' + video.videoTitle + '\n')
})
})
}
http.get(url,function(res){
var html = ''
res.on('data',function(data){
html += data
})
res.on('end',function(){
var courseData = filterChapters(html)
printCourseInfo(courseData)
}).on('error',function(e){
console.error('extrat 348 chapter action is failed')
})
})
2018-10-30
單純用trim方法,輸出還是很丑
2018-08-30
非常不錯,而且看的出來樓主也是看懂了的,我當時結(jié)果出不來,發(fā)現(xiàn)慕課網(wǎng)的div class是改了的,也是醉了,哈哈。改了后就出來了。