前端Node.js爬取百度音乐热门歌单

什么是web前端开发 前端开发要会什么 前端移动开发能做什么

首先,先获取歌单的url,也就是路径,导入必要的模块,使用cheerio 是第三方模块,可以使用 npm install cheerio 进行安装
javascript 代码

var http = require(‘http’) // http 网路
var cheerio = require(‘cheerio’) //html 解析
var fs = require(‘fs’) //流
// 目标网址(百度音乐最热歌单)
// http://music.baidu.com/songlist/tag/%E5%85%A8%E9%83%A8?orderType=1&offset=0&third_type=
var startHref =
    ‘http://music.baidu.com/songlist/tag/%E5%85%A8%E9%83%A8?orderType=1&offset=’
// 歌单路径
var urls = []
// 歌曲详情和总数
var song = []
/**
* 获取网站信息
* @param res.setEncoding(‘utf8’);
*/
function getHtml(href, page) {
    var html = ” //html
    var req = http.get(href + page + ‘&third_type=’, function(res) {
        res.setEncoding(‘utf8’)
        res.on(‘data’, function(chunk) {
            html += chunk
        })
        res.on(‘end’, function() {
            // 解析html
            var $ = cheerio.load(html)
            //将每页歌单的url保存起来
            var songListData = $(‘.main-body .songlist-list ul li’)
                .find(‘.text-title>a’)
                .toArray()
            for (var i = 0; i < songListData.length; i++) {
                var link = songListData[i].attribs.href
                urls.push(songListData[i].attribs.href)
            }
            if (page == maxPage) {
                console.log(‘歌单路径获取完成:’ + urls.length)
                console.log(‘歌单数量:’ + urls.length)
                if (urls.length > 0) {
                    getSongInfo(urls.shift())
                } else {
                    console.leg(‘已完成!’)
                }
            }
        })
    })
}

然后,获取每个歌单中的歌曲和信息
javascript 代码

/**
*获取每页歌单详细信息
*@param {int}: songUrl
*/
function getSongInfo(songUrl) {
    var html = ”
    var req = http.get(‘http://music.baidu.com’ + songUrl, function(res) {
        res.setEncoding(‘utf8’)
        res.on(‘data’, function(chunk) {
            html += chunk
        })
        res.on(‘end’, function() {
            var $ = cheerio.load(html)
            // 歌单名称
            var songTitle = $(‘.songlist-left .songlist-info-box’)
                    .find(‘h1’)
                    .html(),
                songlistData = $(‘.song-list .song-item’)
            for (var i = 0; i < songlistData.length; i++) {
                var songData = {
                    /* 歌曲名 trim为了去除空格*/
                    songName: $(songlistData[i])
                        .children(‘.song-title’)
                        .find(‘a’)
                        .text()
                        .trim(),
                    /* 歌手 */
                    songSinger: $(songlistData[i])
                        .children(‘.singer’)
                        .find(‘span’)
                        .text()
                        .trim(),
                    /* 专辑名 */
                    songAlbum: $(songlistData[i])
                        .children(‘.album-title’)
                        .find(‘a’)
                        .text()
                        .trim(),
                }
                song.push(songData)
                console.log(songData.songName)
            }
            if (urls.length > 0) {
                getSongInfo(urls.shift())
            } else {
                console.log(‘已完成:歌曲数目’ + song.length)
                //song 是所有歌曲的集合,包括歌名、歌手、专辑
                //由于是新手还不会将他显示在页面上,只能显示在命令行中
                // console.log(song)
            }
        })
    })
}

最后,执行
javascript 代码

/**
* 开始执行
* 前三页的歌单
* 由于他的页数每次多20所以i+=20
*/
var maxPage = 40
function start() {
    console.log(‘开始获取歌单!’)
    for (var i = 0; i <= maxPage; i += 20) {
        getHtml(startHref, i)
    }
}
start()

完整代码
javascript 代码

var http = require(‘http’) // http 网路
var cheerio = require(‘cheerio’) //html 解析
var fs = require(‘fs’) //流
// 目标网址(百度音乐最热歌单)
// http://music.baidu.com/songlist/tag/%E5%85%A8%E9%83%A8?orderType=1&offset=0&third_type=
var startHref =
    ‘http://music.baidu.com/songlist/tag/%E5%85%A8%E9%83%A8?orderType=1&offset=’
// 歌单路径
var urls = []
// 歌曲详情和总数
var song = []
/**
* 获取网站信息
* @param res.setEncoding(‘utf8’);
*/
function getHtml(href, page) {
    var html = ” //html
    var req = http.get(href + page + ‘&third_type=’, function(res) {
        res.setEncoding(‘utf8’)
        res.on(‘data’, function(chunk) {
            html += chunk
        })
        res.on(‘end’, function() {
            // 解析html
            var $ = cheerio.load(html)
            //将每页歌单的url保存起来
            var songListData = $(‘.main-body .songlist-list ul li’)
                .find(‘.text-title>a’)
                .toArray()
            for (var i = 0; i < songListData.length; i++) {
                var link = songListData[i].attribs.href
                urls.push(songListData[i].attribs.href)
            }
            if (page == maxPage) {
                console.log(‘歌单路径获取完成:’ + urls.length)
                console.log(‘歌单数量:’ + urls.length)
                if (urls.length > 0) {
                    getSongInfo(urls.shift())
                } else {
                    console.leg(‘已完成!’)
                }
            }
        })
    })
}
/**
*获取每页歌单详细信息
*@param {int}: songUrl
*/
function getSongInfo(songUrl) {
    var html = ”
    var req = http.get(‘http://music.baidu.com’ + songUrl, function(res) {
        res.setEncoding(‘utf8’)
        res.on(‘data’, function(chunk) {
            html += chunk
        })
        res.on(‘end’, function() {
            var $ = cheerio.load(html)
            // 歌单名称
            var songTitle = $(‘.songlist-left .songlist-info-box’)
                    .find(‘h1’)
                    .html(),
                songlistData = $(‘.song-list .song-item’)
            for (var i = 0; i < songlistData.length; i++) {
                var songData = {
                    /* 歌曲名 trim为了去除空格*/
                    songName: $(songlistData[i])
                        .children(‘.song-title’)
                        .find(‘a’)
                        .text()
                        .trim(),
                    /* 歌手 */
                    songSinger: $(songlistData[i])
                        .children(‘.singer’)
                        .find(‘span’)
                        .text()
                        .trim(),
                    /* 专辑名 */
                    songAlbum: $(songlistData[i])
                        .children(‘.album-title’)
                        .find(‘a’)
                        .text()
                        .trim(),
                }
                song.push(songData)
                console.log(songData.songName)
            }
            if (urls.length > 0) {
                getSongInfo(urls.shift())
            } else {
                console.log(‘已完成:歌曲数目’ + song.length)
                //song 是所有歌曲的集合,包括歌名、歌手、专辑
                //由于是新手还不会将他显示在页面上,只能显示在命令行中
                // console.log(song)
            }
        })
    })
}
/**
* 开始执行
* 前三页的歌单
* 由于他的页数每次多20所以i+=20
*/
var maxPage = 40
function start() {
    console.log(‘开始获取歌单!’)
    for (var i = 0; i <= maxPage; i += 20) {
        getHtml(startHref, i)
    }
}
start()

源码 https://github.com/qianxingW/nodejs

前端模块化开发用什么比较好 java开发前端需要什么软件 web前端开发要用什么软件

» 本文来自:前端开发者 » 《前端Node.js爬取百度音乐热门歌单》
» 本文链接地址:https://www.rokub.com/5393.html
» 您也可以订阅本站:https://www.rokub.com
赞(0)
64K

评论 抢沙发

评论前必须登录!