用jsoup分析下载巨鲸的mp3
这两天突然想听听杰克逊的歌.首选当然是巨鲸.支持正版.
发现在线收听都会重复下载,浪费带宽,并且网络差的时候听让人崩溃.
下载下来.
网站不提供批量下载,手动一个一个点可不是我们程序员的风格.
分析了下它的源代码,挺整齐的,OK,jsoup闪亮登场.这里用最新的1.51.
代码很简单:
package com.javaeye.i2534;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.net.HttpURLConnection;import java.net.MalformedURLException;import java.net.URL;import java.util.HashMap;import java.util.Map;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class Top100Mp3Downloader {/** * 给定歌曲列表页面,返回歌曲名称和加密id的键值对 ** @param url * 歌曲列表地址,如:http://www.top100.cn/artist/info-agr5dcqe.shtml * @return 键值对 */private Map<String, String> findIds(String url) {try {URL u = new URL(url);Document doc = Jsoup.parse(u, 1000 * 10);Element listDiv = doc.getElementById("songsListDiv");Elements uls = listDiv.getElementsByTag("ul");Map<String, String> map = new HashMap<String, String>();for (int i = 0; i < uls.size(); i++) {Element ul = uls.get(i);Element hidden = ul.getElementById("hidValue");String id = hidden.val();Element li = ul.getElementsByAttributeValue("class", "l3").first();Element href = li.getElementsByTag("a").first();String name = href.attr("title");map.put(name, id);}return map;} catch (MalformedURLException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return null;}/** * 从歌曲的加密id获取歌曲的下载页面,并分析得到下载地址 ** @param id * 加密id * @return 歌曲下载页面地址 */private String findDownPathById(String id) {if (id.startsWith("m")) {// 所有id都是m开头id = id.substring(1);}try {URL url = new URL("http://www.top100.cn/download/download.aspx?Productid="+ id);Document doc = Jsoup.parse(url, 1000 * 2);Elements eles = doc.getElementsByAttributeValue("onclick","javascript:$(this).css('color','red');");String path = null;for (int i = 0; i < eles.size(); i++) {Element e = eles.get(i);if (e.tagName().equals("a")) {path = e.attr("href");break;}}return path;} catch (MalformedURLException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return null;}/** * 从获取的下载地址获取歌曲内容 ** @param dir * 保存到目录 * @param name * 歌曲名称 * @param path * 歌曲下载地址 */private void downByPath(String dir, String name, String path) {File parent = new File(dir);if (!parent.exists()) {parent.mkdirs();}File mp3 = new File(parent, name + ".mp3");try {URL url = new URL(path);HttpURLConnection con = (HttpURLConnection) url.openConnection();// 此处必须伪造referer,否则会自动返回首页.分析后,与cookie无关con.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon;)");con.setRequestProperty("Accept-Encoding", "gzip");con.setRequestProperty("referer", "http://www.top100.cn");con.setDoInput(true);con.connect();if (con.getResponseCode() == HttpURLConnection.HTTP_OK) {InputStream is = con.getInputStream();byte[] b = new byte;int length = -1;OutputStream os = new FileOutputStream(mp3);while ((length = is.read(b)) != -1) {os.write(b, 0, length);}os.flush();os.close();is.close();} else {System.out.println("服务器返回:" + con.getResponseCode());}} catch (MalformedURLException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public static void main(String[] args) {Top100Mp3Downloader m = new Top100Mp3Downloader();for (Map.Entry<String, String> e : m.findIds("http://www.top100.cn/artist/info-agr5dcqe.shtml").entrySet()) {String name = e.getKey();String path = m.findDownPathById(e.getValue());m.downByPath("F:\\music\\files\\Michael Jackson", name, path);System.out.println(name + " from " + path + " has down!");}}} 本人测试,可以下载.
注:不保证一直可以使用,因为巨鲸只要改变验证或者改变任何一个html元素的特征就可以导致此程序失效.
页:
[1]