#! /bin/bash
# Auto download top 500 song from baidu.com
#
#
T500_url="http://list.mp3.baidu.com/topso/mp3topsong.html?id=1#top2"
File_name="mp3topsong.html"
#创建日志目录
mkdir logs
#抓取top500的网页另存为mp3topsong.htm
if wget -a logs/logs.log $T500_url -O $File_name
then
# 生成top500的地址页下载列表.
if cat $File_name|grep 'word='|grep '<td'|awk -F'href="' '{print $2}'|sed 's/" target=_blank>/ /'| sed 's/<\/a>//' |head -n 501|awk '{print NR,$1,$2}' > top500.dat
then
# 逐条处理top500中的每首歌
while read line
do
Song_num=`echo $line|awk '{print $1}'`
Page_url=`echo $line|awk '{print $2}'`
Song_name=`echo $line|awk '{print $3}'`
if wget -a logs/logs.log --ignore-length $Page_url -O data.tmp
then
#从抓取的网页中找到mp3的地址。
if Song_url=`cat data.tmp|grep word=mp3|awk -F'来源网址' '{print $2}'|awk -F'请参照' '{print $1}'|awk '{print $2}'|grep 'http'|grep 'mp3$' |head -n 1`
then
if [[ -z $Song_url ]]
then
echo "歌曲$Song_name的地址下载地址为空"
echo "$Song_num $Song_name $Page_url" >>logs/Down_error.log
continue
else
out=`echo "$Song_num $Song_name $Song_url " `
echo $out
echo $out >> Down_url_name.txt
rm data.tmp
fi
else
echo "分析歌曲$Song_name的地址发生错误"
echo "$Song_num $Song_name $Page_url" >>logs/Down_error.log
continue
fi
else
echo "下载歌曲$Song_name的地址$Page_url发生错误"
echo "$Song_num $Song_name $Page_url" >>logs/Down_error.log
continue
fi
done < top500.dat
echo "All job completed"
#rm top500.dat
#rm $File_name
else
echo "分析TOP500网页错误";exit 2
fi
else
echo "下载TOP500网页错误";exit 1
fi
7 认真的雪 http://203.69.144.130/grm.cdn.hinet.net/xuite/e/c/d/1/15182794/blog_355860/dv/6777106/6777106.mp3