mirror of
https://gitee.com/insArvin/nypc_python_advanced.git
synced 2026-04-17 22:52:28 +08:00
update Spider/neteaseMusicSpider/main.py.
修复获取榜单奇数歌曲问题 Signed-off-by: iamzhaohaibo <941604465@qq.com>
This commit is contained in:
@@ -37,13 +37,13 @@ class neteaseMusicSpider:
|
||||
def get_page(self):
|
||||
# 用Selenium渲染页面,获取iframe
|
||||
# 创建Service对象,指定ChromeDriver路径
|
||||
service = Service(executable_path='./chromedriver_win32/chromedriver.exe')
|
||||
service = Service(executable_path='/Users/zhaohaibo/Desktop/chromedriver-mac-x64/chromedriver')
|
||||
# 启动Chrome浏览器
|
||||
driver = webdriver.Chrome(service=service)
|
||||
# 访问目标网址
|
||||
driver.get(self.url)
|
||||
# 等待3秒,让JavaScript加载完成iframe和#document
|
||||
time.sleep(3)
|
||||
# time.sleep(3)
|
||||
|
||||
# 定位iframe元素
|
||||
iframe_elem = driver.find_element(By.TAG_NAME, "iframe")
|
||||
@@ -64,8 +64,8 @@ class neteaseMusicSpider:
|
||||
try:
|
||||
# 使用BeautifulSoup解析获取到的页面内容
|
||||
soup = BeautifulSoup(self.get_page(), 'html.parser')
|
||||
# 查找所有class为'even'的tr标签
|
||||
trs = soup.find_all('tr', class_='even')
|
||||
# 查找table > tbody标签下的所有的tr标签
|
||||
trs = soup.select('table > tbody')[0]
|
||||
# 返回找到的tr标签列表
|
||||
return trs
|
||||
except Exception as e:
|
||||
@@ -85,8 +85,8 @@ class neteaseMusicSpider:
|
||||
print(songs_html, '\n', type(songs_html))
|
||||
# 遍历每个歌曲元素
|
||||
for song in songs_html:
|
||||
# 打印当前歌曲元素(此处歌曲可能只获取奇数rank,偶数rank的页面结构需要去区分获取)
|
||||
print(song, '\n')
|
||||
# 提取歌曲排行,通过CSS选择器定位元素并获取排行对应文本内容
|
||||
s_rank = song.select('td:nth-child(1) >div>span')[0].string
|
||||
# 提取歌曲标题,通过CSS选择器定位元素并获取title属性
|
||||
s_title = song.select('span > a > b')[0].get_attribute_list('title')[0]
|
||||
# 提取歌手信息,通过CSS选择器定位元素并获取title属性
|
||||
@@ -94,9 +94,17 @@ class neteaseMusicSpider:
|
||||
# 提取歌曲时长,通过CSS选择器定位元素并获取文本内容
|
||||
s_duration = song.select('td.s-fc3 > span')[0].string
|
||||
# 提取歌曲ID,通过CSS选择器定位元素
|
||||
s_id = song.select('td.rank > div > div > span')
|
||||
s_id = song.select('td:nth-child(2) > div > div > span')[0].get_attribute_list('data-res-id')[0]
|
||||
# 打印提取到的歌曲信息
|
||||
print(s_id, s_title, s_singer, s_duration, '\n')
|
||||
print(s_rank, s_id, s_title, s_singer, s_duration, '\n')
|
||||
# 将歌曲信息添加到字典中、方便后续写入数据库、表格存储
|
||||
songs[s_id] = {
|
||||
'rank': s_rank,
|
||||
'title': s_title,
|
||||
'singer': s_singer,
|
||||
'duration': s_duration
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
# 捕获异常并打印错误信息
|
||||
|
||||
Reference in New Issue
Block a user