update Spider/neteaseMusicSpider/main.py.

修复获取榜单奇数歌曲问题

Signed-off-by: iamzhaohaibo <941604465@qq.com>
This commit is contained in:
iamzhaohaibo
2025-12-13 10:36:31 +00:00
committed by Gitee
parent d5a68effec
commit fc2838e302

View File

@@ -37,13 +37,13 @@ class neteaseMusicSpider:
def get_page(self):
# 用Selenium渲染页面获取iframe
# 创建Service对象指定ChromeDriver路径
service = Service(executable_path='./chromedriver_win32/chromedriver.exe')
service = Service(executable_path='/Users/zhaohaibo/Desktop/chromedriver-mac-x64/chromedriver')
# 启动Chrome浏览器
driver = webdriver.Chrome(service=service)
# 访问目标网址
driver.get(self.url)
# 等待3秒让JavaScript加载完成iframe和#document
time.sleep(3)
# time.sleep(3)
# 定位iframe元素
iframe_elem = driver.find_element(By.TAG_NAME, "iframe")
@@ -64,8 +64,8 @@ class neteaseMusicSpider:
try:
# 使用BeautifulSoup解析获取到的页面内容
soup = BeautifulSoup(self.get_page(), 'html.parser')
# 查找所有class为'even'的tr标签
trs = soup.find_all('tr', class_='even')
# 查找table > tbody标签下的所有的tr标签
trs = soup.select('table > tbody')[0]
# 返回找到的tr标签列表
return trs
except Exception as e:
@@ -85,8 +85,8 @@ class neteaseMusicSpider:
print(songs_html, '\n', type(songs_html))
# 遍历每个歌曲元素
for song in songs_html:
# 打印当前歌曲元素此处歌曲可能只获取奇数rank偶数rank的页面结构需要去区分获取
print(song, '\n')
# 提取歌曲排行通过CSS选择器定位元素并获取排行对应文本内容
s_rank = song.select('td:nth-child(1) >div>span')[0].string
# 提取歌曲标题通过CSS选择器定位元素并获取title属性
s_title = song.select('span > a > b')[0].get_attribute_list('title')[0]
# 提取歌手信息通过CSS选择器定位元素并获取title属性
@@ -94,9 +94,17 @@ class neteaseMusicSpider:
# 提取歌曲时长通过CSS选择器定位元素并获取文本内容
s_duration = song.select('td.s-fc3 > span')[0].string
# 提取歌曲ID通过CSS选择器定位元素
s_id = song.select('td.rank > div > div > span')
s_id = song.select('td:nth-child(2) > div > div > span')[0].get_attribute_list('data-res-id')[0]
# 打印提取到的歌曲信息
print(s_id, s_title, s_singer, s_duration, '\n')
print(s_rank, s_id, s_title, s_singer, s_duration, '\n')
# 将歌曲信息添加到字典中、方便后续写入数据库、表格存储
songs[s_id] = {
'rank': s_rank,
'title': s_title,
'singer': s_singer,
'duration': s_duration
}
except Exception as e:
# 捕获异常并打印错误信息