簡體   English   中英

從 Musixmatch 中檢索歌詞

[英]Retrieving Lyrics from Musixmatch

import requests
import json
import urllib
import lyricsgenius
import os
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys


from bs4 import BeautifulSoup


from pymongo import MongoClient

client = MongoClient('localhost', 27017)
db = client.dbsparta

def get_artist_id(artistName):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
    response = requests.get("https://api.musixmatch.com/ws/1.1/artist.search?page_size=100&format=json&apikey=123&q_artist=" + artistName, headers=headers)
    response.encoding = 'UTF-8'
    return response.json()['message']['body']['artist_list'][0]['artist']['artist_id']
    # print(response.json()['message']['body']['artist_list'][0]['artist']['artist_id'])



def get_album_ids(artist_id):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
    album_response = requests.get("https://api.musixmatch.com/ws/1.1/artist.albums.get?page_size=100&format=json&apikey=123&artist_id=" + str(artist_id), headers=headers)
    album_response.encoding = 'UTF-8'
    # counter = 0
    # album_list = album_response.json()['message']['body']['album_list']
    return album_response.json()['message']['body']['album_list']
    # print(album_response.json()['message']['body']['album_list'])


    # for album in album_list:
    #     # counter += 1
    #     print(album['album']['album_id'])

def get_album_tracks_ids(album_id):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
    response = requests.get("https://api.musixmatch.com/ws/1.1/album.tracks.get?page_size=100&format=json&apikey=123&album_id=" + str(album_id), headers=headers)
    response.encoding = 'UTF-8'
    return response.json()['message']['body']['track_list']


# def get_track_id(artist_id):
#     headers = {
#         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
#     response = requests.get("https://api.musixmatch.com/ws/1.1/track.search?page_size=100format=json&apikey=123&f_artist_id=" + str(artist_id), headers=headers)
#     response.encoding = 'UTF-8'
#     for tracks in response.json()['message']['body']['track_list']:
#         print(tracks['track']['track_name'])

def get_track_lyrics(track_id):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
    response = requests.get("https://api.musixmatch.com/ws/1.1/track.lyrics.get?apikey=123&track_id=" + str(track_id), headers=headers)
    response.encoding = 'UTF-8'
    # return response['message']['body']['lyrics']['lyrics_body']
    return response.json()['message']['body']['lyrics']['lyrics_body']



def main():
    stars_list = list(db.new_top200.find({}, {'_id': 0}))
    for stars in stars_list:
        print(stars['name'])
        album_ids = get_album_ids(get_artist_id(stars['name']))
        # if album_ids is not None:
        for album_id in album_ids:
            # if album_id is not None and get_album_tracks_ids(album_id['album']['album_id']) is not [] and get_album_tracks_ids(album_id['album']['album_id']) is not None:
            track_ids = get_album_tracks_ids(album_id['album']['album_id'])
            for track in track_ids:
                # if track is not [] and track['track']['track_id'] is not [] and track is not None:
                #     if get_track_lyrics(track['track']['track_id']) is not [] and get_track_lyrics(track['track']['track_id']) is not None:
                lyric = get_track_lyrics(track['track']['track_id'])
                db.new_top200.update_one({'name': stars['name']},{'$push': {'lyrics': lyric } })


# get_track_id(get_artist_id('Kanye West'))

# get_album_ids(get_artist_id("Kanye West"))
# get_album_tracks(15565713)


if __name__ == "__main__":
    # for album in get_album_ids(get_artist_id("Kanye West")):
    #     get_album_tracks_ids(album['album']['album_id'])
    # get_track_lyrics(96610952)
    # get_album_tracks_ids(15565713)
    # get_album_ids(get_artist_id('Drake'))
    main()



我正在嘗試獲取藝術家的所有歌詞並將其存儲在數據庫中。 例如,如果藝術家是“Drake”,我希望所有歌詞都存儲在數據庫中的“lyrics”鍵中。

但是,每次運行相同的代碼時,都會遇到一堆不可預知的錯誤。 例如,它會毫無問題地插入 400 條歌詞,突然我會收到一條錯誤消息,說“列表索引必須是整數或切片而不是 str”。 這個錯誤讓我很困惑,因為我假設所有 json 數據都采用相同的格式,並且在處理 400 首歌詞后突然出現錯誤,在此處輸入圖像描述

我可以運行相同的代碼,並且在大約 200 首歌詞中,我會得到 json 解碼錯誤,然后當我可以再次運行它並處理不同數量的歌詞后,我會得到我在開始時描述的錯誤再次。

有人可以解釋這個錯誤的隨機性嗎?

謝謝!

您正在對將從 JSON 返回的數據類型進行假設。 在您的情況下,我懷疑 json 元素之一是列表而不是 object。

您的問題可以通過這個簡單的示例重現:

my_dict = {
    'message': {
        'body': {
            'lyrics': ['Always look on the bright side of life']
        }
    }
}

print(my_dict['message']['body']['lyrics']['lyrics_body'])

給出:

TypeError: list indices must be integers or slices, not str

你如何解決它? 您需要檢查每個元素是否符合您的預期; 例如:

my_dict = {
    'message': {
        'body': {
            'lyrics': ['Always look on the bright side of life']
        }
    }
}

def checker(item, field):
    if isinstance(item, dict):
        return item.get(field)
    else:
        raise ValueError(f"'{item}' in field '{field}' is not a valid dict")


message = checker(my_dict, 'message')
body = checker(message, 'body')
lyrics = checker(body, 'lyrics')
print(checker(lyrics, 'lyrics'))

給出:

ValueError: '['Always look on the bright side of life']' in field 'lyrics' is not a valid dict

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM