加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
audio_conv_utils.py 2.71 KB
一键复制 编辑 原始数据 按行查看 历史
fchollet 提交于 2016-10-01 00:04 . Add music tagger model
import numpy as np
from keras import backend as K
TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic',
'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
'beautiful', 'metal', 'chillout', 'male vocalists',
'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica',
'80s', 'folk', '90s', 'chill', 'instrumental', 'punk',
'oldies', 'blues', 'hard rock', 'ambient', 'acoustic',
'experimental', 'female vocalist', 'guitar', 'Hip-Hop',
'70s', 'party', 'country', 'easy listening',
'sexy', 'catchy', 'funk', 'electro', 'heavy metal',
'Progressive rock', '60s', 'rnb', 'indie pop',
'sad', 'House', 'happy']
def librosa_exists():
try:
__import__('librosa')
except ImportError:
return False
else:
return True
def preprocess_input(audio_path, dim_ordering='default'):
'''Reads an audio file and outputs a Mel-spectrogram.
'''
if dim_ordering == 'default':
dim_ordering = K.image_dim_ordering()
assert dim_ordering in {'tf', 'th'}
if librosa_exists():
import librosa
else:
raise RuntimeError('Librosa is required to process audio files.\n' +
'Install it via `pip install librosa` \nor visit ' +
'http://librosa.github.io/librosa/ for details.')
# mel-spectrogram parameters
SR = 12000
N_FFT = 512
N_MELS = 96
HOP_LEN = 256
DURA = 29.12
src, sr = librosa.load(audio_path, sr=SR)
n_sample = src.shape[0]
n_sample_wanted = int(DURA * SR)
# trim the signal at the center
if n_sample < n_sample_wanted: # if too short
src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
elif n_sample > n_sample_wanted: # if too long
src = src[(n_sample - n_sample_wanted) / 2:
(n_sample + n_sample_wanted) / 2]
logam = librosa.logamplitude
melgram = librosa.feature.melspectrogram
x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MELS) ** 2,
ref_power=1.0)
if dim_ordering == 'th':
x = np.expand_dims(x, axis=0)
elif dim_ordering == 'tf':
x = np.expand_dims(x, axis=3)
return x
def decode_predictions(preds, top_n=5):
'''Decode the output of a music tagger model.
# Arguments
preds: 2-dimensional numpy array
top_n: integer in [0, 50], number of items to show
'''
assert len(preds.shape) == 2 and preds.shape[1] == 50
results = []
for pred in preds:
result = zip(TAGS, pred)
result = sorted(result, key=lambda x: x[1], reverse=True)
results.append(result[:top_n])
return results
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化