diff --git a/Contributors.md b/Contributors.md new file mode 100644 index 000000000..6ab950d8d --- /dev/null +++ b/Contributors.md @@ -0,0 +1,3 @@ +bug fix in file ShortTermFeatures.py function spectrogram: + 1. computing specgram + 2. ploting diff --git a/pyAudioAnalysis/ShortTermFeatures.py b/pyAudioAnalysis/ShortTermFeatures.py index b892634d0..7ec58d017 100644 --- a/pyAudioAnalysis/ShortTermFeatures.py +++ b/pyAudioAnalysis/ShortTermFeatures.py @@ -419,6 +419,11 @@ def spectrogram(signal, sampling_rate, window, step, plot=False, X = abs(fft(x)) X = X[0:num_fft] X = X / len(X) + + if X.shape[0] != specgram.shape[1]: + align = np.zeros(specgram.shape[1]-X.shape[0], dtype=np.float64) + X = np.append(X, align) + specgram[count_fr-1, :] = X freq_axis = [float((f + 1) * sampling_rate) / (2 * num_fft) @@ -427,8 +432,14 @@ def spectrogram(signal, sampling_rate, window, step, plot=False, for t in range(specgram.shape[0])] if plot: - fig, ax = plt.subplots() - imgplot = plt.imshow(specgram.transpose()[::-1, :]) + spT = specgram.transpose()[::-1, :] + amin = 1e-10 + magnitude = spT**2 + ref_value = np.max(magnitude) + log_specgram = 10.0 * np.log10(np.maximum(amin, magnitude)) + log_specgram -= 10.0 * np.log10(np.maximum(amin, ref_value)) + fig, ax = plt.subplots(nrows=1,ncols=1, figsize=(10,4)) + imgplot = plt.imshow(log_specgram, aspect="auto") fstep = int(num_fft / 5.0) frequency_ticks = range(0, int(num_fft) + fstep, fstep) frequency_tick_labels = \ @@ -448,7 +459,6 @@ def spectrogram(signal, sampling_rate, window, step, plot=False, imgplot.set_cmap('jet') plt.colorbar() plt.show() - print(specgram.shape) return specgram, time_axis, freq_axis @@ -683,3 +693,98 @@ def feature_extraction(signal, sampling_rate, window, step, deltas=True): features = np.concatenate(features, 1) return features, feature_names + + +def feature_extraction_lengthwise(signal, sampling_rate, window, step): + """ + This function implements the shor-term windowing process lengthwise the audio signal. + For each short-term window is add to a set of features is extracted. + This results to a sequence of feature vectors, stored in a np matrix. + + ARGUMENTS + signal: the input signal samples + sampling_rate: the sampling freq (in Hz) + window: the short-term window size (in samples) + step: the short-term window step (in samples) + + RETURNS + features (numpy.ndarray): contains features + (n_feats x numOfShortTermWindows) + feature_names (list of strings): contains feature names + """ + + window = int(window) + step = int(step) + if (len(signal)<1) or (len(signal) < window) or (len(signal) < step): + raise ValueError(f'Error in dimension signal len={len(signal)} window={window} step={step}') + + # signal normalization + signal = np.double(signal) + signal = signal / (2.0 ** 15) + + signal = dc_normalize(signal) + + number_of_samples = len(signal) # total number of samples + current_position = 0 + count_fr = 0 + num_fft = int(window / 2) + + + # define list of feature names + feature_names = ["zcr", "energy", "energy_entropy"] + feature_names += ["spectral_centroid", "spectral_spread"] + feature_names.append("spectral_entropy") + feature_names.append("spectral_flux") + feature_names.append("spectral_rolloff") + + + feature_vector = np.zeros((len(feature_names), number_of_samples)) + # for each short-term window to end of signal + while current_position + window - 1 < number_of_samples: + count_fr += 1 + # get current window + x = signal[current_position:current_position + window] + + # update window position + current_position = current_position + step + + # get fft magnitude + fft_magnitude = abs(fft(x)) + + # normalize fft + fft_magnitude = fft_magnitude[0:num_fft] + fft_magnitude = fft_magnitude / len(fft_magnitude) + + # keep previous fft mag (used in spectral flux) + if count_fr == 1: + fft_magnitude_previous = fft_magnitude.copy() + + # zero crossing rate + feature_vector[0,current_position:current_position + window] = zero_crossing_rate(x) + + # short-term energy + feature_vector[1,current_position:current_position + window] = energy(x) + + # short-term entropy of energy + feature_vector[2,current_position:current_position + window] = energy_entropy(x) + + # sp centroid/spread + [feature_vector[3,current_position:current_position + window], + feature_vector[4,current_position:current_position + window]] = \ + spectral_centroid_spread(fft_magnitude, + sampling_rate) + + # spectral entropy + feature_vector[5,current_position:current_position + window] = \ + spectral_entropy(fft_magnitude) + + # spectral flux + feature_vector[6,current_position:current_position + window] = \ + spectral_flux(fft_magnitude, + fft_magnitude_previous) + + # spectral rolloff + feature_vector[7,current_position:current_position + window] = \ + spectral_rolloff(fft_magnitude, 0.90) + + return feature_vector, feature_names diff --git a/tests/test_features.py b/tests/test_features.py new file mode 100644 index 000000000..34b3e47c1 --- /dev/null +++ b/tests/test_features.py @@ -0,0 +1,39 @@ +import argparse +import pytest +import matplotlib.pyplot as plt +import numpy as np +import sys +sys.path.append('../') + +import pyAudioAnalysis +from pyAudioAnalysis import audioBasicIO +from pyAudioAnalysis import ShortTermFeatures as STF + + +@pytest.mark.parametrize('wav_file, plot', [ ('../pyAudioAnalysis/data/recording1.wav', True)]) +def test_shortTermFeatures(wav_file, plot): + [fs, data] = audioBasicIO.read_audio_file(wav_file) + print(f'FS={fs} win={0.050*fs} step={0.025*fs}') + F,f = STF.feature_extraction_lengthwise(data, fs, 0.050*fs, 0.025*fs); + + if plot: + fig = plt.figure(figsize=(12, 6)) + ax1 = fig.subplots() + ax2 = ax1.twinx() + ax3 = ax2.twinx() + + ax1.plot(F[1,:], color='red', label=f[1]) + ax2.plot(F[0,:], color='green', label=f[0]) + ax3.plot(data, color='blue', label='data', alpha=0.5) + + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + lines3, labels3 = ax3.get_legend_handles_labels() + ax3.set_xlabel('time (s)') + ax3.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc=0) + ax1.axis('off') + ax2.axis('off') + #fig.savefig('recording1_shortTermFeatures.png', dpi=200) + plt.show() + + return fig diff --git a/tests/test_pyAudioAnalysis.py b/tests/test_pyAudioAnalysis.py new file mode 100644 index 000000000..438fbeb53 --- /dev/null +++ b/tests/test_pyAudioAnalysis.py @@ -0,0 +1,20 @@ +import argparse +import os +import sys +import pytest +sys.path.append('../') + +import pyAudioAnalysis +from pyAudioAnalysis import audioBasicIO +from pyAudioAnalysis import ShortTermFeatures as STF + +@pytest.mark.parametrize('wav_file, plot', [ ('../pyAudioAnalysis/data/recording1.wav', True)]) +def test_fileSpectrogramWrapper(wav_file, plot): + if not os.path.isfile(wav_file): + raise Exception("Input audio file not found!") + [fs, x] = audioBasicIO.read_audio_file(wav_file) + x = audioBasicIO.stereo_to_mono(x) + win_length = 256 + hop_length = win_length // 4 + specgram, TimeAxis, FreqAxis = STF.spectrogram(x, fs, win_length, hop_length, plot) +