tyiannak · michabs · Feb 17, 2021 · Feb 17, 2021 · Feb 17, 2021 · Feb 18, 2021
diff --git a/Contributors.md b/Contributors.md
@@ -0,0 +1,3 @@
+bug fix in file ShortTermFeatures.py function spectrogram:
+  1. computing specgram
+  2. ploting 
diff --git a/pyAudioAnalysis/ShortTermFeatures.py b/pyAudioAnalysis/ShortTermFeatures.py
@@ -419,6 +419,11 @@ def spectrogram(signal, sampling_rate, window, step, plot=False,
         X = abs(fft(x))
         X = X[0:num_fft]
         X = X / len(X)
+
+        if X.shape[0] != specgram.shape[1]:
+            align = np.zeros(specgram.shape[1]-X.shape[0], dtype=np.float64)
+            X = np.append(X, align)
+
         specgram[count_fr-1, :] = X
 
     freq_axis = [float((f + 1) * sampling_rate) / (2 * num_fft)
@@ -427,8 +432,14 @@ def spectrogram(signal, sampling_rate, window, step, plot=False,
                  for t in range(specgram.shape[0])]
 
     if plot:
-        fig, ax = plt.subplots()
-        imgplot = plt.imshow(specgram.transpose()[::-1, :])
+        spT         = specgram.transpose()[::-1, :]
+        amin        = 1e-10
+        magnitude   = spT**2
+        ref_value   = np.max(magnitude)
+        log_specgram = 10.0 * np.log10(np.maximum(amin, magnitude))
+        log_specgram -= 10.0 * np.log10(np.maximum(amin, ref_value))
+        fig, ax     = plt.subplots(nrows=1,ncols=1, figsize=(10,4))
+        imgplot     = plt.imshow(log_specgram, aspect="auto") 
         fstep = int(num_fft / 5.0)
         frequency_ticks = range(0, int(num_fft) + fstep, fstep)
         frequency_tick_labels = \
@@ -448,7 +459,6 @@ def spectrogram(signal, sampling_rate, window, step, plot=False,
         imgplot.set_cmap('jet')
         plt.colorbar()
         plt.show()
-    print(specgram.shape)
     return specgram, time_axis, freq_axis
 
 
@@ -683,3 +693,98 @@ def feature_extraction(signal, sampling_rate, window, step, deltas=True):
 
     features = np.concatenate(features, 1)
     return features, feature_names
+
+
+def feature_extraction_lengthwise(signal, sampling_rate, window, step):
+    """
+    This function implements the shor-term windowing process lengthwise the audio signal.
+    For each short-term window is add to a set of features is extracted.
+    This results to a sequence of feature vectors, stored in a np matrix.
+
+    ARGUMENTS
+        signal:         the input signal samples
+        sampling_rate:  the sampling freq (in Hz)
+        window:         the short-term window size (in samples)
+        step:           the short-term window step (in samples)
+
+    RETURNS
+        features (numpy.ndarray):        contains features
+                                         (n_feats x numOfShortTermWindows)
+        feature_names (list of strings): contains feature names
+    """
+
+    window = int(window)
+    step = int(step)
+    if (len(signal)<1) or (len(signal) < window) or (len(signal) < step):
+        raise ValueError(f'Error in dimension signal len={len(signal)} window={window} step={step}')
+
+    # signal normalization
+    signal = np.double(signal)
+    signal = signal / (2.0 ** 15)
+
+    signal = dc_normalize(signal)
+
+    number_of_samples = len(signal)  # total number of samples
+    current_position = 0
+    count_fr = 0
+    num_fft = int(window / 2)
+
+
+    # define list of feature names
+    feature_names = ["zcr", "energy", "energy_entropy"]
+    feature_names += ["spectral_centroid", "spectral_spread"]
+    feature_names.append("spectral_entropy")
+    feature_names.append("spectral_flux")
+    feature_names.append("spectral_rolloff")
+
+
+    feature_vector = np.zeros((len(feature_names), number_of_samples))
+    # for each short-term window to end of signal
+    while current_position + window - 1 < number_of_samples:
+        count_fr += 1
+        # get current window
+        x = signal[current_position:current_position + window]
+
+        # update window position
+        current_position = current_position + step
+
+        # get fft magnitude
+        fft_magnitude = abs(fft(x))
+
+        # normalize fft
+        fft_magnitude = fft_magnitude[0:num_fft]
+        fft_magnitude = fft_magnitude / len(fft_magnitude)
+
+        # keep previous fft mag (used in spectral flux)
+        if count_fr == 1:
+            fft_magnitude_previous = fft_magnitude.copy()
+
+        # zero crossing rate
+        feature_vector[0,current_position:current_position + window] = zero_crossing_rate(x)
+
+        # short-term energy
+        feature_vector[1,current_position:current_position + window] = energy(x)
+
+        # short-term entropy of energy
+        feature_vector[2,current_position:current_position + window] = energy_entropy(x)
+
+        # sp centroid/spread
+        [feature_vector[3,current_position:current_position + window], 
+         feature_vector[4,current_position:current_position + window]] = \
+            spectral_centroid_spread(fft_magnitude,
+                                     sampling_rate)
+
+        # spectral entropy
+        feature_vector[5,current_position:current_position + window] = \
+            spectral_entropy(fft_magnitude)
+
+        # spectral flux
+        feature_vector[6,current_position:current_position + window] = \
+            spectral_flux(fft_magnitude,
+                          fft_magnitude_previous)
+
+        # spectral rolloff
+        feature_vector[7,current_position:current_position + window] = \
+            spectral_rolloff(fft_magnitude, 0.90)
+
+    return feature_vector, feature_names
diff --git a/tests/test_features.py b/tests/test_features.py
@@ -0,0 +1,39 @@
+import argparse
+import pytest
+import matplotlib.pyplot as plt
+import numpy as np
+import sys
+sys.path.append('../')
+
+import pyAudioAnalysis 
+from pyAudioAnalysis import audioBasicIO
+from pyAudioAnalysis import ShortTermFeatures as STF 
+
+
+@pytest.mark.parametrize('wav_file, plot', [ ('../pyAudioAnalysis/data/recording1.wav', True)])
+def test_shortTermFeatures(wav_file, plot):
+    [fs, data] = audioBasicIO.read_audio_file(wav_file)
+    print(f'FS={fs} win={0.050*fs} step={0.025*fs}')
+    F,f = STF.feature_extraction_lengthwise(data, fs, 0.050*fs, 0.025*fs);
+
+    if plot:
+        fig = plt.figure(figsize=(12, 6)) 
+        ax1 = fig.subplots() 
+        ax2 = ax1.twinx() 
+        ax3 = ax2.twinx() 
+
+        ax1.plot(F[1,:], color='red', label=f[1])
+        ax2.plot(F[0,:], color='green', label=f[0])
+        ax3.plot(data, color='blue', label='data', alpha=0.5)
+
+        lines1, labels1 = ax1.get_legend_handles_labels()
+        lines2, labels2 = ax2.get_legend_handles_labels()
+        lines3, labels3 = ax3.get_legend_handles_labels()
+        ax3.set_xlabel('time (s)')
+        ax3.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc=0)
+        ax1.axis('off')
+        ax2.axis('off')
+        #fig.savefig('recording1_shortTermFeatures.png', dpi=200)
+        plt.show()
+
+    return fig
diff --git a/tests/test_pyAudioAnalysis.py b/tests/test_pyAudioAnalysis.py
@@ -0,0 +1,20 @@
+import argparse
+import os
+import sys
+import pytest
+sys.path.append('../')
+
+import pyAudioAnalysis 
+from pyAudioAnalysis import audioBasicIO
+from pyAudioAnalysis import ShortTermFeatures as STF 
+
+@pytest.mark.parametrize('wav_file, plot', [ ('../pyAudioAnalysis/data/recording1.wav', True)])
+def test_fileSpectrogramWrapper(wav_file, plot):
+    if not os.path.isfile(wav_file):
+        raise Exception("Input audio file not found!")
+    [fs, x] = audioBasicIO.read_audio_file(wav_file)
+    x = audioBasicIO.stereo_to_mono(x)
+    win_length = 256 
+    hop_length = win_length // 4
+    specgram, TimeAxis, FreqAxis = STF.spectrogram(x, fs, win_length, hop_length, plot)
+