Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Contributors.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
bug fix in file ShortTermFeatures.py function spectrogram:
1. computing specgram
2. ploting
111 changes: 108 additions & 3 deletions pyAudioAnalysis/ShortTermFeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,11 @@ def spectrogram(signal, sampling_rate, window, step, plot=False,
X = abs(fft(x))
X = X[0:num_fft]
X = X / len(X)

if X.shape[0] != specgram.shape[1]:
align = np.zeros(specgram.shape[1]-X.shape[0], dtype=np.float64)
X = np.append(X, align)

specgram[count_fr-1, :] = X

freq_axis = [float((f + 1) * sampling_rate) / (2 * num_fft)
Expand All @@ -427,8 +432,14 @@ def spectrogram(signal, sampling_rate, window, step, plot=False,
for t in range(specgram.shape[0])]

if plot:
fig, ax = plt.subplots()
imgplot = plt.imshow(specgram.transpose()[::-1, :])
spT = specgram.transpose()[::-1, :]
amin = 1e-10
magnitude = spT**2
ref_value = np.max(magnitude)
log_specgram = 10.0 * np.log10(np.maximum(amin, magnitude))
log_specgram -= 10.0 * np.log10(np.maximum(amin, ref_value))
fig, ax = plt.subplots(nrows=1,ncols=1, figsize=(10,4))
imgplot = plt.imshow(log_specgram, aspect="auto")
fstep = int(num_fft / 5.0)
frequency_ticks = range(0, int(num_fft) + fstep, fstep)
frequency_tick_labels = \
Expand All @@ -448,7 +459,6 @@ def spectrogram(signal, sampling_rate, window, step, plot=False,
imgplot.set_cmap('jet')
plt.colorbar()
plt.show()
print(specgram.shape)
return specgram, time_axis, freq_axis


Expand Down Expand Up @@ -683,3 +693,98 @@ def feature_extraction(signal, sampling_rate, window, step, deltas=True):

features = np.concatenate(features, 1)
return features, feature_names


def feature_extraction_lengthwise(signal, sampling_rate, window, step):
"""
This function implements the shor-term windowing process lengthwise the audio signal.
For each short-term window is add to a set of features is extracted.
This results to a sequence of feature vectors, stored in a np matrix.

ARGUMENTS
signal: the input signal samples
sampling_rate: the sampling freq (in Hz)
window: the short-term window size (in samples)
step: the short-term window step (in samples)

RETURNS
features (numpy.ndarray): contains features
(n_feats x numOfShortTermWindows)
feature_names (list of strings): contains feature names
"""

window = int(window)
step = int(step)
if (len(signal)<1) or (len(signal) < window) or (len(signal) < step):
raise ValueError(f'Error in dimension signal len={len(signal)} window={window} step={step}')

# signal normalization
signal = np.double(signal)
signal = signal / (2.0 ** 15)

signal = dc_normalize(signal)

number_of_samples = len(signal) # total number of samples
current_position = 0
count_fr = 0
num_fft = int(window / 2)


# define list of feature names
feature_names = ["zcr", "energy", "energy_entropy"]
feature_names += ["spectral_centroid", "spectral_spread"]
feature_names.append("spectral_entropy")
feature_names.append("spectral_flux")
feature_names.append("spectral_rolloff")


feature_vector = np.zeros((len(feature_names), number_of_samples))
# for each short-term window to end of signal
while current_position + window - 1 < number_of_samples:
count_fr += 1
# get current window
x = signal[current_position:current_position + window]

# update window position
current_position = current_position + step

# get fft magnitude
fft_magnitude = abs(fft(x))

# normalize fft
fft_magnitude = fft_magnitude[0:num_fft]
fft_magnitude = fft_magnitude / len(fft_magnitude)

# keep previous fft mag (used in spectral flux)
if count_fr == 1:
fft_magnitude_previous = fft_magnitude.copy()

# zero crossing rate
feature_vector[0,current_position:current_position + window] = zero_crossing_rate(x)

# short-term energy
feature_vector[1,current_position:current_position + window] = energy(x)

# short-term entropy of energy
feature_vector[2,current_position:current_position + window] = energy_entropy(x)

# sp centroid/spread
[feature_vector[3,current_position:current_position + window],
feature_vector[4,current_position:current_position + window]] = \
spectral_centroid_spread(fft_magnitude,
sampling_rate)

# spectral entropy
feature_vector[5,current_position:current_position + window] = \
spectral_entropy(fft_magnitude)

# spectral flux
feature_vector[6,current_position:current_position + window] = \
spectral_flux(fft_magnitude,
fft_magnitude_previous)

# spectral rolloff
feature_vector[7,current_position:current_position + window] = \
spectral_rolloff(fft_magnitude, 0.90)

return feature_vector, feature_names
39 changes: 39 additions & 0 deletions tests/test_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import argparse
import pytest
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append('../')

import pyAudioAnalysis
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures as STF


@pytest.mark.parametrize('wav_file, plot', [ ('../pyAudioAnalysis/data/recording1.wav', True)])
def test_shortTermFeatures(wav_file, plot):
[fs, data] = audioBasicIO.read_audio_file(wav_file)
print(f'FS={fs} win={0.050*fs} step={0.025*fs}')
F,f = STF.feature_extraction_lengthwise(data, fs, 0.050*fs, 0.025*fs);

if plot:
fig = plt.figure(figsize=(12, 6))
ax1 = fig.subplots()
ax2 = ax1.twinx()
ax3 = ax2.twinx()

ax1.plot(F[1,:], color='red', label=f[1])
ax2.plot(F[0,:], color='green', label=f[0])
ax3.plot(data, color='blue', label='data', alpha=0.5)

lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
lines3, labels3 = ax3.get_legend_handles_labels()
ax3.set_xlabel('time (s)')
ax3.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc=0)
ax1.axis('off')
ax2.axis('off')
#fig.savefig('recording1_shortTermFeatures.png', dpi=200)
plt.show()

return fig
20 changes: 20 additions & 0 deletions tests/test_pyAudioAnalysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import argparse
import os
import sys
import pytest
sys.path.append('../')

import pyAudioAnalysis
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures as STF

@pytest.mark.parametrize('wav_file, plot', [ ('../pyAudioAnalysis/data/recording1.wav', True)])
def test_fileSpectrogramWrapper(wav_file, plot):
if not os.path.isfile(wav_file):
raise Exception("Input audio file not found!")
[fs, x] = audioBasicIO.read_audio_file(wav_file)
x = audioBasicIO.stereo_to_mono(x)
win_length = 256
hop_length = win_length // 4
specgram, TimeAxis, FreqAxis = STF.spectrogram(x, fs, win_length, hop_length, plot)