Trying to get the frequencies of a .wav file in Python

This answer is quite late, but you could try this:

(Note: I deserve very little credit for this since I got most of it from other SO posts and this great article on FFT using Python: https://realpython.com/python-scipy-fft/)

import numpy as np
from scipy.fft import *
from scipy.io import wavfile


def freq(file, start_time, end_time):

    # Open the file and convert to mono
    sr, data = wavfile.read(file)
    if data.ndim > 1:
        data = data[:, 0]
    else:
        pass

    # Return a slice of the data from start_time to end_time
    dataToRead = data[int(start_time * sr / 1000) : int(end_time * sr / 1000) + 1]

    # Fourier Transform
    N = len(dataToRead)
    yf = rfft(dataToRead)
    xf = rfftfreq(N, 1 / sr)

    # Uncomment these to see the frequency spectrum as a plot
    # plt.plot(xf, np.abs(yf))
    # plt.show()

    # Get the most dominant frequency and return it
    idx = np.argmax(np.abs(yf))
    freq = xf[idx]
    return freq

This code can work for any .wav file, but it may be slightly off since it only returns the most dominant frequency, and also because it only uses the first channel of the audio (if not mono).

If you want to learn more about how the Fourier transform works, check out this video by 3blue1brown with a visual explanation: https://www.youtube.com/watch?v=spUNpyF58BY


I felt the OPs frustration - it shouldnt be so hard to find how to get values of the sprectrogram instead of seeing the spectrogram image if someone needs to:

#!/usr/bin/env python

import librosa
import sys
import numpy as np
import matplotlib.pyplot as plt
import librosa.display

np.set_printoptions(threshold=sys.maxsize)

filename = 'filename.wav'
Fs = 44100
clip, sample_rate = librosa.load(filename, sr=Fs)

n_fft = 1024  # frame length 
start = 0 

hop_length=512

#commented out code to display Spectrogram
X = librosa.stft(clip, n_fft=n_fft, hop_length=hop_length)
#Xdb = librosa.amplitude_to_db(abs(X))
#plt.figure(figsize=(14, 5))
#librosa.display.specshow(Xdb, sr=Fs, x_axis='time', y_axis='hz') 
#If to pring log of frequencies  
#librosa.display.specshow(Xdb, sr=Fs, x_axis='time', y_axis='log')
#plt.colorbar()

#librosa.display.waveplot(clip, sr=Fs)
#plt.show()

#now print all values 

t_samples = np.arange(clip.shape[0]) / Fs
t_frames = np.arange(X.shape[1]) * hop_length / Fs
#f_hertz = np.arange(N / 2 + 1) * Fs / N       # Works only when N is even
f_hertz = np.fft.rfftfreq(n_fft, 1 / Fs)         # Works also when N is odd

#example
print('Time (seconds) of last sample:', t_samples[-1])
print('Time (seconds) of last frame: ', t_frames[-1])
print('Frequency (Hz) of last bin:   ', f_hertz[-1])

print('Time (seconds) :', len(t_samples))

#prints array of time frames 
print('Time of frames (seconds) : ', t_frames)
#prints array of frequency bins
print('Frequency (Hz) : ', f_hertz)

print('Number of frames : ', len(t_frames))
print('Number of bins : ', len(f_hertz))

#This code is working to printout frame by frame intensity of each frequency
#on top line gives freq bins
curLine = 'Bins,'
for b in range(1, len(f_hertz)):
    curLine += str(f_hertz[b]) + ','
print(curLine)

curLine = ''
for f in range(1, len(t_frames)):
    curLine = str(t_frames[f]) + ','
    for b in range(1, len(f_hertz)): #for each frame, we get list of bin values printed
        curLine += str("%.02f" % np.abs(X[b, f])) + ','
        #remove format of the float for full details if needed
        #curLine += str(np.abs(X[b, f])) + ','
        #print other useful info like phase of frequency bin b at frame f.
        #curLine += str("%.02f" % np.angle(X[b, f])) + ',' 
    print(curLine)

If you'd like to detect pitch of a sound (and it seems you do), then in terms of Python libraries your best bet is aubio. Please consult this example for implementation.

import sys
from aubio import source, pitch

win_s = 4096
hop_s = 512 

s = source(your_file, samplerate, hop_s)
samplerate = s.samplerate

tolerance = 0.8

pitch_o = pitch("yin", win_s, hop_s, samplerate)
pitch_o.set_unit("midi")
pitch_o.set_tolerance(tolerance)

pitches = []
confidences = []

total_frames = 0
while True:
    samples, read = s()
    pitch = pitch_o(samples)[0]
    pitches += [pitch]
    confidence = pitch_o.get_confidence()
    confidences += [confidence]
    total_frames += read
    if read < hop_s: break

print("Average frequency = " + str(np.array(pitches).mean()) + " hz")

Be sure to check docs on pitch detection methods.

I also thought you might be interested in estimation of mean frequency and some other audio parameters without using any special libraries. Let's just use numpy! This should give you much better insight into how such audio features can be calculated. It's based off specprop from seewave package. Check docs for meaning of computed features.

import numpy as np

def spectral_properties(y: np.ndarray, fs: int) -> dict:
    spec = np.abs(np.fft.rfft(y))
    freq = np.fft.rfftfreq(len(y), d=1 / fs)
    spec = np.abs(spec)
    amp = spec / spec.sum()
    mean = (freq * amp).sum()
    sd = np.sqrt(np.sum(amp * ((freq - mean) ** 2)))
    amp_cumsum = np.cumsum(amp)
    median = freq[len(amp_cumsum[amp_cumsum <= 0.5]) + 1]
    mode = freq[amp.argmax()]
    Q25 = freq[len(amp_cumsum[amp_cumsum <= 0.25]) + 1]
    Q75 = freq[len(amp_cumsum[amp_cumsum <= 0.75]) + 1]
    IQR = Q75 - Q25
    z = amp - amp.mean()
    w = amp.std()
    skew = ((z ** 3).sum() / (len(spec) - 1)) / w ** 3
    kurt = ((z ** 4).sum() / (len(spec) - 1)) / w ** 4

    result_d = {
        'mean': mean,
        'sd': sd,
        'median': median,
        'mode': mode,
        'Q25': Q25,
        'Q75': Q75,
        'IQR': IQR,
        'skew': skew,
        'kurt': kurt
    }

    return result_d