AVSpeechSynthesizer detect when the speech is finished

The function viewDidAppear is used just as example, same code can be placed anywhere as required:

class MyViewController: UIViewController, AVSpeechSynthesizerDelegate {

    var synth = AVSpeechSynthesizer()

    override func viewDidAppear(_ animated: Bool) {
        super.viewDidAppear(animated)

        // make sure to set the delegate before "speaking"
        synth.delegate = self

        let utterance = AVSpeechUtterance(string: "Hello world!")
        synth.speak(utterance)
    }

    // will be called when speech did finish
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        // do something useful here ...
    }
}

A does not conform to protocol NSObjectProtocol means that your class must inherit from NSObject, you can read more about it here.

Now I don't know how you've structured your code, but this little example seems to work for me. First a dead simple class that holds the AVSpeechSynthesizer:

class Speaker: NSObject {
    let synth = AVSpeechSynthesizer()

    override init() {
        super.init()
        synth.delegate = self
    }

    func speak(_ string: String) {
        let utterance = AVSpeechUtterance(string: string)
        synth.speakUtterance(utterance)
    }
}

Notice that I set the delegate here (in the init method) and notice that it must inherit from NSObject to keep the compiler happy (very important!)

And then the actual delegate method:

extension Speaker: AVSpeechSynthesizerDelegate {
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        print("all done")
    }
}

And finally, I can use that class here, like so:

class ViewController: UIViewController {
    let speaker = Speaker()

    @IBAction func buttonTapped(sender: UIButton) {
        speaker.speak("Hello world")
    }
}

Which rewards me with

all done

in my console when the AVSpeechSynthesizer has stopped speaking.

Hope that helps you.

Update

So, time passes and in the comments below @case-silva asked if there was a practical example and @dima-gershman suggested to just use the AVSpeectSynthesizer directly in the ViewController.

To accommodate both, I've made a simple ViewController example here with a UITextField and a UIButton.

The flow is:

  1. You enter some text in the textfield (if not, a default value will be set)
  2. You press the button
  3. The button is disabled and the background color is changed (sorry, it was the best I could come up with :))
  4. Once speech is done, the button is enabled, the textfield is cleared and the background color is changed again.

Here's how it looks

A Simple UIViewController Example

import UIKit
import AVFoundation

class ViewController: UIViewController {

    //MARK: Outlets
    @IBOutlet weak var textField: UITextField!
    @IBOutlet weak var speakButton: UIButton!

    let synth = AVSpeechSynthesizer()

    override func viewDidLoad() {
        super.viewDidLoad()
        synth.delegate = self
    }

    @IBAction func speakButtonTapped(_ sender: UIButton) {
        //We're ready to start speaking, disable UI while we're speaking
        view.backgroundColor = .darkGray
        speakButton.isEnabled = false
        let inputText = textField.text ?? ""
        let textToSpeak = inputText.isEmpty ? "Please enter some text" : inputText

        let speakUtterance = AVSpeechUtterance(string: textToSpeak)
        synth.speak(speakUtterance)
    }
}

extension ViewController: AVSpeechSynthesizerDelegate {
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        //Speaking is done, enable speech UI for next round
        speakButton.isEnabled = true
        view.backgroundColor = .lightGray
        textField.text = ""
    }
}

Hope that gives you a clue Case.


SwiftUI

Create a Speaker class that inherits from NSObject and ObservableObject.

internal class Speaker: NSObject, ObservableObject {
    internal var errorDescription: String? = nil
    private let synthesizer: AVSpeechSynthesizer = AVSpeechSynthesizer()
    @Published var isSpeaking: Bool = false
    @Published var isShowingSpeakingErrorAlert: Bool = false

    override init() {
        super.init()
        self.synthesizer.delegate = self
    }

    internal func speak(_ text: String, language: String) {
        do {
            let utterance = AVSpeechUtterance(string: text)
            utterance.voice = AVSpeechSynthesisVoice(language: language)
            
            try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default)
            try AVAudioSession.sharedInstance().setActive(true)
            self.synthesizer.speak(utterance)
        } catch let error {
            self.errorDescription = error.localizedDescription
            isShowingSpeakingErrorAlert.toggle()
        }
    }
    
    internal func stop() {
        self.synthesizer.stopSpeaking(at: .immediate)
    }
}

Extend it and implement the necessary delegate methods.

extension Speaker: AVSpeechSynthesizerDelegate {
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
        self.isSpeaking = true
    }
    
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
        self.isSpeaking = false
        try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
    }
    
    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        self.isSpeaking = false
        try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
    }
}

Add Speaker to the necessary view using the StateObject wrapper.

struct ContentView: View {
    let text: String = "Hello World!"
    @StateObject var speaker: Speaker = Speaker()
    
    var body: some View {
        HStack {
            Text(text)
            Spacer()
            Button(action: {
                if self.speaker.isSpeaking {
                    speaker.stop()
                } else {
                    speaker.speak(text, language: "en-US")
                }
            }) {
                Image(systemName: self.speaker.isSpeaking ? "stop.circle" : "speaker.wave.2.circle")
                    .resizable()
                    .frame(width: 30, height: 30)
            }
            .buttonStyle(BorderlessButtonStyle())
            .alert(isPresented: $speaker.isShowingSpeakingErrorAlert) {
                Alert(title: Text("Pronunciation error", comment: "Pronunciation error alert title."), message: Text(speaker.errorDescription ?? ""))
            }
        }
        .padding()
    }
}