Source code for PyXA.Additions.Speech

""".. versionadded:: 0.1.1

A collection of classes for handling speak input and output.
"""

import time
from datetime import datetime, timedelta
from typing import Any, Callable, Union

import AppKit
import AVFoundation
import Speech
from PyObjCTools import AppHelper

from PyXA import XABase


[docs] class XACommandDetector: """A command-based query detector. .. versionadded:: 0.0.9 """ def __init__( self, command_function_map: Union[dict[str, Callable[[], Any]], None] = None ): """Creates a command detector object. :param command_function_map: A dictionary mapping command strings to function objects :type command_function_map: dict[str, Callable[[], Any]] .. versionadded:: 0.0.9 """ self.command_function_map = ( command_function_map or {} ) #: The dictionary of commands and corresponding functions to run upon detection
[docs] def on_detect(self, command: str, function: Callable[[], Any]): """Adds or replaces a command to listen for upon calling :func:`listen`, and associates the given function with that command. :param command: The command to listen for :type command: str :param function: The function to call when the command is heard :type function: Callable[[], Any] :Example: >>> detector = PyXA.XACommandDetector() >>> detector.on_detect("go to google", PyXA.XAURL("http://google.com").open) >>> detector.listen() .. versionadded:: 0.0.9 """ self.command_function_map[command] = function
[docs] def listen(self) -> Any: """Begins listening for the specified commands. :return: The execution return value of the corresponding command function :rtype: Any :Example: >>> import PyXA >>> PyXA.speak("What app do you want to open?") >>> PyXA.XACommandDetector({ >>> "safari": PyXA.Application("Safari").activate, >>> "messages": PyXA.Application("Messages").activate, >>> "shortcuts": PyXA.Application("Shortcuts").activate, >>> "mail": PyXA.Application("Mail").activate, >>> "calendar": PyXA.Application("Calendar").activate, >>> "notes": PyXA.Application("Notes").activate, >>> "music": PyXA.Application("Music").activate, >>> "tv": PyXA.Application("TV").activate, >>> "pages": PyXA.Application("Pages").activate, >>> "numbers": PyXA.Application("Numbers").activate, >>> "keynote": PyXA.Application("Keynote").activate, >>> }).listen() .. versionadded:: 0.0.9 """ command_function_map = self.command_function_map return_value = None class NSSpeechRecognizerDelegate(AppKit.NSObject): def speechRecognizer_didRecognizeCommand_(self, recognizer, cmd): return_value = command_function_map[cmd]() AppHelper.stopEventLoop() recognizer = AppKit.NSSpeechRecognizer.alloc().init() recognizer.setCommands_(list(command_function_map.keys())) recognizer.setBlocksOtherRecognizers_(True) recognizer.setDelegate_(NSSpeechRecognizerDelegate.alloc().init().retain()) recognizer.startListening() AppHelper.runConsoleEventLoop() return return_value
[docs] class XASpeechRecognizer: """A rule-based query detector. .. versionadded:: 0.0.9 """ def __init__( self, finish_conditions: Union[ None, dict[Callable[[str], bool], Callable[[str], bool]] ] = None, ): """Creates a speech recognizer object. By default, with no other rules specified, the Speech Recognizer will timeout after 10 seconds once :func:`listen` is called. :param finish_conditions: A dictionary of rules and associated methods to call when a rule evaluates to true, defaults to None :type finish_conditions: Union[None, dict[Callable[[str], bool], Callable[[str], bool]]], optional .. versionadded:: 0.0.9 """ default_conditions = { lambda x: self.time_elapsed > timedelta(seconds=10): lambda x: self.spoken_query, } self.finish_conditions: Callable[[str], bool] = ( finish_conditions or default_conditions ) #: A dictionary of rules and associated methods to call when a rule evaluates to true self.spoken_query: str = "" #: The recognized spoken input self.start_time: datetime #: The time that the Speech Recognizer begins listening self.time_elapsed: timedelta #: The amount of time passed since the start time def __prepare(self): # Request microphone access if we don't already have it Speech.SFSpeechRecognizer.requestAuthorization_(None) # Set up audio session self.audio_session = AVFoundation.AVAudioSession.sharedInstance() self.audio_session.setCategory_mode_options_error_( AVFoundation.AVAudioSessionCategoryRecord, AVFoundation.AVAudioSessionModeMeasurement, AVFoundation.AVAudioSessionCategoryOptionDuckOthers, None, ) self.audio_session.setActive_withOptions_error_( True, AVFoundation.AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation, None, ) # Set up recognition request self.recognizer = Speech.SFSpeechRecognizer.alloc().init() self.recognition_request = ( Speech.SFSpeechAudioBufferRecognitionRequest.alloc().init() ) self.recognition_request.setShouldReportPartialResults_(True) # Set up audio engine self.audio_engine = AVFoundation.AVAudioEngine.alloc().init() self.input_node = self.audio_engine.inputNode() recording_format = self.input_node.outputFormatForBus_(0) self.input_node.installTapOnBus_bufferSize_format_block_( 0, 1024, recording_format, lambda buffer, _when: self.recognition_request.appendAudioPCMBuffer_( buffer ), ) self.audio_engine.prepare() self.audio_engine.startAndReturnError_(None)
[docs] def on_detect(self, rule: Callable[[str], bool], method: Callable[[str], bool]): """Sets the given rule to call the specified method if a spoken query passes the rule. :param rule: A function that takes the spoken query as a parameter and returns a boolean value depending on whether the query passes a desired rule :type rule: Callable[[str], bool] :param method: A function that takes the spoken query as a parameter and acts on it :type method: Callable[[str], bool] .. versionadded:: 0.0.9 """ self.finish_conditions[rule] = method
[docs] def listen(self) -> Any: """Begins listening for a query until a rule returns True. :return: The value returned by the method invoked upon matching some rule :rtype: Any .. versionadded:: 0.0.9 """ self.start_time = datetime.now() self.time_elapsed = None self.__prepare() old_self = self def detect_speech(transcription, error): if error is not None: print("Failed to detect speech. Error: ", error) else: old_self.spoken_query = ( transcription.bestTranscription().formattedString() ) print(old_self.spoken_query) recognition_task = self.recognizer.recognitionTaskWithRequest_resultHandler_( self.recognition_request, detect_speech ) while self.spoken_query == "" or not any( x(self.spoken_query) for x in self.finish_conditions ): self.time_elapsed = datetime.now() - self.start_time AppKit.NSRunLoop.currentRunLoop().runUntilDate_( datetime.now() + timedelta(seconds=0.5) ) self.audio_engine.stop() for rule, method in self.finish_conditions.items(): if rule(self.spoken_query): return method(self.spoken_query)
[docs] class XASpeech: def __init__( self, message: str = "", voice: Union[str, None] = None, volume: float = 0.5, rate: int = 200, ): self.message: str = message #: The message to speak self.voice: Union[str, None] = voice #: The voice that the message is spoken in self.volume: float = volume #: The speaking volume self.rate: int = rate #: The speaking rate
[docs] def voices(self) -> list[str]: """Gets the list of voice names available on the system. :return: The list of voice names :rtype: list[str] :Example: >>> import PyXA >>> speaker = PyXA.XASpeech() >>> print(speaker.voices()) ['Agnes', 'Alex', 'Alice', 'Allison', .. versionadded:: 0.0.9 """ ls = AppKit.NSSpeechSynthesizer.availableVoices() return [ x.replace("com.apple.speech.synthesis.voice.", "") .replace(".premium", "") .title() for x in ls ]
[docs] def speak(self, path: Union[str, XABase.XAPath, None, list[str]] = None): """Speaks the provided message using the desired voice, volume, and speaking rate. :param path: The path to a .AIFF file to output sound to, defaults to None :type path: Union[str, XAPath, None], optional :Example 1: Speak a message aloud >>> import PyXA >>> PyXA.XASpeech("This is a test").speak() :Example 2: Output spoken message to an AIFF file >>> import PyXA >>> speaker = PyXA.XASpeech("Hello, world!") >>> speaker.speak("/Users/steven/Downloads/Hello.AIFF") :Example 3: Control the voice, volume, and speaking rate >>> import PyXA >>> speaker = PyXA.XASpeech( >>> message = "Hello, world!", >>> voice = "Alex", >>> volume = 1, >>> rate = 500 >>> ) >>> speaker.speak() .. versionadded:: 0.0.9 """ if isinstance(self.message, list): self.message = "\n".join(self.message) if self.message.strip() == "": return # Get the selected voice by name voice = None for v in AppKit.NSSpeechSynthesizer.availableVoices(): if self.voice is not None and self.voice.lower() in v.lower(): voice = v # Set up speech synthesis object synthesizer = AppKit.NSSpeechSynthesizer.alloc().initWithVoice_(voice) synthesizer.setVolume_(self.volume) synthesizer.setRate_(self.rate) # Start speaking if path is None: synthesizer.startSpeakingString_(self.message) else: if isinstance(path, str): path = XABase.XAPath(path) synthesizer.startSpeakingString_toURL_(self.message, path.xa_elem) # Wait for speech to complete while synthesizer.isSpeaking(): time.sleep(0.01)