Sound split (nvaccess#16071)

Fixes nvaccess#12985 Summary of the issue: Feature request: sound split. Splits system sound into two channels: NVDA speaks in one channel (e.g. left), while all other applications play their sound in the other channel (e.g. right). Description of user facing changes Added global command NVDA+alt+s that toggles sound split between off, NVDA on the left and NVDA on the right (default behavior). Added combo box on Audio panel in NVDA settings that also allows to switch between Sound split modes. Added list of checkboxes in Audio panel, that allows to change behavior of NVDA+alt+s command: it allows to select all modes that the global command will cycle through. Description of development approach Added pycaw library as a dependency. Created file source\audio\soundSplit.py where I implemented all logic. Contrary to what I said before, I managed to implement sound split without creating an extra monitor thread. It works like this: When sound split is toggled, it uses IAudioSessionEnumerator to set volume in all currently active audio sessions. Then it does sessionManager.RegisterSessionNotification() to create a callback that listens for any new audio sessions being created, an it executes the the same volume updating function upon creation. On the next call or on shutdown we unregister the previous notification callback.
Adriani90 · Mar 13, 2024 · cf235f1 · cf235f1
1 parent 2df9e60
commit cf235f1
Show file tree

Hide file tree

Showing 9 changed files with 355 additions and 0 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -16,6 +16,9 @@ diff_match_patch_python==1.0.2
 # typing_extensions are required for specifying default value for `TypeVar`, which is not yet possible with any released version of Python (see PEP 696)
 typing-extensions==4.9.0 
 
+# pycaw is a Core Audio Windows Library used for sound split
+pycaw==20240210                                                                                   
+
 # Packaging NVDA
 git+https://github.com/py2exe/py2exe@4e7b2b2c60face592e67cb1bc935172a20fa371d#egg=py2exe
 

diff --git a/source/audio/__init__.py b/source/audio/__init__.py
@@ -0,0 +1,16 @@
+# A part of NonVisual Desktop Access (NVDA)
+# Copyright (C) 2024 NV Access Limited
+# This file is covered by the GNU General Public License.
+# See the file COPYING for more details.
+
+from .soundSplit import (
+	SoundSplitState,
+	setSoundSplitState,
+	toggleSoundSplitState,
+)
+
+__all__ = [
+	"SoundSplitState",
+	"setSoundSplitState",
+	"toggleSoundSplitState",
+]
diff --git a/source/audio/soundSplit.py b/source/audio/soundSplit.py
@@ -0,0 +1,189 @@
+# A part of NonVisual Desktop Access (NVDA)
+# Copyright (C) 2024 NV Access Limited
+# This file is covered by the GNU General Public License.
+# See the file COPYING for more details.
+
+import atexit
+import config
+from enum import IntEnum, unique
+import globalVars
+from logHandler import log
+import nvwave
+from pycaw.api.audiopolicy import IAudioSessionManager2
+from pycaw.callbacks import AudioSessionNotification
+from pycaw.utils import AudioSession, AudioUtilities
+import ui
+from utils.displayString import DisplayStringIntEnum
+from dataclasses import dataclass
+
+VolumeTupleT = tuple[float, float]
+
+
+@unique
+class SoundSplitState(DisplayStringIntEnum):
+	OFF = 0
+	NVDA_LEFT_APPS_RIGHT = 1
+	NVDA_LEFT_APPS_BOTH = 2
+	NVDA_RIGHT_APPS_LEFT = 3
+	NVDA_RIGHT_APPS_BOTH = 4
+	NVDA_BOTH_APPS_LEFT = 5
+	NVDA_BOTH_APPS_RIGHT = 6
+
+	@property
+	def _displayStringLabels(self) -> dict[IntEnum, str]:
+		return {
+			# Translators: Sound split state
+			SoundSplitState.OFF: pgettext("SoundSplit", "Disabled"),
+			# Translators: Sound split state
+			SoundSplitState.NVDA_LEFT_APPS_RIGHT: _("NVDA on the left and applications on the right"),
+			# Translators: Sound split state
+			SoundSplitState.NVDA_LEFT_APPS_BOTH: _("NVDA on the left and applications in both channels"),
+			# Translators: Sound split state
+			SoundSplitState.NVDA_RIGHT_APPS_LEFT: _("NVDA on the right and applications on the left"),
+			# Translators: Sound split state
+			SoundSplitState.NVDA_RIGHT_APPS_BOTH: _("NVDA on the right and applications in both channels"),
+			# Translators: Sound split state
+			SoundSplitState.NVDA_BOTH_APPS_LEFT: _("NVDA in both channels and applications on the left"),
+			# Translators: Sound split state
+			SoundSplitState.NVDA_BOTH_APPS_RIGHT: _("NVDA in both channels and applications on the right"),
+		}
+
+	def getAppVolume(self) -> VolumeTupleT:
+		match self:
+			case SoundSplitState.OFF | SoundSplitState.NVDA_LEFT_APPS_BOTH | SoundSplitState.NVDA_RIGHT_APPS_BOTH:
+				return (1.0, 1.0)
+			case SoundSplitState.NVDA_RIGHT_APPS_LEFT | SoundSplitState.NVDA_BOTH_APPS_LEFT:
+				return (1.0, 0.0)
+			case SoundSplitState.NVDA_LEFT_APPS_RIGHT | SoundSplitState.NVDA_BOTH_APPS_RIGHT:
+				return (0.0, 1.0)
+			case _:
+				raise RuntimeError(f"Unexpected or unknown state {self=}")
+
+	def getNVDAVolume(self) -> VolumeTupleT:
+		match self:
+			case SoundSplitState.OFF | SoundSplitState.NVDA_BOTH_APPS_LEFT | SoundSplitState.NVDA_BOTH_APPS_RIGHT:
+				return (1.0, 1.0)
+			case SoundSplitState.NVDA_LEFT_APPS_RIGHT | SoundSplitState.NVDA_LEFT_APPS_BOTH:
+				return (1.0, 0.0)
+			case SoundSplitState.NVDA_RIGHT_APPS_LEFT | SoundSplitState.NVDA_RIGHT_APPS_BOTH:
+				return (0.0, 1.0)
+			case _:
+				raise RuntimeError(f"Unexpected or unknown state {self=}")
+
+
+audioSessionManager: IAudioSessionManager2 | None = None
+activeCallback: AudioSessionNotification | None = None
+
+
+def initialize() -> None:
+	if nvwave.usingWasapiWavePlayer():
+		global audioSessionManager
+		audioSessionManager = AudioUtilities.GetAudioSessionManager()
+		state = SoundSplitState(config.conf["audio"]["soundSplitState"])
+		setSoundSplitState(state)
+	else:
+		log.debug("Cannot initialize sound split as WASAPI is disabled")
+
+
+@atexit.register
+def terminate():
+	if nvwave.usingWasapiWavePlayer():
+		setSoundSplitState(SoundSplitState.OFF)
+		unregisterCallback()
+	else:
+		log.debug("Skipping terminating sound split as WASAPI is disabled.")
+
+
+def applyToAllAudioSessions(
+		callback: AudioSessionNotification,
+		applyToFuture: bool = True,
+) -> None:
+	"""
+		Executes provided callback function on all active audio sessions.
+		Additionally, if applyToFuture is True, then it will register a notification with audio session manager,
+		which will execute the same callback for all future sessions as they are created.
+		That notification will be active until next invokation of this function,
+		or until unregisterCallback() is called.
+	"""
+	unregisterCallback()
+	if applyToFuture:
+		audioSessionManager.RegisterSessionNotification(callback)
+		# The following call is required to make callback to work:
+		audioSessionManager.GetSessionEnumerator()
+		global activeCallback
+		activeCallback = callback
+	sessions: list[AudioSession] = AudioUtilities.GetAllSessions()
+	for session in sessions:
+		callback.on_session_created(session)
+
+
+def unregisterCallback() -> None:
+	global activeCallback
+	if activeCallback is not None:
+		audioSessionManager.UnregisterSessionNotification(activeCallback)
+		activeCallback = None
+
+
+@dataclass(unsafe_hash=True)
+class VolumeSetter(AudioSessionNotification):
+	leftVolume: float
+	rightVolume: float
+	leftNVDAVolume: float
+	rightNVDAVolume: float
+	foundSessionWithNot2Channels: bool = False
+
+	def on_session_created(self, new_session: AudioSession):
+		pid = new_session.ProcessId
+		channelVolume = new_session.channelAudioVolume()
+		channelCount = channelVolume.GetChannelCount()
+		if channelCount != 2:
+			log.warning(f"Audio session for pid {pid} has {channelCount} channels instead of 2 - cannot set volume!")
+			self.foundSessionWithNot2Channels = True
+			return
+		if pid != globalVars.appPid:
+			channelVolume.SetChannelVolume(0, self.leftVolume, None)
+			channelVolume.SetChannelVolume(1, self.rightVolume, None)
+		else:
+			channelVolume.SetChannelVolume(0, self.leftNVDAVolume, None)
+			channelVolume.SetChannelVolume(1, self.rightNVDAVolume, None)
+
+
+def setSoundSplitState(state: SoundSplitState) -> dict:
+	leftVolume, rightVolume = state.getAppVolume()
+	leftNVDAVolume, rightNVDAVolume = state.getNVDAVolume()
+	volumeSetter = VolumeSetter(leftVolume, rightVolume, leftNVDAVolume, rightNVDAVolume)
+	applyToAllAudioSessions(volumeSetter)
+	return {
+		"foundSessionWithNot2Channels": volumeSetter.foundSessionWithNot2Channels,
+	}
+
+
+def toggleSoundSplitState() -> None:
+	if not nvwave.usingWasapiWavePlayer():
+		message = _(
+			# Translators: error message when wasapi is turned off.
+			"Sound split cannot be used. "
+			"Please enable WASAPI in the Advanced category in NVDA Settings to use it."
+		)
+		ui.message(message)
+		return
+	state = SoundSplitState(config.conf["audio"]["soundSplitState"])
+	allowedStates: list[int] = config.conf["audio"]["includedSoundSplitModes"]
+	try:
+		i = allowedStates.index(state)
+	except ValueError:
+		# State not found, resetting to default (OFF)
+		i = -1
+	i = (i + 1) % len(allowedStates)
+	newState = SoundSplitState(allowedStates[i])
+	result = setSoundSplitState(newState)
+	config.conf["audio"]["soundSplitState"] = newState.value
+	ui.message(newState.displayString)
+	if result["foundSessionWithNot2Channels"]:
+		msg = _(
+			# Translators: warning message when sound split trigger wasn't successful due to one of audio sessions
+			# had number of channels other than 2 .
+			"Warning: couldn't set volumes for sound split: "
+			"one of audio sessions is either mono, or has more than 2 audio channels."
+		)
+		ui.message(msg)
diff --git a/source/config/configSpec.py b/source/config/configSpec.py
@@ -57,6 +57,8 @@
 	soundVolume = integer(default=100, min=0, max=100)
 	audioAwakeTime = integer(default=30, min=0, max=3600)
 	whiteNoiseVolume = integer(default=0, min=0, max=100)
+	soundSplitState = integer(default=0)
+	includedSoundSplitModes = int_list(default=list(0, 1, 2))
 
 # Braille settings
 [braille]

diff --git a/source/core.py b/source/core.py
@@ -276,6 +276,7 @@ def resetConfiguration(factoryDefaults=False):
 	import bdDetect
 	import hwIo
 	import tones
+	import audio
 	log.debug("Terminating vision")
 	vision.terminate()
 	log.debug("Terminating braille")
@@ -286,6 +287,8 @@ def resetConfiguration(factoryDefaults=False):
 	speech.terminate()
 	log.debug("terminating tones")
 	tones.terminate()
+	log.debug("terminating sound split")
+	audio.soundSplit.terminate()
 	log.debug("Terminating background braille display detection")
 	bdDetect.terminate()
 	log.debug("Terminating background i/o")
@@ -313,6 +316,9 @@ def resetConfiguration(factoryDefaults=False):
 	bdDetect.initialize()
 	# Tones
 	tones.initialize()
+	# Sound split
+	log.debug("initializing sound split")
+	audio.soundSplit.initialize()
 	#Speech
 	log.debug("initializing speech")
 	speech.initialize()
@@ -671,6 +677,9 @@ def main():
 	log.debug("Initializing tones")
 	import tones
 	tones.initialize()
+	log.debug("Initializing sound split")
+	import audio
+	audio.soundSplit.initialize()
 	import speechDictHandler
 	log.debug("Speech Dictionary processing")
 	speechDictHandler.initialize()

diff --git a/source/globalCommands.py b/source/globalCommands.py
@@ -66,6 +66,7 @@
 from base64 import b16encode
 import vision
 from utils.security import objectBelowLockScreenAndWindowsIsLocked
+import audio
 
 
 #: Script category for text review commands.
@@ -113,6 +114,9 @@
 #: Script category for document formatting commands.
 # Translators: The name of a category of NVDA commands.
 SCRCAT_DOCUMENTFORMATTING = _("Document formatting")
+#: Script category for audio streaming commands.
+# Translators: The name of a category of NVDA commands.
+SCRCAT_AUDIO = _("Audio")
 
 # Translators: Reported when there are no settings to configure in synth settings ring
 # (example: when there is no setting for language).
@@ -127,6 +131,7 @@ class GlobalCommands(ScriptableObject):
 			# Translators: Describes the Cycle audio ducking mode command.
 			"Cycles through audio ducking modes which determine when NVDA lowers the volume of other sounds"
 		),
+		category=SCRCAT_AUDIO,
 		gesture="kb:NVDA+shift+d"
 	)
 	def script_cycleAudioDuckingMode(self,gesture):
@@ -4461,6 +4466,17 @@ def script_cycleParagraphStyle(self, gesture: "inputCore.InputGesture") -> None:
 		config.conf["documentNavigation"]["paragraphStyle"] = newFlag.name
 		ui.message(newFlag.displayString)
 
+	@script(
+		description=_(
+			# Translators: Describes a command.
+			"Cycles through sound split modes",
+		),
+		category=SCRCAT_AUDIO,
+		gesture="kb:NVDA+alt+s",
+	)
+	def script_cycleSoundSplit(self, gesture: "inputCore.InputGesture") -> None:
+		audio.toggleSoundSplitState()
+
 
 #: The single global commands instance.
 #: @type: L{GlobalCommands}