[audio|voice] Add console commands to troubleshoot audio sources and speech-to-text services (#4202)

Signed-off-by: Miguel Álvarez <miguelwork92@gmail.com>
2025-01-25 11:45:49 +01:00 · 2024-05-12 12:25:46 +02:00 · 2024-05-12 12:25:46 +02:00 · e14b0a8d0d
commit e14b0a8d0d
parent ec7674752a
7 changed files with 295 additions and 8 deletions
--- a/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/AudioFormat.java
+++ b/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/AudioFormat.java
@ -386,8 +386,9 @@ public class AudioFormat {
                continue;
            }

-            // Prefer WAVE container
-            if (!CONTAINER_WAVE.equals(format.getContainer())) {
+            // Prefer WAVE container or raw SIGNED PCM encoded audio
+            if (!CONTAINER_WAVE.equals(format.getContainer())
+                    && !(CONTAINER_NONE.equals(format.getContainer()) && CODEC_PCM_SIGNED.equals(format.getCodec()))) {
                continue;
            }

--- a/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/AudioManager.java
+++ b/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/AudioManager.java
@ -26,6 +26,7 @@ import org.openhab.core.library.types.PercentType;
 * @author Kai Kreuzer - removed unwanted dependencies
 * @author Christoph Weitkamp - Added parameter to adjust the volume
 * @author Wouter Born - Added methods for getting all sinks and sources
+ * @author Miguel Álvarez - Add record method
 */
@NonNullByDefault
 public interface AudioManager {
@ -151,6 +152,15 @@ public interface AudioManager {
     */
    void playMelody(String melody, @Nullable String sinkId, @Nullable PercentType volume);

+    /**
+     * Record audio as a WAV file of the specified length to the sounds folder.
+     *
+     * @param seconds seconds to record.
+     * @param filename record filename.
+     * @param sourceId The id of the audio source to use or null for the default.
+     */
+    void record(int seconds, String filename, @Nullable String sourceId) throws AudioException;
+
    /**
     * Retrieves the current volume of a sink
     *
--- a/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/internal/AudioConsoleCommandExtension.java
+++ b/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/internal/AudioConsoleCommandExtension.java
@ -42,12 +42,14 @@ import org.osgi.service.component.annotations.Reference;
 * @author Kai Kreuzer - refactored to match AudioManager implementation
 * @author Christoph Weitkamp - Added parameter to adjust the volume
 * @author Wouter Born - Sort audio sink and source options
+ * @author Miguel Álvarez Díez - Add record command
 */
@Component(service = ConsoleCommandExtension.class)
@NonNullByDefault
 public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtension {

    static final String SUBCMD_PLAY = "play";
+    static final String SUBCMD_RECORD = "record";
    static final String SUBCMD_STREAM = "stream";
    static final String SUBCMD_SYNTHESIZE = "synthesize";
    static final String SUBCMD_SOURCES = "sources";
@ -71,6 +73,8 @@ public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtensio
                        "plays a sound file from the sounds folder through the optionally specified audio sink(s)"),
                buildCommandUsage(SUBCMD_PLAY + " <sink> <filename> <volume>",
                        "plays a sound file from the sounds folder through the specified audio sink(s) with the specified volume"),
+                buildCommandUsage(SUBCMD_RECORD + " [<source>] <seconds> <filename>",
+                        "record an audio file of the specified seconds to the sounds folder. The extension '.wav' will be added to the filename if missed."),
                buildCommandUsage(SUBCMD_STREAM + " [<sink>] <url>",
                        "streams the sound from the url through the optionally specified audio sink(s)"),
                buildCommandUsage(SUBCMD_SYNTHESIZE + " [<sink>] \"<melody>\"",
@ -95,6 +99,14 @@ public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtensio
                                "Specify file to play, and optionally the sink(s) to use (e.g. 'play javasound hello.mp3')");
                    }
                    return;
+                case SUBCMD_RECORD:
+                    if (args.length > 2) {
+                        record(Arrays.copyOfRange(args, 1, args.length), console);
+                    } else {
+                        console.println(
+                                "Specify time to record and the desired filename, and optionally the source to use (e.g. 'record javasound 10 good_morning.wav')");
+                    }
+                    return;
                case SUBCMD_STREAM:
                    if (args.length > 1) {
                        stream(Arrays.copyOfRange(args, 1, args.length), console);
@ -175,6 +187,21 @@ public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtensio
        }
    }

+    private void record(String[] args, Console console) {
+        try {
+            @Nullable
+            String sourceId = args.length > 2 ? args[0] : null;
+            int seconds = Integer.parseInt(args.length > 2 ? args[1] : args[0]);
+            String filename = args.length > 2 ? args[2] : args[1];
+            audioManager.record(seconds, filename, sourceId);
+            console.println("Recording completed");
+        } catch (NumberFormatException e) {
+            console.println("Unable to parse the recording time: " + e.getMessage());
+        } catch (AudioException e) {
+            console.println("Recording terminated with audio exception: " + e.getMessage());
+        }
+    }
+
    private void synthesizeMelody(String[] args, Console console) {
        switch (args.length) {
            case 1:
--- a/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/internal/AudioManagerImpl.java
+++ b/bundles/org.openhab.core.audio/src/main/java/org/openhab/core/audio/internal/AudioManagerImpl.java
@ -14,9 +14,13 @@ package org.openhab.core.audio.internal;

 import static java.util.Comparator.comparing;

+import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.URI;
+import java.nio.ByteBuffer;
+import java.nio.file.Path;
 import java.text.ParseException;
 import java.util.Collection;
 import java.util.HashSet;
@ -26,6 +30,10 @@ import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;

+import javax.sound.sampled.AudioFileFormat;
+import javax.sound.sampled.AudioInputStream;
+import javax.sound.sampled.AudioSystem;
+
 import org.eclipse.jdt.annotation.NonNullByDefault;
 import org.eclipse.jdt.annotation.Nullable;
 import org.openhab.core.OpenHAB;
@ -37,6 +45,7 @@ import org.openhab.core.audio.AudioSource;
 import org.openhab.core.audio.AudioStream;
 import org.openhab.core.audio.FileAudioStream;
 import org.openhab.core.audio.URLAudioStream;
+import org.openhab.core.audio.utils.AudioWaveUtils;
 import org.openhab.core.audio.utils.ToneSynthesizer;
 import org.openhab.core.config.core.ConfigOptionProvider;
 import org.openhab.core.config.core.ConfigurableService;
@ -61,6 +70,7 @@ import org.slf4j.LoggerFactory;
 * @author Christoph Weitkamp - Added getSupportedStreams() and UnsupportedAudioStreamException
 * @author Christoph Weitkamp - Added parameter to adjust the volume
 * @author Wouter Born - Sort audio sink and source options
+ * @author Miguel Álvarez - Add record from source
 */
@NonNullByDefault
@Component(immediate = true, configurationPid = "org.openhab.audio", //
@ -147,8 +157,7 @@ public class AudioManagerImpl implements AudioManager, ConfigOptionProvider {
    @Override
    public void playFile(String fileName, @Nullable String sinkId, @Nullable PercentType volume) throws AudioException {
        Objects.requireNonNull(fileName, "File cannot be played as fileName is null.");
-
-        File file = new File(OpenHAB.getConfigFolder() + File.separator + SOUND_DIR + File.separator + fileName);
+        File file = Path.of(OpenHAB.getConfigFolder(), SOUND_DIR, fileName).toFile();
        FileAudioStream is = new FileAudioStream(file);
        play(is, sinkId, volume);
    }
@ -195,6 +204,67 @@ public class AudioManagerImpl implements AudioManager, ConfigOptionProvider {
        }
    }

+    @Override
+    public void record(int seconds, String filename, @Nullable String sourceId) throws AudioException {
+        var audioSource = sourceId != null ? getSource(sourceId) : getSource();
+        if (audioSource == null) {
+            throw new AudioException("Audio source '" + (sourceId != null ? sourceId : "default") + "' not available");
+        }
+        var audioFormat = AudioFormat.getBestMatch(audioSource.getSupportedFormats(),
+                Set.of(AudioFormat.PCM_SIGNED, AudioFormat.WAV));
+        if (audioFormat == null) {
+            throw new AudioException("Unable to find valid audio format");
+        }
+        javax.sound.sampled.AudioFormat jAudioFormat = new javax.sound.sampled.AudioFormat(
+                Objects.requireNonNull(audioFormat.getFrequency()), Objects.requireNonNull(audioFormat.getBitDepth()),
+                Objects.requireNonNull(audioFormat.getChannels()), true, false);
+        int secondByteLength = ((int) jAudioFormat.getSampleRate() * jAudioFormat.getFrameSize());
+        int targetByteLength = secondByteLength * seconds;
+        ByteBuffer recordBuffer = ByteBuffer.allocate(targetByteLength);
+        try (var audioStream = audioSource.getInputStream(audioFormat)) {
+            if (audioFormat.isCompatible(AudioFormat.WAV)) {
+                AudioWaveUtils.removeFMT(audioStream);
+            }
+            while (true) {
+                try {
+                    var bytes = audioStream.readNBytes(secondByteLength);
+                    if (bytes.length == 0) {
+                        logger.debug("End of input audio stream reached");
+                        break;
+                    }
+                    if (recordBuffer.position() + bytes.length > recordBuffer.limit()) {
+                        logger.debug("Recording limit reached");
+                        break;
+                    }
+                    recordBuffer.put(bytes);
+                } catch (IOException e) {
+                    logger.warn("Reading audio data failed");
+                }
+            }
+        } catch (IOException e) {
+            logger.warn("IOException while reading audioStream: {}", e.getMessage());
+        }
+        String recordFilename = filename.endsWith(".wav") ? filename : filename + ".wav";
+        logger.info("Saving record file: {}", recordFilename);
+        byte[] audioBytes = new byte[recordBuffer.position()];
+        logger.info("Saving bytes: {}", audioBytes.length);
+        recordBuffer.rewind();
+        recordBuffer.get(audioBytes);
+        File recordFile = new File(
+                OpenHAB.getConfigFolder() + File.separator + SOUND_DIR + File.separator + recordFilename);
+        try (FileOutputStream fileOutputStream = new FileOutputStream(recordFile)) {
+            AudioSystem.write(
+                    new AudioInputStream(new ByteArrayInputStream(audioBytes), jAudioFormat,
+                            (long) Math.ceil(((double) audioBytes.length) / jAudioFormat.getFrameSize())), //
+                    AudioFileFormat.Type.WAVE, //
+                    fileOutputStream //
+            );
+            fileOutputStream.flush();
+        } catch (IOException e) {
+            logger.warn("IOException while saving record file: {}", e.getMessage());
+        }
+    }
+
    @Override
    public PercentType getVolume(@Nullable String sinkId) throws IOException {
        AudioSink sink = getSink(sinkId);
--- a/bundles/org.openhab.core.voice/src/main/java/org/openhab/core/voice/VoiceManager.java
+++ b/bundles/org.openhab.core.voice/src/main/java/org/openhab/core/voice/VoiceManager.java
@ -14,11 +14,13 @@ package org.openhab.core.voice;

 import java.util.Collection;
 import java.util.List;
+import java.util.Locale;
 import java.util.Set;

 import org.eclipse.jdt.annotation.NonNullByDefault;
 import org.eclipse.jdt.annotation.Nullable;
 import org.openhab.core.audio.AudioSource;
+import org.openhab.core.audio.AudioStream;
 import org.openhab.core.library.types.PercentType;
 import org.openhab.core.voice.text.HumanLanguageInterpreter;
 import org.openhab.core.voice.text.InterpretationException;
@ -30,6 +32,7 @@ import org.openhab.core.voice.text.InterpretationException;
 * @author Christoph Weitkamp - Added parameter to adjust the volume
 * @author Laurent Garnier - Updated methods startDialog and added method stopDialog
 * @author Miguel Álvarez - New dialog methods using DialogContext
+ * @author Miguel Álvarez - Add transcribe method
 */
@NonNullByDefault
 public interface VoiceManager {
@ -93,6 +96,26 @@ public interface VoiceManager {
     */
    void say(String text, @Nullable String voiceId, @Nullable String sinkId, @Nullable PercentType volume);

+    /**
+     * Run speech-to-text using the provided audio source.
+     *
+     * @param audioSourceId Audio source to listen.
+     * @param sttId The id of the speech-to-text service to use or null to use the default.
+     * @param locale The locale to use or null to use the default.
+     * @return a human language transcription or empty.
+     */
+    String transcribe(@Nullable String audioSourceId, @Nullable String sttId, @Nullable Locale locale);
+
+    /**
+     * Run speech-to-text over the provided audio stream.
+     *
+     * @param audioStream Audio stream to transcribe.
+     * @param sttId The id of the speech-to-text service to use or null to use the default.
+     * @param locale The locale to use or null to use the default.
+     * @return a human language transcription or empty.
+     */
+    String transcribe(AudioStream audioStream, @Nullable String sttId, @Nullable Locale locale);
+
    /**
     * Interprets the passed string using the default services for HLI and locale.
     *
--- a/bundles/org.openhab.core.voice/src/main/java/org/openhab/core/voice/internal/VoiceConsoleCommandExtension.java
+++ b/bundles/org.openhab.core.voice/src/main/java/org/openhab/core/voice/internal/VoiceConsoleCommandExtension.java
@ -14,17 +14,23 @@ package org.openhab.core.voice.internal;

 import static java.util.Comparator.comparing;

+import java.io.FileNotFoundException;
+import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
+import java.util.MissingResourceException;
 import java.util.Objects;
 import java.util.stream.Collectors;

 import org.eclipse.jdt.annotation.NonNullByDefault;
 import org.eclipse.jdt.annotation.Nullable;
+import org.openhab.core.OpenHAB;
+import org.openhab.core.audio.AudioException;
 import org.openhab.core.audio.AudioManager;
+import org.openhab.core.audio.FileAudioStream;
 import org.openhab.core.i18n.LocaleProvider;
 import org.openhab.core.io.console.Console;
 import org.openhab.core.io.console.extensions.AbstractConsoleCommandExtension;
@ -52,12 +58,14 @@ import org.osgi.service.component.annotations.Reference;
 * @author Kai Kreuzer - Initial contribution
 * @author Wouter Born - Sort TTS voices
 * @author Laurent Garnier - Added sub-commands startdialog and stopdialog
+ * @author Miguel Álvarez - Add transcribe command
 */
@Component(service = ConsoleCommandExtension.class)
@NonNullByDefault
 public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtension {

    private static final String SUBCMD_SAY = "say";
+    private static final String SUBCMD_TRANSCRIBE = "transcribe";
    private static final String SUBCMD_INTERPRET = "interpret";
    private static final String SUBCMD_VOICES = "voices";
    private static final String SUBCMD_START_DIALOG = "startdialog";
@ -90,7 +98,9 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio

    @Override
    public List<String> getUsages() {
-        return List.of(buildCommandUsage(SUBCMD_SAY + " <text>", "speaks a text"),
+        return List.of(buildCommandUsage(SUBCMD_SAY + " <text>", "speaks a text"), buildCommandUsage(
+                SUBCMD_TRANSCRIBE + " [--source <source>]|[--file <file>] [--stt <stt>] [--locale <locale>]",
+                "transcribe audio from default source, optionally specify a different source/file, speech-to-text service or locale"),
                buildCommandUsage(SUBCMD_INTERPRET + " <command>", "interprets a human language command"),
                buildCommandUsage(SUBCMD_VOICES, "lists available voices of the TTS services"),
                buildCommandUsage(SUBCMD_DIALOGS, "lists the running dialog and their audio/voice services"),
@ -128,6 +138,10 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
                    }
                    return;
                }
+                case SUBCMD_TRANSCRIBE -> {
+                    transcribe(args, console);
+                    return;
+                }
                case SUBCMD_INTERPRET -> {
                    if (args.length > 1) {
                        interpret(Arrays.copyOfRange(args, 1, args.length), console);
@ -305,6 +319,51 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
        voiceManager.say(msg.toString());
    }

+    private void transcribe(String[] args, Console console) {
+        HashMap<String, String> parameters;
+        try {
+            parameters = parseNamedParameters(args);
+        } catch (IllegalStateException e) {
+            console.println(Objects.requireNonNullElse(e.getMessage(), "An error parsing positional parameters"));
+            return;
+        }
+        @Nullable
+        Locale locale;
+        try {
+            locale = parameters.containsKey("locale")
+                    ? Locale.forLanguageTag(Objects.requireNonNull(parameters.get("locale")))
+                    : null;
+        } catch (MissingResourceException e) {
+            console.println("Error: Locale '" + parameters.get("locale") + "' is not correct.");
+            return;
+        }
+        String text;
+        if (parameters.containsKey("file")) {
+            FileAudioStream fileAudioStream;
+            try {
+                var file = Path.of(OpenHAB.getConfigFolder(), AudioManager.SOUND_DIR, parameters.get("file")).toFile();
+                if (!file.exists()) {
+                    throw new FileNotFoundException();
+                }
+                fileAudioStream = new FileAudioStream(file);
+            } catch (AudioException e) {
+                console.println("Error: Unable to open '" + parameters.get("file") + "' file audio stream.");
+                return;
+            } catch (FileNotFoundException e) {
+                console.println("Error: File '" + parameters.get("file") + "' not found in sound folder.");
+                return;
+            }
+            text = voiceManager.transcribe(fileAudioStream, parameters.get("stt"), locale);
+        } else {
+            text = voiceManager.transcribe(parameters.get("source"), parameters.get("stt"), null);
+        }
+        if (!text.isBlank()) {
+            console.println("Transcription: " + text);
+        } else {
+            console.println("No transcription generated");
+        }
+    }
+
    private void listDialogRegistrations(Console console) {
        Collection<DialogRegistration> registrations = voiceManager.getDialogRegistrations();
        if (!registrations.isEmpty()) {
@ -405,7 +464,7 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
                        .orElse(null);
    }

-    private HashMap<String, String> parseDialogParameters(String[] args) {
+    private HashMap<String, String> parseNamedParameters(String[] args) {
        var parameters = new HashMap<String, String>();
        for (int i = 1; i < args.length; i++) {
            var arg = args[i].trim();
@ -428,7 +487,7 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
        if (args.length < 2) {
            return dialogContextBuilder;
        }
-        var parameters = parseDialogParameters(args);
+        var parameters = parseNamedParameters(args);
        String sourceId = parameters.remove("source");
        if (sourceId != null) {
            var source = audioManager.getSource(sourceId);
@ -463,7 +522,7 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
    }

    private DialogRegistration parseDialogRegistration(String[] args) {
-        var parameters = parseDialogParameters(args);
+        var parameters = parseNamedParameters(args);
        @Nullable
        String sourceId = parameters.remove("source");
        if (sourceId == null) {
--- a/bundles/org.openhab.core.voice/src/main/java/org/openhab/core/voice/internal/VoiceManagerImpl.java
+++ b/bundles/org.openhab.core.voice/src/main/java/org/openhab/core/voice/internal/VoiceManagerImpl.java
@ -12,6 +12,7 @@
 */
 package org.openhab.core.voice.internal;

+import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
@ -28,15 +29,19 @@ import java.util.Map.Entry;
 import java.util.Objects;
 import java.util.Set;
 import java.util.WeakHashMap;
+import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ScheduledFuture;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;

 import org.eclipse.jdt.annotation.NonNullByDefault;
 import org.eclipse.jdt.annotation.Nullable;
+import org.openhab.core.audio.AudioException;
 import org.openhab.core.audio.AudioFormat;
 import org.openhab.core.audio.AudioManager;
 import org.openhab.core.audio.AudioSink;
@ -55,7 +60,13 @@ import org.openhab.core.storage.StorageService;
 import org.openhab.core.voice.DialogContext;
 import org.openhab.core.voice.DialogRegistration;
 import org.openhab.core.voice.KSService;
+import org.openhab.core.voice.RecognitionStartEvent;
+import org.openhab.core.voice.RecognitionStopEvent;
+import org.openhab.core.voice.STTException;
 import org.openhab.core.voice.STTService;
+import org.openhab.core.voice.STTServiceHandle;
+import org.openhab.core.voice.SpeechRecognitionErrorEvent;
+import org.openhab.core.voice.SpeechRecognitionEvent;
 import org.openhab.core.voice.TTSException;
 import org.openhab.core.voice.TTSService;
 import org.openhab.core.voice.Voice;
@ -85,6 +96,7 @@ import org.slf4j.LoggerFactory;
 * @author Wouter Born - Sort TTS options
 * @author Laurent Garnier - Updated methods startDialog and added method stopDialog
 * @author Miguel Álvarez - Use dialog context
+ * @author Miguel Álvarez - Add transcribe method
 */
@Component(immediate = true, configurationPid = VoiceManagerImpl.CONFIGURATION_PID, //
        property = Constants.SERVICE_PID + "=org.openhab.voice")
@ -288,6 +300,91 @@ public class VoiceManagerImpl implements VoiceManager, ConfigOptionProvider, Dia
        }
    }

+    @Override
+    public String transcribe(@Nullable String audioSourceId, @Nullable String sttId, @Nullable Locale locale) {
+        var audioSource = audioSourceId != null ? audioManager.getSource(audioSourceId) : audioManager.getSource();
+        if (audioSource == null) {
+            logger.warn("Audio source '{}' not available", audioSourceId != null ? audioSourceId : "default");
+            return "";
+        }
+        var sttService = sttId != null ? getSTT(sttId) : getSTT();
+        if (sttService == null) {
+            logger.warn("Speech-to-text service '{}' not available", sttId != null ? sttId : "default");
+            return "";
+        }
+        var sttFormat = VoiceManagerImpl.getBestMatch(audioSource.getSupportedFormats(),
+                sttService.getSupportedFormats());
+        if (sttFormat == null) {
+            logger.warn("No compatible audio format found for stt '{}' and the provided audio stream",
+                    sttService.getId());
+            return "";
+        }
+        AudioStream audioStream;
+        try {
+            audioStream = audioSource.getInputStream(sttFormat);
+        } catch (AudioException e) {
+            logger.warn("AudioException creating source audio stream: {}", e.getMessage());
+            return "";
+        }
+        return transcribe(audioStream, sttService, locale);
+    }
+
+    @Override
+    public String transcribe(AudioStream audioStream, @Nullable String sttId, @Nullable Locale locale) {
+        var sttService = sttId != null ? getSTT(sttId) : getSTT();
+        if (sttService == null) {
+            logger.warn("Speech-to-text service '{}' not available", sttId != null ? sttId : "default");
+            return "";
+        }
+        var sttFormat = VoiceManagerImpl.getBestMatch(Set.of(audioStream.getFormat()),
+                sttService.getSupportedFormats());
+        if (sttFormat == null) {
+            logger.warn("No compatible audio format found for stt '{}' and the provided audio stream",
+                    sttService.getId());
+            return "";
+        }
+        return transcribe(audioStream, sttService, locale);
+    }
+
+    private String transcribe(AudioStream audioStream, STTService sttService, @Nullable Locale locale) {
+        Locale nullSafeLocale = locale != null ? locale : localeProvider.getLocale();
+        CompletableFuture<String> transcriptionResult = new CompletableFuture<>();
+        STTServiceHandle sttServiceHandle;
+        try {
+            sttServiceHandle = sttService.recognize(sttEvent -> {
+                if (sttEvent instanceof SpeechRecognitionEvent sre) {
+                    logger.debug("SpeechRecognitionEvent event received");
+                    String transcript = sre.getTranscript();
+                    logger.debug("Text recognized: {}", transcript);
+                    transcriptionResult.complete(transcript);
+                } else if (sttEvent instanceof RecognitionStartEvent) {
+                    logger.debug("RecognitionStartEvent event received");
+                } else if (sttEvent instanceof RecognitionStopEvent) {
+                    logger.debug("RecognitionStopEvent event received");
+                } else if (sttEvent instanceof SpeechRecognitionErrorEvent sre) {
+                    logger.debug("SpeechRecognitionErrorEvent event received");
+                    transcriptionResult.completeExceptionally(
+                            new IOException("SpeechRecognitionErrorEvent emitted: " + sre.getMessage()));
+                }
+            }, audioStream, nullSafeLocale, new HashSet<>());
+        } catch (STTException e) {
+            logger.warn("STTException while running transcription");
+            return "";
+        }
+        try {
+            return transcriptionResult.get(60, TimeUnit.SECONDS);
+        } catch (InterruptedException e) {
+            logger.warn("InterruptedException waiting for transcription: {}", e.getMessage());
+            sttServiceHandle.abort();
+        } catch (ExecutionException e) {
+            logger.warn("ExecutionException running transcription: {}", e.getCause().getMessage());
+        } catch (TimeoutException e) {
+            logger.warn("TimeoutException waiting for transcription");
+            sttServiceHandle.abort();
+        }
+        return "";
+    }
+
    @Override
    public String interpret(String text) throws InterpretationException {
        return interpret(text, null);