mirror of
https://github.com/danieldemus/openhab-core.git
synced 2025-01-25 11:45:49 +01:00
[audio|voice] Add console commands to troubleshoot audio sources and speech-to-text services (#4202)
Signed-off-by: Miguel Álvarez <miguelwork92@gmail.com>
This commit is contained in:
parent
ec7674752a
commit
e14b0a8d0d
@ -386,8 +386,9 @@ public class AudioFormat {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Prefer WAVE container
|
||||
if (!CONTAINER_WAVE.equals(format.getContainer())) {
|
||||
// Prefer WAVE container or raw SIGNED PCM encoded audio
|
||||
if (!CONTAINER_WAVE.equals(format.getContainer())
|
||||
&& !(CONTAINER_NONE.equals(format.getContainer()) && CODEC_PCM_SIGNED.equals(format.getCodec()))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,7 @@ import org.openhab.core.library.types.PercentType;
|
||||
* @author Kai Kreuzer - removed unwanted dependencies
|
||||
* @author Christoph Weitkamp - Added parameter to adjust the volume
|
||||
* @author Wouter Born - Added methods for getting all sinks and sources
|
||||
* @author Miguel Álvarez - Add record method
|
||||
*/
|
||||
@NonNullByDefault
|
||||
public interface AudioManager {
|
||||
@ -151,6 +152,15 @@ public interface AudioManager {
|
||||
*/
|
||||
void playMelody(String melody, @Nullable String sinkId, @Nullable PercentType volume);
|
||||
|
||||
/**
|
||||
* Record audio as a WAV file of the specified length to the sounds folder.
|
||||
*
|
||||
* @param seconds seconds to record.
|
||||
* @param filename record filename.
|
||||
* @param sourceId The id of the audio source to use or null for the default.
|
||||
*/
|
||||
void record(int seconds, String filename, @Nullable String sourceId) throws AudioException;
|
||||
|
||||
/**
|
||||
* Retrieves the current volume of a sink
|
||||
*
|
||||
|
@ -42,12 +42,14 @@ import org.osgi.service.component.annotations.Reference;
|
||||
* @author Kai Kreuzer - refactored to match AudioManager implementation
|
||||
* @author Christoph Weitkamp - Added parameter to adjust the volume
|
||||
* @author Wouter Born - Sort audio sink and source options
|
||||
* @author Miguel Álvarez Díez - Add record command
|
||||
*/
|
||||
@Component(service = ConsoleCommandExtension.class)
|
||||
@NonNullByDefault
|
||||
public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtension {
|
||||
|
||||
static final String SUBCMD_PLAY = "play";
|
||||
static final String SUBCMD_RECORD = "record";
|
||||
static final String SUBCMD_STREAM = "stream";
|
||||
static final String SUBCMD_SYNTHESIZE = "synthesize";
|
||||
static final String SUBCMD_SOURCES = "sources";
|
||||
@ -71,6 +73,8 @@ public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
"plays a sound file from the sounds folder through the optionally specified audio sink(s)"),
|
||||
buildCommandUsage(SUBCMD_PLAY + " <sink> <filename> <volume>",
|
||||
"plays a sound file from the sounds folder through the specified audio sink(s) with the specified volume"),
|
||||
buildCommandUsage(SUBCMD_RECORD + " [<source>] <seconds> <filename>",
|
||||
"record an audio file of the specified seconds to the sounds folder. The extension '.wav' will be added to the filename if missed."),
|
||||
buildCommandUsage(SUBCMD_STREAM + " [<sink>] <url>",
|
||||
"streams the sound from the url through the optionally specified audio sink(s)"),
|
||||
buildCommandUsage(SUBCMD_SYNTHESIZE + " [<sink>] \"<melody>\"",
|
||||
@ -95,6 +99,14 @@ public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
"Specify file to play, and optionally the sink(s) to use (e.g. 'play javasound hello.mp3')");
|
||||
}
|
||||
return;
|
||||
case SUBCMD_RECORD:
|
||||
if (args.length > 2) {
|
||||
record(Arrays.copyOfRange(args, 1, args.length), console);
|
||||
} else {
|
||||
console.println(
|
||||
"Specify time to record and the desired filename, and optionally the source to use (e.g. 'record javasound 10 good_morning.wav')");
|
||||
}
|
||||
return;
|
||||
case SUBCMD_STREAM:
|
||||
if (args.length > 1) {
|
||||
stream(Arrays.copyOfRange(args, 1, args.length), console);
|
||||
@ -175,6 +187,21 @@ public class AudioConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
}
|
||||
}
|
||||
|
||||
private void record(String[] args, Console console) {
|
||||
try {
|
||||
@Nullable
|
||||
String sourceId = args.length > 2 ? args[0] : null;
|
||||
int seconds = Integer.parseInt(args.length > 2 ? args[1] : args[0]);
|
||||
String filename = args.length > 2 ? args[2] : args[1];
|
||||
audioManager.record(seconds, filename, sourceId);
|
||||
console.println("Recording completed");
|
||||
} catch (NumberFormatException e) {
|
||||
console.println("Unable to parse the recording time: " + e.getMessage());
|
||||
} catch (AudioException e) {
|
||||
console.println("Recording terminated with audio exception: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void synthesizeMelody(String[] args, Console console) {
|
||||
switch (args.length) {
|
||||
case 1:
|
||||
|
@ -14,9 +14,13 @@ package org.openhab.core.audio.internal;
|
||||
|
||||
import static java.util.Comparator.comparing;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.file.Path;
|
||||
import java.text.ParseException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
@ -26,6 +30,10 @@ import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import javax.sound.sampled.AudioFileFormat;
|
||||
import javax.sound.sampled.AudioInputStream;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
import org.eclipse.jdt.annotation.Nullable;
|
||||
import org.openhab.core.OpenHAB;
|
||||
@ -37,6 +45,7 @@ import org.openhab.core.audio.AudioSource;
|
||||
import org.openhab.core.audio.AudioStream;
|
||||
import org.openhab.core.audio.FileAudioStream;
|
||||
import org.openhab.core.audio.URLAudioStream;
|
||||
import org.openhab.core.audio.utils.AudioWaveUtils;
|
||||
import org.openhab.core.audio.utils.ToneSynthesizer;
|
||||
import org.openhab.core.config.core.ConfigOptionProvider;
|
||||
import org.openhab.core.config.core.ConfigurableService;
|
||||
@ -61,6 +70,7 @@ import org.slf4j.LoggerFactory;
|
||||
* @author Christoph Weitkamp - Added getSupportedStreams() and UnsupportedAudioStreamException
|
||||
* @author Christoph Weitkamp - Added parameter to adjust the volume
|
||||
* @author Wouter Born - Sort audio sink and source options
|
||||
* @author Miguel Álvarez - Add record from source
|
||||
*/
|
||||
@NonNullByDefault
|
||||
@Component(immediate = true, configurationPid = "org.openhab.audio", //
|
||||
@ -147,8 +157,7 @@ public class AudioManagerImpl implements AudioManager, ConfigOptionProvider {
|
||||
@Override
|
||||
public void playFile(String fileName, @Nullable String sinkId, @Nullable PercentType volume) throws AudioException {
|
||||
Objects.requireNonNull(fileName, "File cannot be played as fileName is null.");
|
||||
|
||||
File file = new File(OpenHAB.getConfigFolder() + File.separator + SOUND_DIR + File.separator + fileName);
|
||||
File file = Path.of(OpenHAB.getConfigFolder(), SOUND_DIR, fileName).toFile();
|
||||
FileAudioStream is = new FileAudioStream(file);
|
||||
play(is, sinkId, volume);
|
||||
}
|
||||
@ -195,6 +204,67 @@ public class AudioManagerImpl implements AudioManager, ConfigOptionProvider {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void record(int seconds, String filename, @Nullable String sourceId) throws AudioException {
|
||||
var audioSource = sourceId != null ? getSource(sourceId) : getSource();
|
||||
if (audioSource == null) {
|
||||
throw new AudioException("Audio source '" + (sourceId != null ? sourceId : "default") + "' not available");
|
||||
}
|
||||
var audioFormat = AudioFormat.getBestMatch(audioSource.getSupportedFormats(),
|
||||
Set.of(AudioFormat.PCM_SIGNED, AudioFormat.WAV));
|
||||
if (audioFormat == null) {
|
||||
throw new AudioException("Unable to find valid audio format");
|
||||
}
|
||||
javax.sound.sampled.AudioFormat jAudioFormat = new javax.sound.sampled.AudioFormat(
|
||||
Objects.requireNonNull(audioFormat.getFrequency()), Objects.requireNonNull(audioFormat.getBitDepth()),
|
||||
Objects.requireNonNull(audioFormat.getChannels()), true, false);
|
||||
int secondByteLength = ((int) jAudioFormat.getSampleRate() * jAudioFormat.getFrameSize());
|
||||
int targetByteLength = secondByteLength * seconds;
|
||||
ByteBuffer recordBuffer = ByteBuffer.allocate(targetByteLength);
|
||||
try (var audioStream = audioSource.getInputStream(audioFormat)) {
|
||||
if (audioFormat.isCompatible(AudioFormat.WAV)) {
|
||||
AudioWaveUtils.removeFMT(audioStream);
|
||||
}
|
||||
while (true) {
|
||||
try {
|
||||
var bytes = audioStream.readNBytes(secondByteLength);
|
||||
if (bytes.length == 0) {
|
||||
logger.debug("End of input audio stream reached");
|
||||
break;
|
||||
}
|
||||
if (recordBuffer.position() + bytes.length > recordBuffer.limit()) {
|
||||
logger.debug("Recording limit reached");
|
||||
break;
|
||||
}
|
||||
recordBuffer.put(bytes);
|
||||
} catch (IOException e) {
|
||||
logger.warn("Reading audio data failed");
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.warn("IOException while reading audioStream: {}", e.getMessage());
|
||||
}
|
||||
String recordFilename = filename.endsWith(".wav") ? filename : filename + ".wav";
|
||||
logger.info("Saving record file: {}", recordFilename);
|
||||
byte[] audioBytes = new byte[recordBuffer.position()];
|
||||
logger.info("Saving bytes: {}", audioBytes.length);
|
||||
recordBuffer.rewind();
|
||||
recordBuffer.get(audioBytes);
|
||||
File recordFile = new File(
|
||||
OpenHAB.getConfigFolder() + File.separator + SOUND_DIR + File.separator + recordFilename);
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(recordFile)) {
|
||||
AudioSystem.write(
|
||||
new AudioInputStream(new ByteArrayInputStream(audioBytes), jAudioFormat,
|
||||
(long) Math.ceil(((double) audioBytes.length) / jAudioFormat.getFrameSize())), //
|
||||
AudioFileFormat.Type.WAVE, //
|
||||
fileOutputStream //
|
||||
);
|
||||
fileOutputStream.flush();
|
||||
} catch (IOException e) {
|
||||
logger.warn("IOException while saving record file: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public PercentType getVolume(@Nullable String sinkId) throws IOException {
|
||||
AudioSink sink = getSink(sinkId);
|
||||
|
@ -14,11 +14,13 @@ package org.openhab.core.voice;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
import org.eclipse.jdt.annotation.Nullable;
|
||||
import org.openhab.core.audio.AudioSource;
|
||||
import org.openhab.core.audio.AudioStream;
|
||||
import org.openhab.core.library.types.PercentType;
|
||||
import org.openhab.core.voice.text.HumanLanguageInterpreter;
|
||||
import org.openhab.core.voice.text.InterpretationException;
|
||||
@ -30,6 +32,7 @@ import org.openhab.core.voice.text.InterpretationException;
|
||||
* @author Christoph Weitkamp - Added parameter to adjust the volume
|
||||
* @author Laurent Garnier - Updated methods startDialog and added method stopDialog
|
||||
* @author Miguel Álvarez - New dialog methods using DialogContext
|
||||
* @author Miguel Álvarez - Add transcribe method
|
||||
*/
|
||||
@NonNullByDefault
|
||||
public interface VoiceManager {
|
||||
@ -93,6 +96,26 @@ public interface VoiceManager {
|
||||
*/
|
||||
void say(String text, @Nullable String voiceId, @Nullable String sinkId, @Nullable PercentType volume);
|
||||
|
||||
/**
|
||||
* Run speech-to-text using the provided audio source.
|
||||
*
|
||||
* @param audioSourceId Audio source to listen.
|
||||
* @param sttId The id of the speech-to-text service to use or null to use the default.
|
||||
* @param locale The locale to use or null to use the default.
|
||||
* @return a human language transcription or empty.
|
||||
*/
|
||||
String transcribe(@Nullable String audioSourceId, @Nullable String sttId, @Nullable Locale locale);
|
||||
|
||||
/**
|
||||
* Run speech-to-text over the provided audio stream.
|
||||
*
|
||||
* @param audioStream Audio stream to transcribe.
|
||||
* @param sttId The id of the speech-to-text service to use or null to use the default.
|
||||
* @param locale The locale to use or null to use the default.
|
||||
* @return a human language transcription or empty.
|
||||
*/
|
||||
String transcribe(AudioStream audioStream, @Nullable String sttId, @Nullable Locale locale);
|
||||
|
||||
/**
|
||||
* Interprets the passed string using the default services for HLI and locale.
|
||||
*
|
||||
|
@ -14,17 +14,23 @@ package org.openhab.core.voice.internal;
|
||||
|
||||
import static java.util.Comparator.comparing;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
import org.eclipse.jdt.annotation.Nullable;
|
||||
import org.openhab.core.OpenHAB;
|
||||
import org.openhab.core.audio.AudioException;
|
||||
import org.openhab.core.audio.AudioManager;
|
||||
import org.openhab.core.audio.FileAudioStream;
|
||||
import org.openhab.core.i18n.LocaleProvider;
|
||||
import org.openhab.core.io.console.Console;
|
||||
import org.openhab.core.io.console.extensions.AbstractConsoleCommandExtension;
|
||||
@ -52,12 +58,14 @@ import org.osgi.service.component.annotations.Reference;
|
||||
* @author Kai Kreuzer - Initial contribution
|
||||
* @author Wouter Born - Sort TTS voices
|
||||
* @author Laurent Garnier - Added sub-commands startdialog and stopdialog
|
||||
* @author Miguel Álvarez - Add transcribe command
|
||||
*/
|
||||
@Component(service = ConsoleCommandExtension.class)
|
||||
@NonNullByDefault
|
||||
public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtension {
|
||||
|
||||
private static final String SUBCMD_SAY = "say";
|
||||
private static final String SUBCMD_TRANSCRIBE = "transcribe";
|
||||
private static final String SUBCMD_INTERPRET = "interpret";
|
||||
private static final String SUBCMD_VOICES = "voices";
|
||||
private static final String SUBCMD_START_DIALOG = "startdialog";
|
||||
@ -90,7 +98,9 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
|
||||
@Override
|
||||
public List<String> getUsages() {
|
||||
return List.of(buildCommandUsage(SUBCMD_SAY + " <text>", "speaks a text"),
|
||||
return List.of(buildCommandUsage(SUBCMD_SAY + " <text>", "speaks a text"), buildCommandUsage(
|
||||
SUBCMD_TRANSCRIBE + " [--source <source>]|[--file <file>] [--stt <stt>] [--locale <locale>]",
|
||||
"transcribe audio from default source, optionally specify a different source/file, speech-to-text service or locale"),
|
||||
buildCommandUsage(SUBCMD_INTERPRET + " <command>", "interprets a human language command"),
|
||||
buildCommandUsage(SUBCMD_VOICES, "lists available voices of the TTS services"),
|
||||
buildCommandUsage(SUBCMD_DIALOGS, "lists the running dialog and their audio/voice services"),
|
||||
@ -128,6 +138,10 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
}
|
||||
return;
|
||||
}
|
||||
case SUBCMD_TRANSCRIBE -> {
|
||||
transcribe(args, console);
|
||||
return;
|
||||
}
|
||||
case SUBCMD_INTERPRET -> {
|
||||
if (args.length > 1) {
|
||||
interpret(Arrays.copyOfRange(args, 1, args.length), console);
|
||||
@ -305,6 +319,51 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
voiceManager.say(msg.toString());
|
||||
}
|
||||
|
||||
private void transcribe(String[] args, Console console) {
|
||||
HashMap<String, String> parameters;
|
||||
try {
|
||||
parameters = parseNamedParameters(args);
|
||||
} catch (IllegalStateException e) {
|
||||
console.println(Objects.requireNonNullElse(e.getMessage(), "An error parsing positional parameters"));
|
||||
return;
|
||||
}
|
||||
@Nullable
|
||||
Locale locale;
|
||||
try {
|
||||
locale = parameters.containsKey("locale")
|
||||
? Locale.forLanguageTag(Objects.requireNonNull(parameters.get("locale")))
|
||||
: null;
|
||||
} catch (MissingResourceException e) {
|
||||
console.println("Error: Locale '" + parameters.get("locale") + "' is not correct.");
|
||||
return;
|
||||
}
|
||||
String text;
|
||||
if (parameters.containsKey("file")) {
|
||||
FileAudioStream fileAudioStream;
|
||||
try {
|
||||
var file = Path.of(OpenHAB.getConfigFolder(), AudioManager.SOUND_DIR, parameters.get("file")).toFile();
|
||||
if (!file.exists()) {
|
||||
throw new FileNotFoundException();
|
||||
}
|
||||
fileAudioStream = new FileAudioStream(file);
|
||||
} catch (AudioException e) {
|
||||
console.println("Error: Unable to open '" + parameters.get("file") + "' file audio stream.");
|
||||
return;
|
||||
} catch (FileNotFoundException e) {
|
||||
console.println("Error: File '" + parameters.get("file") + "' not found in sound folder.");
|
||||
return;
|
||||
}
|
||||
text = voiceManager.transcribe(fileAudioStream, parameters.get("stt"), locale);
|
||||
} else {
|
||||
text = voiceManager.transcribe(parameters.get("source"), parameters.get("stt"), null);
|
||||
}
|
||||
if (!text.isBlank()) {
|
||||
console.println("Transcription: " + text);
|
||||
} else {
|
||||
console.println("No transcription generated");
|
||||
}
|
||||
}
|
||||
|
||||
private void listDialogRegistrations(Console console) {
|
||||
Collection<DialogRegistration> registrations = voiceManager.getDialogRegistrations();
|
||||
if (!registrations.isEmpty()) {
|
||||
@ -405,7 +464,7 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
private HashMap<String, String> parseDialogParameters(String[] args) {
|
||||
private HashMap<String, String> parseNamedParameters(String[] args) {
|
||||
var parameters = new HashMap<String, String>();
|
||||
for (int i = 1; i < args.length; i++) {
|
||||
var arg = args[i].trim();
|
||||
@ -428,7 +487,7 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
if (args.length < 2) {
|
||||
return dialogContextBuilder;
|
||||
}
|
||||
var parameters = parseDialogParameters(args);
|
||||
var parameters = parseNamedParameters(args);
|
||||
String sourceId = parameters.remove("source");
|
||||
if (sourceId != null) {
|
||||
var source = audioManager.getSource(sourceId);
|
||||
@ -463,7 +522,7 @@ public class VoiceConsoleCommandExtension extends AbstractConsoleCommandExtensio
|
||||
}
|
||||
|
||||
private DialogRegistration parseDialogRegistration(String[] args) {
|
||||
var parameters = parseDialogParameters(args);
|
||||
var parameters = parseNamedParameters(args);
|
||||
@Nullable
|
||||
String sourceId = parameters.remove("source");
|
||||
if (sourceId == null) {
|
||||
|
@ -12,6 +12,7 @@
|
||||
*/
|
||||
package org.openhab.core.voice.internal;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@ -28,15 +29,19 @@ import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.WeakHashMap;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
import org.eclipse.jdt.annotation.Nullable;
|
||||
import org.openhab.core.audio.AudioException;
|
||||
import org.openhab.core.audio.AudioFormat;
|
||||
import org.openhab.core.audio.AudioManager;
|
||||
import org.openhab.core.audio.AudioSink;
|
||||
@ -55,7 +60,13 @@ import org.openhab.core.storage.StorageService;
|
||||
import org.openhab.core.voice.DialogContext;
|
||||
import org.openhab.core.voice.DialogRegistration;
|
||||
import org.openhab.core.voice.KSService;
|
||||
import org.openhab.core.voice.RecognitionStartEvent;
|
||||
import org.openhab.core.voice.RecognitionStopEvent;
|
||||
import org.openhab.core.voice.STTException;
|
||||
import org.openhab.core.voice.STTService;
|
||||
import org.openhab.core.voice.STTServiceHandle;
|
||||
import org.openhab.core.voice.SpeechRecognitionErrorEvent;
|
||||
import org.openhab.core.voice.SpeechRecognitionEvent;
|
||||
import org.openhab.core.voice.TTSException;
|
||||
import org.openhab.core.voice.TTSService;
|
||||
import org.openhab.core.voice.Voice;
|
||||
@ -85,6 +96,7 @@ import org.slf4j.LoggerFactory;
|
||||
* @author Wouter Born - Sort TTS options
|
||||
* @author Laurent Garnier - Updated methods startDialog and added method stopDialog
|
||||
* @author Miguel Álvarez - Use dialog context
|
||||
* @author Miguel Álvarez - Add transcribe method
|
||||
*/
|
||||
@Component(immediate = true, configurationPid = VoiceManagerImpl.CONFIGURATION_PID, //
|
||||
property = Constants.SERVICE_PID + "=org.openhab.voice")
|
||||
@ -288,6 +300,91 @@ public class VoiceManagerImpl implements VoiceManager, ConfigOptionProvider, Dia
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String transcribe(@Nullable String audioSourceId, @Nullable String sttId, @Nullable Locale locale) {
|
||||
var audioSource = audioSourceId != null ? audioManager.getSource(audioSourceId) : audioManager.getSource();
|
||||
if (audioSource == null) {
|
||||
logger.warn("Audio source '{}' not available", audioSourceId != null ? audioSourceId : "default");
|
||||
return "";
|
||||
}
|
||||
var sttService = sttId != null ? getSTT(sttId) : getSTT();
|
||||
if (sttService == null) {
|
||||
logger.warn("Speech-to-text service '{}' not available", sttId != null ? sttId : "default");
|
||||
return "";
|
||||
}
|
||||
var sttFormat = VoiceManagerImpl.getBestMatch(audioSource.getSupportedFormats(),
|
||||
sttService.getSupportedFormats());
|
||||
if (sttFormat == null) {
|
||||
logger.warn("No compatible audio format found for stt '{}' and the provided audio stream",
|
||||
sttService.getId());
|
||||
return "";
|
||||
}
|
||||
AudioStream audioStream;
|
||||
try {
|
||||
audioStream = audioSource.getInputStream(sttFormat);
|
||||
} catch (AudioException e) {
|
||||
logger.warn("AudioException creating source audio stream: {}", e.getMessage());
|
||||
return "";
|
||||
}
|
||||
return transcribe(audioStream, sttService, locale);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String transcribe(AudioStream audioStream, @Nullable String sttId, @Nullable Locale locale) {
|
||||
var sttService = sttId != null ? getSTT(sttId) : getSTT();
|
||||
if (sttService == null) {
|
||||
logger.warn("Speech-to-text service '{}' not available", sttId != null ? sttId : "default");
|
||||
return "";
|
||||
}
|
||||
var sttFormat = VoiceManagerImpl.getBestMatch(Set.of(audioStream.getFormat()),
|
||||
sttService.getSupportedFormats());
|
||||
if (sttFormat == null) {
|
||||
logger.warn("No compatible audio format found for stt '{}' and the provided audio stream",
|
||||
sttService.getId());
|
||||
return "";
|
||||
}
|
||||
return transcribe(audioStream, sttService, locale);
|
||||
}
|
||||
|
||||
private String transcribe(AudioStream audioStream, STTService sttService, @Nullable Locale locale) {
|
||||
Locale nullSafeLocale = locale != null ? locale : localeProvider.getLocale();
|
||||
CompletableFuture<String> transcriptionResult = new CompletableFuture<>();
|
||||
STTServiceHandle sttServiceHandle;
|
||||
try {
|
||||
sttServiceHandle = sttService.recognize(sttEvent -> {
|
||||
if (sttEvent instanceof SpeechRecognitionEvent sre) {
|
||||
logger.debug("SpeechRecognitionEvent event received");
|
||||
String transcript = sre.getTranscript();
|
||||
logger.debug("Text recognized: {}", transcript);
|
||||
transcriptionResult.complete(transcript);
|
||||
} else if (sttEvent instanceof RecognitionStartEvent) {
|
||||
logger.debug("RecognitionStartEvent event received");
|
||||
} else if (sttEvent instanceof RecognitionStopEvent) {
|
||||
logger.debug("RecognitionStopEvent event received");
|
||||
} else if (sttEvent instanceof SpeechRecognitionErrorEvent sre) {
|
||||
logger.debug("SpeechRecognitionErrorEvent event received");
|
||||
transcriptionResult.completeExceptionally(
|
||||
new IOException("SpeechRecognitionErrorEvent emitted: " + sre.getMessage()));
|
||||
}
|
||||
}, audioStream, nullSafeLocale, new HashSet<>());
|
||||
} catch (STTException e) {
|
||||
logger.warn("STTException while running transcription");
|
||||
return "";
|
||||
}
|
||||
try {
|
||||
return transcriptionResult.get(60, TimeUnit.SECONDS);
|
||||
} catch (InterruptedException e) {
|
||||
logger.warn("InterruptedException waiting for transcription: {}", e.getMessage());
|
||||
sttServiceHandle.abort();
|
||||
} catch (ExecutionException e) {
|
||||
logger.warn("ExecutionException running transcription: {}", e.getCause().getMessage());
|
||||
} catch (TimeoutException e) {
|
||||
logger.warn("TimeoutException waiting for transcription");
|
||||
sttServiceHandle.abort();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String interpret(String text) throws InterpretationException {
|
||||
return interpret(text, null);
|
||||
|
Loading…
Reference in New Issue
Block a user