mirror of
https://github.com/openhab/openhab-addons.git
synced 2025-01-25 14:55:55 +01:00
[whisper] Add OpenAI API compatibility
Apply PR comments Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
This commit is contained in:
parent
e40473594a
commit
5487ef17bc
@ -286,7 +286,6 @@ public class WhisperSTTService implements STTService {
|
|||||||
@Override
|
@Override
|
||||||
public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
|
public STTServiceHandle recognize(STTListener sttListener, AudioStream audioStream, Locale locale, Set<String> set)
|
||||||
throws STTException {
|
throws STTException {
|
||||||
|
|
||||||
AtomicBoolean aborted = new AtomicBoolean(false);
|
AtomicBoolean aborted = new AtomicBoolean(false);
|
||||||
try {
|
try {
|
||||||
logger.debug("Creating VAD instance...");
|
logger.debug("Creating VAD instance...");
|
||||||
@ -607,7 +606,6 @@ public class WhisperSTTService implements STTService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private String recognizeAPI(int audioSamplesOffset, short[] audioStream, String language) throws STTException {
|
private String recognizeAPI(int audioSamplesOffset, short[] audioStream, String language) throws STTException {
|
||||||
|
|
||||||
// convert to byte array, Each short has 2 bytes
|
// convert to byte array, Each short has 2 bytes
|
||||||
int size = audioSamplesOffset * 2;
|
int size = audioSamplesOffset * 2;
|
||||||
ByteBuffer byteArrayBuffer = ByteBuffer.allocate(size).order(ByteOrder.LITTLE_ENDIAN);
|
ByteBuffer byteArrayBuffer = ByteBuffer.allocate(size).order(ByteOrder.LITTLE_ENDIAN);
|
||||||
@ -621,7 +619,7 @@ public class WhisperSTTService implements STTService {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
AudioInputStream audioInputStream = new AudioInputStream(new ByteArrayInputStream(byteArray), jAudioFormat,
|
AudioInputStream audioInputStream = new AudioInputStream(new ByteArrayInputStream(byteArray), jAudioFormat,
|
||||||
size);
|
audioSamplesOffset);
|
||||||
|
|
||||||
// write stream as a WAV file, in a byte array stream :
|
// write stream as a WAV file, in a byte array stream :
|
||||||
ByteArrayInputStream byteArrayInputStream = null;
|
ByteArrayInputStream byteArrayInputStream = null;
|
||||||
|
@ -3,6 +3,12 @@
|
|||||||
addon.whisperstt.name = Whisper Speech-to-Text
|
addon.whisperstt.name = Whisper Speech-to-Text
|
||||||
addon.whisperstt.description = Whisper STT Service uses the whisper.cpp library to transcript audio data to text.
|
addon.whisperstt.description = Whisper STT Service uses the whisper.cpp library to transcript audio data to text.
|
||||||
|
|
||||||
|
voice.config.whisperstt.apiKey.label = API Key
|
||||||
|
voice.config.whisperstt.apiKey.description = Key to access the API
|
||||||
|
voice.config.whisperstt.apiModelName.label = API Model
|
||||||
|
voice.config.whisperstt.apiModelName.description = Model name to use (API only). Default to OpenAI only available model (whisper-1).
|
||||||
|
voice.config.whisperstt.apiUrl.label = API Url
|
||||||
|
voice.config.whisperstt.apiUrl.description = OpenAI compatible API URL. Default to OpenAI transcription service.
|
||||||
voice.config.whisperstt.audioContext.label = Audio Context
|
voice.config.whisperstt.audioContext.label = Audio Context
|
||||||
voice.config.whisperstt.audioContext.description = Overwrite the audio context size. (0 to use whisper default context size)
|
voice.config.whisperstt.audioContext.description = Overwrite the audio context size. (0 to use whisper default context size)
|
||||||
voice.config.whisperstt.beamSize.label = Beam Size
|
voice.config.whisperstt.beamSize.label = Beam Size
|
||||||
@ -24,27 +30,35 @@ voice.config.whisperstt.greedyBestOf.description = Best Of configuration for sam
|
|||||||
voice.config.whisperstt.group.developer.label = Developer
|
voice.config.whisperstt.group.developer.label = Developer
|
||||||
voice.config.whisperstt.group.developer.description = Options added for developers.
|
voice.config.whisperstt.group.developer.description = Options added for developers.
|
||||||
voice.config.whisperstt.group.grammar.label = Grammar
|
voice.config.whisperstt.group.grammar.label = Grammar
|
||||||
voice.config.whisperstt.group.grammar.description = Define a grammar to improve transcrptions.
|
voice.config.whisperstt.group.grammar.description = Define a grammar to improve transcriptions.
|
||||||
voice.config.whisperstt.group.messages.label = Info Messages
|
voice.config.whisperstt.group.messages.label = Info Messages
|
||||||
voice.config.whisperstt.group.messages.description = Configure service information messages.
|
voice.config.whisperstt.group.messages.description = Configure service information messages.
|
||||||
|
voice.config.whisperstt.group.openaiapi.label = API Configuration Options
|
||||||
|
voice.config.whisperstt.group.openaiapi.description = Configure OpenAI compatible API, if you don't want to use the local model.
|
||||||
voice.config.whisperstt.group.stt.label = STT Configuration
|
voice.config.whisperstt.group.stt.label = STT Configuration
|
||||||
voice.config.whisperstt.group.stt.description = Configure Speech to Text.
|
voice.config.whisperstt.group.stt.description = Configure Speech to Text.
|
||||||
voice.config.whisperstt.group.vad.label = Voice Activity Detection
|
voice.config.whisperstt.group.vad.label = Voice Activity Detection
|
||||||
voice.config.whisperstt.group.vad.description = Configure the VAD mechanisim used to isolate single phrases to feed whisper with.
|
voice.config.whisperstt.group.vad.description = Configure the VAD mechanism used to isolate single phrases to feed whisper with.
|
||||||
voice.config.whisperstt.group.whisper.label = Whisper Options
|
voice.config.whisperstt.group.whisper.label = Whisper Options
|
||||||
voice.config.whisperstt.group.whisper.description = Configure the whisper.cpp transcription options.
|
voice.config.whisperstt.group.whisper.description = Configure the whisper.cpp transcription options.
|
||||||
voice.config.whisperstt.initSilenceSeconds.label = Initial Silence Seconds
|
voice.config.whisperstt.initSilenceSeconds.label = Initial Silence Seconds
|
||||||
voice.config.whisperstt.initSilenceSeconds.description = Max initial seconds of silence to discard transcription.
|
voice.config.whisperstt.initSilenceSeconds.description = Max initial seconds of silence to discard transcription.
|
||||||
voice.config.whisperstt.initialPrompt.label = Initial Prompt
|
voice.config.whisperstt.initialPrompt.label = Initial Prompt
|
||||||
voice.config.whisperstt.initialPrompt.description = Initial prompt to feed whisper with.
|
voice.config.whisperstt.initialPrompt.description = Initial prompt to feed whisper with.
|
||||||
|
voice.config.whisperstt.language.label = Language
|
||||||
|
voice.config.whisperstt.language.description = If specified, speed up recognition by avoiding auto-detection. Default to system locale.
|
||||||
voice.config.whisperstt.maxSeconds.label = Max Transcription Seconds
|
voice.config.whisperstt.maxSeconds.label = Max Transcription Seconds
|
||||||
voice.config.whisperstt.maxSeconds.description = Seconds to force transcription before silence detection.
|
voice.config.whisperstt.maxSeconds.description = Seconds to force transcription before silence detection.
|
||||||
voice.config.whisperstt.maxSilenceSeconds.label = Max Silence Seconds
|
voice.config.whisperstt.maxSilenceSeconds.label = Max Silence Seconds
|
||||||
voice.config.whisperstt.maxSilenceSeconds.description = Seconds of silence to trigger transcription.
|
voice.config.whisperstt.maxSilenceSeconds.description = Seconds of silence to trigger transcription.
|
||||||
voice.config.whisperstt.minSeconds.label = Min Transcription Seconds
|
voice.config.whisperstt.minSeconds.label = Min Transcription Seconds
|
||||||
voice.config.whisperstt.minSeconds.description = Min transcription seconds passed to whisper.
|
voice.config.whisperstt.minSeconds.description = Min transcription seconds passed to whisper.
|
||||||
voice.config.whisperstt.modelName.label = Model Name
|
voice.config.whisperstt.mode.label = Local Mode Or API
|
||||||
voice.config.whisperstt.modelName.description = Model name without extension.
|
voice.config.whisperstt.mode.description = Use the local model or the OpenAI compatible API.
|
||||||
|
voice.config.whisperstt.mode.option.LOCAL = Local
|
||||||
|
voice.config.whisperstt.mode.option.API = OpenAI API
|
||||||
|
voice.config.whisperstt.modelName.label = Local Model Name
|
||||||
|
voice.config.whisperstt.modelName.description = Model name without extension. Local mode only.
|
||||||
voice.config.whisperstt.openvinoDevice.label = OpenVINO Device
|
voice.config.whisperstt.openvinoDevice.label = OpenVINO Device
|
||||||
voice.config.whisperstt.openvinoDevice.description = Initialize OpenVINO encoder. (built-in binaries do not support OpenVINO, this has no effect)
|
voice.config.whisperstt.openvinoDevice.description = Initialize OpenVINO encoder. (built-in binaries do not support OpenVINO, this has no effect)
|
||||||
voice.config.whisperstt.preloadModel.label = Preload Model
|
voice.config.whisperstt.preloadModel.label = Preload Model
|
||||||
|
Loading…
Reference in New Issue
Block a user