mirror of
https://github.com/openhab/openhab-addons.git
synced 2025-01-25 14:55:55 +01:00
[PiperTTS] Initial contribution (#15965)
* [PiperTTS] Initial contribution Signed-off-by: Miguel Álvarez <miguelwork92@gmail.com> Signed-off-by: Ciprian Pascu <contact@ciprianpascu.ro>
This commit is contained in:
parent
b5da12e1ea
commit
83cc15a7ca
@ -432,6 +432,7 @@
|
||||
/bundles/org.openhab.voice.marytts/ @kaikreuzer
|
||||
/bundles/org.openhab.voice.mimictts/ @dalgwen
|
||||
/bundles/org.openhab.voice.picotts/ @FlorianSW
|
||||
/bundles/org.openhab.voice.pipertts/ @GiviMAD
|
||||
/bundles/org.openhab.voice.pollytts/ @openhab/add-ons-maintainers
|
||||
/bundles/org.openhab.voice.rustpotterks/ @GiviMAD
|
||||
/bundles/org.openhab.voice.voicerss/ @lolodomo
|
||||
|
@ -2146,6 +2146,11 @@
|
||||
<artifactId>org.openhab.voice.picotts</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openhab.addons.bundles</groupId>
|
||||
<artifactId>org.openhab.voice.pipertts</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openhab.addons.bundles</groupId>
|
||||
<artifactId>org.openhab.voice.pollytts</artifactId>
|
||||
|
40
bundles/org.openhab.voice.pipertts/NOTICE
Normal file
40
bundles/org.openhab.voice.pipertts/NOTICE
Normal file
@ -0,0 +1,40 @@
|
||||
This content is produced and maintained by the openHAB project.
|
||||
|
||||
* Project home: https://www.openhab.org
|
||||
|
||||
== Declared Project Licenses
|
||||
|
||||
This program and the accompanying materials are made available under the terms
|
||||
of the Eclipse Public License 2.0 which is available at
|
||||
https://www.eclipse.org/legal/epl-2.0/.
|
||||
|
||||
== Source Code
|
||||
|
||||
https://github.com/openhab/openhab-addons
|
||||
|
||||
== Third-party Content
|
||||
|
||||
io.github.givimad: piper-jni
|
||||
* License: Apache 2.0 License
|
||||
* Project: https://github.com/GiviMAD/piper-jni
|
||||
* Source: https://github.com/GiviMAD/piper-jni
|
||||
|
||||
io.github.rhasspy: piper
|
||||
* License: MIT License
|
||||
* Project: https://github.com/rhasspy/piper
|
||||
* Source: https://github.com/rhasspy/piper
|
||||
|
||||
io.github.rhasspy: espeak-ng
|
||||
* License: GPL version 3, 2-clause BSD
|
||||
* Project: https://github.com/rhasspy/espeak-ng
|
||||
* Source: https://github.com/rhasspy/espeak-ng
|
||||
|
||||
io.github.rhasspy: piper-phonemize
|
||||
* License: MIT License
|
||||
* Project: https://github.com/rhasspy/piper-phonemize
|
||||
* Source: https://github.com/rhasspy/piper-phonemize
|
||||
|
||||
io.github.microsoft: onnxruntime
|
||||
* License: MIT License
|
||||
* Project: https://github.com/microsoft/onnxruntime
|
||||
* Source: https://github.com/microsoft/onnxruntime
|
59
bundles/org.openhab.voice.pipertts/README.md
Normal file
59
bundles/org.openhab.voice.pipertts/README.md
Normal file
@ -0,0 +1,59 @@
|
||||
# Piper Text-to-Speech
|
||||
|
||||
This voice service allows you to use the open source library [Piper](https://github.com/rhasspy/piper) as your TTS service in openHAB.
|
||||
[Piper](https://github.com/rhasspy/piper) is a fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.
|
||||
|
||||
## Supported platforms
|
||||
|
||||
The add-on is compatible with the following platforms:
|
||||
|
||||
* linux (armv7l, aarch64, x86_64, min GLIBC version 2.31)
|
||||
* macOS (x86_64 min version 11.0, aarch64 min version 13.0)
|
||||
* win64 (x86_64 min version Windows 10).
|
||||
|
||||
## Configuration
|
||||
|
||||
### Downloading Voice Model Files
|
||||
|
||||
You can find the link to the available voices at the [Piper README](https://github.com/rhasspy/piper).
|
||||
|
||||
Each voice model is composed of two files an onnx runtime model file with extension '.onnx' and a model config file with extension '.onnx.json'.
|
||||
For the add-on to load your voices you need both to be named equal (obviously excluding their extensions).
|
||||
|
||||
You should place both voice files at '<OPENHAB_USERDATA>/piper/'.
|
||||
After that the UI should display your available voices at 'Settings / System Settings / Voice'.
|
||||
|
||||
### Multi Speaker Voices
|
||||
|
||||
Models that support multiples speakers are shown as multiple voices in openHAB.
|
||||
|
||||
### Text to Speech Configuration
|
||||
|
||||
Use your favorite configuration UI to edit **Settings / Other Services - Piper Text-to-Speech**:
|
||||
|
||||
* **Preload model** - Keep last voice model used loaded in memory, these way it can be reused on next execution if the voice option matches.
|
||||
|
||||
### Configuration via a text file
|
||||
|
||||
In case you would like to setup the service via a text file, create a new file in `$OPENHAB_ROOT/conf/services` named `pipertts.cfg`
|
||||
|
||||
Its contents should look similar to:
|
||||
|
||||
```text
|
||||
org.openhab.voice.pipertts:preloadModel=true
|
||||
```
|
||||
|
||||
### Default Text-to-Speech Configuration
|
||||
|
||||
You can setup your preferred default Speech-to-Text in the UI:
|
||||
|
||||
* Go to **Settings**.
|
||||
* Edit **System Services - Voice**.
|
||||
* Set **Piper** as **Text-to-Speech**.
|
||||
* Set your **Default Voice**.
|
||||
|
||||
In case you would like to set up these settings via a text file, you can edit the file `runtime.cfg` in `$OPENHAB_ROOT/conf/services` and set the following entries:
|
||||
|
||||
```text
|
||||
org.openhab.voice:defaultTTS=pipertts
|
||||
```
|
24
bundles/org.openhab.voice.pipertts/pom.xml
Normal file
24
bundles/org.openhab.voice.pipertts/pom.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.openhab.addons.bundles</groupId>
|
||||
<artifactId>org.openhab.addons.reactor.bundles</artifactId>
|
||||
<version>4.2.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>org.openhab.voice.pipertts</artifactId>
|
||||
|
||||
<name>openHAB Add-ons :: Bundles :: PiperTTS Binding</name>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>io.github.givimad</groupId>
|
||||
<artifactId>piper-jni</artifactId>
|
||||
<version>1.2.0-e5cb84c</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<features name="org.openhab.voice.pipertts-${project.version}" xmlns="http://karaf.apache.org/xmlns/features/v1.4.0">
|
||||
<repository>mvn:org.openhab.core.features.karaf/org.openhab.core.features.karaf.openhab-core/${ohc.version}/xml/features</repository>
|
||||
|
||||
<feature name="openhab-voice-pipertts" description="Piper Text-to-Speech" version="${project.version}">
|
||||
<feature>openhab-runtime-base</feature>
|
||||
<bundle start-level="80">mvn:org.openhab.addons.bundles/org.openhab.voice.pipertts/${project.version}</bundle>
|
||||
</feature>
|
||||
</features>
|
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Copyright (c) 2010-2024 Contributors to the openHAB project
|
||||
*
|
||||
* See the NOTICE file(s) distributed with this work for additional
|
||||
* information.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Public License 2.0 which is available at
|
||||
* http://www.eclipse.org/legal/epl-2.0
|
||||
*
|
||||
* SPDX-License-Identifier: EPL-2.0
|
||||
*/
|
||||
package org.openhab.voice.pipertts.internal;
|
||||
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
|
||||
/**
|
||||
* The {@link PiperTTSConfiguration} class contains fields mapping thing configuration parameters.
|
||||
*
|
||||
* @author Miguel Álvarez Díez - Initial contribution
|
||||
*/
|
||||
@NonNullByDefault
|
||||
public class PiperTTSConfiguration {
|
||||
/**
|
||||
* Keep last voice model used loaded in memory.
|
||||
*/
|
||||
boolean preloadModel;
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Copyright (c) 2010-2024 Contributors to the openHAB project
|
||||
*
|
||||
* See the NOTICE file(s) distributed with this work for additional
|
||||
* information.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Public License 2.0 which is available at
|
||||
* http://www.eclipse.org/legal/epl-2.0
|
||||
*
|
||||
* SPDX-License-Identifier: EPL-2.0
|
||||
*/
|
||||
package org.openhab.voice.pipertts.internal;
|
||||
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
|
||||
/**
|
||||
* The {@link PiperTTSConstants} class defines common constants, which are
|
||||
* used across the whole service.
|
||||
*
|
||||
* @author Miguel Álvarez Díez - Initial contribution
|
||||
*/
|
||||
@NonNullByDefault
|
||||
public class PiperTTSConstants {
|
||||
/**
|
||||
* Service name
|
||||
*/
|
||||
public static final String SERVICE_NAME = "Piper";
|
||||
/**
|
||||
* Service id
|
||||
*/
|
||||
public static final String SERVICE_ID = "pipertts";
|
||||
/**
|
||||
* Service category
|
||||
*/
|
||||
public static final String SERVICE_CATEGORY = "voice";
|
||||
/**
|
||||
* Service pid
|
||||
*/
|
||||
public static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
|
||||
}
|
@ -0,0 +1,418 @@
|
||||
/**
|
||||
* Copyright (c) 2010-2024 Contributors to the openHAB project
|
||||
*
|
||||
* See the NOTICE file(s) distributed with this work for additional
|
||||
* information.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Public License 2.0 which is available at
|
||||
* http://www.eclipse.org/legal/epl-2.0
|
||||
*
|
||||
* SPDX-License-Identifier: EPL-2.0
|
||||
*/
|
||||
package org.openhab.voice.pipertts.internal;
|
||||
|
||||
import static org.openhab.voice.pipertts.internal.PiperTTSConstants.SERVICE_CATEGORY;
|
||||
import static org.openhab.voice.pipertts.internal.PiperTTSConstants.SERVICE_ID;
|
||||
import static org.openhab.voice.pipertts.internal.PiperTTSConstants.SERVICE_NAME;
|
||||
import static org.openhab.voice.pipertts.internal.PiperTTSConstants.SERVICE_PID;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.sound.sampled.AudioFileFormat;
|
||||
import javax.sound.sampled.AudioInputStream;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
import org.eclipse.jdt.annotation.Nullable;
|
||||
import org.openhab.core.OpenHAB;
|
||||
import org.openhab.core.audio.AudioFormat;
|
||||
import org.openhab.core.audio.AudioStream;
|
||||
import org.openhab.core.audio.ByteArrayAudioStream;
|
||||
import org.openhab.core.config.core.ConfigurableService;
|
||||
import org.openhab.core.config.core.Configuration;
|
||||
import org.openhab.core.voice.AbstractCachedTTSService;
|
||||
import org.openhab.core.voice.TTSCache;
|
||||
import org.openhab.core.voice.TTSException;
|
||||
import org.openhab.core.voice.TTSService;
|
||||
import org.openhab.core.voice.Voice;
|
||||
import org.osgi.framework.Constants;
|
||||
import org.osgi.service.component.annotations.Activate;
|
||||
import org.osgi.service.component.annotations.Component;
|
||||
import org.osgi.service.component.annotations.Deactivate;
|
||||
import org.osgi.service.component.annotations.Modified;
|
||||
import org.osgi.service.component.annotations.Reference;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import io.github.givimad.piperjni.PiperJNI;
|
||||
import io.github.givimad.piperjni.PiperVoice;
|
||||
|
||||
/**
|
||||
* The {@link PiperTTSService} class is a service implementation to use Piper for Text-to-Speech.
|
||||
*
|
||||
* @author Miguel Álvarez - Initial contribution
|
||||
*/
|
||||
@NonNullByDefault
|
||||
@Component(service = TTSService.class, configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "="
|
||||
+ SERVICE_PID)
|
||||
@ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
|
||||
+ " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
|
||||
public class PiperTTSService extends AbstractCachedTTSService {
|
||||
private static final Path PIPER_FOLDER = Path.of(OpenHAB.getUserDataFolder(), "piper");
|
||||
private final Logger logger = LoggerFactory.getLogger(PiperTTSService.class);
|
||||
private final Object modelLock = new Object();
|
||||
private PiperTTSConfiguration config = new PiperTTSConfiguration();
|
||||
private @Nullable VoiceModel preloadedModel;
|
||||
private @Nullable PiperJNI piper;
|
||||
private Map<String, List<Voice>> cachedVoicesByModel = new HashMap<>();
|
||||
|
||||
@Activate
|
||||
public PiperTTSService(final @Reference TTSCache ttsCache) {
|
||||
super(ttsCache);
|
||||
}
|
||||
|
||||
@Activate
|
||||
protected void activate(Map<String, Object> config) {
|
||||
try {
|
||||
piper = new PiperJNI();
|
||||
piper.initialize(true, false);
|
||||
logger.debug("Using Piper version {}", piper.getPiperVersion());
|
||||
} catch (IOException e) {
|
||||
logger.warn("Piper registration failed, the add-on will not work: {}", e.getMessage());
|
||||
}
|
||||
tryCreatePiperDirectory();
|
||||
configChange(config);
|
||||
}
|
||||
|
||||
@Modified
|
||||
protected void modified(Map<String, Object> config) {
|
||||
configChange(config);
|
||||
}
|
||||
|
||||
@Deactivate
|
||||
protected void deactivate(Map<String, Object> config) {
|
||||
try {
|
||||
unloadModel();
|
||||
getPiper().close();
|
||||
piper = null;
|
||||
} catch (IOException e) {
|
||||
logger.warn("Exception unloading model: {}", e.getMessage());
|
||||
} catch (LibraryNotLoaded ignored) {
|
||||
}
|
||||
}
|
||||
|
||||
private void configChange(Map<String, Object> config) {
|
||||
this.config = new Configuration(config).as(PiperTTSConfiguration.class);
|
||||
try {
|
||||
unloadModel();
|
||||
} catch (IOException e) {
|
||||
logger.warn("IOException unloading model: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private PiperJNI getPiper() throws LibraryNotLoaded {
|
||||
PiperJNI piper = this.piper;
|
||||
if (piper == null) {
|
||||
throw new LibraryNotLoaded();
|
||||
}
|
||||
return piper;
|
||||
}
|
||||
|
||||
private void tryCreatePiperDirectory() {
|
||||
if (!Files.exists(PIPER_FOLDER)) {
|
||||
try {
|
||||
Files.createDirectory(PIPER_FOLDER);
|
||||
logger.info("Piper directory created at: {}", PIPER_FOLDER);
|
||||
} catch (IOException e) {
|
||||
logger.warn("Unable to create piper directory at {}", PIPER_FOLDER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getId() {
|
||||
return SERVICE_ID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLabel(@Nullable Locale locale) {
|
||||
return SERVICE_NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Voice> getAvailableVoices() {
|
||||
try (var filesStream = Files.list(PIPER_FOLDER)) {
|
||||
HashMap<String, List<Voice>> newCachedVoices = new HashMap<>();
|
||||
Set<Voice> voices = filesStream //
|
||||
.filter(filePath -> filePath.getFileName().toString().endsWith(".onnx")) //
|
||||
.map(filePath -> {
|
||||
List<Voice> modelVoices = getVoice(filePath);
|
||||
newCachedVoices.put(filePath.toString(), modelVoices);
|
||||
return modelVoices;
|
||||
}) //
|
||||
.flatMap(List::stream) //
|
||||
.collect(Collectors.toSet());
|
||||
cachedVoicesByModel = newCachedVoices;
|
||||
logger.debug("Available number of piper voices: {}", voices.size());
|
||||
return voices;
|
||||
} catch (IOException e) {
|
||||
logger.warn("IOException getting piper voices: {}", e.getMessage());
|
||||
}
|
||||
return Set.of();
|
||||
}
|
||||
|
||||
private List<Voice> getVoice(Path modelPath) {
|
||||
try {
|
||||
Path configFile = modelPath.getParent().resolve(modelPath.getFileName() + ".json");
|
||||
if (!Files.exists(configFile) || Files.isDirectory(configFile)) {
|
||||
throw new IOException("Missed config file: " + configFile.toAbsolutePath());
|
||||
}
|
||||
List<Voice> cachedVoices = cachedVoicesByModel.get(modelPath.toString());
|
||||
if (cachedVoices != null) {
|
||||
return cachedVoices;
|
||||
}
|
||||
String voiceData = Files.readString(configFile);
|
||||
JsonNode voiceJsonRoot = new ObjectMapper().readTree(voiceData);
|
||||
JsonNode datasetJsonNode = voiceJsonRoot.get("dataset");
|
||||
JsonNode languageJsonNode = voiceJsonRoot.get("language");
|
||||
JsonNode numSpeakersJsonNode = voiceJsonRoot.get("num_speakers");
|
||||
if (datasetJsonNode == null || languageJsonNode == null) {
|
||||
throw new IOException("Unknown voice config structure");
|
||||
}
|
||||
JsonNode languageFamilyJsonNode = languageJsonNode.get("family");
|
||||
JsonNode languageRegionJsonNode = languageJsonNode.get("region");
|
||||
if (languageFamilyJsonNode == null || languageRegionJsonNode == null) {
|
||||
throw new IOException("Unknown voice config structure");
|
||||
}
|
||||
String voiceName = datasetJsonNode.textValue();
|
||||
String voiceUID = voiceName.replace(" ", "_");
|
||||
String languageFamily = languageFamilyJsonNode.textValue();
|
||||
String languageRegion = languageRegionJsonNode.textValue();
|
||||
int numSpeakers = numSpeakersJsonNode != null ? numSpeakersJsonNode.intValue() : 1;
|
||||
JsonNode speakersIdsJsonNode = voiceJsonRoot.get("speaker_id_map");
|
||||
if (numSpeakers != 1 && speakersIdsJsonNode != null) {
|
||||
List<Voice> voices = new ArrayList<>();
|
||||
speakersIdsJsonNode.fieldNames().forEachRemaining(field -> {
|
||||
JsonNode fieldNode = speakersIdsJsonNode.get(field);
|
||||
voices.add(new PiperTTSVoice( //
|
||||
voiceUID + "_" + field, //
|
||||
capitalize(voiceName + " " + field), //
|
||||
languageFamily, //
|
||||
languageRegion, //
|
||||
modelPath, //
|
||||
configFile, //
|
||||
Optional.of(fieldNode.longValue())));
|
||||
});
|
||||
return voices;
|
||||
}
|
||||
return List.of(new PiperTTSVoice(voiceUID, capitalize(voiceName), languageFamily, languageRegion, modelPath,
|
||||
configFile, Optional.empty()));
|
||||
} catch (IOException e) {
|
||||
logger.warn("IOException reading voice info: {}", e.getMessage());
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<AudioFormat> getSupportedFormats() {
|
||||
return Set.of(new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, null, null, null,
|
||||
null));
|
||||
}
|
||||
|
||||
@Override
|
||||
public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat audioFormat) throws TTSException {
|
||||
if (!(voice instanceof PiperTTSVoice ttsVoice)) {
|
||||
throw new TTSException("No piper voice provided");
|
||||
}
|
||||
VoiceModel voiceModel = null;
|
||||
boolean usingPreloadedModel = false;
|
||||
short[] buffer;
|
||||
final VoiceModel preloadedModel = this.preloadedModel;
|
||||
try {
|
||||
try {
|
||||
if (preloadedModel != null && preloadedModel.ttsVoice.getUID().equals(ttsVoice.getUID())) {
|
||||
logger.debug("Using preloaded voice model");
|
||||
preloadedModel.consumers.incrementAndGet();
|
||||
voiceModel = preloadedModel;
|
||||
usingPreloadedModel = true;
|
||||
} else {
|
||||
unloadModel();
|
||||
logger.debug("Loading voice model...");
|
||||
voiceModel = loadModel(ttsVoice);
|
||||
synchronized (modelLock) {
|
||||
usingPreloadedModel = voiceModel.equals(this.preloadedModel);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new TTSException("Unable to load voice model: " + e.getMessage());
|
||||
}
|
||||
try {
|
||||
logger.debug("Generating audio for: '{}'", text);
|
||||
buffer = getPiper().textToAudio(voiceModel.piperVoice, text);
|
||||
logger.debug("Generated {} samples of audio", buffer.length);
|
||||
} catch (IOException e) {
|
||||
throw new TTSException("Voice generation failed: " + e.getMessage());
|
||||
}
|
||||
} catch (PiperJNI.NotInitialized | LibraryNotLoaded e) {
|
||||
throw new TTSException("Piper not initialized, try restarting the add-on.");
|
||||
} catch (RuntimeException e) {
|
||||
logger.warn("RuntimeException running text to audio: {}", e.getMessage());
|
||||
throw new TTSException("There was an error running Piper");
|
||||
} finally {
|
||||
if (voiceModel != null) {
|
||||
if (!usingPreloadedModel
|
||||
|| voiceModel.consumers.decrementAndGet() == 0 && !voiceModel.equals(this.preloadedModel)) {
|
||||
logger.debug("Unloading voice model");
|
||||
voiceModel.close();
|
||||
} else {
|
||||
logger.debug("Skipping voice model unload");
|
||||
}
|
||||
}
|
||||
}
|
||||
try {
|
||||
logger.debug("Return re-encoded audio stream");
|
||||
return getAudioStream(buffer, voiceModel.sampleRate, audioFormat);
|
||||
} catch (IOException e) {
|
||||
throw new TTSException("Error while creating audio stream: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private VoiceModel loadModel(PiperTTSVoice voice) throws IOException, PiperJNI.NotInitialized, LibraryNotLoaded {
|
||||
if (!Files.exists(voice.voiceModelPath()) || !Files.exists(voice.voiceModelConfigPath())) {
|
||||
throw new IOException("Missing voice files");
|
||||
}
|
||||
PiperJNI piper = getPiper();
|
||||
PiperVoice piperVoice;
|
||||
VoiceModel voiceModel;
|
||||
piperVoice = piper.loadVoice(voice.voiceModelPath(), voice.voiceModelConfigPath(), voice.speakerId.orElse(-1L));
|
||||
voiceModel = new VoiceModel(voice, piperVoice, piperVoice.getSampleRate(), new AtomicInteger(1), logger);
|
||||
if (config.preloadModel) {
|
||||
synchronized (modelLock) {
|
||||
if (preloadedModel == null) {
|
||||
logger.debug("Voice model will be kept preloaded");
|
||||
preloadedModel = voiceModel;
|
||||
} else {
|
||||
logger.debug("Another voice model already preloaded");
|
||||
}
|
||||
}
|
||||
}
|
||||
return voiceModel;
|
||||
}
|
||||
|
||||
private void unloadModel() throws IOException {
|
||||
var model = preloadedModel;
|
||||
if (model != null) {
|
||||
synchronized (modelLock) {
|
||||
preloadedModel = null;
|
||||
if (model.consumers.get() == 0) {
|
||||
// Do not release the model memory if it's been used, it should be released by the consumer
|
||||
// when there is no other consumers and is not a ref of the preloaded model object.
|
||||
logger.debug("Unloading preloaded model");
|
||||
model.close();
|
||||
} else {
|
||||
logger.debug("Preloaded model in use, skip memory release");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ByteArrayAudioStream getAudioStream(short[] samples, long sampleRate, AudioFormat targetFormat)
|
||||
throws IOException {
|
||||
// Convert the i16 samples returned by piper to a byte buffer
|
||||
ByteBuffer byteBuffer;
|
||||
int numSamples = samples.length;
|
||||
byteBuffer = ByteBuffer.allocate(numSamples * 2).order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (var sample : samples) {
|
||||
byteBuffer.putShort(sample);
|
||||
}
|
||||
// Initialize a Java audio stream using the Piper output format with the byte buffer created.
|
||||
byte[] bytes = byteBuffer.array();
|
||||
javax.sound.sampled.AudioFormat jAudioFormat = new javax.sound.sampled.AudioFormat(sampleRate, 16, 1, true,
|
||||
false);
|
||||
long audioLength = (long) Math.ceil(((double) bytes.length) / jAudioFormat.getFrameSize());
|
||||
AudioInputStream audioInputStreamTemp = new AudioInputStream(new ByteArrayInputStream(bytes), jAudioFormat,
|
||||
audioLength);
|
||||
// Move the audio data to another Java audio stream in the target format so the Java AudioSystem encoded it as
|
||||
// needed.
|
||||
javax.sound.sampled.AudioFormat jTargetFormat = new javax.sound.sampled.AudioFormat(
|
||||
Objects.requireNonNull(targetFormat.getFrequency()), Objects.requireNonNull(targetFormat.getBitDepth()),
|
||||
Objects.requireNonNull(targetFormat.getChannels()), true, false);
|
||||
AudioInputStream convertedInputStream = AudioSystem.getAudioInputStream(jTargetFormat, audioInputStreamTemp);
|
||||
// It's required to add the wav header to the byte array stream returned for it to work with all the sink
|
||||
// implementations.
|
||||
// It can not be done with the AudioInputStream returned by AudioSystem::getAudioInputStream because it missed
|
||||
// the length property.
|
||||
// Therefore, the following method creates another AudioInputStream instance and uses the Java AudioSystem to
|
||||
// prepend
|
||||
// the wav header bytes,
|
||||
// and finally initializes an OpenHAB audio stream.
|
||||
return getAudioStreamWithRIFFHeader(convertedInputStream.readAllBytes(), jTargetFormat, targetFormat);
|
||||
}
|
||||
|
||||
private String capitalize(String text) {
|
||||
return text.substring(0, 1).toUpperCase() + text.substring(1);
|
||||
}
|
||||
|
||||
private ByteArrayAudioStream getAudioStreamWithRIFFHeader(byte[] audioBytes,
|
||||
javax.sound.sampled.AudioFormat jAudioFormat, AudioFormat audioFormat) throws IOException {
|
||||
AudioInputStream audioInputStreamTemp = new AudioInputStream(new ByteArrayInputStream(audioBytes), jAudioFormat,
|
||||
(long) Math.ceil(((double) audioBytes.length) / jAudioFormat.getFrameSize()));
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
AudioSystem.write(audioInputStreamTemp, AudioFileFormat.Type.WAVE, outputStream);
|
||||
return new ByteArrayAudioStream(outputStream.toByteArray(), audioFormat);
|
||||
}
|
||||
|
||||
private record PiperTTSVoice(String voiceId, String voiceName, String languageFamily, String languageRegion,
|
||||
Path voiceModelPath, Path voiceModelConfigPath, Optional<Long> speakerId) implements Voice {
|
||||
@Override
|
||||
public String getUID() {
|
||||
// Voice uid should be prefixed by service id to be listed properly on the UI.
|
||||
return SERVICE_ID + ":" + voiceId + "-" + languageFamily + "_" + languageRegion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLabel() {
|
||||
return voiceName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Locale getLocale() {
|
||||
return new Locale(languageFamily, languageRegion);
|
||||
}
|
||||
}
|
||||
|
||||
private static class LibraryNotLoaded extends Exception {
|
||||
private LibraryNotLoaded() {
|
||||
super("Library not loaded");
|
||||
}
|
||||
}
|
||||
|
||||
private record VoiceModel(PiperTTSVoice ttsVoice, PiperVoice piperVoice, int sampleRate, AtomicInteger consumers,
|
||||
Logger logger) implements AutoCloseable {
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
piperVoice.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<addon:addon id="pipertts" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:addon="https://openhab.org/schemas/addon/v1.0.0"
|
||||
xsi:schemaLocation="https://openhab.org/schemas/addon/v1.0.0 https://openhab.org/schemas/addon-1.0.0.xsd">
|
||||
|
||||
<type>voice</type>
|
||||
<name>Piper Text-to-Speech</name>
|
||||
<description>This voice service allows using the open source project Piper as your TTS service in openHAB.</description>
|
||||
<connection>none</connection>
|
||||
|
||||
<service-id>org.openhab.voice.pipertts</service-id>
|
||||
|
||||
<config-description-ref uri="voice:pipertts"/>
|
||||
|
||||
</addon:addon>
|
@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<config-description:config-descriptions
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:config-description="https://openhab.org/schemas/config-description/v1.0.0"
|
||||
xsi:schemaLocation="https://openhab.org/schemas/config-description/v1.0.0
|
||||
https://openhab.org/schemas/config-description-1.0.0.xsd">
|
||||
|
||||
<config-description uri="voice:pipertts">
|
||||
<parameter name="preloadModel" type="boolean">
|
||||
<label>Preload Model</label>
|
||||
<description>
|
||||
Keep the last voice model loaded. If the parameter is set to true, the model will be reloaded only when
|
||||
using a different voice.
|
||||
</description>
|
||||
<default>false</default>
|
||||
</parameter>
|
||||
</config-description>
|
||||
|
||||
</config-description:config-descriptions>
|
@ -0,0 +1,7 @@
|
||||
# add-on
|
||||
|
||||
addon.pipertts.name = Piper Text-to-Speech
|
||||
addon.pipertts.description = This voice service allows using the open source project Piper as your TTS service in openHAB.
|
||||
|
||||
voice.config.pipertts.preloadModel.label = Preload Model
|
||||
voice.config.pipertts.preloadModel.description = Keep the last voice model loaded. If the parameter is set to true, the model will be reloaded only when using a different voice.
|
@ -451,6 +451,7 @@
|
||||
<module>org.openhab.voice.marytts</module>
|
||||
<module>org.openhab.voice.mimictts</module>
|
||||
<module>org.openhab.voice.picotts</module>
|
||||
<module>org.openhab.voice.pipertts</module>
|
||||
<module>org.openhab.voice.pollytts</module>
|
||||
<module>org.openhab.voice.rustpotterks</module>
|
||||
<module>org.openhab.voice.voicerss</module>
|
||||
|
Loading…
Reference in New Issue
Block a user