mirror of
https://github.com/openhab/openhab-addons.git
synced 2025-01-25 14:55:55 +01:00
[pollyTTS] Replace custom TTS cache with common TTS cache (#15228)
* [pollyTTS] Replace custom TTS cache with common TTS cache --------- Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
This commit is contained in:
parent
2a70a2dda8
commit
59ebd37a88
@ -28,21 +28,11 @@ The following settings can be edited in UI (**Settings / Other Services - Polly
|
||||
* **Access Key** - The AWS credentials access key (required).
|
||||
* **Secret Key** - The AWS credentials secret key (required).
|
||||
* **Service Region** - The service region used for accessing Polly (required). To reduce latency select the region closest to you. E.g. "eu-west-1" (see [regions](https://docs.aws.amazon.com/general/latest/gr/rande.html#pol_region))
|
||||
|
||||
* **Cache Expiration** - Cache expiration in days.
|
||||
|
||||
The PollyTTS service caches audio files from previous requests.
|
||||
This reduces traffic, improves performance, reduces the number of requests and provides offline functionality.
|
||||
When cache files are used their time stamps are updated, unused files are purged if their time stamp exceeds the specified age.
|
||||
The default value of 0 disables this functionality.
|
||||
A value of 365 removes files that have been unused for a year.
|
||||
|
||||
* **Audio Format** - Allows for overriding the system default audio format.
|
||||
|
||||
Use "default" to select the system default audio format.
|
||||
The default audio format can be overriden with the value "mp3" or "ogg".
|
||||
|
||||
|
||||
In case you would like to setup the service via a text file, create a new file in `$OPENHAB_ROOT/conf/services` named `pollytts.cfg`
|
||||
|
||||
Its contents should look similar to:
|
||||
@ -51,7 +41,6 @@ Its contents should look similar to:
|
||||
org.openhab.voice.pollytts:accessKey=ACCESS_KEY
|
||||
org.openhab.voice.pollytts:secretKey=SECRET_KEY
|
||||
org.openhab.voice.pollytts:serviceRegion=eu-west-1
|
||||
org.openhab.voice.pollytts:cacheExpiration=0
|
||||
org.openhab.voice.pollytts:audioFormat=default
|
||||
```
|
||||
|
||||
@ -71,6 +60,10 @@ org.openhab.voice:defaultTTS=pollytts
|
||||
org.openhab.voice:defaultVoice=pollytts:Joanne
|
||||
```
|
||||
|
||||
## Caching
|
||||
|
||||
The PolyTTS service uses the openHAB TTS cache to cache audio files produced from the most recent queries in order to reduce traffic, improve performance and reduce number of requests.
|
||||
|
||||
## Rule Examples
|
||||
|
||||
```
|
||||
|
@ -12,26 +12,101 @@
|
||||
*/
|
||||
package org.openhab.voice.pollytts.internal;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.openhab.core.audio.AudioException;
|
||||
import org.eclipse.jdt.annotation.NonNullByDefault;
|
||||
import org.eclipse.jdt.annotation.Nullable;
|
||||
import org.openhab.core.audio.AudioFormat;
|
||||
import org.openhab.core.audio.AudioStream;
|
||||
import org.openhab.core.audio.FileAudioStream;
|
||||
|
||||
/**
|
||||
* Implementation of the {@link AudioStream} interface for the {@link PollyTTSService}.
|
||||
* It simply uses a {@link FileAudioStream} which is doing all the necessary work,
|
||||
* e.g. supporting MP3 and WAV files with fixed stream length.
|
||||
* An AudioStream with an {@link InputStream} inside
|
||||
*
|
||||
* @author Robert Hillman - Initial contribution
|
||||
* @author Gwendal Roulleau - Refactor to simple audiostream
|
||||
*/
|
||||
class PollyTTSAudioStream extends FileAudioStream {
|
||||
@NonNullByDefault
|
||||
public class PollyTTSAudioStream extends AudioStream {
|
||||
|
||||
/**
|
||||
* main method the passes the audio file to system audio services
|
||||
*/
|
||||
public PollyTTSAudioStream(File audioFile, AudioFormat format) throws AudioException {
|
||||
super(audioFile, format);
|
||||
public InputStream innerInputStream;
|
||||
public AudioFormat audioFormat;
|
||||
|
||||
public PollyTTSAudioStream(InputStream innerInputStream, AudioFormat audioFormat) {
|
||||
super();
|
||||
this.innerInputStream = innerInputStream;
|
||||
this.audioFormat = audioFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AudioFormat getFormat() {
|
||||
return audioFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return innerInputStream.read();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte @Nullable [] b) throws IOException {
|
||||
return innerInputStream.read(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte @Nullable [] b, int off, int len) throws IOException {
|
||||
return innerInputStream.read(b, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] readAllBytes() throws IOException {
|
||||
return innerInputStream.readAllBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] readNBytes(int len) throws IOException {
|
||||
return innerInputStream.readNBytes(len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readNBytes(byte @Nullable [] b, int off, int len) throws IOException {
|
||||
return innerInputStream.readNBytes(b, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(long n) throws IOException {
|
||||
return innerInputStream.skip(n);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int available() throws IOException {
|
||||
return innerInputStream.available();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
innerInputStream.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void mark(int readlimit) {
|
||||
innerInputStream.mark(readlimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void reset() throws IOException {
|
||||
innerInputStream.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean markSupported() {
|
||||
return innerInputStream.markSupported();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long transferTo(@Nullable OutputStream out) throws IOException {
|
||||
return innerInputStream.transferTo(out);
|
||||
}
|
||||
}
|
||||
|
@ -16,40 +16,48 @@ import static java.util.stream.Collectors.toSet;
|
||||
import static org.openhab.core.audio.AudioFormat.*;
|
||||
import static org.openhab.voice.pollytts.internal.PollyTTSService.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.openhab.core.OpenHAB;
|
||||
import org.openhab.core.audio.AudioException;
|
||||
import org.openhab.core.audio.AudioFormat;
|
||||
import org.openhab.core.audio.AudioStream;
|
||||
import org.openhab.core.config.core.ConfigurableService;
|
||||
import org.openhab.core.voice.AbstractCachedTTSService;
|
||||
import org.openhab.core.voice.TTSCache;
|
||||
import org.openhab.core.voice.TTSException;
|
||||
import org.openhab.core.voice.TTSService;
|
||||
import org.openhab.core.voice.Voice;
|
||||
import org.openhab.voice.pollytts.internal.cloudapi.CachedPollyTTSCloudImpl;
|
||||
import org.openhab.voice.pollytts.internal.cloudapi.PollyTTSCloudImpl;
|
||||
import org.openhab.voice.pollytts.internal.cloudapi.PollyTTSConfig;
|
||||
import org.osgi.framework.Constants;
|
||||
import org.osgi.service.component.annotations.Activate;
|
||||
import org.osgi.service.component.annotations.Component;
|
||||
import org.osgi.service.component.annotations.Modified;
|
||||
import org.osgi.service.component.annotations.Reference;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.amazonaws.services.polly.model.AmazonPollyException;
|
||||
|
||||
/**
|
||||
* This is a TTS service implementation for using Polly Text-to-Speech.
|
||||
*
|
||||
* @author Robert Hillman - Initial contribution
|
||||
*/
|
||||
@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
|
||||
@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "="
|
||||
+ SERVICE_PID, service = TTSService.class)
|
||||
@ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
|
||||
+ " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
|
||||
public class PollyTTSService implements TTSService {
|
||||
public class PollyTTSService extends AbstractCachedTTSService {
|
||||
|
||||
@Activate
|
||||
public PollyTTSService(final @Reference TTSCache ttsCache) {
|
||||
super(ttsCache);
|
||||
}
|
||||
|
||||
/**
|
||||
* Service name
|
||||
@ -71,17 +79,9 @@ public class PollyTTSService implements TTSService {
|
||||
*/
|
||||
static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
|
||||
|
||||
/**
|
||||
* Cache folder under $userdata
|
||||
*/
|
||||
private static final String CACHE_FOLDER_NAME = "cache";
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(PollyTTSService.class);
|
||||
|
||||
/**
|
||||
* We need the cached implementation to allow for FixedLengthAudioStream.
|
||||
*/
|
||||
private CachedPollyTTSCloudImpl pollyTTSImpl;
|
||||
private PollyTTSCloudImpl pollyTTSImpl;
|
||||
|
||||
/**
|
||||
* Set of supported voices
|
||||
@ -106,14 +106,7 @@ public class PollyTTSService implements TTSService {
|
||||
pollyTTSConfig = new PollyTTSConfig(config);
|
||||
logger.debug("Using configuration {}", config);
|
||||
|
||||
// create cache folder
|
||||
File cacheFolder = new File(new File(OpenHAB.getUserDataFolder(), CACHE_FOLDER_NAME), SERVICE_PID);
|
||||
if (!cacheFolder.exists()) {
|
||||
cacheFolder.mkdirs();
|
||||
}
|
||||
logger.info("Using cache folder {}", cacheFolder.getAbsolutePath());
|
||||
|
||||
pollyTTSImpl = new CachedPollyTTSCloudImpl(pollyTTSConfig, cacheFolder);
|
||||
pollyTTSImpl = new PollyTTSCloudImpl(pollyTTSConfig);
|
||||
|
||||
audioFormats.clear();
|
||||
audioFormats.addAll(initAudioFormats());
|
||||
@ -143,7 +136,7 @@ public class PollyTTSService implements TTSService {
|
||||
* obtain audio stream from cache or Amazon Polly service and return it to play the audio
|
||||
*/
|
||||
@Override
|
||||
public AudioStream synthesize(String inText, Voice voice, AudioFormat requestedFormat) throws TTSException {
|
||||
public AudioStream synthesizeForCache(String inText, Voice voice, AudioFormat requestedFormat) throws TTSException {
|
||||
logger.debug("Synthesize '{}' in format {}", inText, requestedFormat);
|
||||
logger.debug("voice UID: '{}' voice label: '{}' voice Locale: {}", voice.getUID(), voice.getLabel(),
|
||||
voice.getLocale());
|
||||
@ -151,8 +144,8 @@ public class PollyTTSService implements TTSService {
|
||||
// Validate arguments
|
||||
// trim text
|
||||
String text = inText.trim();
|
||||
if (text == null || text.isEmpty()) {
|
||||
throw new TTSException("The passed text is null or empty");
|
||||
if (text.isEmpty()) {
|
||||
throw new TTSException("The passed text is empty");
|
||||
}
|
||||
if (!voices.contains(voice)) {
|
||||
throw new TTSException("The passed voice is unsupported");
|
||||
@ -167,17 +160,15 @@ public class PollyTTSService implements TTSService {
|
||||
// now create the input stream for given text, locale, format. There is
|
||||
// only a default voice
|
||||
try {
|
||||
File cacheAudioFile = pollyTTSImpl.getTextToSpeechAsFile(text, voice.getLabel(),
|
||||
InputStream pollyAudioStream = pollyTTSImpl.getTextToSpeech(text, voice.getLabel(),
|
||||
getApiAudioFormat(requestedFormat));
|
||||
if (cacheAudioFile == null) {
|
||||
if (pollyAudioStream == null) {
|
||||
throw new TTSException("Could not read from PollyTTS service");
|
||||
}
|
||||
logger.debug("Audio Stream for '{}' in format {}", text, requestedFormat);
|
||||
AudioStream audioStream = new PollyTTSAudioStream(cacheAudioFile, requestedFormat);
|
||||
AudioStream audioStream = new PollyTTSAudioStream(pollyAudioStream, requestedFormat);
|
||||
return audioStream;
|
||||
} catch (AudioException ex) {
|
||||
throw new TTSException("Could not create AudioStream: " + ex.getMessage(), ex);
|
||||
} catch (IOException ex) {
|
||||
} catch (AmazonPollyException ex) {
|
||||
throw new TTSException("Could not read from PollyTTS service: " + ex.getMessage(), ex);
|
||||
}
|
||||
}
|
||||
|
@ -1,162 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2010-2023 Contributors to the openHAB project
|
||||
*
|
||||
* See the NOTICE file(s) distributed with this work for additional
|
||||
* information.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Public License 2.0 which is available at
|
||||
* http://www.eclipse.org/legal/epl-2.0
|
||||
*
|
||||
* SPDX-License-Identifier: EPL-2.0
|
||||
*/
|
||||
package org.openhab.voice.pollytts.internal.cloudapi;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.math.BigInteger;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Date;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This class implements a cache for the retrieved audio data. It will preserve them in the file system,
|
||||
* as audio files with an additional .txt file to indicate what content is in the audio file.
|
||||
*
|
||||
* @author Robert Hillman - Initial contribution
|
||||
*/
|
||||
public class CachedPollyTTSCloudImpl extends PollyTTSCloudImpl {
|
||||
|
||||
private static final int READ_BUFFER_SIZE = 4096;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(CachedPollyTTSCloudImpl.class);
|
||||
|
||||
private final File cacheFolder;
|
||||
|
||||
/**
|
||||
* Create the file folder to hold the the cached speech files.
|
||||
* check to make sure the directory exist and
|
||||
* create it if necessary
|
||||
*/
|
||||
public CachedPollyTTSCloudImpl(PollyTTSConfig config, File cacheFolder) throws IOException {
|
||||
super(config);
|
||||
this.cacheFolder = cacheFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the specified text as an audio file.
|
||||
* The audio file will be obtained from the cached folder if it
|
||||
* exist or generated by use to the external voice service.
|
||||
* The cached file txt description time stamp will be updated
|
||||
* to identify last use.
|
||||
*/
|
||||
public File getTextToSpeechAsFile(String text, String label, String audioFormat) throws IOException {
|
||||
String fileNameInCache = getUniqueFilenameForText(text, label);
|
||||
// check if in cache
|
||||
File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioFormat.toLowerCase());
|
||||
if (audioFileInCache.exists()) {
|
||||
// update use date
|
||||
updateTimeStamp(audioFileInCache);
|
||||
updateTimeStamp(new File(cacheFolder, fileNameInCache + ".txt"));
|
||||
purgeAgedFiles();
|
||||
return audioFileInCache;
|
||||
}
|
||||
|
||||
// if not in cache, get audio data and put to cache
|
||||
try (InputStream is = getTextToSpeech(text, label, audioFormat);
|
||||
FileOutputStream fos = new FileOutputStream(audioFileInCache)) {
|
||||
copyStream(is, fos);
|
||||
// write text to file for transparency too
|
||||
// this allows to know which contents is in which audio file
|
||||
File txtFileInCache = new File(cacheFolder, fileNameInCache + ".txt");
|
||||
writeText(txtFileInCache, text);
|
||||
// return from cache
|
||||
return audioFileInCache;
|
||||
} catch (IOException ex) {
|
||||
logger.warn("Could not write {} to cache, return null", audioFileInCache, ex);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a unique filename for a give text, by creating a MD5 hash of it. It
|
||||
* will be preceded by the voice label.
|
||||
*
|
||||
* Sample: "Robert_00a2653ac5f77063bc4ea2fee87318d3"
|
||||
*/
|
||||
private String getUniqueFilenameForText(String text, String label) {
|
||||
MessageDigest md;
|
||||
try {
|
||||
md = MessageDigest.getInstance("MD5");
|
||||
} catch (NoSuchAlgorithmException ex) {
|
||||
logger.error("Could not create MD5 hash for '{}'", text, ex);
|
||||
return null;
|
||||
}
|
||||
byte[] md5Hash = md.digest(text.getBytes(StandardCharsets.UTF_8));
|
||||
BigInteger bigInt = new BigInteger(1, md5Hash);
|
||||
String hashtext = bigInt.toString(16);
|
||||
// Now we need to zero pad it if you actually want the full 32
|
||||
// chars.
|
||||
while (hashtext.length() < 32) {
|
||||
hashtext = "0" + hashtext;
|
||||
}
|
||||
String fileName = label + "_" + hashtext;
|
||||
return fileName;
|
||||
}
|
||||
|
||||
// helper methods
|
||||
|
||||
private void copyStream(InputStream inputStream, OutputStream outputStream) throws IOException {
|
||||
byte[] bytes = new byte[READ_BUFFER_SIZE];
|
||||
int read = inputStream.read(bytes, 0, READ_BUFFER_SIZE);
|
||||
while (read > 0) {
|
||||
outputStream.write(bytes, 0, read);
|
||||
read = inputStream.read(bytes, 0, READ_BUFFER_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeText(File file, String text) throws IOException {
|
||||
try (OutputStream outputStream = new FileOutputStream(file)) {
|
||||
outputStream.write(text.getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
|
||||
private void updateTimeStamp(File file) throws IOException {
|
||||
// update use date for cache management
|
||||
file.setLastModified(System.currentTimeMillis());
|
||||
}
|
||||
|
||||
private void purgeAgedFiles() throws IOException {
|
||||
// just exit if expiration set to 0/disabled
|
||||
if (config.getExpireDate() == 0) {
|
||||
return;
|
||||
}
|
||||
long now = new Date().getTime();
|
||||
long diff = now - config.getLastDelete();
|
||||
// only execute ~ once every 2 days if cache called
|
||||
long oneDayMillis = TimeUnit.DAYS.toMillis(1);
|
||||
logger.debug("PollyTTS cache cleaner lastdelete {}", diff);
|
||||
if (diff > (2 * oneDayMillis)) {
|
||||
config.setLastDelete(now);
|
||||
long xDaysAgo = config.getExpireDate() * oneDayMillis;
|
||||
// Now search folders and delete old files
|
||||
int filesDeleted = 0;
|
||||
for (File file : cacheFolder.listFiles()) {
|
||||
diff = now - file.lastModified();
|
||||
if (diff > xDaysAgo) {
|
||||
filesDeleted++;
|
||||
file.delete();
|
||||
}
|
||||
}
|
||||
logger.debug("PollyTTS cache cleaner deleted '{}' aged files", filesDeleted);
|
||||
}
|
||||
}
|
||||
}
|
@ -15,7 +15,6 @@ package org.openhab.voice.pollytts.internal.cloudapi;
|
||||
import static java.util.stream.Collectors.*;
|
||||
import static org.openhab.core.audio.AudioFormat.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
@ -29,6 +28,7 @@ import com.amazonaws.auth.AWSStaticCredentialsProvider;
|
||||
import com.amazonaws.auth.BasicAWSCredentials;
|
||||
import com.amazonaws.services.polly.AmazonPolly;
|
||||
import com.amazonaws.services.polly.AmazonPollyClientBuilder;
|
||||
import com.amazonaws.services.polly.model.AmazonPollyException;
|
||||
import com.amazonaws.services.polly.model.DescribeVoicesRequest;
|
||||
import com.amazonaws.services.polly.model.OutputFormat;
|
||||
import com.amazonaws.services.polly.model.SynthesizeSpeechRequest;
|
||||
@ -115,8 +115,7 @@ public class PollyTTSCloudImpl {
|
||||
* @param audioFormat
|
||||
* the audio format to use
|
||||
* @return an InputStream to the audio data in specified format
|
||||
* @throws IOException
|
||||
* will be raised if the audio data can not be retrieved from
|
||||
* @throws AmazonPollyException will be raised if the audio data can not be retrieved from
|
||||
* cloud service
|
||||
*/
|
||||
public InputStream getTextToSpeech(String text, String label, String audioFormat) {
|
||||
|
@ -52,13 +52,6 @@
|
||||
</options>
|
||||
<default>default</default>
|
||||
</parameter>
|
||||
|
||||
<parameter name="cacheExpiration" type="text">
|
||||
<label>Cache Expiration</label>
|
||||
<description>Determines the age in days when unused cached files are purged.
|
||||
Use 0 to disable this functionality.</description>
|
||||
<default>0</default>
|
||||
</parameter>
|
||||
</config-description>
|
||||
|
||||
</config-description:config-descriptions>
|
||||
|
@ -5,8 +5,6 @@ voice.config.pollytts.audioFormat.description = Allows for overriding the system
|
||||
voice.config.pollytts.audioFormat.option.default = Use system default
|
||||
voice.config.pollytts.audioFormat.option.MP3 = MP3
|
||||
voice.config.pollytts.audioFormat.option.OGG = OGG
|
||||
voice.config.pollytts.cacheExpiration.label = Cache Expiration
|
||||
voice.config.pollytts.cacheExpiration.description = Determines the age in days when unused cached files are purged. Use 0 to disable this functionality.
|
||||
voice.config.pollytts.secretKey.label = Secret Key
|
||||
voice.config.pollytts.secretKey.description = The secret key part of the AWS credentials. You need to register to get a key.
|
||||
voice.config.pollytts.serviceRegion.label = Service Region
|
||||
|
Loading…
Reference in New Issue
Block a user