[pollyTTS] Replace custom TTS cache with common TTS cache (#15228)

* [pollyTTS] Replace custom TTS cache with common TTS cache

---------

Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
This commit is contained in:
Gwendal Roulleau 2023-07-16 12:24:20 +02:00 committed by GitHub
parent 2a70a2dda8
commit 59ebd37a88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 116 additions and 229 deletions

View File

@ -28,21 +28,11 @@ The following settings can be edited in UI (**Settings / Other Services - Polly
* **Access Key** - The AWS credentials access key (required).
* **Secret Key** - The AWS credentials secret key (required).
* **Service Region** - The service region used for accessing Polly (required). To reduce latency select the region closest to you. E.g. "eu-west-1" (see [regions](https://docs.aws.amazon.com/general/latest/gr/rande.html#pol_region))
* **Cache Expiration** - Cache expiration in days.
The PollyTTS service caches audio files from previous requests.
This reduces traffic, improves performance, reduces the number of requests and provides offline functionality.
When cache files are used their time stamps are updated, unused files are purged if their time stamp exceeds the specified age.
The default value of 0 disables this functionality.
A value of 365 removes files that have been unused for a year.
* **Audio Format** - Allows for overriding the system default audio format.
Use "default" to select the system default audio format.
The default audio format can be overriden with the value "mp3" or "ogg".
In case you would like to setup the service via a text file, create a new file in `$OPENHAB_ROOT/conf/services` named `pollytts.cfg`
Its contents should look similar to:
@ -51,7 +41,6 @@ Its contents should look similar to:
org.openhab.voice.pollytts:accessKey=ACCESS_KEY
org.openhab.voice.pollytts:secretKey=SECRET_KEY
org.openhab.voice.pollytts:serviceRegion=eu-west-1
org.openhab.voice.pollytts:cacheExpiration=0
org.openhab.voice.pollytts:audioFormat=default
```
@ -71,6 +60,10 @@ org.openhab.voice:defaultTTS=pollytts
org.openhab.voice:defaultVoice=pollytts:Joanne
```
## Caching
The PolyTTS service uses the openHAB TTS cache to cache audio files produced from the most recent queries in order to reduce traffic, improve performance and reduce number of requests.
## Rule Examples
```

View File

@ -12,26 +12,101 @@
*/
package org.openhab.voice.pollytts.internal;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.openhab.core.audio.AudioException;
import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.audio.FileAudioStream;
/**
* Implementation of the {@link AudioStream} interface for the {@link PollyTTSService}.
* It simply uses a {@link FileAudioStream} which is doing all the necessary work,
* e.g. supporting MP3 and WAV files with fixed stream length.
* An AudioStream with an {@link InputStream} inside
*
* @author Robert Hillman - Initial contribution
* @author Gwendal Roulleau - Refactor to simple audiostream
*/
class PollyTTSAudioStream extends FileAudioStream {
@NonNullByDefault
public class PollyTTSAudioStream extends AudioStream {
/**
* main method the passes the audio file to system audio services
*/
public PollyTTSAudioStream(File audioFile, AudioFormat format) throws AudioException {
super(audioFile, format);
public InputStream innerInputStream;
public AudioFormat audioFormat;
public PollyTTSAudioStream(InputStream innerInputStream, AudioFormat audioFormat) {
super();
this.innerInputStream = innerInputStream;
this.audioFormat = audioFormat;
}
@Override
public AudioFormat getFormat() {
return audioFormat;
}
@Override
public int read() throws IOException {
return innerInputStream.read();
}
@Override
public int read(byte @Nullable [] b) throws IOException {
return innerInputStream.read(b);
}
@Override
public int read(byte @Nullable [] b, int off, int len) throws IOException {
return innerInputStream.read(b, off, len);
}
@Override
public byte[] readAllBytes() throws IOException {
return innerInputStream.readAllBytes();
}
@Override
public byte[] readNBytes(int len) throws IOException {
return innerInputStream.readNBytes(len);
}
@Override
public int readNBytes(byte @Nullable [] b, int off, int len) throws IOException {
return innerInputStream.readNBytes(b, off, len);
}
@Override
public long skip(long n) throws IOException {
return innerInputStream.skip(n);
}
@Override
public int available() throws IOException {
return innerInputStream.available();
}
@Override
public void close() throws IOException {
innerInputStream.close();
}
@Override
public synchronized void mark(int readlimit) {
innerInputStream.mark(readlimit);
}
@Override
public synchronized void reset() throws IOException {
innerInputStream.reset();
}
@Override
public boolean markSupported() {
return innerInputStream.markSupported();
}
@Override
public long transferTo(@Nullable OutputStream out) throws IOException {
return innerInputStream.transferTo(out);
}
}

View File

@ -16,40 +16,48 @@ import static java.util.stream.Collectors.toSet;
import static org.openhab.core.audio.AudioFormat.*;
import static org.openhab.voice.pollytts.internal.PollyTTSService.*;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.openhab.core.OpenHAB;
import org.openhab.core.audio.AudioException;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.config.core.ConfigurableService;
import org.openhab.core.voice.AbstractCachedTTSService;
import org.openhab.core.voice.TTSCache;
import org.openhab.core.voice.TTSException;
import org.openhab.core.voice.TTSService;
import org.openhab.core.voice.Voice;
import org.openhab.voice.pollytts.internal.cloudapi.CachedPollyTTSCloudImpl;
import org.openhab.voice.pollytts.internal.cloudapi.PollyTTSCloudImpl;
import org.openhab.voice.pollytts.internal.cloudapi.PollyTTSConfig;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Modified;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.services.polly.model.AmazonPollyException;
/**
* This is a TTS service implementation for using Polly Text-to-Speech.
*
* @author Robert Hillman - Initial contribution
*/
@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "="
+ SERVICE_PID, service = TTSService.class)
@ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
+ " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
public class PollyTTSService implements TTSService {
public class PollyTTSService extends AbstractCachedTTSService {
@Activate
public PollyTTSService(final @Reference TTSCache ttsCache) {
super(ttsCache);
}
/**
* Service name
@ -71,17 +79,9 @@ public class PollyTTSService implements TTSService {
*/
static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
/**
* Cache folder under $userdata
*/
private static final String CACHE_FOLDER_NAME = "cache";
private final Logger logger = LoggerFactory.getLogger(PollyTTSService.class);
/**
* We need the cached implementation to allow for FixedLengthAudioStream.
*/
private CachedPollyTTSCloudImpl pollyTTSImpl;
private PollyTTSCloudImpl pollyTTSImpl;
/**
* Set of supported voices
@ -106,14 +106,7 @@ public class PollyTTSService implements TTSService {
pollyTTSConfig = new PollyTTSConfig(config);
logger.debug("Using configuration {}", config);
// create cache folder
File cacheFolder = new File(new File(OpenHAB.getUserDataFolder(), CACHE_FOLDER_NAME), SERVICE_PID);
if (!cacheFolder.exists()) {
cacheFolder.mkdirs();
}
logger.info("Using cache folder {}", cacheFolder.getAbsolutePath());
pollyTTSImpl = new CachedPollyTTSCloudImpl(pollyTTSConfig, cacheFolder);
pollyTTSImpl = new PollyTTSCloudImpl(pollyTTSConfig);
audioFormats.clear();
audioFormats.addAll(initAudioFormats());
@ -143,7 +136,7 @@ public class PollyTTSService implements TTSService {
* obtain audio stream from cache or Amazon Polly service and return it to play the audio
*/
@Override
public AudioStream synthesize(String inText, Voice voice, AudioFormat requestedFormat) throws TTSException {
public AudioStream synthesizeForCache(String inText, Voice voice, AudioFormat requestedFormat) throws TTSException {
logger.debug("Synthesize '{}' in format {}", inText, requestedFormat);
logger.debug("voice UID: '{}' voice label: '{}' voice Locale: {}", voice.getUID(), voice.getLabel(),
voice.getLocale());
@ -151,8 +144,8 @@ public class PollyTTSService implements TTSService {
// Validate arguments
// trim text
String text = inText.trim();
if (text == null || text.isEmpty()) {
throw new TTSException("The passed text is null or empty");
if (text.isEmpty()) {
throw new TTSException("The passed text is empty");
}
if (!voices.contains(voice)) {
throw new TTSException("The passed voice is unsupported");
@ -167,17 +160,15 @@ public class PollyTTSService implements TTSService {
// now create the input stream for given text, locale, format. There is
// only a default voice
try {
File cacheAudioFile = pollyTTSImpl.getTextToSpeechAsFile(text, voice.getLabel(),
InputStream pollyAudioStream = pollyTTSImpl.getTextToSpeech(text, voice.getLabel(),
getApiAudioFormat(requestedFormat));
if (cacheAudioFile == null) {
if (pollyAudioStream == null) {
throw new TTSException("Could not read from PollyTTS service");
}
logger.debug("Audio Stream for '{}' in format {}", text, requestedFormat);
AudioStream audioStream = new PollyTTSAudioStream(cacheAudioFile, requestedFormat);
AudioStream audioStream = new PollyTTSAudioStream(pollyAudioStream, requestedFormat);
return audioStream;
} catch (AudioException ex) {
throw new TTSException("Could not create AudioStream: " + ex.getMessage(), ex);
} catch (IOException ex) {
} catch (AmazonPollyException ex) {
throw new TTSException("Could not read from PollyTTS service: " + ex.getMessage(), ex);
}
}

View File

@ -1,162 +0,0 @@
/**
* Copyright (c) 2010-2023 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.pollytts.internal.cloudapi;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Date;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class implements a cache for the retrieved audio data. It will preserve them in the file system,
* as audio files with an additional .txt file to indicate what content is in the audio file.
*
* @author Robert Hillman - Initial contribution
*/
public class CachedPollyTTSCloudImpl extends PollyTTSCloudImpl {
private static final int READ_BUFFER_SIZE = 4096;
private final Logger logger = LoggerFactory.getLogger(CachedPollyTTSCloudImpl.class);
private final File cacheFolder;
/**
* Create the file folder to hold the the cached speech files.
* check to make sure the directory exist and
* create it if necessary
*/
public CachedPollyTTSCloudImpl(PollyTTSConfig config, File cacheFolder) throws IOException {
super(config);
this.cacheFolder = cacheFolder;
}
/**
* Fetch the specified text as an audio file.
* The audio file will be obtained from the cached folder if it
* exist or generated by use to the external voice service.
* The cached file txt description time stamp will be updated
* to identify last use.
*/
public File getTextToSpeechAsFile(String text, String label, String audioFormat) throws IOException {
String fileNameInCache = getUniqueFilenameForText(text, label);
// check if in cache
File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioFormat.toLowerCase());
if (audioFileInCache.exists()) {
// update use date
updateTimeStamp(audioFileInCache);
updateTimeStamp(new File(cacheFolder, fileNameInCache + ".txt"));
purgeAgedFiles();
return audioFileInCache;
}
// if not in cache, get audio data and put to cache
try (InputStream is = getTextToSpeech(text, label, audioFormat);
FileOutputStream fos = new FileOutputStream(audioFileInCache)) {
copyStream(is, fos);
// write text to file for transparency too
// this allows to know which contents is in which audio file
File txtFileInCache = new File(cacheFolder, fileNameInCache + ".txt");
writeText(txtFileInCache, text);
// return from cache
return audioFileInCache;
} catch (IOException ex) {
logger.warn("Could not write {} to cache, return null", audioFileInCache, ex);
return null;
}
}
/**
* Gets a unique filename for a give text, by creating a MD5 hash of it. It
* will be preceded by the voice label.
*
* Sample: "Robert_00a2653ac5f77063bc4ea2fee87318d3"
*/
private String getUniqueFilenameForText(String text, String label) {
MessageDigest md;
try {
md = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException ex) {
logger.error("Could not create MD5 hash for '{}'", text, ex);
return null;
}
byte[] md5Hash = md.digest(text.getBytes(StandardCharsets.UTF_8));
BigInteger bigInt = new BigInteger(1, md5Hash);
String hashtext = bigInt.toString(16);
// Now we need to zero pad it if you actually want the full 32
// chars.
while (hashtext.length() < 32) {
hashtext = "0" + hashtext;
}
String fileName = label + "_" + hashtext;
return fileName;
}
// helper methods
private void copyStream(InputStream inputStream, OutputStream outputStream) throws IOException {
byte[] bytes = new byte[READ_BUFFER_SIZE];
int read = inputStream.read(bytes, 0, READ_BUFFER_SIZE);
while (read > 0) {
outputStream.write(bytes, 0, read);
read = inputStream.read(bytes, 0, READ_BUFFER_SIZE);
}
}
private void writeText(File file, String text) throws IOException {
try (OutputStream outputStream = new FileOutputStream(file)) {
outputStream.write(text.getBytes(StandardCharsets.UTF_8));
}
}
private void updateTimeStamp(File file) throws IOException {
// update use date for cache management
file.setLastModified(System.currentTimeMillis());
}
private void purgeAgedFiles() throws IOException {
// just exit if expiration set to 0/disabled
if (config.getExpireDate() == 0) {
return;
}
long now = new Date().getTime();
long diff = now - config.getLastDelete();
// only execute ~ once every 2 days if cache called
long oneDayMillis = TimeUnit.DAYS.toMillis(1);
logger.debug("PollyTTS cache cleaner lastdelete {}", diff);
if (diff > (2 * oneDayMillis)) {
config.setLastDelete(now);
long xDaysAgo = config.getExpireDate() * oneDayMillis;
// Now search folders and delete old files
int filesDeleted = 0;
for (File file : cacheFolder.listFiles()) {
diff = now - file.lastModified();
if (diff > xDaysAgo) {
filesDeleted++;
file.delete();
}
}
logger.debug("PollyTTS cache cleaner deleted '{}' aged files", filesDeleted);
}
}
}

View File

@ -15,7 +15,6 @@ package org.openhab.voice.pollytts.internal.cloudapi;
import static java.util.stream.Collectors.*;
import static org.openhab.core.audio.AudioFormat.*;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;
@ -29,6 +28,7 @@ import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.polly.AmazonPolly;
import com.amazonaws.services.polly.AmazonPollyClientBuilder;
import com.amazonaws.services.polly.model.AmazonPollyException;
import com.amazonaws.services.polly.model.DescribeVoicesRequest;
import com.amazonaws.services.polly.model.OutputFormat;
import com.amazonaws.services.polly.model.SynthesizeSpeechRequest;
@ -115,8 +115,7 @@ public class PollyTTSCloudImpl {
* @param audioFormat
* the audio format to use
* @return an InputStream to the audio data in specified format
* @throws IOException
* will be raised if the audio data can not be retrieved from
* @throws AmazonPollyException will be raised if the audio data can not be retrieved from
* cloud service
*/
public InputStream getTextToSpeech(String text, String label, String audioFormat) {

View File

@ -52,13 +52,6 @@
</options>
<default>default</default>
</parameter>
<parameter name="cacheExpiration" type="text">
<label>Cache Expiration</label>
<description>Determines the age in days when unused cached files are purged.
Use 0 to disable this functionality.</description>
<default>0</default>
</parameter>
</config-description>
</config-description:config-descriptions>

View File

@ -5,8 +5,6 @@ voice.config.pollytts.audioFormat.description = Allows for overriding the system
voice.config.pollytts.audioFormat.option.default = Use system default
voice.config.pollytts.audioFormat.option.MP3 = MP3
voice.config.pollytts.audioFormat.option.OGG = OGG
voice.config.pollytts.cacheExpiration.label = Cache Expiration
voice.config.pollytts.cacheExpiration.description = Determines the age in days when unused cached files are purged. Use 0 to disable this functionality.
voice.config.pollytts.secretKey.label = Secret Key
voice.config.pollytts.secretKey.description = The secret key part of the AWS credentials. You need to register to get a key.
voice.config.pollytts.serviceRegion.label = Service Region