[openaitts] OpenAI Text-to-Speech initial contribution (#17733)

Also-by: Wouter Born <github@maindrain.net> Signed-off-by: Artur-Fedjukevits <fedjukevitsh@gmail.com>
2025-01-10 07:02:02 +01:00 · 2024-12-24 18:19:25 +01:00 · 2024-12-24 18:19:25 +01:00 · 7c6e658f56
commit 7c6e658f56
parent 50e3ca62c0
14 changed files with 408 additions and 0 deletions
--- a/1
+++ b/1
@ -465,6 +465,7 @@
 /bundles/org.openhab.voice.mactts/ @kaikreuzer
 /bundles/org.openhab.voice.marytts/ @kaikreuzer
 /bundles/org.openhab.voice.mimictts/ @dalgwen
+/bundles/org.openhab.voice.openaitts/ @Artur-Fedjukevits
 /bundles/org.openhab.voice.picotts/ @FlorianSW
 /bundles/org.openhab.voice.pipertts/ @GiviMAD
 /bundles/org.openhab.voice.pollytts/ @openhab/add-ons-maintainers
--- a/bom/openhab-addons/pom.xml
+++ b/bom/openhab-addons/pom.xml
@ -2301,6 +2301,11 @@
      <artifactId>org.openhab.voice.mimictts</artifactId>
      <version>${project.version}</version>
    </dependency>
+    <dependency>
+      <groupId>org.openhab.addons.bundles</groupId>
+      <artifactId>org.openhab.voice.openaitts</artifactId>
+      <version>${project.version}</version>
+    </dependency>
    <dependency>
      <groupId>org.openhab.addons.bundles</groupId>
      <artifactId>org.openhab.voice.picotts</artifactId>
--- a/bundles/org.openhab.voice.openaitts/NOTICE
+++ b/bundles/org.openhab.voice.openaitts/NOTICE
@ -0,0 +1,14 @@
+This content is produced and maintained by the openHAB project.
+
+* Project home: https://www.openhab.org
+
+== Declared Project Licenses
+
+This program and the accompanying materials are made available under the terms
+of the Eclipse Public License 2.0 which is available at
+https://www.eclipse.org/legal/epl-2.0/.
+
+== Source Code
+
+https://github.com/openhab/openhab-addons
+
--- a/bundles/org.openhab.voice.openaitts/README.md
+++ b/bundles/org.openhab.voice.openaitts/README.md
@ -0,0 +1,23 @@
+# OpenAI Text-to-Speech
+
+The OpenAI TTS (Text-to-Speech) add-on for openHAB allows you to integrate OpenAI's Text-to-Speech capabilities into your openHAB system.
+The advantage of this service over others is that one selected voice can speak different languages.
+This is useful, for example, in conjunction with ChatGPT binding, which will help in learning foreign languages.
+You can find the price for this service here - https://openai.com/api/pricing/
+
+## Configuration
+
+To configure the OpenAI TTS, **Settings / Other Services - OpenAI Text-to-Speech** and set:
+
+* **apiKey** - The API key to be used for the requests.
+* **apiUrl** - The server API where to reach the AI TTS service.
+* **model**  - The ID of the model to use for TTS.
+
+### Default Text-to-Speech and Voice Configuration
+
+You can setup your preferred default Text-to-Speech and default voice in the UI:
+
+* Go to **Settings**.
+* Edit **System Services - Voice**.
+* Set **OpenAI TTS Service** as **Default Text-to-Speech**.
+* Choose your preferred **Default Voice** for your setup.
--- a/bundles/org.openhab.voice.openaitts/pom.xml
+++ b/bundles/org.openhab.voice.openaitts/pom.xml
@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.openhab.addons.bundles</groupId>
+    <artifactId>org.openhab.addons.reactor.bundles</artifactId>
+    <version>5.0.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>org.openhab.voice.openaitts</artifactId>
+
+  <name>openHAB Add-ons :: Bundles :: Voice :: OpenAI Text-to-Speech</name>
+</project>
--- a/bundles/org.openhab.voice.openaitts/src/main/feature/feature.xml
+++ b/bundles/org.openhab.voice.openaitts/src/main/feature/feature.xml
@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<features name="org.openhab.voice.openaitts-${project.version}" xmlns="http://karaf.apache.org/xmlns/features/v1.4.0">
+	<repository>mvn:org.openhab.core.features.karaf/org.openhab.core.features.karaf.openhab-core/${ohc.version}/xml/features</repository>
+
+	<feature name="openhab-voice-openaitts" description="OpenAI Text-to-Speech" version="${project.version}">
+		<feature>openhab-runtime-base</feature>
+		<bundle start-level="80">mvn:org.openhab.addons.bundles/org.openhab.voice.openaitts/${project.version}</bundle>
+	</feature>
+</features>
--- a/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConfiguration.java
+++ b/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConfiguration.java
@ -0,0 +1,27 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ */
+@NonNullByDefault
+public class OpenAITTSConfiguration {
+
+    public String apiKey = "";
+    public String apiUrl = "https://api.openai.com/v1/audio/speech";
+    public String model = "tts-1";
+    public String speed = "1";
+}
--- a/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConstants.java
+++ b/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConstants.java
@ -0,0 +1,25 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ */
+@NonNullByDefault
+public class OpenAITTSConstants {
+
+    public static final String TTS_SERVICE_ID = "openaitts";
+    public static final String TTS_SERVICE_PID = "org.openhab.voice.openaitts";
+}
--- a/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSService.java
+++ b/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSService.java
@ -0,0 +1,148 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import static org.openhab.voice.openaitts.internal.OpenAITTSConstants.*;
+
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+import org.eclipse.jdt.annotation.Nullable;
+import org.eclipse.jetty.client.HttpClient;
+import org.eclipse.jetty.client.api.ContentResponse;
+import org.eclipse.jetty.client.util.StringContentProvider;
+import org.eclipse.jetty.http.HttpMethod;
+import org.eclipse.jetty.http.HttpStatus;
+import org.openhab.core.audio.AudioFormat;
+import org.openhab.core.audio.AudioStream;
+import org.openhab.core.audio.ByteArrayAudioStream;
+import org.openhab.core.config.core.ConfigurableService;
+import org.openhab.core.config.core.Configuration;
+import org.openhab.core.io.net.http.HttpClientFactory;
+import org.openhab.core.voice.AbstractCachedTTSService;
+import org.openhab.core.voice.TTSCache;
+import org.openhab.core.voice.TTSException;
+import org.openhab.core.voice.TTSService;
+import org.openhab.core.voice.Voice;
+import org.osgi.framework.Constants;
+import org.osgi.service.component.annotations.Activate;
+import org.osgi.service.component.annotations.Component;
+import org.osgi.service.component.annotations.Modified;
+import org.osgi.service.component.annotations.Reference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+import com.google.gson.JsonObject;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ *         API documentation: https://platform.openai.com/docs/guides/text-to-speech
+ */
+@Component(configurationPid = TTS_SERVICE_PID, property = Constants.SERVICE_PID + "="
+        + TTS_SERVICE_PID, service = TTSService.class)
+@ConfigurableService(category = "voice", label = "OpenAI TTS Service", description_uri = "voice:" + TTS_SERVICE_ID)
+
+@NonNullByDefault
+public class OpenAITTSService extends AbstractCachedTTSService {
+
+    private static final int REQUEST_TIMEOUT_MS = 10_000;
+    private final Logger logger = LoggerFactory.getLogger(OpenAITTSService.class);
+    private OpenAITTSConfiguration config = new OpenAITTSConfiguration();
+    private final HttpClient httpClient;
+    private final Gson gson = new Gson();
+    private static final Set<Voice> VOICES = Stream.of("nova", "alloy", "echo", "fable", "onyx", "shimmer")
+            .map(OpenAITTSVoice::new).collect(Collectors.toSet());
+
+    @Activate
+    public OpenAITTSService(@Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
+            Map<String, Object> config) {
+        super(ttsCache);
+        this.httpClient = httpClientFactory.getCommonHttpClient();
+    }
+
+    @Activate
+    protected void activate(Map<String, Object> config) {
+        this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
+    }
+
+    @Modified
+    protected void modified(Map<String, Object> config) {
+        this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
+    }
+
+    @Override
+    public Set<AudioFormat> getSupportedFormats() {
+        return Set.of(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000, 44100L));
+    }
+
+    @Override
+    public String getId() {
+        return TTS_SERVICE_ID;
+    }
+
+    @Override
+    public String getLabel(@Nullable Locale locale) {
+        return "OpenAI TTS Service";
+    }
+
+    @Override
+    public Set<Voice> getAvailableVoices() {
+        return VOICES;
+    }
+
+    /**
+     * Synthesizes the given text to audio data using the OpenAI API
+     *
+     * @param text The text to synthesize
+     * @param voice The voice to use
+     * @param requestedFormat The requested audio format
+     * @return The synthesized audio data
+     * @throws TTSException If the synthesis fails
+     */
+    @Override
+    public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
+        JsonObject content = new JsonObject();
+        content.addProperty("model", config.model);
+        content.addProperty("input", text);
+        content.addProperty("voice", voice.getLabel().toLowerCase());
+        content.addProperty("speed", config.speed);
+
+        String queryJson = gson.toJson(content);
+
+        try {
+            ContentResponse response = httpClient.newRequest(config.apiUrl).method(HttpMethod.POST)
+                    .timeout(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
+                    .header("Authorization", "Bearer " + config.apiKey).header("Content-Type", "application/json")
+                    .content(new StringContentProvider(queryJson)).send();
+
+            if (response.getStatus() == HttpStatus.OK_200) {
+                return new ByteArrayAudioStream(response.getContent(), requestedFormat);
+            } else {
+                logger.error("Request resulted in HTTP {} with message: {}", response.getStatus(),
+                        response.getReason());
+                throw new TTSException("Failed to generate audio data");
+            }
+        } catch (InterruptedException | TimeoutException | ExecutionException e) {
+            logger.error("Request to OpenAI failed: {}", e.getMessage(), e);
+            throw new TTSException("Failed to generate audio data");
+        }
+    }
+}
--- a/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSVoice.java
+++ b/bundles/org.openhab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSVoice.java
@ -0,0 +1,61 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import java.util.Locale;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+import org.openhab.core.voice.Voice;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ */
+@NonNullByDefault
+public class OpenAITTSVoice implements Voice {
+
+    private final String label;
+
+    public OpenAITTSVoice(String label) {
+        this.label = label;
+    }
+
+    /**
+     * The unique identifier of the voice, used for internal purposes
+     *
+     * @return The unique identifier of the voice
+     */
+    @Override
+    public String getUID() {
+        return "openaitts:" + label;
+    }
+
+    /**
+     * The voice label, used for GUI's or VUI's
+     *
+     * @return The voice label
+     */
+    @Override
+    public String getLabel() {
+        return Character.toUpperCase(label.charAt(0)) + label.substring(1);
+    }
+
+    /**
+     * The locale of the voice
+     *
+     * @return The locale of the voice
+     */
+    @Override
+    public Locale getLocale() {
+        return Locale.ENGLISH;
+    }
+}
--- a/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/addon/addon.xml
+++ b/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/addon/addon.xml
@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<addon:addon id="openaitts" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xmlns:addon="https://openhab.org/schemas/addon/v1.0.0"
+	xsi:schemaLocation="https://openhab.org/schemas/addon/v1.0.0 https://openhab.org/schemas/addon-1.0.0.xsd">
+
+	<type>voice</type>
+	<name>OpenAI Text-to-Speech</name>
+	<description>OpenAI TTS Service provides text-to-speech capabilities for openHAB.</description>
+	<connection>cloud</connection>
+
+	<service-id>org.openhab.voice.openaitts</service-id>
+
+	<config-description-ref uri="voice:openaitts"/>
+
+</addon:addon>
--- a/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/config/config.xml
+++ b/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/config/config.xml
@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<config-description:config-descriptions
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xmlns:config-description="https://openhab.org/schemas/config-description/v1.0.0"
+	xsi:schemaLocation="https://openhab.org/schemas/config-description/v1.0.0
+		https://openhab.org/schemas/config-description-1.0.0.xsd">
+	<config-description uri="voice:openaitts">
+		<parameter-group name="authentication">
+			<label>Authentication</label>
+			<description>Authentication for connecting to OpenAI API.</description>
+		</parameter-group>
+		<parameter-group name="tts">
+			<label>TTS Configuration</label>
+			<description>Configure Text to Speech.</description>
+		</parameter-group>
+		<parameter name="apiKey" type="text" required="true" groupName="authentication">
+			<label>API Key</label>
+			<required>true</required>
+			<description>OpenAI API key.</description>
+			<context>password</context>
+		</parameter>
+		<parameter name="apiUrl" type="text" required="true" groupName="authentication">
+			<label>API URL</label>
+			<required>true</required>
+			<description>TTS host API URL.</description>
+			<default>https://api.openai.com/v1/audio/speech</default>
+		</parameter>
+		<parameter name="model" type="text" required="true" groupName="tts">
+			<label>Model</label>
+			<required>true</required>
+			<description>ID of the model to use.</description>
+			<options>
+				<option value="tts-1">tts-1</option>
+				<option value="tts-1-hd">tts-1-hd</option>
+			</options>
+			<limitToOptions>false</limitToOptions>
+			<default>tts-1</default>
+		</parameter>
+		<parameter name="speed" type="decimal" min="0.25" max="4" groupName="tts">
+			<label>Speed</label>
+			<description>The speed of the generated audio. Select a value from 0.25 to 4.0.</description>
+			<default>1.0</default>
+		</parameter>
+
+	</config-description>
+</config-description:config-descriptions>
--- a/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/i18n/openaitts.properties
+++ b/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/i18n/openaitts.properties
@ -0,0 +1,17 @@
+# add-on
+
+addon.openaitts.name = OpenAI Text-to-Speech
+addon.openaitts.description = OpenAI TTS Service provides text-to-speech capabilities for openHAB.
+
+voice.config.openaitts.apiKey.label = API Key
+voice.config.openaitts.apiKey.description = OpenAI API key.
+voice.config.openaitts.apiUrl.label = API URL
+voice.config.openaitts.apiUrl.description = TTS host API URL.
+voice.config.openaitts.group.authentication.label = Authentication
+voice.config.openaitts.group.authentication.description = Authentication for connecting to OpenAI API.
+voice.config.openaitts.group.tts.label = TTS Configuration
+voice.config.openaitts.group.tts.description = Configure Text to Speech.
+voice.config.openaitts.model.label = Model
+voice.config.openaitts.model.description = ID of the model to use.
+voice.config.openaitts.speed.label = Speed
+voice.config.openaitts.speed.description = The speed of the generated audio. Select a value from 0.25 to 4.0.
--- a/bundles/pom.xml
+++ b/bundles/pom.xml
@ -483,6 +483,7 @@
    <module>org.openhab.voice.mactts</module>
    <module>org.openhab.voice.marytts</module>
    <module>org.openhab.voice.mimictts</module>
+    <module>org.openhab.voice.openaitts</module>
    <module>org.openhab.voice.picotts</module>
    <module>org.openhab.voice.pipertts</module>
    <module>org.openhab.voice.pollytts</module>