[googlestt] Fix drop bytes (#14649)

* [googlestt] Fix drop bytes
* fix unhandled cancelation error when using single utterance mode

Signed-off-by: Miguel Álvarez <miguelwork92@gmail.com>
This commit is contained in:
GiviMAD 2023-03-23 01:57:00 -07:00 committed by Jacob Laursen
parent 3bab21f242
commit 555210cb23

View File

@ -255,14 +255,20 @@ public class GoogleSTTService implements STTService {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L); long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
long maxSilenceMillis = (config.maxSilenceSeconds * 1000L); long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
int readBytes = 6400; final int bufferSize = 6400;
while (!aborted.get()) { int numBytesRead;
byte[] data = new byte[readBytes]; int remaining = bufferSize;
int dataN = audioStream.read(data); byte[] audioBuffer = new byte[bufferSize];
while (!aborted.get() && !responseObserver.isDone()) {
numBytesRead = audioStream.read(audioBuffer, bufferSize - remaining, remaining);
if (aborted.get()) { if (aborted.get()) {
logger.debug("Stops listening, aborted"); logger.debug("Stops listening, aborted");
break; break;
} }
if (numBytesRead == -1) {
logger.debug("End of stream");
break;
}
if (isExpiredInterval(maxTranscriptionMillis, startTime)) { if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
logger.debug("Stops listening, max transcription time reached"); logger.debug("Stops listening, max transcription time reached");
break; break;
@ -272,18 +278,17 @@ public class GoogleSTTService implements STTService {
logger.debug("Stops listening, max silence time reached"); logger.debug("Stops listening, max silence time reached");
break; break;
} }
if (dataN != readBytes) { if (numBytesRead != remaining) {
try { remaining = remaining - numBytesRead;
Thread.sleep(100);
} catch (InterruptedException e) {
}
continue; continue;
} }
remaining = bufferSize;
StreamingRecognizeRequest dataRequest = StreamingRecognizeRequest.newBuilder() StreamingRecognizeRequest dataRequest = StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(data)).build(); .setAudioContent(ByteString.copyFrom(audioBuffer)).build();
logger.debug("Sending audio data {}", dataN); logger.debug("Sending audio data {}", bufferSize);
clientStream.send(dataRequest); clientStream.send(dataRequest);
} }
audioStream.close();
} }
private void sendStreamConfig(ClientStream<StreamingRecognizeRequest> clientStream, private void sendStreamConfig(ClientStream<StreamingRecognizeRequest> clientStream,
@ -335,6 +340,7 @@ public class GoogleSTTService implements STTService {
private float confidenceSum = 0; private float confidenceSum = 0;
private int responseCount = 0; private int responseCount = 0;
private long lastInputTime = 0; private long lastInputTime = 0;
private boolean done = false;
public TranscriptionListener(STTListener sttListener, GoogleSTTConfiguration config, AtomicBoolean aborted) { public TranscriptionListener(STTListener sttListener, GoogleSTTConfiguration config, AtomicBoolean aborted) {
this.sttListener = sttListener; this.sttListener = sttListener;
@ -374,7 +380,7 @@ public class GoogleSTTService implements STTService {
responseCount++; responseCount++;
// when in single utterance mode we can just get one final result so complete // when in single utterance mode we can just get one final result so complete
if (config.singleUtteranceMode) { if (config.singleUtteranceMode) {
onComplete(); done = true;
} }
} }
}); });
@ -411,6 +417,10 @@ public class GoogleSTTService implements STTService {
} }
} }
public boolean isDone() {
return done;
}
public long getLastInputTime() { public long getLastInputTime() {
return lastInputTime; return lastInputTime;
} }