coze-java/example/src/main/java/example/websocket/audio/transcriptions/WebsocketTranscriptionsExample.java at 0ff5fc98445d7852c20c5922039ac8a6616fc725 · qyou/coze-java · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
package example.websocket.audio.transcriptions;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;

import com.coze.openapi.client.audio.common.AudioFormat;
import com.coze.openapi.client.audio.speech.CreateSpeechReq;
import com.coze.openapi.client.audio.speech.CreateSpeechResp;
import com.coze.openapi.client.websocket.event.downstream.*;
import com.coze.openapi.client.websocket.event.model.AsrConfig;
import com.coze.openapi.client.websocket.event.model.InputAudio;
import com.coze.openapi.client.websocket.event.model.TranscriptionsUpdateEventData;
import com.coze.openapi.service.auth.TokenAuth;
import com.coze.openapi.service.service.CozeAPI;
import com.coze.openapi.service.service.websocket.audio.transcriptions.WebsocketsAudioTranscriptionsCallbackHandler;
import com.coze.openapi.service.service.websocket.audio.transcriptions.WebsocketsAudioTranscriptionsClient;
import com.coze.openapi.service.service.websocket.audio.transcriptions.WebsocketsAudioTranscriptionsCreateReq;

/*
This example demonstrates how to use the WebSocket transcription API to transcribe audio data,
process transcription events, and handle the results through callback methods.
 */
public class WebsocketTranscriptionsExample {

  public static boolean isDone = false;

  private static class CallbackHandler extends WebsocketsAudioTranscriptionsCallbackHandler {

    public CallbackHandler() {
      super();
    }

    @Override
    public void onError(WebsocketsAudioTranscriptionsClient client, ErrorEvent event) {
      System.out.println(event);
    }

    @Override
    public void onClientException(WebsocketsAudioTranscriptionsClient client, Throwable e) {
      e.printStackTrace();
    }

    // 转录配置更新事件 (transcriptions.updated)
    @Override
    public void onTranscriptionsUpdated(
        WebsocketsAudioTranscriptionsClient client, TranscriptionsUpdatedEvent event) {
      System.out.println("=== Transcriptions Updated ===");
      System.out.println(event);
    }

    // 转录创建事件 (transcriptions.created)
    @Override
    public void onTranscriptionsCreated(
        WebsocketsAudioTranscriptionsClient client, TranscriptionsCreatedEvent event) {
      System.out.println("=== Transcriptions Created ===");
      System.out.println(event);
    }

    // 转录消息更新事件 (transcriptions.message.update)
    @Override
    public void onTranscriptionsMessageUpdate(
        WebsocketsAudioTranscriptionsClient client, TranscriptionsMessageUpdateEvent event) {
      System.out.println(event.getData().getContent());
    }

    // 转录消息完成事件 (transcriptions.message.completed)
    @Override
    public void onTranscriptionsMessageCompleted(
        WebsocketsAudioTranscriptionsClient client, TranscriptionsMessageCompletedEvent event) {
      System.out.println("=== Transcriptions Message Completed ===");
      System.out.println(event);
      isDone = true;
    }

    // 语音缓冲区完成事件 (input_audio_buffer.completed)
    @Override
    public void onInputAudioBufferCompleted(
        WebsocketsAudioTranscriptionsClient client, InputAudioBufferCompletedEvent event) {
      System.out.println("=== Input Audio Buffer Completed ===");
      System.out.println(event);
    }
  }

  // For non-streaming chat API, it is necessary to create a chat first and then poll the chat
  // results.
  public static void main(String[] args) throws Exception {
    // Get an access_token through personal access token or oauth.
    String token = System.getenv("COZE_API_TOKEN");
    String voiceID = System.getenv("COZE_VOICE_ID");
    TokenAuth authCli = new TokenAuth(token);

    // Init the Coze client through the access_token.
    CozeAPI coze =
        new CozeAPI.Builder()
            .baseURL(System.getenv("COZE_API_BASE"))
            .auth(authCli)
            .readTimeout(10000)
            .build();

    WebsocketsAudioTranscriptionsClient client = null;
    try {
      client =
          coze.websockets()
              .audio()
              .transcriptions()
              .create(new WebsocketsAudioTranscriptionsCreateReq(new CallbackHandler()));
      CreateSpeechResp speechResp =
          coze.audio()
              .speech()
              .create(
                  CreateSpeechReq.builder()
                      .input("今天深圳的天气怎么样?")
                      .voiceID(voiceID)
                      .responseFormat(AudioFormat.WAV)
                      .sampleRate(24000)
                      .build());

      InputAudio inputAudio =
          InputAudio.builder().sampleRate(24000).codec("pcm").format("wav").channel(2).build();

      AsrConfig asrConfig =
          AsrConfig.builder()
              .hotWords(Arrays.asList("Coze", "AI"))
              .context("Real-time transcription")
              .userLanguage("en-US")
              .build();

      client.transcriptionsUpdate(new TranscriptionsUpdateEventData(inputAudio, asrConfig));

      try (InputStream inputStream = speechResp.getResponse().byteStream()) {
        byte[] buffer = new byte[1024];
        int bytesRead;

        while ((bytesRead = inputStream.read(buffer)) != -1) {
          client.inputAudioBufferAppend(Arrays.copyOf(buffer, bytesRead));
          // 模拟人说话的间隔
          TimeUnit.MILLISECONDS.sleep(100);
        }
        client.inputAudioBufferComplete();
      } catch (IOException e) {
        e.printStackTrace();
      }

      while (!isDone) {
        TimeUnit.MILLISECONDS.sleep(100);
      }

    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      if (client != null) {
        client.close();
      }
      coze.shutdownExecutor();
    }
  }
}