基于 websocket 的 tts 使用示例
使用 websocket 协议实时处理 tts 转换
七牛云推出的基于大模型的 tts 处理能力,可以非常简单的实现实时的文本生成声音,以下是基于 websocket 的实现示例:
package main
import (
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"github.com/gorilla/websocket"
)
var addr = "api.qnaigc.com"
var token = "sk-xx"
var voiceType = "qiniu_zh_female_tmjxxy" //此处替换成需要调用的音色,
var u = url.URL{Scheme: "wss", Host: addr, Path: "/v1/voice/tts"}
var header = http.Header{
"Authorization": []string{fmt.Sprintf("Bearer %s", token)},
"VoiceType": []string{voiceType},
}
type TTSRequest struct {
Audio `json:"audio"`
Request `json:"request"`
}
type Audio struct {
VoiceType string `json:"voice_type"`
Encoding string `json:"encoding"`
SpeedRatio float64 `json:"speed_ratio"`
}
type Request struct {
Text string `json:"text"`
}
type RelayTTSResponse struct {
Reqid string `json:"reqid"`
Operation string `json:"operation"`
Sequence int `json:"sequence"`
Data string `json:"data"`
Addition *Addition `json:"addition,omitempty"`
}
type Addition struct {
Duration string `json:"duration"`
}
func main() {
wssStream("我想测试下语音合成的效果", voiceType, "test.mp3")
}
// 流式合成
func wssStream(text, voiceType, outFile string) {
input := setupInput(voiceType, "mp3", 1.0, text)
c, _, err := websocket.DefaultDialer.Dial(u.String(), header)
if err != nil {
fmt.Println("dial err:", err)
return
}
defer c.Close()
err = c.WriteMessage(websocket.BinaryMessage, input)
if err != nil {
fmt.Println("write message fail, err:", err.Error())
return
}
count := 0
var audio []byte
for {
count++
var message []byte
_, message, err := c.ReadMessage()
if err != nil {
fmt.Println("read message fail, err:", err.Error())
break
}
var resp RelayTTSResponse
err = json.Unmarshal(message, &resp)
if err != nil {
fmt.Println("unmarshal fail, err:", err.Error())
continue
}
d, err := base64.StdEncoding.DecodeString(resp.Data)
if err != nil {
fmt.Println("decode fail, err:", err.Error())
}
audio = append(audio, d...)
if resp.Sequence < 0 {
err = ioutil.WriteFile(outFile, audio, 0644)
if err != nil {
fmt.Println("write audio to file fail, err:", err.Error())
}
break
}
}
if err != nil {
fmt.Println("stream synthesis fail, err:", err.Error())
return
}
}
func setupInput(voiceType string, encoding string, speedRatio float64, text string) []byte {
params := &TTSRequest{
Audio: Audio{
VoiceType: voiceType,
Encoding: encoding,
SpeedRatio: speedRatio,
},
Request: Request{
Text: text,
},
}
resStr, _ := json.Marshal(params)
return resStr
}
文档反馈
(如有产品使用问题,请 提交工单)