AI 大模型推理

  • AI 大模型推理 > 最佳实践 > 基于 websocket 的 tts 使用示例

    基于 websocket 的 tts 使用示例

    最近更新时间: 2025-06-10 16:18:46

    使用 websocket 协议实时处理 tts 转换

    七牛云推出的基于大模型的 tts 处理能力,可以非常简单的实现实时的文本生成声音,以下是基于 websocket 的实现示例:

    package main
    
    import (
    	"encoding/base64"
    	"encoding/json"
    	"fmt"
    	"io/ioutil"
    	"net/http"
    	"net/url"
    
    	"github.com/gorilla/websocket"
    )
    
    var addr = "api.qnaigc.com"
    var token = "sk-xx"
    var voiceType = "qiniu_zh_female_tmjxxy" //此处替换成需要调用的音色,
    var u = url.URL{Scheme: "wss", Host: addr, Path: "/v1/voice/tts"}
    var header = http.Header{
    	"Authorization": []string{fmt.Sprintf("Bearer %s", token)},
    	"VoiceType":     []string{voiceType},
    }
    
    type TTSRequest struct {
    	Audio   `json:"audio"`
    	Request `json:"request"`
    }
    type Audio struct {
    	VoiceType  string  `json:"voice_type"`
    	Encoding   string  `json:"encoding"`
    	SpeedRatio float64 `json:"speed_ratio"`
    }
    type Request struct {
    	Text string `json:"text"`
    }
    
    type RelayTTSResponse struct {
    	Reqid     string    `json:"reqid"`
    	Operation string    `json:"operation"`
    	Sequence  int       `json:"sequence"`
    	Data      string    `json:"data"`
    	Addition  *Addition `json:"addition,omitempty"`
    }
    type Addition struct {
    	Duration string `json:"duration"`
    }
    
    func main() {
    	wssStream("我想测试下语音合成的效果", voiceType, "test.mp3")
    }
    
    // 流式合成
    func wssStream(text, voiceType, outFile string) {
    	input := setupInput(voiceType, "mp3", 1.0, text)
    
    	c, _, err := websocket.DefaultDialer.Dial(u.String(), header)
    	if err != nil {
    		fmt.Println("dial err:", err)
    		return
    	}
    	defer c.Close()
    	err = c.WriteMessage(websocket.BinaryMessage, input)
    	if err != nil {
    		fmt.Println("write message fail, err:", err.Error())
    		return
    	}
    	count := 0
    	var audio []byte
    	for {
    		count++
    		var message []byte
    		_, message, err := c.ReadMessage()
    		if err != nil {
    			fmt.Println("read message fail, err:", err.Error())
    			break
    		}
    
    		var resp RelayTTSResponse
    		err = json.Unmarshal(message, &resp)
    
    		if err != nil {
    			fmt.Println("unmarshal fail, err:", err.Error())
    			continue
    		}
    		d, err := base64.StdEncoding.DecodeString(resp.Data)
    		if err != nil {
    			fmt.Println("decode fail, err:", err.Error())
    		}
    		audio = append(audio, d...)
    
    		if resp.Sequence < 0 {
    			err = ioutil.WriteFile(outFile, audio, 0644)
    			if err != nil {
    				fmt.Println("write audio to file fail, err:", err.Error())
    			}
    			break
    		}
    	}
    	if err != nil {
    		fmt.Println("stream synthesis fail, err:", err.Error())
    		return
    	}
    }
    
    func setupInput(voiceType string, encoding string, speedRatio float64, text string) []byte {
    	params := &TTSRequest{
    		Audio: Audio{
    			VoiceType:  voiceType,
    			Encoding:   encoding,
    			SpeedRatio: speedRatio,
    		},
    		Request: Request{
    			Text: text,
    		},
    	}
    	resStr, _ := json.Marshal(params)
    	return resStr
    }
    
    以上内容是否对您有帮助?
  • Close