前端语音输入、输出
虽然现代浏览器支持本地语音识别,但是为了准确性和兼容性,依旧还是选择使用专用服务,如阿里的语音服务:大模型平台百炼
后端服务参考-阿里云方案
- 首页 - 大模型服务平台百炼 - API 参考(模型)- 语音合成/识别/翻译 - 语音识别-Paraformer
- 首页 - 大模型服务平台百炼 - API 参考(模型)- 语音合成/识别/翻译 - 语音合成-CosyVoice
前端部分
1. 语音识别
1.1 采集音频
音频采集有 2 种方案:
-
通过客户端给 webview 的 sdk,使用 app 能力录音 优点:无需重复授权、响应快、稳定
缺点:只能在指定 app 内使用、还有 sdk 鉴权的流程
-
使用浏览器原生录音 优点:响应快、稳定性尚可、浏览器支持度高
缺点:刷新、重开页面后需要用户再次授权
1.1.1 客户端录音
- 配置 sdk 以飞书为例,需要对 sdk 进行配置:
// 自行实现getLarkSdkSecret,从后台获取sdk配置方法需要的参数
h5sdk.ready(() => {
getLarkSdkSecret().then(async (res) => {
console.log("sdk authApi result", res);
const { appId, timestamp, nonceStr, signature } = res;
h5sdk.config({
appId,
timestamp,
nonceStr,
signature,
jsApiList: [],
//成功回调
onSuccess: (res: any) => {
console.log(`config success: ${JSON.stringify(res)}`);
resolve(res);
},
//失败回调
onFail: (err: any) => {
console.log(`config failed: ${JSON.stringify(err)}`);
reject(err);
},
});
});
});- 调用录音方法 getRecorderManager 其中 start 的方法参数如下即可:
recorder?.start({
sampleRate: 16000,
numberOfChannels: 1,
format: "wav",
frameSize: 16,
});1.1.2 浏览器录音
以recorder-core这个库为例:
// h5Recorder.ts
import Recorder from " recorder-core/recorder.wav.min";
/**
* H5录音器类 - 单例模式
* 提供录音功能,包括打开录音权限、开始录音、停止录音等操作
*/
class H5Recorder {
private static instance: H5Recorder | null = null;
private rec: any = null;
private processTime: number = 0;
private pcmBuffer: Int16Array = new Int16Array(0);
private chunk: any = null;
private onProcess: Function | null = null;
private pcmBufferSampleRate: number = 16000;
private startTime: number = 0;
/**
* 私有构造函数,防止外部直接实例化
*/
private constructor() {}
/**
* 获取单例实例
* @returns H5Recorder实例
*/
public static getInstance(): H5Recorder {
if (!H5Recorder.instance) {
H5Recorder.instance = new H5Recorder();
}
return H5Recorder.instance;
}
/**
* 调用open打开录音请求好录音权限
* 一般在显示出录音按钮或相关的录音界面时进行此方法调用
* @param success 成功回调函数
* @param sendBufferFn 处理buffer的回调函数
*/
public async recOpen(
success?: Function,
sendBufferFn?: Function,
onError?: (errorMsg: any) => void
): Promise<void> {
try {
this.onProcess = sendBufferFn || null;
this.createRecorder();
this.rec.open(
() => {
success && success();
},
(msg: any, isUserNotAllow: any) => {
// 用户拒绝未授权或不支持
console.log(
(isUserNotAllow ? "UserNotAllow," : "") + "录音初始化失败:" + msg
);
onError?.(
(isUserNotAllow ? "【PC模式】" : "") +
"录音初始化失败:" +
msg +
",请在浏览器中开启此页面的麦克风权限"
);
}
);
} catch (e) {
console.error(e, "麦克风权限出错!");
}
}
/**
* 创建录音器实例
* @returns 录音器实例
*/
private createRecorder(): any {
this.rec = Recorder({
type: "wav",
sampleRate: 16000,
bitRate: 16,
onProcess: (
buffers: any[],
powerLevel: number,
bufferDuration: number,
bufferSampleRate: number,
newBufferIdx: number,
asyncEnd: Function
) => {
this.chunk = Recorder.SampleData(
buffers,
bufferSampleRate,
this.pcmBufferSampleRate,
this.chunk
);
const buffer = this.chunk.data.buffer;
this.onProcess && this.onProcess(buffer);
this.processTime = Date.now();
const pcm = this.chunk.data;
// 【关键代码】将实时处理的pcm拼接到缓冲结尾,结束录音时方便转码试听
const tmp = new Int16Array(this.pcmBuffer.length + pcm.length);
tmp.set(this.pcmBuffer, 0);
tmp.set(pcm, this.pcmBuffer.length);
this.pcmBuffer = tmp;
},
});
return this.rec;
}
/**
* 开始录音
* 打开了录音后才能进行start、stop调用
*/
public recStart(): void {
console.log("recStart");
// 重置环境
this.pcmBuffer = new Int16Array(0);
this.chunk = null;
this.rec.start();
// 【稳如老狗WDT】可选的,监控是否在正常录音有onProcess回调
// 如果长时间没有回调就代表录音不正常
const wdt = (this.rec.watchDogTimer = setInterval(() => {
if (!this.rec || wdt !== this.rec.watchDogTimer) {
clearInterval(wdt);
return;
} // sync
if (Date.now() < this.rec.wdtPauseT) return; // 如果暂停录音了就不检测
if (Date.now() - (this.processTime || this.startTime) > 1500) {
clearInterval(wdt);
console.error(this.processTime ? "录音被中断" : "录音未能正常开始");
// ... 错误处理,关闭录音,提醒用户
}
}, 1000));
this.startTime = Date.now();
this.rec.wdtPauseT = 0;
this.processTime = 0;
}
/**
* 销毁录音器
*/
public destroyRec(): void {
if (this.rec) {
this.recStop();
this.rec = null;
}
}
/**
* 结束录音
* @param stopFn 停止回调函数
*/
public recStop(stopFn?: Function): void {
if (!this.rec) return;
this.rec.watchDogTimer = 0; // 停止监控onProcess超时
this.rec.stop(
(blob: Blob, duration: number) => {
stopFn && stopFn();
},
(msg: any) => {
console.log("录音stop失败,msg:" + msg);
}
);
}
/**
* 获取录音器实例(用于外部访问)
* @returns 录音器实例
*/
public getRecorder(): any {
return this.rec;
}
/**
* 检查录音器是否已初始化
* @returns boolean
*/
public isInitialized(): boolean {
return !!this.rec;
}
}
// 导出单例实例
export const h5Recorder = H5Recorder.getInstance();
// 为了兼容原有代码,也导出原有函数名
export const { recOpen, recStart, recStop, destroyRec } = h5Recorder;关键调用代码:
// 初始化录音器
h5Recorder.recOpen(
() => {
// 录音就绪
setIsRecReady(true);
},
// 接收到录音数据时的处理函数
(buffer: any) => {
if (!isRecording.current) {
return;
}
// 通过ws给语音识别服务端发送录音的二进制流
socketIns.current.sendData(buffer);
}
);
// 使用普通录音器
h5Recorder.recStart();
// 取消录音
h5Recorder.recStop();
// 停止录音并发送完成任务
h5Recorder.recStop(() => {
socketIns.current.sendFinishTask();
});
// 销毁录音实例
h5Recorder.destroyRec();1.2 音频转文字
1.2.1 websocket 连接
前置代码:
BaseWebSocket:
import { w3cwebsocket as W3CWebSocket } from "websocket";
import { message } from "../message";
export abstract class BaseWebSocket {
// 静态配置
protected static readonly HEARTBEAT_INTERVAL = 30000;
// 实例属性
public socket: W3CWebSocket | null = null;
/**
* 用于存储connect方法返回的promise
*/
public connectPromise?: Promise<any>;
/**
* ws服务端是否返回了第一条消息
* - 用于判断服务是否处于可用状态
*/
public isServerReady: boolean = false;
/**
* 心跳计时器
*/
protected heartbeatTimer?: any;
/**
* ws地址
*/
protected url: string = "";
protected onStart?: () => void;
protected onUpdate?: (resultMsg: any) => void;
protected onFinish?: (resultMsg?: any) => void;
protected onFail?: (error: any) => void;
protected constructor(params: {
onStart?: () => void;
onUpdate?: (resultMsg: any) => void;
onFinish?: (resultMsg: any) => void;
onFail?: (error: any) => void;
url: string;
}) {
const { onStart, onFail, onFinish, onUpdate, url } = params;
this.url = url;
this.onStart = onStart;
this.onUpdate = onUpdate;
this.onFinish = onFinish;
this.onFail = onFail;
window.addEventListener("beforeunload", () => this.cleanup());
}
public connect(): Promise<any> {
this.connectPromise = new Promise<void>((resolve, reject) => {
this.socket = new W3CWebSocket(this.url);
this.socket.binaryType = "arraybuffer";
this.socket.onopen = () => {
console.log("WebSocket connection established");
this.startHeartbeat();
this.sendRunTask();
};
this.socket.onmessage = (event: any) =>
this.handleMessage(event, () => {
this.isServerReady = true;
resolve();
});
this.socket.onerror = (error: any) => {
message.error(
navigator.onLine
? "音频功能异常[websocket连接失败],请检查网络或者联系管理员"
: "网络连接已断开,暂时无法使用语音功能,请检查您的网络设置"
);
console.error("[websocket连接失败]:", error);
reject(error);
};
this.socket.onclose = () => {
console.log("WebSocket connection closed");
this.cleanup();
};
});
return this.connectPromise;
}
/**
* ws已经处于open状态,但是服务端不一定返回了消息(对应的服务不一定可用)
* - open时可以close ws连接
* @returns
*/
public isWsOpened() {
return this.socket?.readyState === WebSocket.OPEN;
}
/**
* 清除心跳
* 清除promise
* 关闭socket
*/
public cleanup(): void {
if (this.heartbeatTimer) {
clearInterval(this.heartbeatTimer);
}
this.isServerReady = false;
this.connectPromise = undefined;
if (this.isWsOpened()) {
this.socket?.close();
}
this.socket = null;
}
/**
* 已建立连接时,直接发送
* 未建立连接时,加入队列
* @param wsData 二进制或者 json对象字符串
* @returns
*/
public sendData(wsData: ArrayBuffer | string): void {
if (!wsData) return;
if (this.isServerReady) {
this.socket?.send(wsData);
} else if (this.connectPromise) {
this.connectPromise.then(() => {
this.socket?.send(wsData);
});
} else {
throw "sendData失败,ws连接未建立";
}
}
/**
* 开启ws心跳
*/
protected startHeartbeat(): void {
this.heartbeatTimer = setInterval(() => {
if (this.isWsOpened()) {
this.sendData("ping");
}
}, BaseWebSocket.HEARTBEAT_INTERVAL);
}
/**
* 发送任务开始消息
*/
protected sendRunTask(): void {
const payload = this.getRunTaskPayload();
this.socket?.send(payload);
}
/**
* 发送任务结束消息(前端数据已上传完毕)
*/
public sendFinishTask(): void {
const payload = this.getFinishTaskPayload();
this.socket?.send(payload);
}
/**
* 如何处理消息体,由子类实现
* @param event
* @param resolve
*/
protected abstract handleMessage(
event: MessageEvent,
resolve: Function
): void;
/**
* 生成任务开始的消息,用于启动服务端任务,由子类实现
*/
protected abstract getRunTaskPayload(): string;
/**
* 生成任务结束的消息,告知服务端前端数据已上传完,由子类实现
*/
protected abstract getFinishTaskPayload(): string;
}语音转文字 类:AudioToTextWS
import { type IWebSocketMessage } from "./type.ts";
import { generateUUID } from "@/utils/ai/uuid.ts";
import { BaseWebSocket } from "../http/websocket.ts";
import { audioToTextApi } from "@/api/index.ts";
export class AudioToTextWS extends BaseWebSocket {
// 静态配置
private readonly TASK_ID = generateUUID().replace(/-/g, "").slice(0, 32);
// 实例属性
public resultMsg = "";
constructor(params: {
onStart?: () => void;
onUpdate: (resultMsg: string) => void;
/**
* 服务端响应任务结束时会触发
* @param resultMsg
* @returns
*/
onFinish?: (resultMsg: string) => void;
onFail?: (error: IWebSocketMessage) => void;
accessToken: string;
}) {
const { accessToken } = params;
super({
...params,
url: `${audioToTextApi}&access_token=${accessToken}`,
});
}
public getResult(): string {
return this.resultMsg;
}
/**
* 处理websocket响应
* @param event
* @param resolve
*/
protected handleMessage(event: MessageEvent, resolve: Function): void {
const message: IWebSocketMessage = JSON.parse(event?.data?.toString());
switch (message?.header?.event) {
case "task-started":
resolve();
this.onStart?.();
break;
case "result-generated":
this.resultMsg = message.payload?.output?.sentence?.text || "";
this.onUpdate?.(this.resultMsg);
break;
case "task-finished":
this.cleanup();
this.onFinish?.(this.resultMsg);
break;
case "task-failed":
this.cleanup();
console.error("Task failed:", message);
this.onFail?.(message);
break;
}
console.log(message?.header?.event, this.resultMsg);
}
/**
* 生成启动消息,websocket open事件后会发送
* @returns
*/
protected getRunTaskPayload(): string {
return JSON.stringify({
header: {
action: "run-task",
task_id: this.TASK_ID,
streaming: "duplex",
},
payload: {
task_group: "audio",
task: "asr",
function: "recognition",
model: "paraformer-realtime-v2",
parameters: { sample_rate: 16000, format: "wav" },
input: {},
},
});
}
/**
* sendFinishTask方法调用时,会发送到服务端
* @returns
*/
protected getFinishTaskPayload(): string {
return JSON.stringify({
header: {
action: "finish-task",
task_id: this.TASK_ID,
streaming: "duplex",
},
});
}
}关键调用代码
// 创建WebSocket连接
socketIns.current = new AudioToTextWS({
onUpdate: (text) => {
if (isCanceled.current) {
return;
}
onUpdateText(text);
},
onFinish: (text) => {
if (isCanceled.current) {
return;
}
onUpdateText(text, true);
},
accessToken,
});
await socketIns.current.connect();
// 发送语音二进制数据
socketIns.current.sendData(buffer);
// 停止录音时发送【完成任务】指令
socketIns.current.sendFinishTask();
// 按需关闭ws连接
socketIns.current?.cleanup?.();2. 语音播报
语音播报直接调用浏览器的 API 即可。
2.1 websocket 连接
文字转语音 类:TextToAudioWS
import { textToAudioApi } from "@/api/index.ts";
import { BaseWebSocket } from "../http/websocket.ts";
import { type IWebSocketMessage } from "./type.ts";
import { generateUUID } from "@/utils/ai/uuid.ts";
export default class TextToAudioWS extends BaseWebSocket {
// 静态配置
private readonly TASK_ID = generateUUID().replace(/-/g, "").slice(0, 32);
public isGeneratingAudio = false;
public declare onUpdate: (buffer: ArrayBuffer) => void;
public declare onFinish: () => void;
constructor(params: {
onStart?: () => void;
onUpdate: (audioBuffer: ArrayBuffer) => void;
/**
* 服务端响应任务结束时会触发
* @returns
*/
onFinish?: () => void;
onFail?: (error: IWebSocketMessage) => void;
accessToken: string;
}) {
const { accessToken } = params;
super({
...params,
url: `${textToAudioApi}&access_token=${accessToken}`,
});
}
/**
* 发送转码请求,会等待wx 准备好再发送
* @param text
* @returns
*/
public sendText(text: string): void {
if (!text?.length) return;
console.log(`TextToAudioWS【文本转语音:】${text}`);
const data = JSON.stringify({
header: {
action: "continue-task",
task_id: this.TASK_ID,
streaming: "duplex",
},
payload: {
input: {
text,
},
},
});
super.sendData(data);
}
protected async handleMessage(
event: MessageEvent,
resolve: Function
): Promise<void> {
if (typeof event.data === "string") {
const message: IWebSocketMessage = JSON.parse(event.data);
switch (message.header.event) {
case "task-started":
resolve();
this.isGeneratingAudio = true;
this.onStart?.();
break;
case "task-finished":
this.isGeneratingAudio = false;
this.onFinish();
this.cleanup();
// 不能执行onFinish和cleanup,此时音频不一定播放完毕
break;
case "task-failed":
console.error("Task failed:", message);
this.isGeneratingAudio = false;
this.onFail?.(message);
this.cleanup();
break;
}
} else {
console.log("TextToAudioWS【收到音频流】");
this.onUpdate(event.data);
}
}
/**
* 生成启动消息,websocket open事件后会发送
* @returns
*/
protected getRunTaskPayload(): string {
return JSON.stringify({
header: {
action: "run-task",
task_id: this.TASK_ID,
streaming: "duplex",
},
payload: {
task_group: "audio",
task: "tts",
function: "SpeechSynthesizer",
model: "cosyvoice-v1",
parameters: {
text_type: "PlainText",
voice: "longyue",
format: "pcm",
sample_rate: 16000,
volume: 50,
rate: 1.1,
pitch: 1,
},
input: {},
},
});
}
/**
* sendFinishTask方法调用时,会发送到服务端
* @returns
*/
protected getFinishTaskPayload(): string {
return JSON.stringify({
header: {
action: "finish-task",
task_id: this.TASK_ID,
streaming: "duplex",
},
payload: {
input: {},
},
});
}
}2.2 播放器 API
纯前端实现的音频二进制流播放器 PCMPlayer
/**
* PCM音频播放器类
* 用于播放PCM格式的音频数据
*/
export class PCMPlayer {
/**
* 构造函数
* @param {Object} option - 配置选项
* @param {string} [option.inputCodec='Int16'] - 输入编码格式 ('Int8'|'Int16'|'Int32'|'Float32')
* @param {number} [option.channels=1] - 声道数
* @param {number} [option.sampleRate=8000] - 采样率(Hz)
* @param {number} [option.flushTime=1000] - 缓存刷新时间(ms)
* @param {number} [option.fftSize=2048] - 频谱分析器的FFT大小
* @param {Function} [option.onBufferAllPlayEnd] - 所有音频播放结束回调
* @param {Function} [option.onPlay] - 音频播放结束回调
* @param {Function} [option.onPause] - 音频播放结束回调
* @param {Function} [option.onstatechange] - 音频上下文状态变化回调
*/
constructor(option) {
this.init(option);
}
/**
* 还在持续输入音频二进制流
*/
hasMoreBuffer = true;
/**
* 初始化播放器
* @param {Object} option - 配置选项
* @private
*/
init(option) {
// 默认配置
const defaultOption = {
inputCodec: "Int16",
channels: 1,
sampleRate: 8000,
flushTime: 1000,
fftSize: 2048,
};
// 合并配置
this.option = { ...defaultOption, ...this.option, ...option };
// 音频样本数据缓冲区
this.samples = new Float32Array();
// 清除定时器
if (this.interval) {
clearInterval(this.interval);
}
// 定时刷新播放缓冲区
this.interval = setInterval(() => this.flush(), this.option.flushTime);
// 计算转换值用于标准化音频数据
this.convertValue = this.getConvertValue();
// 获取对应的TypedArray类型
this.typedArray = this.getTypedArray();
// 初始化Web Audio API上下文
this.initAudioContext();
// 绑定音频上下文事件
this.bindAudioContextEvent();
// 记录上一个音频源节点
this.lastBufferSource = null;
}
/**
* 根据编码格式获取转换值
* @returns {number} 转换值
* @private
*/
getConvertValue() {
const inputCodecs = {
Int8: 128,
Int16: 32768,
Int32: 2147483648,
Float32: 1,
};
if (!inputCodecs[this.option.inputCodec]) {
throw new Error(
"Wrong codec. Please input one of these codecs: Int8, Int16, Int32, Float32"
);
}
return inputCodecs[this.option.inputCodec];
}
/**
* 根据编码格式获取对应的TypedArray类型
* @returns {Function} TypedArray构造函数
* @private
*/
getTypedArray() {
const typedArrays = {
Int8: Int8Array,
Int16: Int16Array,
Int32: Int32Array,
Float32: Float32Array,
};
if (!typedArrays[this.option.inputCodec]) {
throw new Error(
"Wrong codec. Please input one of these codecs: Int8, Int16, Int32, Float32"
);
}
return typedArrays[this.option.inputCodec];
}
/**
* 初始化Web Audio API上下文
* @private
*/
initAudioContext() {
// 创建音频上下文
this.audioCtx = new (window.AudioContext || window.webkitAudioContext)();
// 创建增益节点用于控制音量
this.gainNode = this.audioCtx.createGain();
this.gainNode.gain.value = 3;
this.gainNode.connect(this.audioCtx.destination);
// 记录开始时间
this.startTime = this.audioCtx.currentTime;
// 创建频谱分析器节点
this.analyserNode = this.audioCtx.createAnalyser();
this.analyserNode.fftSize = this.option.fftSize;
}
/**
* 检查数据是否为TypedArray或ArrayBuffer类型
* @param {*} data - 待检查的数据
* @returns {boolean} 是否为支持的类型
* @static
*/
static isTypedArray(data) {
return (
(data.byteLength &&
data.buffer &&
data.buffer.constructor === ArrayBuffer) ||
data.constructor === ArrayBuffer
);
}
/**
* 检查数据是否受支持
* @param {*} data - 待检查的数据
* @returns {boolean} 是否受支持
* @private
*/
isSupported(data) {
if (!PCMPlayer.isTypedArray(data)) {
throw new Error("请传入ArrayBuffer或者任意TypedArray");
}
return true;
}
/**
* 向播放器提供音频数据
* @param {ArrayBuffer|TypedArray} data - 音频数据
*/
feed(data) {
this.isSupported(data);
if (!this.samples) return;
// 格式化输入数据为Float32Array
data = this.getFormattedValue(data);
// 创建新的缓冲区并合并历史数据和新数据
const tmp = new Float32Array(this.samples.length + data.length);
// console.log(data, this.samples, this.samples.length)
// 复制当前的实例的buffer值(历史buff)
// 从头(0)开始复制
tmp.set(this.samples, 0);
// 复制传入的新数据
// 从历史buff位置开始
tmp.set(data, this.samples.length);
// 将新的完整buff数据赋值给samples
// interval定时器也会从samples里面播放数据
this.samples = tmp;
}
/**
* 将输入数据格式化为标准的Float32Array
* @param {ArrayBuffer|TypedArray} data - 输入数据
* @returns {Float32Array} 格式化后的数据
* @private
*/
getFormattedValue(data) {
// 转换为对应的TypedArray类型
if (data.constructor === ArrayBuffer) {
data = new this.typedArray(data);
} else {
data = new this.typedArray(data.buffer);
}
// 创建Float32Array用于存储标准化后的数据
const float32 = new Float32Array(data.length);
// 将数据标准化到[-1, 1]范围
for (let i = 0; i < data.length; i++) {
// buffer 缓冲区的数据,需要是IEEE754 里32位的线性PCM,范围从-1到+1
// 所以对数据进行除法
// 除以对应的位数范围,得到-1到+1的数据
// float32[i] = data[i] / 0x8000;
float32[i] = data[i] / this.convertValue;
}
return float32;
}
/**
* 设置音量
* @param {number} volume - 音量值(0-1)
*/
volume(volume) {
if (this.gainNode) {
this.gainNode.gain.value = volume;
}
}
/**
* 销毁播放器资源
*/
destroy() {
// 清除定时器
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
// 清空样本数据
this.samples = null;
// 关闭音频上下文
if (this.audioCtx) {
this.audioCtx.close();
this.audioCtx = null;
}
// 清空引用
this.lastBufferSource = null;
}
/**
* 告知播放器,当前所有的音频数据已经输入完毕
*/
markBufferInputEnd() {
this.hasMoreBuffer = false;
}
/**
* 刷新并播放缓冲区中的音频数据
* @private
*/
flush() {
// 如果没有数据则返回
if (!this.samples?.length) return;
// 创建音频源节点
const bufferSource = this.audioCtx.createBufferSource();
this.lastBufferSource = bufferSource;
// 绑定播放结束事件
if (typeof this.option.onBufferAllPlayEnd === "function") {
bufferSource.onended = (event) => {
if (!this.hasMoreBuffer && this.lastBufferSource === bufferSource) {
this.option.onBufferAllPlayEnd();
console.log("pcmPlayer【所有音频流播放完毕】");
} else {
console.log(
"pcmPlayer【单个音频流播放完毕】,hasMoreBuffer",
this.hasMoreBuffer
);
}
};
}
// 计算音频缓冲区长度
const length = this.samples.length / this.option.channels;
// 创建音频缓冲区
const audioBuffer = this.audioCtx.createBuffer(
this.option.channels,
length,
this.option.sampleRate
);
// 填充各声道数据并应用淡入淡出效果
for (let channel = 0; channel < this.option.channels; channel++) {
const audioData = audioBuffer.getChannelData(channel);
let offset = channel;
let decrement = 50;
for (let i = 0; i < length; i++) {
audioData[i] = this.samples[offset];
// 前50个样本应用淡入效果
if (i < 50) {
audioData[i] = (audioData[i] * i) / 50;
}
// 后51个样本应用淡出效果
if (i >= length - 51) {
audioData[i] = (audioData[i] * decrement--) / 50;
}
offset += this.option.channels;
}
}
// 调整开始时间以避免延迟累积
if (this.startTime < this.audioCtx.currentTime) {
this.startTime = this.audioCtx.currentTime;
}
// 连接节点并开始播放
bufferSource.buffer = audioBuffer;
bufferSource.connect(this.gainNode);
bufferSource.connect(this.analyserNode);
bufferSource.start(this.startTime);
// 更新下次播放的开始时间
this.startTime += audioBuffer.duration;
// 清空已播放的样本数据
this.samples = new Float32Array();
}
/**
* 暂停播放
* @returns {Promise<void>}
*/
async pause() {
if (this.audioCtx) {
await this.audioCtx.suspend();
}
}
/**
* 继续播放
* @returns {Promise<void>}
*/
async continue() {
if (this.audioCtx) {
await this.audioCtx.resume();
}
}
/**
* 绑定音频上下文事件
* @private
*/
bindAudioContextEvent() {
if (typeof this.option.onstatechange === "function" && this.audioCtx) {
this.audioCtx.onstatechange = (event) => {
if (this.audioCtx) {
this.option.onstatechange(this, event, this.audioCtx.state);
}
};
}
}
}2.3 结合 ws 和播放器 api:player
import TextToAudioWS from "./textToAudioWs";
import { PCMPlayer } from "./pcm-player";
import { IGNORE_CLASSNAME } from "@/types/constant";
import { message } from "../message";
import { concatArrayBuffers } from "./buffer";
import { isTablet } from "../device/ua";
import { truncateText } from "../system/string";
let tempContainer: HTMLElement;
/**
* 克隆dom,并且剔除要忽略的元素,然后提取innerText
* - 可以忽略链接、图片等非文本元素内容
* @param element
* @returns
*/
export const getTextWithoutIgnoreElements = (element: HTMLElement): string => {
// 克隆元素以避免影响原始DOM
const clonedElement = element.cloneNode(true) as HTMLElement;
// 查找并移除所有带有ignore类名的元素
const ignoreElements = clonedElement.querySelectorAll("." + IGNORE_CLASSNAME);
ignoreElements.forEach((ignoreElement) => {
ignoreElement.remove();
});
// 创建临时容器
if (!tempContainer) {
tempContainer = document.createElement("div");
tempContainer.style.cssText = `
position: absolute;
left: -999999px;
top: -999999px;
opacity: 0;
`;
}
// 使用临时容器加载内容(不加载内容到body,获取innerText的时候会丢失换行和缩进格式)
tempContainer.appendChild(clonedElement);
document.body.appendChild(tempContainer);
let fullText = clonedElement.innerText;
// 奇葩iPad,自作主张在结尾加换行,需要去掉,否则会影响alreadySendText计算
if (isTablet && fullText.endsWith("\n")) {
fullText = fullText.slice(0, -1);
}
tempContainer.innerHTML = "";
// 返回处理后的文本内容
return fullText;
};
/**
* 获取dom内容,剔除不需要的元素
* @param domId
* @returns
*/
export const getDomContent = (domId: string) => {
const dom = document.querySelector<HTMLElement>(`#${domId}`)!;
return getTextWithoutIgnoreElements(dom);
};
type PlayStatus = "unPlay" | "playing" | "suspended" | "playEnd";
export class AudioPlayer {
public accessToken: string;
/**
* 静音开关
* - 也是是否实时转码的开关
*/
private isMuting: boolean = true;
/**
* 未转码的文本队列
*/
private textPool: string[] = [];
/**
*
* 服务端是否已经响应完所有用户的输入
* - 前提:用户触发finishedTask事件
*/
private isResponseUserEnd: boolean = false;
/**
* 已经加入转码队列的文本(有可能已转码)
*/
private alreadySendText: string = "";
/**
* 已转码的音频
*/
private alreadyReceivedAudioBuffer: ArrayBuffer[] = [];
/**
* 缓存任务音频 {"id": arrayBuffer[]}
* - 只有完全转换结束的音频会被缓存
*/
private taskBufferMap: { [taskId: string]: ArrayBuffer[] } = {};
/**
* 当前播放状态
*/
private _playStatus: PlayStatus = "unPlay";
/**
* 当前播放状态
*/
private get playStatus() {
return this._playStatus;
}
private set playStatus(status: PlayStatus) {
this._playStatus = status;
// 同步调用 onPlayStateChange 回调
if (this.onPlayStateChange) {
this.onPlayStateChange(this.taskId!, status);
}
}
/**
* 当前播放任务id
*/
taskId?: string = "initial_no_value";
/**
* websocket实例
*/
public wsInstance?: TextToAudioWS;
/**
* 等待dom读取完毕
*/
public waitReadDomReady: Promise<void> | null = null;
private pcmPlayer?: PCMPlayer;
onPlayEnd?: () => void;
onPlayStateChange?: (taskId: string, statue: PlayStatus) => void;
constructor(params: {
accessToken: string;
onPlayEnd?: () => void;
onPlayStateChange?: (taskId: string, statue: PlayStatus) => void;
}) {
this.accessToken = params.accessToken;
this.onPlayEnd = params.onPlayEnd;
this.onPlayStateChange = params.onPlayStateChange;
}
private _init() {
this._initPlayer();
this._initWebsocket();
}
/**
* 初始化pcmplayer;默认静音
*/
private _initPlayer(withPcmPlayer = true) {
this.isMuting = true;
this.playStatus = "unPlay";
if (!withPcmPlayer) {
console.log("AudioPlayer reset");
return;
}
console.log("AudioPlayer 【init】");
this.pcmPlayer = new PCMPlayer({
inputCodec: "Int16",
channels: 1,
sampleRate: 16000,
flushTime: 100,
onstatechange: (
_instance: any,
_event: any,
state: "closed" | "running" | "suspended"
) => {
console.log("state", state);
switch (state) {
case "running":
break;
case "closed":
case "suspended":
break;
}
},
onBufferAllPlayEnd: () => {
this.onLastBufferPlayEnd();
},
});
}
/**
* 初始化ws参数,默认进行websocket连接
* @param withWebsocketConnect 是否进行websocket连接 默认true。
* - 任务结果被缓存时传false,用于重置参数
*/
private _initWebsocket = (withWebsocketConnect = true) => {
this.isResponseUserEnd = false;
this.alreadySendText = "";
const preTaskPoolLength = this.textPool.length;
this.textPool = [];
this.alreadyReceivedAudioBuffer = [];
this.waitReadDomReady = null;
if (!withWebsocketConnect) {
console.log(`audio websocket reset,清空textPool: ${preTaskPoolLength}`);
return;
}
console.log(`audio websocket 【init】,清空textPool: ${preTaskPoolLength}`);
this.wsInstance = new TextToAudioWS({
accessToken: this.accessToken,
onUpdate: (audioBuffer) => {
this.pcmPlayer?.feed(audioBuffer);
this.alreadyReceivedAudioBuffer.push(audioBuffer);
},
onFinish: () => {
this.isResponseUserEnd = true;
// 缓存音频数据,并合并为一个流
this.taskBufferMap[this.taskId!] = [
concatArrayBuffers(this.alreadyReceivedAudioBuffer),
];
this.pcmPlayer?.markBufferInputEnd();
this.alreadyReceivedAudioBuffer = [];
console.log("文字转语音响应完毕,ws已断开,音频已合并缓存");
},
onFail: (error) => {
console.error("文字转语音响应报错,ws已断开", error);
message.error("文字转语音响应报错");
this.isResponseUserEnd = true;
},
});
this.wsInstance.connect();
};
/**
* 是否播放中
*/
get isPlaying() {
return this.playStatus === "playing";
}
/**
* 是否暂停中(注意,播放结束不属于暂停)
*/
get isSuspended() {
return this.playStatus === "suspended";
}
/**
* 清除其他播放任务,开启新播放任务(本身并不开启播放,要播放需要调用doPlay)
* - 初始化player和websocket
* - 如果任务已有缓存音频,则不建立websocket连接,直接播放缓存音频(由doPlay触发)。否则
* @param taskId
* @param afterToggle
*/
toggleTask(
taskId: string,
params: {
afterToggle?: () => void;
onPlayEnd?: () => void;
}
) {
console.log(`toggleTask ${this.taskId} -----> ${taskId}`);
this.cleanup();
// 执行上一个任务的onPlayEnd事件
this.onPlayEnd?.();
this.taskId = taskId;
this.onPlayEnd = params.onPlayEnd;
if (this.taskBufferMap[taskId]) {
console.log(`taskId ${taskId} 已有音频缓存,跳过初始化`);
this._initPlayer(false);
this._initWebsocket(false);
this.isResponseUserEnd = true;
params.afterToggle?.();
} else {
console.log(`taskId ${taskId} 初始化`);
this._init();
params.afterToggle?.();
}
return this.wsInstance?.connectPromise;
}
/**
* 切换至播放状态,并
* - 1. 播放队列中的文本(如果ws连接中),否则
* - 2. 播放已缓存的音频(如果播放完则从头开始播放)
*/
doPlay(taskId: string) {
if (this.wsInstance!.connectPromise) {
this.isMuting = false;
// 1. ws就绪后处理队列
this.wsInstance!.connectPromise?.then(() => {
this.afterWsReady();
});
} else {
this.isMuting = false;
// 2. ws任务已完成,ws已关闭,使用音频缓存
if (this.isSuspended || this.isPlaying) {
console.log("ws已销毁,继续播放");
this.pcmPlayer?.continue();
} else {
console.log("ws已销毁,从头播放");
// 从头播放
this._initPlayer();
// _initPlayer改了this.isMuting为true,要改回来
this.isMuting = false;
// 从头播放缓存音频
this.taskBufferMap[taskId].forEach((buffer) => {
this.pcmPlayer!.feed(buffer);
});
this.pcmPlayer?.markBufferInputEnd();
}
}
this.playStatus = "playing";
return this.wsInstance!.connectPromise;
}
/**
* 继续播放可能存在的旧音频数据、发送待转换的数据
* @returns
*/
afterWsReady = () => {
// 此函数是异步执行的,执行时有可能又被用户静音了
if (this.isMuting === true) {
return;
}
this.doContinue();
// 未转码文本队列有值,则一次性转码,并清空待转码队列
if (this.textPool.length > 0) {
console.log("一次性转码,合并文本数量:", this.textPool.length);
this._transformText(this.textPool.join(""));
this.textPool = [];
}
};
/**
* 停止播放
* - 将未播放的文本推流保存。是否有 ws 连接?有就停止转码。
*/
doMute() {
console.log("mute");
this.isMuting = true;
this.playStatus = "suspended";
return this.pcmPlayer?.pause();
}
/**
* 继续播放可能存在的旧音频数据
*/
private doContinue() {
if (this?.pcmPlayer?.audioCtx?.state === "suspended") {
this.pcmPlayer.continue();
}
}
/**
* 发送文本转码请求
* @param text
*/
private _transformText(text: string) {
this.wsInstance?.sendText(text);
}
/**
* 将输入文本加入转换队列
* - 如果是播放状态,直接转码、否则加入文本队列
*/
private _addText(text: string) {
// 播放开启
if (!this.isMuting) {
if (this.wsInstance?.isServerReady) {
this._transformText(text);
return;
}
}
this.textPool.push(text);
console.log("AudioPlayer未就绪,待播放区文本数", this.textPool.length);
}
/**
* 播放文本片段
* @param taskId 如果不是当前任务,不执行操作(异步情况会出现)
* @param textChunk
* @returns
*/
playAudioByTextChunk(taskId: string, textChunk: string) {
if (!textChunk || taskId !== this.taskId) {
return;
}
this._addText(textChunk);
this.alreadySendText += textChunk;
}
/**
* 播放dom内容(dom可以增量更新,已播放的内容不再播放)
* @param taskId 如果不是当前任务,不执行操作(异步情况会出现)
* @param domId
* @returns
*/
playAudioByDomContent(taskId: string, domId: string) {
if (!domId || taskId !== this.taskId) {
return;
}
this.waitReadDomReady = new Promise((resolve) => {
setTimeout(() => {
const dom = document.querySelector<HTMLElement>(`#${domId}`);
if (!dom) {
resolve();
console.error("playAudioByDomContent 失败,DOM未找到");
return;
}
const fullText: string = getTextWithoutIgnoreElements(dom);
if (!fullText) {
resolve();
return;
}
const newChunk = fullText.replace(this.alreadySendText, "");
if (fullText.indexOf(this.alreadySendText) === -1) {
console.error("playAudioByDomContent 错误,已播放文本未匹配到", {
ok: fullText.indexOf(this.alreadySendText),
fullText: truncateText(fullText),
newChunk,
alreadySendText: truncateText(this.alreadySendText),
});
return;
}
newChunk && this.playAudioByTextChunk(taskId, newChunk);
resolve();
});
});
}
/**
* 延迟触发结束事件,自动清理ws和播放器
* 播放完毕事件(防抖 2 秒):
* - 当前是否是已知音频流里的最后一段?
* - 是播放(非暂停)状态?
* - 文本流是否发送、响应完毕(messageEnd/task-finished)?
* 如果是,destroy ws 和播放器
*/
private onLastBufferPlayEnd = () => {
this.isMuting && console.error(`onLastBufferPlayEnd失败,被静音了`);
if (!this.isMuting && this.isResponseUserEnd) {
this.cleanup();
this.onPlayEnd?.();
this.playStatus = "playEnd";
}
};
/**
* 告知服务端,待转换的文本传输完毕
*/
async sendFinishTask() {
await this.waitReadDomReady;
this.wsInstance?.connectPromise?.then(() => {
// 重置alreadySendText等参数
this._initWebsocket(false);
this.wsInstance?.sendFinishTask();
console.log(
"sendFinishTask,文字上传完毕,清空alreadySendText。等待服务端响应结束"
);
});
}
/**
* 停止播放、close websocket连接
*/
cleanup() {
console.log("AudioPlayer cleanup");
this.taskId = "initial_no_value";
this._initPlayer(false);
this._initWebsocket(false);
this.pcmPlayer?.destroy();
this.wsInstance?.cleanup();
}
}2.4 使用 player 进行流式播报
关键调用代码:
- 流式播放
// 获取全局唯一的播放实例
const { audioPlayer } = useAudioPlayer();
// 任务唯一id,多次播放会缓存(只请求一次ws)
const taskId = "uuid";
// 如果是非静音状态,直接开始播放
isAllowAudioPlay &&
audioPlayer.toggleTask(taskId, {
afterToggle() {
audioPlayer.doPlay(taskId);
},
});
// 播放一个DOM元素的innerText(DOM在变化时多次调用,即可流式播报)
audioPlayer?.playAudioByDomContent(taskId, domId);
// 播放一段文本
audioPlayer?.playAudioByTextChunk(taskId, text);
// 告知ws,文本已上传完毕
audioPlayer?.sendFinishTask();单条消息、按钮播放
import style from "./index.module.less";
import { useEffect, useRef, useState, type FC } from "react";
import type { CustomContentObjType, NormalContentObjType } from "@/types/ai";
import audioPlay from "@/assets/icons/audio_play.svg?react";
import audio_play2 from "@/assets/icons/audio_play2.svg?react";
import Icon from "@ant-design/icons";
import { useAudioPlayer } from "@/store/useAudioPlayer";
import { eventBus } from "@/utils/mitt";
import { isTablet } from "@/utils/device/ua";
export const AiMsgFooter: FC<NormalContentObjType & CustomContentObjType> = (
props
) => {
const { uuid, isFinish, _origin } = props;
const { audioPlayer, currentTaskId, isPlaying } = useAudioPlayer();
// 当前消息是否播放中、UI状态
const [audioPlaying, setAudioPlaying] = useState(false);
const [taskId, setTaskId] = useState(uuid);
const iconRef = useRef<HTMLDivElement>(null);
useEffect(() => {
// 可能有分页播放的子任务,子任务格式为`${uuid}_${page}`,子任务播放时UI状态也是播放中
if (currentTaskId.startsWith(uuid)) {
setAudioPlaying(isPlaying);
} else {
setAudioPlaying(false);
}
}, [currentTaskId, isPlaying]);
useEffect(() => {
const page_change_handler = ({ card_uuid, page }: any) => {
// 监听回答UI分页事件:
// 如果当前回答UI分页正在播放,且用户切换页数,则重新开启播放新页数据,任务id为`${card_uuid}_${page}`
if (uuid === card_uuid) {
const subTaskId =
page === 1 ? card_uuid : `${card_uuid}_${page}`;
setTaskId(subTaskId)
if (audioPlaying) {
audioPlayer.doMute()?.then(() => {
setTimeout(
() => {
iconRef.current?.click();
},
isTablet ? 60 : 0
);
});
}
}
};
eventBus.on("page_change", page_change_handler);
return () => {
eventBus.off("page_change", page_change_handler);
};
}, [audioPlaying, taskId]);
return isFinish && _origin ? (
<div className={style.actionWrapper}>
<Icon
component={audioPlaying ? audioPlay : audio_play2}
ref={iconRef}
onClick={() => {
// 1. 在播放中
if (audioPlaying) {
console.log("暂停");
// 暂停
audioPlayer.doMute()?.then(() => {
setAudioPlaying(false);
});
} else {
if (audioPlayer.taskId !== taskId) {
// 中断其他播放任务、创建新播放任务
audioPlayer
.toggleTask(taskId, {
afterToggle: () => {
audioPlayer.playAudioByDomContent(taskId, uuid);
audioPlayer.doPlay(taskId);
audioPlayer.sendFinishTask();
},
onPlayEnd: () => {
setAudioPlaying(false);
},
})
?.catch((e) => {
setAudioPlaying(false);
});
} else {
// 继续播放
audioPlayer.doPlay(taskId)?.catch((e) => {
setAudioPlaying(false);
});
}
setAudioPlaying(true);
}
}}
/>
</div>
) : null;
};- 一次性播放
const { audioPlayer } = useAudioPlayer();
const [audioPlaying, setAudioPlaying] = useState(false);
// click播放事件
// 1. 在播放中
if (audioPlaying) {
console.log("暂停");
// 暂停
audioPlayer.doMute()?.then(() => {
setAudioPlaying(false);
});
} else {
const taskId = uuid;
if (audioPlayer.taskId !== taskId) {
// 中断其他播放任务、创建新播放任务
audioPlayer.toggleTask(taskId, {
afterToggle: () => {
audioPlayer.playAudioByDomContent(taskId, uuid);
audioPlayer.doPlay(taskId);
audioPlayer.sendFinishTask();
},
onPlayEnd: () => {
setAudioPlaying(false);
},
});
} else {
// 继续播放
audioPlayer.doPlay(taskId);
}
setAudioPlaying(true);
}