
在 IM(即时通讯)场景中,语音消息是核心功能之一,而实现高质量、跨场景兼容的语音录制与播放,需要解决 PCM 数据编码、采样率适配、硬件兼容性、播放模式等一系列问题。本文基于腾讯云 TUIChat 组件的实际开发场景,拆解从遥控器 PCM 音频采集、AAC 编码为 M4A 格式,到音频播放时扬声器强制外放的全流程实现,重点分析 PCM 预处理、重采样、编码器排空等关键技术点,以及 TV / 盒子等特殊设备的适配方案,为移动端语音开发提供可落地的实践参考。
在 TUIChat 的语音开发中,面临以下核心挑战:
针对以上问题,实现了一套包含「AudioRecorder(录制)+ AacPcmEncoder(PCM 编码)+ AudioPlayer(播放)」的完整方案。
AudioRecorder 是录音功能的入口,核心分为「系统 MediaRecorder 录音」「Callkit 录音」「遥控器 PCM 录音」三种模式,本文重点拆解最复杂的 PCM 录音模式。
public void startPcmRecord(Callback callback) {
synchronized (mPcmLock) {
if (mIsPcmMode) {
TUIChatLog.w(TAG, "startPcmRecord: already in pcm mode, stopping first");
stopPcmRecordInternal();
} mRecordCallback = callback;
mAudioRecordPath = CURRENT_RECORD_FILE + System.currentTimeMillis() + ".m4a";
mIsPcmMode = true;
mIsStopping = false;
mIsCallkitRecorder = false;
mRealDuration = 0;
mPcmStartTime = SystemClock.elapsedRealtime(); try {
mPcmEncoder = new AacPcmEncoder();
// 适配不同遥控器的采样率,默认16kHz,可动态设置
// mPcmEncoder.setSrcSampleRate(44100);
mPcmEncoder.start(mAudioRecordPath); // 录音时长限制:60秒提前200ms停止,避免超时
mHandler.postDelayed(() -> {
if (!mIsPcmMode || mIsStopping) return;
stopPcmRecord();
ToastUtil.toastShortMessageCenter(
TUIChatService.getAppContext().getString(R.string.record_limit_tips));
}, (60 * 1000) - 200); TUIChatLog.i(TAG, "startPcmRecord: " + mAudioRecordPath);
} catch (IOException e) {
TUIChatLog.e(TAG, "startPcmRecord failed: " + e.getMessage());
mIsPcmMode = false;
onRecordCompleted(false);
}
}
}
关键逻辑解析:
public void writePcmData(byte[] pcmData) {
synchronized (mPcmLock) {
if (!mIsPcmMode || mIsStopping || mPcmEncoder == null) return;
try {
mPcmEncoder.writePcm(pcmData);
} catch (Exception e) {
TUIChatLog.e(TAG, "writePcmData failed");
} // 实时计算分贝,用于UI展示音量波动
if (mRecordCallback != null) {
double db = calculateDbFromPcm(pcmData);
mRecordCallback.onVoiceDb(db);
}
}
}// 简化版PCM分贝计算:基于均方根(RMS)
private double calculateDbFromPcm(byte[] pcm) {
long sum = 0;
for (int i = 0; i < pcm.length; i += 2) {
short sample = (short) ((pcm[i + 1] << 8) | (pcm[i] & 0xFF));
sum += sample * sample;
}
double rms = Math.sqrt(sum / (pcm.length / 2.0));
return 20 * Math.log10(rms / 32768.0 + 1e-10);
}
关键逻辑解析:
private void stopPcmRecordInternal() {
if (!mIsPcmMode || mIsStopping) {
TUIChatLog.w(TAG, "stopPcmRecordInternal: skip, mode=" + mIsPcmMode + ", stopping=" + mIsStopping);
return;
} mIsStopping = true;
mHandler.removeCallbacksAndMessages(null);
mIsPcmMode = false; // 计算真实录音时长,硬上限60秒
mRealDuration = (int) (SystemClock.elapsedRealtime() - mPcmStartTime);
if (mRealDuration > 60000) {
mRealDuration = 60000;
}
if (mRealDuration < 0) {
mRealDuration = 0;
}
TUIChatLog.i(TAG, "PCM real duration: " + mRealDuration + "ms"); // 停止编码器并校验文件有效性
boolean encoderStoppedCleanly = false;
if (mPcmEncoder != null) {
try {
mPcmEncoder.stop();
encoderStoppedCleanly = true;
} catch (Exception e) {
TUIChatLog.e(TAG, "stopPcmRecord: encoder stop failed");
} finally {
mPcmEncoder = null;
}
} // 校验文件:存在且大小大于1KB才视为有效
boolean fileValid = false;
if (!TextUtils.isEmpty(mAudioRecordPath)) {
java.io.File f = new java.io.File(mAudioRecordPath);
fileValid = f.exists() && f.length() > 1024;
if (!fileValid) {
TUIChatLog.e(TAG, "record file invalid or too small: " + mAudioRecordPath);
}
} mIsStopping = false;
onRecordCompleted(encoderStoppedCleanly && fileValid);
}
关键逻辑解析:
AacPcmEncoder 是核心工具类,负责将原始 PCM 数据编码为 M4A 格式(AAC 编码),解决了 PCM 预处理、重采样、编码器排空等关键问题。
private byte[] preprocessPcm(byte[] pcm) {
if (pcm == null || pcm.length < 2) return pcm;
short[] samples = bytesToShorts(pcm); // 1. 去直流偏移:消除电流底噪/嗡嗡声
long sum = 0;
for (short s : samples) sum += s;
short dcOffset = (short) (sum / samples.length);
if (dcOffset != 0) {
for (int i = 0; i < samples.length; i++) {
samples[i] -= dcOffset;
}
} // 2. 轻微衰减 + 硬限幅:防止削顶失真
final short LIMIT = 30000; // 留出头room,避免达到Short.MAX_VALUE
for (int i = 0; i < samples.length; i++) {
int val = (int) (samples[i] * 0.90f); // 衰减10%,降低硬件过载底噪
if (val > LIMIT) val = LIMIT;
else if (val < -LIMIT) val = -LIMIT;
samples[i] = (short) val;
}
return shortsToBytes(samples);
}
关键逻辑解析:
private byte[] resamplePcm(byte[] pcm, int srcRate, int dstRate) {
if (srcRate == dstRate || pcm == null || pcm.length < 2) return pcm;
short[] src = bytesToShorts(pcm);
double ratio = (double) srcRate / dstRate;
int dstLen = (int) (src.length / ratio);
if (dstLen <= 0) return pcm;
short[] dst = new short[dstLen]; for (int i = 0; i < dstLen; i++) {
double srcIdx = i * ratio;
int idx = (int) srcIdx;
double frac = srcIdx - idx;
if (idx + 1 < src.length) {
dst[i] = (short) (src[idx] * (1.0 - frac) + src[idx + 1] * frac);
} else {
dst[i] = src[idx];
}
}
return shortsToBytes(dst);
}
关键逻辑解析:
public void writePcm(byte[] pcmData) {
synchronized (mLock) {
if (!mIsRecording || !mIsEncoderStarted || mEncoder == null || mMediaMuxer == null) {
return;
} try {
// 预处理+重采样
byte[] processed = preprocessPcm(pcmData);
byte[] finalPcm = resamplePcm(processed, mSrcSampleRate, SAMPLE_RATE); // 分块写入:避免单次数据包过大导致编码器阻塞
int offset = 0;
final int CHUNK_BYTES = 2048; // 1024 samples * 2 bytes
while (offset < finalPcm.length) {
int len = Math.min(CHUNK_BYTES, finalPcm.length - offset);
int inputBufferIndex = mEncoder.dequeueInputBuffer(1000);
if (inputBufferIndex >= 0) {
ByteBuffer inputBuffer = mEncoder.getInputBuffer(inputBufferIndex);
if (inputBuffer != null) {
inputBuffer.clear();
inputBuffer.put(finalPcm, offset, len);
// 基于累计采样数计算PTS,确保时间戳单调递增
long pts = mTotalSamples * 1000000L / SAMPLE_RATE;
mTotalSamples += len / 2L;
mEncoder.queueInputBuffer(inputBufferIndex, 0, len, pts, 0);
}
}
drainEncoder(1000); // 写入后立即排空编码器输出
offset += len;
}
} catch (Exception e) {
Log.e(TAG, "writePcm failed", e);
}
}
}// 排空编码器输出,写入MediaMuxer
private void drainEncoder(long timeoutUs) {
MediaCodec.BufferInfo bufferInfo = new MediaCodec.BufferInfo();
while (true) {
int outputBufferIndex = mEncoder.dequeueOutputBuffer(bufferInfo, timeoutUs);
if (outputBufferIndex == MediaCodec.INFO_TRY_AGAIN_LATER) {
break;
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// 编码器格式变化时,初始化MediaMuxer轨道
if (mAudioTrackIndex == -1 && mMediaMuxer != null) {
MediaFormat format = mEncoder.getOutputFormat();
mAudioTrackIndex = mMediaMuxer.addTrack(format);
mMediaMuxer.start();
mIsMuxerStarted = true;
}
} else if (outputBufferIndex >= 0) {
ByteBuffer outputBuffer = mEncoder.getOutputBuffer(outputBufferIndex);
if (outputBuffer != null && mAudioTrackIndex != -1 && mIsMuxerStarted) {
if ((bufferInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) == 0) {
if (bufferInfo.presentationTimeUs < 0) {
Log.w(TAG, "skip negative pts: " + bufferInfo.presentationTimeUs);
} else {
mMediaMuxer.writeSampleData(mAudioTrackIndex, outputBuffer, bufferInfo);
}
}
}
mEncoder.releaseOutputBuffer(outputBufferIndex, false);
if ((bufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
break;
}
}
}
}
关键逻辑解析:
private void stopInternal() {
if (!mIsRecording && !mIsEncoderStarted) {
return;
}
mIsRecording = false; if (mIsEncoderStarted && mEncoder != null) {
try {
// 发送EOS标记,通知编码器结束
int inputBufferIndex = mEncoder.dequeueInputBuffer(1000);
if (inputBufferIndex >= 0) {
ByteBuffer inputBuffer = mEncoder.getInputBuffer(inputBufferIndex);
if (inputBuffer != null) {
inputBuffer.clear();
long eosPts = mTotalSamples * 1000000L / SAMPLE_RATE;
mEncoder.queueInputBuffer(inputBufferIndex, 0, 0, eosPts, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
}
}
} catch (Exception e) {
Log.e(TAG, "queue EOS failed", e);
} // 多次重试排空,确保尾部数据写入
MediaCodec.BufferInfo bufferInfo = new MediaCodec.BufferInfo();
int tryAgainCount = 0;
while (true) {
int outputBufferIndex = mEncoder.dequeueOutputBuffer(bufferInfo, 50000);
if (outputBufferIndex == MediaCodec.INFO_TRY_AGAIN_LATER) {
tryAgainCount++;
if (tryAgainCount >= 5) {
Log.w(TAG, "drain timeout after 5 retries, force stop");
break;
}
continue;
}
// 处理输出数据(逻辑同drainEncoder)
// ... 省略重复逻辑 ...
if ((bufferInfo.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
Log.i(TAG, "EOS fully drained");
break;
}
} // 释放编码器资源
try { mEncoder.stop(); } catch (Exception e) { Log.e(TAG, "encoder stop failed", e); }
try { mEncoder.release(); } catch (Exception e) { Log.e(TAG, "encoder release failed", e); }
mEncoder = null;
mIsEncoderStarted = false;
} // 释放MediaMuxer资源
if (mMediaMuxer != null) {
try { if (mIsMuxerStarted) { mMediaMuxer.stop(); } } catch (Exception e) { Log.e(TAG, "muxer stop failed", e); }
try { mMediaMuxer.release(); } catch (Exception e) { Log.e(TAG, "muxer release failed", e); }
mMediaMuxer = null;
mIsMuxerStarted = false;
}
}
关键逻辑解析:
AudioPlayer 专注于解决播放兼容性问题,尤其是 TV / 盒子设备的扬声器适配。
public void startPlay(String filePath, Callback callback) {
mAudioRecordPath = filePath;
mPlayCallback = callback; // TV/盒子强制扬声器模式,避免听筒播放
setSpeakerModeForce(); try {
mPlayer = new MediaPlayer();
mPlayer.setAudioStreamType(AudioManager.STREAM_MUSIC);
mPlayer.setDataSource(filePath);
mPlayer.setOnCompletionListener(mp -> {
stopInternalPlay();
onPlayCompleted(true);
});
mPlayer.prepare();
mPlayer.start();
} catch (Exception e) {
TUIChatLog.w(TAG, "startPlay failed", e);
ToastUtil.toastLongMessage(TUIChatService.getAppContext().getString(R.string.play_error_tip));
stopInternalPlay();
onPlayCompleted(false);
}
}// 强制扬声器模式,不受系统配置影响
private void setSpeakerModeForce() {
AudioManager audioManager = (AudioManager) TUIChatService.getAppContext().getSystemService(Context.AUDIO_SERVICE);
if (audioManager != null) {
audioManager.setMode(AudioManager.MODE_NORMAL);
audioManager.setSpeakerphoneOn(true);
}
}
关键逻辑解析:
public void stopPlay() {
stopInternalPlay();
onPlayCompleted(false);
mPlayCallback = null;
}private void stopInternalPlay() {
if (mPlayer == null) {
return;
}
mPlayer.release();
mPlayer = null;
}private void onPlayCompleted(boolean success) {
if (mPlayCallback != null) {
mPlayCallback.onCompletion(success);
}
resetSpeakerMode(); // 播放完成后恢复音频模式
mPlayer = null;
}
关键逻辑解析:
本文围绕 IM 场景下的语音录制与播放,拆解了从 PCM 原始数据编码到音频播放的全流程实现,重点解决了多设备兼容、音频质量优化、边界异常处理等核心问题。这套方案已落地于腾讯云 TUIChat 组件,能够稳定支撑手机、TV、盒子等多终端的语音消息功能。