音視頻的格式是一個(gè)有歧義的說(shuō)法。我們熟知的諸如Flv、Mp4、Mov啥的都是包裝格式,可以理解為一種容器,就像一個(gè)盒子。里面放到是經(jīng)過(guò)編碼的音視頻數(shù)據(jù),而這些音視頻數(shù)據(jù)都有自己的編碼格式,如AAC、H264、H265等等。 今天要展示的是從直播流中獲取到的音頻編碼數(shù)據(jù)進(jìn)行解碼并使用H5的音頻API進(jìn)行播放的過(guò)程。
這些格式分別是
1. speex
2. aac
3. mp3
這些格式都有開源的解碼庫(kù),不過(guò)都是c庫(kù),在H5中需要通過(guò)emscripten編譯成js執(zhí)行。
引入頭文件
#ifdef USE_SPEEX#include <speex/speex.h>#endif#ifdef USE_AAC#include "aacDecoder/include/neaacdec.h"// #include "libfdk-aac/libAACdec/include/aacdecoder_lib.h"#endif#ifdef USE_MP3#include "libmad/mad.h"//#include "libid3tag/tag.h"#endif
定義變量
int bufferLength;int bufferFilled;u8 *outputBuffer;#ifdef USE_AAC faacDecHandle faacHandle;#endif#ifdef USE_SPEEX i16 *audioOutput; void *speexState; SpeexBits speexBits;#endif#ifdef USE_MP3 MP3Decoder mp3Decoder;#endif
bufferLength 用于指定緩沖區(qū)的長(zhǎng)度,bufferFilled用于指示緩沖中沒有使用的數(shù)據(jù),outputBuffer用來(lái)存放解碼后的數(shù)據(jù)。 MP3Decoder是自己寫的一個(gè)類,需要定義這幾個(gè)成員
mad_stream inputStream;mad_frame frame;mad_synth synth;
初始化
outputBuffer = (u8 *)malloc(bufferLength);#ifdef USE_SPEEX audioOutput = (i16 *)malloc(640); auto mode = speex_lib_get_mode(SPEEX_MODEID_WB); speexState = speex_decoder_init(mode); speex_bits_init(&speexBits);#endif#ifdef USE_AAC faacHandle = faacDecOpen();#endif
mp3的初始化
mad_stream_init(&inputStream);mad_frame_init(&frame);mad_synth_init(&synth);
解碼
input對(duì)象中包含了經(jīng)過(guò)協(xié)議拆包后的原始音頻數(shù)據(jù)(RTMP協(xié)議或Flv格式中的格式)緩沖大小雖然是自己定義,但必須遵循下面的規(guī)則
aac:1024的倍數(shù)(AAC一幀的播放時(shí)間是= 1024 * 1000/44100 = 22.32ms)
speex:320的倍數(shù)(320 * 1000/16000 = 20ms)
MP3:576的倍數(shù)(雙聲道1152 * 1000 /44100 = 26.122ms)
根據(jù)這些數(shù)據(jù)可以估算緩沖大小引起的音頻的延時(shí),然后需要和視頻的延遲進(jìn)行同步。
#ifdef USE_SPEEX if (input.length() <= 11) { memset(output, 0, 640); } else { speex_bits_read_from(&speexBits, (const char *)input, 52); speex_decode_int(speexState, &speexBits, audioOutput); memcpy(output, audioOutput, 640); } return 640;#endif#ifdef USE_AAC //0 = AAC sequence header ,1 = AAC raw if (input.readB<1, u8>()) { faacDecFrameInfo frame_info; auto pcm_data = faacDecDecode(faacHandle, &frame_info, (unsigned char *)input.point(), input.length()); if (frame_info.error > 0) { emscripten_log(1, "!!%sn", NeAACDecGetErrorMessage(frame_info.error)); } else { int samplesBytes = frame_info.samples << 1; memcpy(output, pcm_data, samplesBytes); return samplesBytes; } } else { unsigned long samplerate; unsigned char channels; auto config = faacDecGetCurrentConfiguration(faacHandle); config->defObjectType = LTP; faacDecSetConfiguration(faacHandle,config); faacDecInit2(faacHandle, (unsigned char *)input.point(), 4, &samplerate, &channels); emscripten_log(0, "aac samplerate:%d channels:%d", samplerate, channels); }#endif
mp3 比較復(fù)雜,這里不貼代碼了,主要是mad庫(kù)不能直接調(diào)用其提供的API,直播流中的MP3數(shù)據(jù)和mp3文件的格式有所不同導(dǎo)致。如果本文火的話,我就詳細(xì)說(shuō)明。
C++音視頻開發(fā)學(xué)習(xí)資料:點(diǎn)擊領(lǐng)取→音視頻開發(fā)(資料文檔+視頻教程+面試題)(FFmpeg+WebRTC+RTMP+RTSP+HLS+RTP)
釋放資源
#ifdef USE_AAC faacDecClose(faacHandle);#endif#ifdef USE_SPEEX speex_decoder_destroy(speexState); speex_bits_destroy(&speexBits); free(audioOutput);#endif free(outputBuffer);
mp3
mad_synth_finish(&synth);mad_frame_finish(&frame);
播放
創(chuàng)建AudioContext對(duì)象
window.AudioContext = window.AudioContext || window.webkitAudioContext;var context = new window.AudioContext();
創(chuàng)建audioBuffer
var audioBuffers = []var audioBuffer = context.createBuffer(channels, frameCount, samplerate);
播放音頻(帶緩沖)
var playNextBuffer = function() { isPlaying = false; if (audioBuffers.length) { playAudio(audioBuffers.shift()); } if (audioBuffers.length > 1) audioBuffers.shift(); //console.log(audioBuffers.length) }; var copyAudioOutputArray = resampled ? function(target) { for (var i = 0; i < allFrameCount; i++) { var j = i << 1; target[j] = target[j + 1] = audioOutputArray[i] / 32768; } } : function(target) { for (var i = 0; i < allFrameCount; i++) { target[i] = audioOutputArray[i] / 32768; } }; var copyToCtxBuffer = channels > 1 ? function(fromBuffer) { for (var channel = 0; channel < channels; channel++) { var nowBuffering = audioBuffer.getChannelData(channel); if (fromBuffer) { for (var i = 0; i < frameCount; i++) { nowBuffering[i] = fromBuffer[i * (channel + 1)]; } } else { for (var i = 0; i < frameCount; i++) { nowBuffering[i] = audioOutputArray[i * (channel + 1)] / 32768; } } } } : function(fromBuffer) { var nowBuffering = audioBuffer.getChannelData(0); if (fromBuffer) nowBuffering.set(fromBuffer); else copyAudioOutputArray(nowBuffering); }; var playAudio = function(fromBuffer) { if (isPlaying) { var buffer = new Float32Array(resampled ? allFrameCount * 2 : allFrameCount); copyAudioOutputArray(buffer); audioBuffers.push(buffer); return; } isPlaying = true; copyToCtxBuffer(fromBuffer); var source = context.createBufferSource(); source.buffer = audioBuffer; source.connect(context.destination); source.onended = playNextBuffer; //setTimeout(playNextBuffer, audioBufferTime-audioBuffers.length*200); source.start(); };
其中playNextBuffer 函數(shù)用于從緩沖中取出數(shù)據(jù) copyAudioOutputArray 函數(shù)用于將音頻數(shù)據(jù)轉(zhuǎn)化成浮點(diǎn)數(shù)。 copyToCtxBuffer 函數(shù)用于將音頻數(shù)據(jù)拷貝進(jìn)可以播放的緩沖數(shù)組中。 這些函數(shù)對(duì)單聲道和雙聲道進(jìn)行了處理
var resampled = samplerate < 22050;
對(duì)于頻率小于22khz的數(shù)據(jù),我們需要復(fù)制一份,模擬成22khz,因?yàn)镠5只支持大于22khz的數(shù)據(jù)。