| /* |
| * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "audio_processing_impl.h" |
| |
| #include <assert.h> |
| |
| #include "audio_buffer.h" |
| #include "critical_section_wrapper.h" |
| #include "echo_cancellation_impl.h" |
| #include "echo_control_mobile_impl.h" |
| #include "file_wrapper.h" |
| #include "high_pass_filter_impl.h" |
| #include "gain_control_impl.h" |
| #include "level_estimator_impl.h" |
| #include "module_common_types.h" |
| #include "noise_suppression_impl.h" |
| #include "processing_component.h" |
| #include "splitting_filter.h" |
| #include "voice_detection_impl.h" |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| // Files generated at build-time by the protobuf compiler. |
| #ifdef WEBRTC_ANDROID |
| #include "external/webrtc/src/modules/audio_processing/debug.pb.h" |
| #else |
| #include "webrtc/audio_processing/debug.pb.h" |
| #endif |
| #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP |
| |
| namespace webrtc { |
| AudioProcessing* AudioProcessing::Create(int id) { |
| /*WEBRTC_TRACE(webrtc::kTraceModuleCall, |
| webrtc::kTraceAudioProcessing, |
| id, |
| "AudioProcessing::Create()");*/ |
| |
| AudioProcessingImpl* apm = new AudioProcessingImpl(id); |
| if (apm->Initialize() != kNoError) { |
| delete apm; |
| apm = NULL; |
| } |
| |
| return apm; |
| } |
| |
| void AudioProcessing::Destroy(AudioProcessing* apm) { |
| delete static_cast<AudioProcessingImpl*>(apm); |
| } |
| |
| AudioProcessingImpl::AudioProcessingImpl(int id) |
| : id_(id), |
| echo_cancellation_(NULL), |
| echo_control_mobile_(NULL), |
| gain_control_(NULL), |
| high_pass_filter_(NULL), |
| level_estimator_(NULL), |
| noise_suppression_(NULL), |
| voice_detection_(NULL), |
| crit_(CriticalSectionWrapper::CreateCriticalSection()), |
| render_audio_(NULL), |
| capture_audio_(NULL), |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| debug_file_(FileWrapper::Create()), |
| event_msg_(new audioproc::Event()), |
| #endif |
| sample_rate_hz_(kSampleRate16kHz), |
| split_sample_rate_hz_(kSampleRate16kHz), |
| samples_per_channel_(sample_rate_hz_ / 100), |
| stream_delay_ms_(0), |
| was_stream_delay_set_(false), |
| num_reverse_channels_(1), |
| num_input_channels_(1), |
| num_output_channels_(1) { |
| |
| echo_cancellation_ = new EchoCancellationImpl(this); |
| component_list_.push_back(echo_cancellation_); |
| |
| echo_control_mobile_ = new EchoControlMobileImpl(this); |
| component_list_.push_back(echo_control_mobile_); |
| |
| gain_control_ = new GainControlImpl(this); |
| component_list_.push_back(gain_control_); |
| |
| high_pass_filter_ = new HighPassFilterImpl(this); |
| component_list_.push_back(high_pass_filter_); |
| |
| level_estimator_ = new LevelEstimatorImpl(this); |
| component_list_.push_back(level_estimator_); |
| |
| noise_suppression_ = new NoiseSuppressionImpl(this); |
| component_list_.push_back(noise_suppression_); |
| |
| voice_detection_ = new VoiceDetectionImpl(this); |
| component_list_.push_back(voice_detection_); |
| } |
| |
| AudioProcessingImpl::~AudioProcessingImpl() { |
| while (!component_list_.empty()) { |
| ProcessingComponent* component = component_list_.front(); |
| component->Destroy(); |
| delete component; |
| component_list_.pop_front(); |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| if (debug_file_->Open()) { |
| debug_file_->CloseFile(); |
| } |
| #endif |
| |
| delete crit_; |
| crit_ = NULL; |
| |
| if (render_audio_) { |
| delete render_audio_; |
| render_audio_ = NULL; |
| } |
| |
| if (capture_audio_) { |
| delete capture_audio_; |
| capture_audio_ = NULL; |
| } |
| } |
| |
| CriticalSectionWrapper* AudioProcessingImpl::crit() const { |
| return crit_; |
| } |
| |
| int AudioProcessingImpl::split_sample_rate_hz() const { |
| return split_sample_rate_hz_; |
| } |
| |
| int AudioProcessingImpl::Initialize() { |
| CriticalSectionScoped crit_scoped(*crit_); |
| return InitializeLocked(); |
| } |
| |
| int AudioProcessingImpl::InitializeLocked() { |
| if (render_audio_ != NULL) { |
| delete render_audio_; |
| render_audio_ = NULL; |
| } |
| |
| if (capture_audio_ != NULL) { |
| delete capture_audio_; |
| capture_audio_ = NULL; |
| } |
| |
| render_audio_ = new AudioBuffer(num_reverse_channels_, |
| samples_per_channel_); |
| capture_audio_ = new AudioBuffer(num_input_channels_, |
| samples_per_channel_); |
| |
| was_stream_delay_set_ = false; |
| |
| // Initialize all components. |
| std::list<ProcessingComponent*>::iterator it; |
| for (it = component_list_.begin(); it != component_list_.end(); it++) { |
| int err = (*it)->Initialize(); |
| if (err != kNoError) { |
| return err; |
| } |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| if (debug_file_->Open()) { |
| int err = WriteInitMessage(); |
| if (err != kNoError) { |
| return err; |
| } |
| } |
| #endif |
| |
| return kNoError; |
| } |
| |
| int AudioProcessingImpl::set_sample_rate_hz(int rate) { |
| CriticalSectionScoped crit_scoped(*crit_); |
| if (rate != kSampleRate8kHz && |
| rate != kSampleRate16kHz && |
| rate != kSampleRate32kHz) { |
| return kBadParameterError; |
| } |
| |
| sample_rate_hz_ = rate; |
| samples_per_channel_ = rate / 100; |
| |
| if (sample_rate_hz_ == kSampleRate32kHz) { |
| split_sample_rate_hz_ = kSampleRate16kHz; |
| } else { |
| split_sample_rate_hz_ = sample_rate_hz_; |
| } |
| |
| return InitializeLocked(); |
| } |
| |
| int AudioProcessingImpl::sample_rate_hz() const { |
| return sample_rate_hz_; |
| } |
| |
| int AudioProcessingImpl::set_num_reverse_channels(int channels) { |
| CriticalSectionScoped crit_scoped(*crit_); |
| // Only stereo supported currently. |
| if (channels > 2 || channels < 1) { |
| return kBadParameterError; |
| } |
| |
| num_reverse_channels_ = channels; |
| |
| return InitializeLocked(); |
| } |
| |
| int AudioProcessingImpl::num_reverse_channels() const { |
| return num_reverse_channels_; |
| } |
| |
| int AudioProcessingImpl::set_num_channels( |
| int input_channels, |
| int output_channels) { |
| CriticalSectionScoped crit_scoped(*crit_); |
| if (output_channels > input_channels) { |
| return kBadParameterError; |
| } |
| |
| // Only stereo supported currently. |
| if (input_channels > 2 || input_channels < 1) { |
| return kBadParameterError; |
| } |
| |
| if (output_channels > 2 || output_channels < 1) { |
| return kBadParameterError; |
| } |
| |
| num_input_channels_ = input_channels; |
| num_output_channels_ = output_channels; |
| |
| return InitializeLocked(); |
| } |
| |
| int AudioProcessingImpl::num_input_channels() const { |
| return num_input_channels_; |
| } |
| |
| int AudioProcessingImpl::num_output_channels() const { |
| return num_output_channels_; |
| } |
| |
| int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { |
| CriticalSectionScoped crit_scoped(*crit_); |
| int err = kNoError; |
| |
| if (frame == NULL) { |
| return kNullPointerError; |
| } |
| |
| if (frame->_frequencyInHz != sample_rate_hz_) { |
| return kBadSampleRateError; |
| } |
| |
| if (frame->_audioChannel != num_input_channels_) { |
| return kBadNumberChannelsError; |
| } |
| |
| if (frame->_payloadDataLengthInSamples != samples_per_channel_) { |
| return kBadDataLengthError; |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| if (debug_file_->Open()) { |
| event_msg_->set_type(audioproc::Event::STREAM); |
| audioproc::Stream* msg = event_msg_->mutable_stream(); |
| const size_t data_size = sizeof(int16_t) * |
| frame->_payloadDataLengthInSamples * |
| frame->_audioChannel; |
| msg->set_input_data(frame->_payloadData, data_size); |
| msg->set_delay(stream_delay_ms_); |
| msg->set_drift(echo_cancellation_->stream_drift_samples()); |
| msg->set_level(gain_control_->stream_analog_level()); |
| } |
| #endif |
| |
| capture_audio_->DeinterleaveFrom(frame); |
| |
| // TODO(ajm): experiment with mixing and AEC placement. |
| if (num_output_channels_ < num_input_channels_) { |
| capture_audio_->Mix(num_output_channels_); |
| frame->_audioChannel = num_output_channels_; |
| } |
| |
| bool data_changed = stream_data_changed(); |
| if (analysis_needed(data_changed)) { |
| for (int i = 0; i < num_output_channels_; i++) { |
| // Split into a low and high band. |
| SplittingFilterAnalysis(capture_audio_->data(i), |
| capture_audio_->low_pass_split_data(i), |
| capture_audio_->high_pass_split_data(i), |
| capture_audio_->analysis_filter_state1(i), |
| capture_audio_->analysis_filter_state2(i)); |
| } |
| } |
| |
| err = high_pass_filter_->ProcessCaptureAudio(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| err = gain_control_->AnalyzeCaptureAudio(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| err = echo_cancellation_->ProcessCaptureAudio(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| if (echo_control_mobile_->is_enabled() && |
| noise_suppression_->is_enabled()) { |
| capture_audio_->CopyLowPassToReference(); |
| } |
| |
| err = noise_suppression_->ProcessCaptureAudio(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| err = voice_detection_->ProcessCaptureAudio(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| err = gain_control_->ProcessCaptureAudio(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| if (synthesis_needed(data_changed)) { |
| for (int i = 0; i < num_output_channels_; i++) { |
| // Recombine low and high bands. |
| SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i), |
| capture_audio_->high_pass_split_data(i), |
| capture_audio_->data(i), |
| capture_audio_->synthesis_filter_state1(i), |
| capture_audio_->synthesis_filter_state2(i)); |
| } |
| } |
| |
| // The level estimator operates on the recombined data. |
| err = level_estimator_->ProcessStream(capture_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| capture_audio_->InterleaveTo(frame, data_changed); |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| if (debug_file_->Open()) { |
| audioproc::Stream* msg = event_msg_->mutable_stream(); |
| const size_t data_size = sizeof(int16_t) * |
| frame->_payloadDataLengthInSamples * |
| frame->_audioChannel; |
| msg->set_output_data(frame->_payloadData, data_size); |
| err = WriteMessageToDebugFile(); |
| if (err != kNoError) { |
| return err; |
| } |
| } |
| #endif |
| |
| was_stream_delay_set_ = false; |
| return kNoError; |
| } |
| |
| int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { |
| CriticalSectionScoped crit_scoped(*crit_); |
| int err = kNoError; |
| |
| if (frame == NULL) { |
| return kNullPointerError; |
| } |
| |
| if (frame->_frequencyInHz != sample_rate_hz_) { |
| return kBadSampleRateError; |
| } |
| |
| if (frame->_audioChannel != num_reverse_channels_) { |
| return kBadNumberChannelsError; |
| } |
| |
| if (frame->_payloadDataLengthInSamples != samples_per_channel_) { |
| return kBadDataLengthError; |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| if (debug_file_->Open()) { |
| event_msg_->set_type(audioproc::Event::REVERSE_STREAM); |
| audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); |
| const size_t data_size = sizeof(int16_t) * |
| frame->_payloadDataLengthInSamples * |
| frame->_audioChannel; |
| msg->set_data(frame->_payloadData, data_size); |
| err = WriteMessageToDebugFile(); |
| if (err != kNoError) { |
| return err; |
| } |
| } |
| #endif |
| |
| render_audio_->DeinterleaveFrom(frame); |
| |
| // TODO(ajm): turn the splitting filter into a component? |
| if (sample_rate_hz_ == kSampleRate32kHz) { |
| for (int i = 0; i < num_reverse_channels_; i++) { |
| // Split into low and high band. |
| SplittingFilterAnalysis(render_audio_->data(i), |
| render_audio_->low_pass_split_data(i), |
| render_audio_->high_pass_split_data(i), |
| render_audio_->analysis_filter_state1(i), |
| render_audio_->analysis_filter_state2(i)); |
| } |
| } |
| |
| // TODO(ajm): warnings possible from components? |
| err = echo_cancellation_->ProcessRenderAudio(render_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| err = echo_control_mobile_->ProcessRenderAudio(render_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| err = gain_control_->ProcessRenderAudio(render_audio_); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| return err; // TODO(ajm): this is for returning warnings; necessary? |
| } |
| |
| int AudioProcessingImpl::set_stream_delay_ms(int delay) { |
| was_stream_delay_set_ = true; |
| if (delay < 0) { |
| return kBadParameterError; |
| } |
| |
| // TODO(ajm): the max is rather arbitrarily chosen; investigate. |
| if (delay > 500) { |
| stream_delay_ms_ = 500; |
| return kBadStreamParameterWarning; |
| } |
| |
| stream_delay_ms_ = delay; |
| return kNoError; |
| } |
| |
| int AudioProcessingImpl::stream_delay_ms() const { |
| return stream_delay_ms_; |
| } |
| |
| bool AudioProcessingImpl::was_stream_delay_set() const { |
| return was_stream_delay_set_; |
| } |
| |
| int AudioProcessingImpl::StartDebugRecording( |
| const char filename[AudioProcessing::kMaxFilenameSize]) { |
| CriticalSectionScoped crit_scoped(*crit_); |
| assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize); |
| |
| if (filename == NULL) { |
| return kNullPointerError; |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| // Stop any ongoing recording. |
| if (debug_file_->Open()) { |
| if (debug_file_->CloseFile() == -1) { |
| return kFileError; |
| } |
| } |
| |
| if (debug_file_->OpenFile(filename, false) == -1) { |
| debug_file_->CloseFile(); |
| return kFileError; |
| } |
| |
| int err = WriteInitMessage(); |
| if (err != kNoError) { |
| return err; |
| } |
| return kNoError; |
| #else |
| return kUnsupportedFunctionError; |
| #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP |
| } |
| |
| int AudioProcessingImpl::StopDebugRecording() { |
| CriticalSectionScoped crit_scoped(*crit_); |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| // We just return if recording hasn't started. |
| if (debug_file_->Open()) { |
| if (debug_file_->CloseFile() == -1) { |
| return kFileError; |
| } |
| } |
| return kNoError; |
| #else |
| return kUnsupportedFunctionError; |
| #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP |
| } |
| |
| EchoCancellation* AudioProcessingImpl::echo_cancellation() const { |
| return echo_cancellation_; |
| } |
| |
| EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { |
| return echo_control_mobile_; |
| } |
| |
| GainControl* AudioProcessingImpl::gain_control() const { |
| return gain_control_; |
| } |
| |
| HighPassFilter* AudioProcessingImpl::high_pass_filter() const { |
| return high_pass_filter_; |
| } |
| |
| LevelEstimator* AudioProcessingImpl::level_estimator() const { |
| return level_estimator_; |
| } |
| |
| NoiseSuppression* AudioProcessingImpl::noise_suppression() const { |
| return noise_suppression_; |
| } |
| |
| VoiceDetection* AudioProcessingImpl::voice_detection() const { |
| return voice_detection_; |
| } |
| |
| WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) { |
| CriticalSectionScoped crit_scoped(*crit_); |
| /*WEBRTC_TRACE(webrtc::kTraceModuleCall, |
| webrtc::kTraceAudioProcessing, |
| id_, |
| "ChangeUniqueId(new id = %d)", |
| id);*/ |
| id_ = id; |
| |
| return kNoError; |
| } |
| |
| bool AudioProcessingImpl::stream_data_changed() const { |
| int enabled_count = 0; |
| std::list<ProcessingComponent*>::const_iterator it; |
| for (it = component_list_.begin(); it != component_list_.end(); it++) { |
| if ((*it)->is_component_enabled()) { |
| enabled_count++; |
| } |
| } |
| |
| // Data is unchanged if no components are enabled, or if only level_estimator_ |
| // or voice_detection_ is enabled. |
| if (enabled_count == 0) { |
| return false; |
| } else if (enabled_count == 1) { |
| if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) { |
| return false; |
| } |
| } else if (enabled_count == 2) { |
| if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const { |
| return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz); |
| } |
| |
| bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const { |
| if (!stream_data_changed && !voice_detection_->is_enabled()) { |
| // Only level_estimator_ is enabled. |
| return false; |
| } else if (sample_rate_hz_ == kSampleRate32kHz) { |
| // Something besides level_estimator_ is enabled, and we have super-wb. |
| return true; |
| } |
| return false; |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| int AudioProcessingImpl::WriteMessageToDebugFile() { |
| int32_t size = event_msg_->ByteSize(); |
| if (size <= 0) { |
| return kUnspecifiedError; |
| } |
| #if defined(WEBRTC_BIG_ENDIAN) |
| // TODO(ajm): Use little-endian "on the wire". For the moment, we can be |
| // pretty safe in assuming little-endian. |
| #endif |
| |
| if (!event_msg_->SerializeToString(&event_str_)) { |
| return kUnspecifiedError; |
| } |
| |
| // Write message preceded by its size. |
| if (!debug_file_->Write(&size, sizeof(int32_t))) { |
| return kFileError; |
| } |
| if (!debug_file_->Write(event_str_.data(), event_str_.length())) { |
| return kFileError; |
| } |
| |
| event_msg_->Clear(); |
| |
| return 0; |
| } |
| |
| int AudioProcessingImpl::WriteInitMessage() { |
| event_msg_->set_type(audioproc::Event::INIT); |
| audioproc::Init* msg = event_msg_->mutable_init(); |
| msg->set_sample_rate(sample_rate_hz_); |
| msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz()); |
| msg->set_num_input_channels(num_input_channels_); |
| msg->set_num_output_channels(num_output_channels_); |
| msg->set_num_reverse_channels(num_reverse_channels_); |
| |
| int err = WriteMessageToDebugFile(); |
| if (err != kNoError) { |
| return err; |
| } |
| |
| return kNoError; |
| } |
| #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP |
| } // namespace webrtc |