Skip to content

Better support for PCM audio formats around DefaultAudioSink #2445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package androidx.media3.common.audio;

import androidx.media3.common.Format;
import androidx.media3.common.PlaybackParameters;
import androidx.media3.common.util.UnstableApi;

Expand All @@ -33,7 +34,7 @@ public interface AudioProcessorChain {
* during initialization, but audio processors may change state to become active/inactive during
* playback.
*/
AudioProcessor[] getAudioProcessors();
AudioProcessor[] getAudioProcessors(Format inputFormat);

/**
* Configures audio processors to apply the specified playback parameters immediately, returning
Expand All @@ -50,7 +51,7 @@ public interface AudioProcessorChain {
* value. Only called when processors have no input pending.
*
* @param skipSilenceEnabled Whether silences should be skipped in the audio stream.
* @return The new value.
* @return The value that was actually applied.
*/
boolean applySkipSilenceEnabled(boolean skipSilenceEnabled);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2401,6 +2401,7 @@ public static int getApiLevelThatAudioFormatIntroducedAudioEncoding(int encoding
return 28;
case C.ENCODING_OPUS:
return 30;
case C.ENCODING_PCM_24BIT:
case C.ENCODING_PCM_32BIT:
return 31;
case C.ENCODING_DTS_UHD_P2:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public class DefaultRenderersFactory implements RenderersFactory {
private long allowedVideoJoiningTimeMs;
private boolean enableDecoderFallback;
private MediaCodecSelector mediaCodecSelector;
private boolean pcmEncodingRestrictionLifted;
private boolean enableFloatOutput;
private boolean enableAudioTrackPlaybackParams;
private boolean enableMediaCodecVideoRendererPrewarming;
Expand Down Expand Up @@ -225,15 +226,38 @@ public final DefaultRenderersFactory setMediaCodecSelector(
*
* <p>The default value is {@code false}.
*
* @deprecated Use {@link #setPcmEncodingRestrictionLifted} instead to allow any encoding, not
* just 32-bit float.
* @param enableFloatOutput Whether to enable use of floating point audio output, if available.
* @return This factory, for convenience.
*/
@CanIgnoreReturnValue
@Deprecated
public final DefaultRenderersFactory setEnableAudioFloatOutput(boolean enableFloatOutput) {
this.enableFloatOutput = enableFloatOutput;
return this;
}

/**
* Sets whether to enable outputting samples in any platform-supported format (such as 32-bit
* float, 32-bit integer, 24-bit integer, 16-bit integer or 8-bit integer) instead of restricting
* output to 16-bit integers. Where possible, the input sample format will be used, otherwise
* high-resolution formats will be output as 32-bit float. Parts of the default audio processing
* chain (for example, speed adjustment) will not be available when output formats other than
* 16-bit integer are in use.
*
* <p>The default value is {@code false}.
*
* @param pcmEncodingRestrictionLifted Whether to lift any restriction of output sample format.
* @return This factory, for convenience.
*/
@CanIgnoreReturnValue
public final DefaultRenderersFactory setPcmEncodingRestrictionLifted(
boolean pcmEncodingRestrictionLifted) {
this.pcmEncodingRestrictionLifted = pcmEncodingRestrictionLifted;
return this;
}

/**
* Sets whether to enable setting playback speed using {@link
* android.media.AudioTrack#setPlaybackParams(PlaybackParams)}, which is supported from API level
Expand Down Expand Up @@ -377,7 +401,11 @@ public Renderer[] createRenderers(
renderersList);
@Nullable
AudioSink audioSink =
buildAudioSink(context, enableFloatOutput, enableAudioTrackPlaybackParams);
buildAudioSink(
context,
pcmEncodingRestrictionLifted,
enableFloatOutput,
enableAudioTrackPlaybackParams);
if (audioSink != null) {
buildAudioRenderers(
context,
Expand Down Expand Up @@ -854,11 +882,17 @@ protected void buildMiscellaneousRenderers(
*/
@Nullable
protected AudioSink buildAudioSink(
Context context, boolean enableFloatOutput, boolean enableAudioTrackPlaybackParams) {
return new DefaultAudioSink.Builder(context)
.setEnableFloatOutput(enableFloatOutput)
.setEnableAudioTrackPlaybackParams(enableAudioTrackPlaybackParams)
.build();
Context context,
boolean pcmEncodingRestrictionLifted,
boolean enableFloatOutput,
boolean enableAudioTrackPlaybackParams) {
DefaultAudioSink.Builder builder = new DefaultAudioSink.Builder(context);
if (pcmEncodingRestrictionLifted || !enableFloatOutput) {
builder.setPcmEncodingRestrictionLifted(pcmEncodingRestrictionLifted);
} else {
builder.setEnableFloatOutput(true);
}
return builder.setEnableAudioTrackPlaybackParams(enableAudioTrackPlaybackParams).build();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,21 @@ default int getAudioTrackChannelConfig(int channelCount) {
/** The time it takes to ramp AudioTrack's volume up or down when pausing or starting to play. */
private static final int AUDIO_TRACK_VOLUME_RAMP_TIME_MS = 20;

/**
* @see Builder#setPcmEncodingRestrictionLifted(boolean)
*/
private static final int PCM_ENCODING_ANY = 0;

/**
* @see Builder#setPcmEncodingRestrictionLifted(boolean)
*/
private static final int PCM_ENCODING_INT16_ONLY = 1;

/**
* @see Builder#setEnableFloatOutput(boolean)
*/
private static final int PCM_ENCODING_INT16_FLOAT32_ONLY = 2;

/**
* Thrown when the audio track has provided a spurious timestamp, if {@link
* #failOnSpuriousAudioTimestamp} is set.
Expand All @@ -167,10 +182,12 @@ public interface AudioProcessorChain extends androidx.media3.common.audio.AudioP
/**
* The default audio processor chain, which applies a (possibly empty) chain of user-defined audio
* processors followed by {@link SilenceSkippingAudioProcessor} and {@link SonicAudioProcessor}.
* No audio processors will be applied for PCM encodings other than 16-bit integer.
*/
@SuppressWarnings("deprecation")
public static class DefaultAudioProcessorChain implements AudioProcessorChain {

private boolean formatSupported = false;
private final AudioProcessor[] audioProcessors;
private final SilenceSkippingAudioProcessor silenceSkippingAudioProcessor;
private final SonicAudioProcessor sonicAudioProcessor;
Expand Down Expand Up @@ -207,33 +224,44 @@ public DefaultAudioProcessorChain(
}

@Override
public AudioProcessor[] getAudioProcessors() {
public AudioProcessor[] getAudioProcessors(Format inputFormat) {
if (inputFormat.pcmEncoding != C.ENCODING_PCM_16BIT) {
formatSupported = false;
return new AudioProcessor[0];
}
formatSupported = true;
return audioProcessors;
}

@Override
public PlaybackParameters applyPlaybackParameters(PlaybackParameters playbackParameters) {
if (!formatSupported) {
return PlaybackParameters.DEFAULT;
}
sonicAudioProcessor.setSpeed(playbackParameters.speed);
sonicAudioProcessor.setPitch(playbackParameters.pitch);
return playbackParameters;
}

@Override
public boolean applySkipSilenceEnabled(boolean skipSilenceEnabled) {
if (!formatSupported) {
return false;
}
silenceSkippingAudioProcessor.setEnabled(skipSilenceEnabled);
return skipSilenceEnabled;
}

@Override
public long getMediaDuration(long playoutDuration) {
return sonicAudioProcessor.isActive()
return formatSupported && sonicAudioProcessor.isActive()
? sonicAudioProcessor.getMediaDuration(playoutDuration)
: playoutDuration;
}

@Override
public long getSkippedOutputFrameCount() {
return silenceSkippingAudioProcessor.getSkippedFrames();
return formatSupported ? silenceSkippingAudioProcessor.getSkippedFrames() : 0;
}
}

Expand Down Expand Up @@ -297,7 +325,7 @@ public static final class Builder {
@Nullable private final Context context;
private AudioCapabilities audioCapabilities;
@Nullable private androidx.media3.common.audio.AudioProcessorChain audioProcessorChain;
private boolean enableFloatOutput;
private int pcmEncodingRestrictionMode;
private boolean enableAudioTrackPlaybackParams;

private boolean buildCalled;
Expand All @@ -316,6 +344,7 @@ public Builder() {
audioCapabilities = DEFAULT_AUDIO_CAPABILITIES;
audioTrackBufferSizeProvider = AudioTrackBufferSizeProvider.DEFAULT;
audioTrackProvider = AudioTrackProvider.DEFAULT;
pcmEncodingRestrictionMode = PCM_ENCODING_INT16_ONLY;
}

/**
Expand All @@ -328,6 +357,7 @@ public Builder(Context context) {
audioCapabilities = DEFAULT_AUDIO_CAPABILITIES;
audioTrackBufferSizeProvider = AudioTrackBufferSizeProvider.DEFAULT;
audioTrackProvider = AudioTrackProvider.DEFAULT;
pcmEncodingRestrictionMode = PCM_ENCODING_INT16_ONLY;
}

/**
Expand Down Expand Up @@ -375,14 +405,37 @@ public Builder setAudioProcessorChain(
/**
* Sets whether to enable 32-bit float output or integer output. Where possible, 32-bit float
* output will be used if the input is 32-bit float, and also if the input is high resolution
* (24-bit or 32-bit) integer PCM. Audio processing (for example, speed adjustment) will not be
* available when float output is in use.
* (24-bit or 32-bit) integer PCM. Parts of the default audio processing chain (for example,
* speed adjustment) will not be available when output formats other than 16-bit integer are in
* use.
*
* <p>The default value is {@code false}.
*
* @deprecated Use {@link #setPcmEncodingRestrictionLifted} instead to allow any encoding, not
* just 32-bit float.
*/
@Deprecated
@CanIgnoreReturnValue
public Builder setEnableFloatOutput(boolean enableFloatOutput) {
this.enableFloatOutput = enableFloatOutput;
this.pcmEncodingRestrictionMode =
enableFloatOutput ? PCM_ENCODING_INT16_FLOAT32_ONLY : PCM_ENCODING_INT16_ONLY;
return this;
}

/**
* Sets whether to enable outputting samples in any platform-supported format (such as 32-bit
* float, 32-bit integer, 24-bit integer, 16-bit integer or 8-bit integer) instead of
* restricting output to 16-bit integers. Where possible, the input sample format will be used,
* otherwise high-resolution formats will be output as 32-bit float. Parts of the default audio
* processing chain (for example, speed adjustment) will not be available when output formats
* other than 16-bit integer are in use.
*
* <p>The default value is {@code false}.
*/
@CanIgnoreReturnValue
public Builder setPcmEncodingRestrictionLifted(boolean pcmEncodingRestrictionLifted) {
this.pcmEncodingRestrictionMode =
pcmEncodingRestrictionLifted ? PCM_ENCODING_ANY : PCM_ENCODING_INT16_ONLY;
return this;
}

Expand Down Expand Up @@ -549,7 +602,7 @@ public DefaultAudioSink build() {

@Nullable private final Context context;
private final androidx.media3.common.audio.AudioProcessorChain audioProcessorChain;
private final boolean enableFloatOutput;
private final int pcmEncodingRestrictionMode;
private final ChannelMappingAudioProcessor channelMappingAudioProcessor;
private final TrimmingAudioProcessor trimmingAudioProcessor;
private final ToInt16PcmAudioProcessor toInt16PcmAudioProcessor;
Expand Down Expand Up @@ -628,7 +681,7 @@ private DefaultAudioSink(Builder builder) {
audioAttributes = AudioAttributes.DEFAULT;
audioCapabilities = context != null ? null : builder.audioCapabilities;
audioProcessorChain = builder.audioProcessorChain;
enableFloatOutput = builder.enableFloatOutput;
pcmEncodingRestrictionMode = builder.pcmEncodingRestrictionMode;
preferAudioTrackPlaybackParams = SDK_INT >= 23 && builder.enableAudioTrackPlaybackParams;
offloadMode = OFFLOAD_MODE_DISABLED;
audioTrackBufferSizeProvider = builder.audioTrackBufferSizeProvider;
Expand Down Expand Up @@ -690,13 +743,24 @@ public boolean supportsFormat(Format format) {
Log.w(TAG, "Invalid PCM encoding: " + format.pcmEncoding);
return SINK_FORMAT_UNSUPPORTED;
}
if (format.pcmEncoding == C.ENCODING_PCM_16BIT
|| (enableFloatOutput && format.pcmEncoding == C.ENCODING_PCM_FLOAT)) {
return SINK_FORMAT_SUPPORTED_DIRECTLY;
if (format.pcmEncoding != C.ENCODING_PCM_16BIT) {
if (pcmEncodingRestrictionMode == PCM_ENCODING_INT16_FLOAT32_ONLY
&& format.pcmEncoding != C.ENCODING_PCM_FLOAT) {
// PCM_ENCODING_INT16_FLOAT32_ONLY is deprecated and kept for backwards compatibility.
return SINK_FORMAT_SUPPORTED_WITH_TRANSCODING;
}
if (pcmEncodingRestrictionMode == PCM_ENCODING_INT16_ONLY) {
// We will forcibly transcode to 16-bit PCM to allow the full audio processor chain to
// operate.
return SINK_FORMAT_SUPPORTED_WITH_TRANSCODING;
}
}
if (SDK_INT < Util.getApiLevelThatAudioFormatIntroducedAudioEncoding(format.pcmEncoding)) {
// We can resample all linear PCM encodings to 16-bit integer PCM, which AudioTrack is
// guaranteed to support.
return SINK_FORMAT_SUPPORTED_WITH_TRANSCODING;
}
// We can resample all linear PCM encodings to 16-bit integer PCM, which AudioTrack is
// guaranteed to support.
return SINK_FORMAT_SUPPORTED_WITH_TRANSCODING;
return SINK_FORMAT_SUPPORTED_DIRECTLY;
}
if (audioCapabilities.isPassthroughPlaybackSupported(format, audioAttributes)) {
return SINK_FORMAT_SUPPORTED_DIRECTLY;
Expand Down Expand Up @@ -743,12 +807,32 @@ public void configure(Format inputFormat, int specifiedBufferSize, @Nullable int

ImmutableList.Builder<AudioProcessor> pipelineProcessors = new ImmutableList.Builder<>();
pipelineProcessors.addAll(availableAudioProcessors);
if (shouldUseFloatOutput(inputFormat.pcmEncoding)) {
pipelineProcessors.add(toFloatPcmAudioProcessor);
// We need to convert sample formats either if we don't support it, or if:
// - there is some pcm encoding restriction, and
// - the format isn't 16-bit integer (which is always allowed), and
// - the format isn't 32-bit float OR 32-bit float isn't allowed
Format afterConversionFormat;
if (SDK_INT < Util.getApiLevelThatAudioFormatIntroducedAudioEncoding(inputFormat.pcmEncoding)
|| (pcmEncodingRestrictionMode != PCM_ENCODING_ANY
&& inputFormat.pcmEncoding != C.ENCODING_PCM_16BIT
&& (inputFormat.pcmEncoding != C.ENCODING_PCM_FLOAT
|| pcmEncodingRestrictionMode == PCM_ENCODING_INT16_ONLY))) {
if (Util.isEncodingHighResolutionPcm(inputFormat.pcmEncoding)
&& pcmEncodingRestrictionMode != PCM_ENCODING_INT16_ONLY) {
pipelineProcessors.add(toFloatPcmAudioProcessor);
afterConversionFormat =
Util.getPcmFormat(
C.ENCODING_PCM_FLOAT, inputFormat.channelCount, inputFormat.sampleRate);
} else {
pipelineProcessors.add(toInt16PcmAudioProcessor);
afterConversionFormat =
Util.getPcmFormat(
C.ENCODING_PCM_16BIT, inputFormat.channelCount, inputFormat.sampleRate);
}
} else {
pipelineProcessors.add(toInt16PcmAudioProcessor);
pipelineProcessors.add(audioProcessorChain.getAudioProcessors());
afterConversionFormat = inputFormat;
}
pipelineProcessors.add(audioProcessorChain.getAudioProcessors(afterConversionFormat));
audioProcessingPipeline = new AudioProcessingPipeline(pipelineProcessors.build());

// If the underlying processors of the new pipeline are the same as the existing pipeline,
Expand Down Expand Up @@ -1779,25 +1863,14 @@ private boolean shouldApplyAudioProcessorPlaybackParameters() {
// frame presentation times are currently not modified (see also
// https://github.com/google/ExoPlayer/issues/4803);
// - when playing encoded audio via passthrough/offload, because modifying the audio stream
// would require decoding/re-encoding; and
// - when outputting float PCM audio, because SonicAudioProcessor outputs 16-bit integer PCM.
return !tunneling
&& configuration.outputMode == OUTPUT_MODE_PCM
&& !shouldUseFloatOutput(configuration.inputFormat.pcmEncoding);
// would require decoding/re-encoding.
return !tunneling && configuration.outputMode == OUTPUT_MODE_PCM;
}

private boolean useAudioTrackPlaybackParams() {
return configuration != null && configuration.enableAudioTrackPlaybackParams && SDK_INT >= 23;
}

/**
* Returns whether audio in the specified PCM encoding should be written to the audio track as
* float PCM.
*/
private boolean shouldUseFloatOutput(@C.PcmEncoding int pcmEncoding) {
return enableFloatOutput && Util.isEncodingHighResolutionPcm(pcmEncoding);
}

/**
* Applies and updates media position parameters.
*
Expand Down
Loading