Skip to content

Commit c5c564e

Browse files
committed
Parse ReplayGain data from LAME Xing/Info header
see http://gabriel.mp3-tech.org/mp3infotag.html#replaygain
1 parent df4ae49 commit c5c564e

File tree

3 files changed

+272
-7
lines changed

3 files changed

+272
-7
lines changed

libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ public final class Mp3Extractor implements Extractor {
174174
private int synchronizedHeaderData;
175175

176176
@Nullable private Metadata metadata;
177+
@Nullable private Metadata infoMetadata;
177178
private long basisTimeUs;
178179
private long samplesRead;
179180
private long firstSamplePosition;
@@ -290,6 +291,12 @@ private int readInternal(ExtractorInput input) throws IOException {
290291
if (seeker == null) {
291292
seeker = computeSeeker(input);
292293
extractorOutput.seekMap(seeker);
294+
@Nullable Metadata finalMetadata = (flags & FLAG_DISABLE_ID3_METADATA) != 0 ? null : metadata;
295+
if (finalMetadata != null) {
296+
finalMetadata = finalMetadata.copyWithAppendedEntriesFrom(infoMetadata);
297+
} else {
298+
finalMetadata = infoMetadata;
299+
}
293300
Format.Builder format =
294301
new Format.Builder()
295302
.setContainerMimeType(MimeTypes.AUDIO_MPEG)
@@ -299,7 +306,7 @@ private int readInternal(ExtractorInput input) throws IOException {
299306
.setSampleRate(synchronizedHeader.sampleRate)
300307
.setEncoderDelay(gaplessInfoHolder.encoderDelay)
301308
.setEncoderPadding(gaplessInfoHolder.encoderPadding)
302-
.setMetadata((flags & FLAG_DISABLE_ID3_METADATA) != 0 ? null : metadata);
309+
.setMetadata(finalMetadata);
303310
if (seeker.getAverageBitrate() != C.RATE_UNSET_INT) {
304311
format.setAverageBitrate(seeker.getAverageBitrate());
305312
}
@@ -575,6 +582,7 @@ private Seeker maybeReadSeekFrame(ExtractorInput input) throws IOException {
575582
gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay;
576583
gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding;
577584
}
585+
infoMetadata = xingFrame.getMetadata();
578586
long startPosition = input.getPosition();
579587
if (input.getLength() != C.LENGTH_UNSET
580588
&& xingFrame.dataSize != C.LENGTH_UNSET
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package androidx.media3.extractor.mp3;
17+
18+
import static java.lang.annotation.ElementType.TYPE_USE;
19+
20+
import android.annotation.SuppressLint;
21+
import androidx.annotation.IntDef;
22+
import androidx.media3.common.Metadata;
23+
import androidx.media3.common.util.UnstableApi;
24+
import java.lang.annotation.Documented;
25+
import java.lang.annotation.Retention;
26+
import java.lang.annotation.RetentionPolicy;
27+
import java.lang.annotation.Target;
28+
import java.util.Objects;
29+
30+
/** Representation of the ReplayGain data stored in a LAME Xing or Info frame. */
31+
@UnstableApi
32+
public final class Mp3InfoReplayGain implements Metadata.Entry {
33+
/**
34+
* 32 bit floating point "Peak signal amplitude".
35+
*
36+
* <p>1.0 is maximal signal amplitude store-able in decoding format. 0.8 is 80% of maximal signal
37+
* amplitude store-able in decoding format. 1.5 is 150% of maximal signal amplitude store-able in
38+
* decoding format.
39+
*
40+
* <p>A value above 1.0 can occur for example due to "true peak" measurement. A value of 0.0 means
41+
* the peak signal amplitude is unknown.
42+
*/
43+
public final float peak;
44+
45+
/** A gain field can store one gain adjustment with name and originator metadata. */
46+
public static final class GainField {
47+
/** This gain field contains no valid data, and should be ignored. */
48+
public static final int NAME_INVALID = 0;
49+
50+
/**
51+
* This gain field contains a gain adjustment that will make all the tracks sound equally loud
52+
* (as they do on the radio, hence the name!). If the ReplayGain is calculated on a
53+
* track-by-track basis (i.e. an individual ReplayGain calculation is carried out for each
54+
* track), this will be the result.
55+
*/
56+
public static final int NAME_RADIO = 1;
57+
58+
/**
59+
* The problem with the "Radio" setting is that tracks which should be quiet will be brought up
60+
* to the level of all the rest.
61+
*
62+
* <p>To solve this problem, the "Audiophile" setting represents the ideal listening gain for
63+
* each track. ReplayGain can have a good guess at this too, by reading the entire CD, and
64+
* calculating a single gain adjustment for the whole disc. This works because quiet tracks then
65+
* stay quieter than the rest, since the gain won't be changed for each track. It still solves
66+
* the basic problem (annoying, unwanted level differences between discs) because quiet or loud
67+
* discs are still adjusted overall.
68+
*
69+
* <p>Where ReplayGain will fail is if you have an entire CD of quiet music. It will bring it up
70+
* to an average level. This is why the "Audiophile" Replay Gain adjustment must be user
71+
* adjustable. The ReplayGain whole disc value represents a good guess, and should be stored in
72+
* the file. Later, the user can tweak it if required. If the file has originated from the
73+
* artist, then the "Audiophile" setting can be specified by the artist. Naturally, the user is
74+
* free to change the value if they desire.
75+
*/
76+
public static final int NAME_AUDIOPHILE = 2;
77+
78+
/** The origin of this gain adjustment is not known. */
79+
public static final int ORIGINATOR_UNKNOWN = 0;
80+
81+
/** This gain adjustment was manually determined by the artist. */
82+
public static final int ORIGINATOR_ARTIST = 1;
83+
84+
/** This gain adjustment was manually determined by the user. */
85+
public static final int ORIGINATOR_USER = 2;
86+
87+
/** This gain adjustment was automatically determined by the ReplayGain algorithm. */
88+
public static final int ORIGINATOR_REPLAYGAIN = 3;
89+
90+
/** This gain adjustment was automatically determined by a simple RMS algorithm. */
91+
public static final int ORIGINATOR_SIMPLE_RMS = 4;
92+
93+
/** Creates a gain field from already unpacked values. */
94+
public GainField(@Name int name, @Originator int originator, float gain) {
95+
this.name = name;
96+
this.originator = originator;
97+
this.gain = gain;
98+
}
99+
100+
/** Creates a gain field from the packed representation. */
101+
@SuppressLint("WrongConstant")
102+
public GainField(short field) {
103+
this.name = (field >> 13) & 7;
104+
this.originator = (field >> 10) & 7;
105+
this.gain = ((field & 0x1ff) * ((field & 0x200) != 0 ? -1 : 1)) / 10f;
106+
}
107+
108+
@Documented
109+
@Retention(RetentionPolicy.SOURCE)
110+
@Target(TYPE_USE)
111+
@IntDef({NAME_INVALID, NAME_RADIO, NAME_AUDIOPHILE})
112+
public @interface Name {}
113+
114+
@Documented
115+
@Retention(RetentionPolicy.SOURCE)
116+
@Target(TYPE_USE)
117+
@IntDef({
118+
ORIGINATOR_UNKNOWN,
119+
ORIGINATOR_ARTIST,
120+
ORIGINATOR_USER,
121+
ORIGINATOR_REPLAYGAIN,
122+
ORIGINATOR_SIMPLE_RMS
123+
})
124+
public @interface Originator {}
125+
126+
/**
127+
* Name/type of the gain field.
128+
*
129+
* <p>If equal to {@link #NAME_INVALID}, or an unknown name, the entire {@link GainField} should
130+
* be ignored.
131+
*/
132+
public final @Name int name;
133+
134+
/**
135+
* Originator of the gain field, i.e. who determined the value / in what way it was determined.
136+
*
137+
* <p>Either a human (user / artist) set the value according to their preferences, or an
138+
* algorithm like ReplayGain or simple RMS average was used to determine it.
139+
*/
140+
public final @Originator int originator;
141+
142+
/**
143+
* Absolute gain adjustment in decibels.
144+
*
145+
* <p>Positive values mean the signal should be amplified, negative values mean it should be
146+
* attenuated.
147+
*
148+
* <p>Due to limitations of the storage format, this is only accurate to the first decimal
149+
* place.
150+
*/
151+
public final float gain;
152+
153+
/**
154+
* @return Whether the name field is set to a valid value, hence, whether this gain field should
155+
* be considered or not. If false, the entire field should be ignored.
156+
*/
157+
public boolean isValid() {
158+
return name == NAME_RADIO || name == NAME_AUDIOPHILE;
159+
}
160+
161+
@Override
162+
public String toString() {
163+
return "GainField{" + "name=" + name + ", originator=" + originator + ", gain=" + gain + '}';
164+
}
165+
166+
@Override
167+
public boolean equals(Object o) {
168+
if (!(o instanceof GainField)) {
169+
return false;
170+
}
171+
GainField gainField = (GainField) o;
172+
return name == gainField.name
173+
&& originator == gainField.originator
174+
&& Float.compare(gain, gainField.gain) == 0;
175+
}
176+
177+
@Override
178+
public int hashCode() {
179+
return Objects.hash(name, originator, gain);
180+
}
181+
}
182+
183+
/** The first of two gain fields in the LAME MP3 Info header. */
184+
public GainField field1;
185+
186+
/** The second of two gain fields in the LAME MP3 Info header. */
187+
public GainField field2;
188+
189+
/** Creates the gain field from already unpacked values. */
190+
public Mp3InfoReplayGain(float peak, GainField field1, GainField field2) {
191+
this.peak = peak;
192+
this.field1 = field1;
193+
this.field2 = field2;
194+
}
195+
196+
/** Creates the gain fields from the packed representation. */
197+
public Mp3InfoReplayGain(float peak, short field1, short field2) {
198+
this(peak, new GainField(field1), new GainField(field2));
199+
}
200+
201+
@Override
202+
public String toString() {
203+
return "ReplayGain Xing/Info: "
204+
+ "peak="
205+
+ peak
206+
+ ", field 1="
207+
+ field1
208+
+ ", field 2="
209+
+ field2;
210+
}
211+
212+
@Override
213+
public boolean equals(Object o) {
214+
if (!(o instanceof Mp3InfoReplayGain)) {
215+
return false;
216+
}
217+
Mp3InfoReplayGain that = (Mp3InfoReplayGain) o;
218+
return Float.compare(peak, that.peak) == 0
219+
&& Objects.equals(field1, that.field1)
220+
&& Objects.equals(field2, that.field2);
221+
}
222+
223+
@Override
224+
public int hashCode() {
225+
return Objects.hash(peak, field1, field2);
226+
}
227+
}

libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingFrame.java

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import androidx.annotation.Nullable;
1919
import androidx.media3.common.C;
20+
import androidx.media3.common.Metadata;
2021
import androidx.media3.common.util.ParsableByteArray;
2122
import androidx.media3.common.util.Util;
2223
import androidx.media3.extractor.MpegAudioUtil;
@@ -35,6 +36,9 @@
3536
*/
3637
public final long dataSize;
3738

39+
/** ReplayGain data. Only present if this frame is an Info or the LAME variant of a Xing frame. */
40+
public final @Nullable Mp3InfoReplayGain replayGain;
41+
3842
/**
3943
* The number of samples to skip at the start of the stream, or {@link C#LENGTH_UNSET} if not
4044
* present in the header.
@@ -58,12 +62,14 @@ private XingFrame(
5862
long frameCount,
5963
long dataSize,
6064
@Nullable long[] tableOfContents,
65+
@Nullable Mp3InfoReplayGain replayGain,
6166
int encoderDelay,
6267
int encoderPadding) {
6368
this.header = new MpegAudioUtil.Header(header);
6469
this.frameCount = frameCount;
6570
this.dataSize = dataSize;
6671
this.tableOfContents = tableOfContents;
72+
this.replayGain = replayGain;
6773
this.encoderDelay = encoderDelay;
6874
this.encoderPadding = encoderPadding;
6975
}
@@ -98,23 +104,39 @@ public static XingFrame parse(MpegAudioUtil.Header mpegAudioHeader, ParsableByte
98104
frame.skipBytes(4); // Quality indicator
99105
}
100106

107+
@Nullable Mp3InfoReplayGain replayGain;
101108
int encoderDelay;
102109
int encoderPadding;
103-
// Skip: version string (9), revision & VBR method (1), lowpass filter (1), replay gain (8),
104-
// encoding flags & ATH type (1), bitrate (1).
105-
int bytesToSkipBeforeEncoderDelayAndPadding = 9 + 1 + 1 + 8 + 1 + 1;
106-
if (frame.bytesLeft() >= bytesToSkipBeforeEncoderDelayAndPadding + 3) {
107-
frame.skipBytes(bytesToSkipBeforeEncoderDelayAndPadding);
110+
// Skip: version string (9), revision & VBR method (1), lowpass filter (1).
111+
int bytesToSkipBeforeReplayGain = 9 + 1 + 1;
112+
// Skip: encoding flags & ATH type (1), bitrate (1).
113+
int bytesToSkipAfterReplayGain = 1 + 1;
114+
// And account for values we parse, ReplayGain (8) and encoder delay & padding (3).
115+
if (frame.bytesLeft() >= bytesToSkipBeforeReplayGain + 8 + bytesToSkipAfterReplayGain + 3) {
116+
frame.skipBytes(bytesToSkipBeforeReplayGain);
117+
float peak = frame.readFloat();
118+
short field1 = frame.readShort();
119+
short field2 = frame.readShort();
120+
replayGain = new Mp3InfoReplayGain(peak, field1, field2);
121+
122+
frame.skipBytes(bytesToSkipAfterReplayGain);
108123
int encoderDelayAndPadding = frame.readUnsignedInt24();
109124
encoderDelay = (encoderDelayAndPadding & 0xFFF000) >> 12;
110125
encoderPadding = (encoderDelayAndPadding & 0xFFF);
111126
} else {
127+
replayGain = null;
112128
encoderDelay = C.LENGTH_UNSET;
113129
encoderPadding = C.LENGTH_UNSET;
114130
}
115131

116132
return new XingFrame(
117-
mpegAudioHeader, frameCount, dataSize, tableOfContents, encoderDelay, encoderPadding);
133+
mpegAudioHeader,
134+
frameCount,
135+
dataSize,
136+
tableOfContents,
137+
replayGain,
138+
encoderDelay,
139+
encoderPadding);
118140
}
119141

120142
/**
@@ -132,4 +154,12 @@ public long computeDurationUs() {
132154
return Util.sampleCountToDurationUs(
133155
(frameCount * header.samplesPerFrame) - 1, header.sampleRate);
134156
}
157+
158+
/** Provide the metadata derived from this Xing frame, such as ReplayGain data. */
159+
public @Nullable Metadata getMetadata() {
160+
if (replayGain != null) {
161+
return new Metadata(replayGain);
162+
}
163+
return null;
164+
}
135165
}

0 commit comments

Comments
 (0)