Skip to content

Commit 54c4eb0

Browse files
committed
[PERFORMANCE] Improve FileSystemFontProvider.scanFonts() performance by adding 'only headers' mode to TTF parser:
* only read tables needed for FSFontInfo ('name', 'head', 'OS/2', 'CFF ', 'gcid') * 'CFF ' and 'head' table parsers finish as soon as it has all needed headers
1 parent cb29a4a commit 54c4eb0

File tree

10 files changed

+618
-129
lines changed

10 files changed

+618
-129
lines changed

fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java

+91-16
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import org.apache.commons.logging.Log;
3030
import org.apache.commons.logging.LogFactory;
31+
import org.apache.fontbox.ttf.FontHeaders;
3132
import org.apache.pdfbox.io.RandomAccessRead;
3233

3334

@@ -106,13 +107,61 @@ public List<CFFFont> parse(RandomAccessRead randomAccessRead) throws IOException
106107
}
107108

108109
/**
109-
* Parse CFF font using a DataInput as input.
110+
* Extract "Registry", "Ordering" and "Supplement" properties from the first CFF subfont.
110111
*
111-
* @param input the source to be parsed
112+
* @param randomAccessRead the source to be parsed
113+
* @param outHeaders where to put results
112114
* @return the parsed CFF fonts
113115
* @throws IOException If there is an error reading from the stream
114116
*/
115-
private List<CFFFont> parse(DataInput input) throws IOException
117+
public void parseFirstSubFontROS(RandomAccessRead randomAccessRead, FontHeaders outHeaders) throws IOException
118+
{
119+
// this method is a simplified and merged version of parse(RandomAccessRead) > parse(DataInput) > parseFont(...)
120+
121+
// start code from parse(RandomAccessRead)
122+
randomAccessRead.seek(0);
123+
DataInput input = new DataInputRandomAccessRead(randomAccessRead);
124+
125+
// start code from parse(DataInput)
126+
input = skipHeader(input);
127+
String[] nameIndex = readStringIndexData(input);
128+
if (nameIndex.length == 0)
129+
{
130+
outHeaders.setError("Name index missing in CFF font");
131+
return;
132+
}
133+
byte[][] topDictIndex = readIndexData(input);
134+
if (topDictIndex.length == 0)
135+
{
136+
outHeaders.setError("Top DICT INDEX missing in CFF font");
137+
return;
138+
}
139+
140+
// 'stringIndex' is required by 'parseROS() > readString()'
141+
stringIndex = readStringIndexData(input);
142+
143+
// start code from parseFont(...)
144+
DataInputByteArray topDictInput = new DataInputByteArray(topDictIndex[0]);
145+
DictData topDict = readDictData(topDictInput);
146+
147+
DictData.Entry syntheticBaseEntry = topDict.getEntry("SyntheticBase");
148+
if (syntheticBaseEntry != null)
149+
{
150+
outHeaders.setError("Synthetic Fonts are not supported");
151+
return;
152+
}
153+
154+
CFFCIDFont cffCIDFont = parseROS(topDict);
155+
if (cffCIDFont != null)
156+
{
157+
outHeaders.setOtfROS(
158+
cffCIDFont.getRegistry(),
159+
cffCIDFont.getOrdering(),
160+
cffCIDFont.getSupplement());
161+
}
162+
}
163+
164+
private DataInput skipHeader(DataInput input) throws IOException
116165
{
117166
String firstTag = readTagName(input);
118167
// try to determine which kind of font we have
@@ -132,6 +181,19 @@ private List<CFFFont> parse(DataInput input) throws IOException
132181

133182
@SuppressWarnings("unused")
134183
Header header = readHeader(input);
184+
return input;
185+
}
186+
187+
/**
188+
* Parse CFF font using a DataInput as input.
189+
*
190+
* @param input the source to be parsed
191+
* @return the parsed CFF fonts
192+
* @throws IOException If there is an error reading from the stream
193+
*/
194+
private List<CFFFont> parse(DataInput input) throws IOException
195+
{
196+
input = skipHeader(input);
135197
String[] nameIndex = readStringIndexData(input);
136198
if (nameIndex.length == 0)
137199
{
@@ -463,6 +525,28 @@ private static Double readRealNumber(DataInput input) throws IOException
463525
}
464526
}
465527

528+
/**
529+
* Extracts Registry, Ordering and Supplement from {@code topDict["ROS"]}.
530+
*/
531+
private CFFCIDFont parseROS(DictData topDict) throws IOException
532+
{
533+
// determine if this is a Type 1-equivalent font or a CIDFont
534+
DictData.Entry rosEntry = topDict.getEntry("ROS");
535+
if (rosEntry != null)
536+
{
537+
if (rosEntry.size() < 3)
538+
{
539+
throw new IOException("ROS entry must have 3 elements");
540+
}
541+
CFFCIDFont cffCIDFont = new CFFCIDFont();
542+
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
543+
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
544+
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
545+
return cffCIDFont;
546+
}
547+
return null;
548+
}
549+
466550
private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) throws IOException
467551
{
468552
// top dict
@@ -476,21 +560,12 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
476560
throw new IOException("Synthetic Fonts are not supported");
477561
}
478562

479-
// determine if this is a Type 1-equivalent font or a CIDFont
480563
CFFFont font;
481-
boolean isCIDFont = topDict.getEntry("ROS") != null;
482-
if (isCIDFont)
564+
CFFCIDFont cffCIDFont = parseROS(topDict);
565+
// determine if this is a Type 1-equivalent font or a CIDFont
566+
boolean isCIDFont = cffCIDFont != null;
567+
if (cffCIDFont != null)
483568
{
484-
CFFCIDFont cffCIDFont = new CFFCIDFont();
485-
DictData.Entry rosEntry = topDict.getEntry("ROS");
486-
if (rosEntry == null || rosEntry.size() < 3)
487-
{
488-
throw new IOException("ROS entry must have 3 elements");
489-
}
490-
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
491-
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
492-
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
493-
494569
font = cffCIDFont;
495570
}
496571
else

fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java

+23
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import java.io.IOException;
2121
import org.apache.fontbox.cff.CFFFont;
2222
import org.apache.fontbox.cff.CFFParser;
23+
import org.apache.pdfbox.io.RandomAccessRead;
24+
import org.apache.pdfbox.io.RandomAccessReadBuffer;
2325

2426
/**
2527
* PostScript font program (compact font format).
@@ -56,6 +58,27 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
5658
initialized = true;
5759
}
5860

61+
/** {@inheritDoc} */
62+
@Override
63+
void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException
64+
{
65+
try (RandomAccessRead subReader = data.createSubView(getLength()))
66+
{
67+
RandomAccessRead reader;
68+
if (subReader != null)
69+
{
70+
reader = subReader;
71+
}
72+
else
73+
{
74+
assert false : "It is inefficient to read TTFDataStream into an array";
75+
byte[] bytes = data.read((int)getLength());
76+
reader = new RandomAccessReadBuffer(bytes);
77+
}
78+
new CFFParser().parseFirstSubFontROS(reader, outHeaders);
79+
}
80+
}
81+
5982
/**
6083
* Returns the CFF font, which is a compact representation of a PostScript Type 1, or CIDFont
6184
*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.fontbox.ttf;
18+
19+
/**
20+
* To improve performance of {@code FileSystemFontProvider.scanFonts(...)},
21+
* this class is used both as a marker (to skip unused data) and as a storage for collected data.
22+
* <p>
23+
* Tables it needs:<ul>
24+
* <li>NamingTable.TAG
25+
* <li>HeaderTable.TAG
26+
* <li>OS2WindowsMetricsTable.TAG
27+
* <li>CFFTable.TAG (for OTF)
28+
* <li>"gcid" (for non-OTF)
29+
* </ul>
30+
*
31+
* @author Mykola Bohdiuk
32+
*/
33+
public final class FontHeaders
34+
{
35+
static final int BYTES_GCID = 142;
36+
37+
private String error;
38+
private String name;
39+
private Integer headerMacStyle;
40+
private OS2WindowsMetricsTable os2Windows;
41+
private String fontFamily;
42+
private String fontSubFamily;
43+
private byte[] nonOtfGcid142;
44+
//
45+
private boolean isOTFAndPostScript;
46+
private String otfRegistry;
47+
private String otfOrdering;
48+
private int otfSupplement;
49+
50+
public String getError()
51+
{
52+
return error;
53+
}
54+
55+
public String getName()
56+
{
57+
return name;
58+
}
59+
60+
/**
61+
* null == no HeaderTable, {@code ttf.getHeader().getMacStyle()}
62+
*/
63+
public Integer getHeaderMacStyle()
64+
{
65+
return headerMacStyle;
66+
}
67+
68+
public OS2WindowsMetricsTable getOS2Windows()
69+
{
70+
return os2Windows;
71+
}
72+
73+
// only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: FontFamily, FontSubfamily
74+
public String getFontFamily()
75+
{
76+
return fontFamily;
77+
}
78+
79+
public String getFontSubFamily()
80+
{
81+
return fontSubFamily;
82+
}
83+
84+
public boolean isOpenTypePostScript()
85+
{
86+
return isOTFAndPostScript;
87+
}
88+
89+
public byte[] getNonOtfTableGCID142()
90+
{
91+
return nonOtfGcid142;
92+
}
93+
94+
public String getOtfRegistry()
95+
{
96+
return otfRegistry;
97+
}
98+
99+
public String getOtfOrdering()
100+
{
101+
return otfOrdering;
102+
}
103+
104+
public int getOtfSupplement()
105+
{
106+
return otfSupplement;
107+
}
108+
109+
public void setError(String exception)
110+
{
111+
this.error = exception;
112+
}
113+
114+
void setName(String name)
115+
{
116+
this.name = name;
117+
}
118+
119+
void setHeaderMacStyle(Integer headerMacStyle)
120+
{
121+
this.headerMacStyle = headerMacStyle;
122+
}
123+
124+
void setOs2Windows(OS2WindowsMetricsTable os2Windows)
125+
{
126+
this.os2Windows = os2Windows;
127+
}
128+
129+
void setFontFamily(String fontFamily, String fontSubFamily)
130+
{
131+
this.fontFamily = fontFamily;
132+
this.fontSubFamily = fontSubFamily;
133+
}
134+
135+
void setNonOtfGcid142(byte[] nonOtfGcid142)
136+
{
137+
this.nonOtfGcid142 = nonOtfGcid142;
138+
}
139+
140+
void setIsOTFAndPostScript(boolean isOTFAndPostScript)
141+
{
142+
this.isOTFAndPostScript = isOTFAndPostScript;
143+
}
144+
145+
// public because CFFParser is in a different package
146+
public void setOtfROS(String otfRegistry, String otfOrdering, int otfSupplement)
147+
{
148+
this.otfRegistry = otfRegistry;
149+
this.otfOrdering = otfOrdering;
150+
this.otfSupplement = otfSupplement;
151+
}
152+
}

fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java

+10
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ public class HeaderTable extends TTFTable
6464
super();
6565
}
6666

67+
/** {@inheritDoc} */
68+
@Override
69+
void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException
70+
{
71+
// 44 == 4 + 4 + 4 + 4 + 2 + 2 + 2*8 + 4*2, see read()
72+
data.seek(data.getCurrentPosition() + 44);
73+
macStyle = data.readUnsignedShort();
74+
outHeaders.setHeaderMacStyle(macStyle);
75+
}
76+
6777
/**
6878
* This will read the required data from the stream.
6979
*

0 commit comments

Comments
 (0)