Skip to content

Commit e390fe1

Browse files
committed
[PERFORMANCE] Improve FileSystemFontProvider.scanFonts() performance by adding 'only headers' mode to TTF parser:
* only read tables needed for FSFontInfo ('name', 'head', 'OS/2', 'CFF ', 'gcid') * 'CFF ' and 'head' table parsers finish as soon as it has all needed headers
1 parent 2bd40b9 commit e390fe1

File tree

9 files changed

+500
-92
lines changed

9 files changed

+500
-92
lines changed

fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java

+33-9
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import org.apache.commons.logging.Log;
3030
import org.apache.commons.logging.LogFactory;
31+
import org.apache.fontbox.ttf.FontHeaders;
3132
import org.apache.pdfbox.io.RandomAccessRead;
3233

3334

@@ -48,7 +49,8 @@ public class CFFParser
4849

4950
private String[] stringIndex = null;
5051
private ByteSource source;
51-
52+
private FontHeaders loadOnlyHeaders;
53+
5254
// for debugging only
5355
private String debugFontName;
5456

@@ -66,6 +68,11 @@ public interface ByteSource
6668
byte[] getBytes() throws IOException;
6769
}
6870

71+
public void setLoadOnlyHeaders(FontHeaders loadOnlyHeaders)
72+
{
73+
this.loadOnlyHeaders = loadOnlyHeaders;
74+
}
75+
6976
/**
7077
* Parse CFF font using byte array, also passing in a byte source for future use.
7178
*
@@ -91,17 +98,21 @@ public List<CFFFont> parse(byte[] bytes, ByteSource source) throws IOException
9198
public List<CFFFont> parse(RandomAccessRead randomAccessRead) throws IOException
9299
{
93100
// TODO do we need to store the source data of the font? It isn't used at all
94-
byte[] bytes = new byte[(int) randomAccessRead.length()];
101+
// definitely don't need 'source' in 'loadOnlyHeaders' mode
95102
randomAccessRead.seek(0);
96-
int remainingBytes = bytes.length;
97-
int amountRead;
98-
while ((amountRead = randomAccessRead.read(bytes, bytes.length - remainingBytes,
99-
remainingBytes)) > 0)
103+
if (loadOnlyHeaders == null)
100104
{
101-
remainingBytes -= amountRead;
105+
byte[] bytes = new byte[(int) randomAccessRead.length()];
106+
int remainingBytes = bytes.length;
107+
int amountRead;
108+
while ((amountRead = randomAccessRead.read(bytes, bytes.length - remainingBytes,
109+
remainingBytes)) > 0)
110+
{
111+
remainingBytes -= amountRead;
112+
}
113+
randomAccessRead.seek(0);
114+
this.source = new CFFBytesource(bytes);
102115
}
103-
randomAccessRead.seek(0);
104-
this.source = new CFFBytesource(bytes);
105116
return parse(new DataInputRandomAccessRead(randomAccessRead));
106117
}
107118

@@ -492,6 +503,15 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
492503
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
493504

494505
font = cffCIDFont;
506+
if (loadOnlyHeaders != null)
507+
{
508+
loadOnlyHeaders.setOtfROS(
509+
cffCIDFont.getRegistry(),
510+
cffCIDFont.getOrdering(),
511+
cffCIDFont.getSupplement());
512+
// we just read (Registry, Ordering, Supplement) and don't need anything else
513+
return font;
514+
}
495515
}
496516
else
497517
{
@@ -501,6 +521,10 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
501521
// name
502522
debugFontName = name;
503523
font.setName(name);
524+
if (loadOnlyHeaders != null)
525+
{
526+
return font; // not a 'CFFCIDFont' => cannot read properties needed by LoadOnlyHeaders anyway
527+
}
504528

505529
// top dict
506530
font.addValueToTopDict("version", getString(topDict, "version"));

fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java

+21-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.io.IOException;
2121
import org.apache.fontbox.cff.CFFFont;
2222
import org.apache.fontbox.cff.CFFParser;
23+
import org.apache.pdfbox.io.RandomAccessRead;
2324

2425
/**
2526
* PostScript font program (compact font format).
@@ -48,9 +49,27 @@ public class CFFTable extends TTFTable
4849
@Override
4950
void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
5051
{
51-
byte[] bytes = data.read((int)getLength());
52-
52+
// assert data.getCurrentPosition() == getOffset();
5353
CFFParser parser = new CFFParser();
54+
FontHeaders loadOnlyHeaders = ttf.getLoadOnlyHeaders();
55+
parser.setLoadOnlyHeaders(loadOnlyHeaders);
56+
if (loadOnlyHeaders != null)
57+
{
58+
// TODO: measure performance and maybe use createSubView() for non-loadOnlyHeaders case
59+
try (RandomAccessRead subReader = data.createSubView(getLength()))
60+
{
61+
if (subReader != null)
62+
{
63+
cffFont = parser.parse(subReader).get(0);
64+
data.seek(getOffset() + getLength());
65+
initialized = true;
66+
return;
67+
}
68+
assert loadOnlyHeaders != null
69+
: "It is inefficient to read whole CFF table to parse only headers, please use RandomAccessReadUncachedDataStream";
70+
}
71+
}
72+
byte[] bytes = data.read((int)getLength());
5473
cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0);
5574

5675
initialized = true;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.fontbox.ttf;
18+
19+
import java.io.IOException;
20+
21+
/**
22+
* To improve performance of {@code FileSystemFontProvider.scanFonts(...)},
23+
* this class is used both as a marker (to skip unused data) and as a storage for collected data.
24+
* <p>
25+
* Tables it needs:<ul>
26+
* <li>NamingTable.TAG
27+
* <li>HeaderTable.TAG
28+
* <li>OS2WindowsMetricsTable.TAG
29+
* <li>CFFTable.TAG (for OTF)
30+
* <li>"gcid" (for non-OTF)
31+
* </ul>
32+
*
33+
* @author Mykola Bohdiuk
34+
*/
35+
public final class FontHeaders
36+
{
37+
static final int BYTES_GCID = 142;
38+
39+
private IOException exception;
40+
private String name;
41+
private Integer headerMacStyle;
42+
private OS2WindowsMetricsTable os2Windows;
43+
private String fontFamily;
44+
private String fontSubFamily;
45+
private byte[] nonOtfGcid142;
46+
//
47+
private boolean isOTFAndPostScript;
48+
private String otfRegistry;
49+
private String otfOrdering;
50+
private int otfSupplement;
51+
52+
public IOException getException()
53+
{
54+
return exception;
55+
}
56+
57+
public String getName()
58+
{
59+
return name;
60+
}
61+
62+
/**
63+
* null == no HeaderTable, {@code ttf.getHeader().getMacStyle()}
64+
*/
65+
public Integer getHeaderMacStyle()
66+
{
67+
return headerMacStyle;
68+
}
69+
70+
public OS2WindowsMetricsTable getOS2Windows()
71+
{
72+
return os2Windows;
73+
}
74+
75+
// only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: FontFamily, FontSubfamily
76+
public String getFontFamily()
77+
{
78+
return fontFamily;
79+
}
80+
81+
public String getFontSubFamily()
82+
{
83+
return fontSubFamily;
84+
}
85+
86+
public boolean isOpenTypePostScript()
87+
{
88+
return isOTFAndPostScript;
89+
}
90+
91+
public byte[] getNonOtfTableGCID142()
92+
{
93+
return nonOtfGcid142;
94+
}
95+
96+
public String getOtfRegistry()
97+
{
98+
return otfRegistry;
99+
}
100+
101+
public String getOtfOrdering()
102+
{
103+
return otfOrdering;
104+
}
105+
106+
public int getOtfSupplement()
107+
{
108+
return otfSupplement;
109+
}
110+
111+
void setException(IOException exception)
112+
{
113+
this.exception = exception;
114+
}
115+
116+
void setName(String name)
117+
{
118+
this.name = name;
119+
}
120+
121+
void setHeaderMacStyle(Integer headerMacStyle)
122+
{
123+
this.headerMacStyle = headerMacStyle;
124+
}
125+
126+
void setOs2Windows(OS2WindowsMetricsTable os2Windows)
127+
{
128+
this.os2Windows = os2Windows;
129+
}
130+
131+
void setFontFamily(String fontFamily, String fontSubFamily)
132+
{
133+
this.fontFamily = fontFamily;
134+
this.fontSubFamily = fontSubFamily;
135+
}
136+
137+
void setNonOtfGcid142(byte[] nonOtfGcid142)
138+
{
139+
this.nonOtfGcid142 = nonOtfGcid142;
140+
}
141+
142+
void setIsOTFAndPostScript(boolean isOTFAndPostScript)
143+
{
144+
this.isOTFAndPostScript = isOTFAndPostScript;
145+
}
146+
147+
// public because CFFParser is in a different package
148+
public void setOtfROS(String otfRegistry, String otfOrdering, int otfSupplement)
149+
{
150+
this.otfRegistry = otfRegistry;
151+
this.otfOrdering = otfOrdering;
152+
this.otfSupplement = otfSupplement;
153+
}
154+
}

fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java

+10
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ public class HeaderTable extends TTFTable
7474
@Override
7575
void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
7676
{
77+
FontHeaders outHeaders = ttf.getLoadOnlyHeaders();
78+
if (outHeaders != null) {
79+
// 44 == 4 + 4 + 4 + 4 + 2 + 2 + 2*8 + 4*2
80+
data.seek(data.getCurrentPosition() + 44);
81+
macStyle = data.readUnsignedShort();
82+
outHeaders.setHeaderMacStyle(macStyle);
83+
initialized = true;
84+
return;
85+
}
86+
7787
version = data.read32Fixed();
7888
fontRevision = data.read32Fixed();
7989
checkSumAdjustment = data.readUnsignedInt();

fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java

+28-3
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,15 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
6262
int numberOfNameRecords = data.readUnsignedShort();
6363
int offsetToStartOfStringStorage = data.readUnsignedShort();
6464
nameRecords = new ArrayList<>(numberOfNameRecords);
65+
FontHeaders onlyHeaders = ttf.getLoadOnlyHeaders();
6566
for (int i=0; i< numberOfNameRecords; i++)
6667
{
6768
NameRecord nr = new NameRecord();
6869
nr.initData(ttf, data);
69-
nameRecords.add(nr);
70+
if (onlyHeaders == null || isUsefulForOnlyHeaders(nr))
71+
{
72+
nameRecords.add(nr);
73+
}
7074
}
7175

7276
for (NameRecord nr : nameRecords)
@@ -86,7 +90,7 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
8690

8791
lookupTable = new HashMap<>(nameRecords.size());
8892
fillLookupTable();
89-
readInterestingStrings();
93+
readInterestingStrings(onlyHeaders);
9094

9195
initialized = true;
9296
}
@@ -141,7 +145,7 @@ private void fillLookupTable()
141145
}
142146
}
143147

144-
private void readInterestingStrings()
148+
private void readInterestingStrings(FontHeaders onlyHeaders)
145149
{
146150
// extract strings of interest
147151
fontFamily = getEnglishName(NameRecord.NAME_FONT_FAMILY_NAME);
@@ -163,6 +167,27 @@ private void readInterestingStrings()
163167
{
164168
psName = psName.trim();
165169
}
170+
171+
if (onlyHeaders != null)
172+
{
173+
onlyHeaders.setName(psName);
174+
onlyHeaders.setFontFamily(fontFamily, fontSubFamily);
175+
}
176+
}
177+
178+
private static boolean isUsefulForOnlyHeaders(NameRecord nr)
179+
{
180+
int nameId = nr.getNameId();
181+
// see "psName =" and "getEnglishName()"
182+
if (nameId == NameRecord.NAME_POSTSCRIPT_NAME
183+
|| nameId == NameRecord.NAME_FONT_FAMILY_NAME
184+
|| nameId == NameRecord.NAME_FONT_SUB_FAMILY_NAME)
185+
{
186+
int languageId = nr.getLanguageId();
187+
return languageId == NameRecord.LANGUAGE_UNICODE
188+
|| languageId == NameRecord.LANGUAGE_WINDOWS_EN_US;
189+
}
190+
return false;
166191
}
167192

168193
/**

0 commit comments

Comments
 (0)