@@ -343,25 +343,43 @@ def __init__(
343343
344344 # If a font resource was added, get the font character map
345345 if font_resource :
346- font_resource = cast (DictionaryObject , font_resource .get_object ())
347346 font = Font .from_font_resource (font_resource )
348347 else :
349348 logger_warning (f"Font dictionary for { font_name } not found; defaulting to Helvetica." , __name__ )
350349 font_name = "/Helv"
351- font_resource = DictionaryObject ({
352- NameObject ("/Subtype" ): NameObject ("/Type1" ),
353- NameObject ("/Name" ): NameObject ("/Helv" ),
354- NameObject ("/Type" ): NameObject ("/Font" ),
355- NameObject ("/BaseFont" ): NameObject ("/Helvetica" ),
356- NameObject ("/Encoding" ): NameObject ("/WinAnsiEncoding" )
357- })
350+ core_font_metrics = CORE_FONT_METRICS ["Helvetica" ]
358351 font = Font (
359352 name = "Helvetica" ,
360353 character_map = {},
361354 encoding = dict (zip (range (256 ), fill_from_encoding ("cp1252" ))), # WinAnsiEncoding
362355 sub_type = "Type1" ,
363- font_descriptor = CORE_FONT_METRICS ["Helvetica" ].font_descriptor ,
364- character_widths = CORE_FONT_METRICS ["Helvetica" ].character_widths
356+ font_descriptor = core_font_metrics .font_descriptor ,
357+ character_widths = core_font_metrics .character_widths
358+ )
359+ font_resource = font .as_font_resource ()
360+
361+ # Check whether the font resource is able to encode the text value.
362+ encodable = True
363+ try :
364+ if isinstance (font .encoding , str ):
365+ text .encode (font .encoding , "surrogatepass" )
366+ else :
367+ supported_chars = set (font .encoding .values ())
368+ if any (char not in supported_chars for char in text ):
369+ encodable = False
370+ # We should add a final check against the character_map (CMap) of the font,
371+ # but we don't appear to have PDF forms with such fonts, so we skip this for
372+ # now.
373+
374+ except UnicodeEncodeError :
375+ encodable = False
376+
377+ if not encodable :
378+ logger_warning (
379+ f"Text string '{ text } ' contains characters not supported by font encoding. "
380+ "This may result in text corruption. "
381+ "Consider calling writer.update_page_form_field_values with auto_regenerate=True." ,
382+ __name__
365383 )
366384
367385 font_glyph_byte_map : dict [str , bytes ]
@@ -398,6 +416,44 @@ def __init__(
398416 })
399417 })
400418
419+ @staticmethod
420+ def _find_annotation_font_resource (
421+ font_name : str ,
422+ annotation : DictionaryObject ,
423+ acro_form : DictionaryObject
424+ ) -> tuple [str , DictionaryObject ]:
425+ # Try to find a resource dictionary for the font by examining the annotation and, if that fails,
426+ # the AcroForm resources dictionary
427+ acro_form_resources : Any = cast (
428+ DictionaryObject ,
429+ annotation .get_inherited (
430+ "/DR" ,
431+ acro_form .get ("/DR" , DictionaryObject ()),
432+ ),
433+ )
434+ acro_form_font_resources = acro_form_resources .get ("/Font" , DictionaryObject ())
435+ font_resource = acro_form_font_resources .get (font_name , None )
436+
437+ # Normally, we should have found a font resource by now. However, when a user has provided a specific
438+ # font name, we may not have found the associated font resource among the AcroForm resources. Also, in
439+ # case of the 14 Adobe Core fonts, we may be expected to construct a font resource ourselves.
440+ if is_null_or_none (font_resource ):
441+ if font_name .removeprefix ("/" ) not in CORE_FONT_METRICS :
442+ # Default to Helvetica if we haven't found a font resource and cannot construct one ourselves.
443+ logger_warning (f"Font dictionary for { font_name } not found; defaulting to Helvetica." , __name__ )
444+ font_name = "/Helvetica"
445+ core_font_metrics = CORE_FONT_METRICS [font_name .removeprefix ("/" )]
446+ font_resource = Font (
447+ name = font_name .removeprefix ("/" ),
448+ character_map = {},
449+ encoding = dict (zip (range (256 ), fill_from_encoding ("cp1252" ))), # WinAnsiEncoding
450+ sub_type = "Type1" ,
451+ font_descriptor = core_font_metrics .font_descriptor ,
452+ character_widths = core_font_metrics .character_widths
453+ ).as_font_resource ()
454+
455+ return font_name , font_resource
456+
401457 @classmethod
402458 def from_text_annotation (
403459 cls ,
@@ -443,6 +499,23 @@ def from_text_annotation(
443499 else :
444500 default_appearance = default_appearance .get_object ()
445501
502+ # Retrieve field text and selected values
503+ field_flags = field .get (FieldDictionaryAttributes .Ff , 0 )
504+ if (
505+ field .get (FieldDictionaryAttributes .FT , "/Tx" ) == "/Ch" and
506+ field_flags & FieldDictionaryAttributes .FfBits .Combo == 0
507+ ):
508+ text = "\n " .join (annotation .get_inherited (FieldDictionaryAttributes .Opt , []))
509+ selection = field .get ("/V" , [])
510+ if not isinstance (selection , list ):
511+ selection = [selection ]
512+ else : # /Tx
513+ text = field .get ("/V" , "" )
514+ selection = []
515+
516+ # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
517+ text = text .replace ("\\ " , "\\ \\ " ).replace ("(" , r"\(" ).replace (")" , r"\)" )
518+
446519 # Derive font name, size and color from the default appearance. Also set
447520 # user-provided font name and font size in the default appearance, if given.
448521 # For a font name, this presumes that we can find an associated font resource
@@ -463,46 +536,7 @@ def from_text_annotation(
463536 if user_font_size > 0 :
464537 font_size = user_font_size
465538
466- # Try to find a resource dictionary for the font
467- document_resources : Any = cast (
468- DictionaryObject ,
469- cast (
470- DictionaryObject ,
471- annotation .get_inherited (
472- "/DR" ,
473- acro_form .get ("/DR" , DictionaryObject ()),
474- ),
475- ).get_object (),
476- )
477- document_font_resources = document_resources .get ("/Font" , DictionaryObject ()).get_object ()
478- # CORE_FONT_METRICS is the dict with Standard font metrics
479- if font_name not in document_font_resources and font_name .removeprefix ("/" ) not in CORE_FONT_METRICS :
480- # ...or AcroForm dictionary
481- document_resources = cast (
482- dict [Any , Any ],
483- acro_form .get ("/DR" , {}),
484- )
485- document_font_resources = document_resources .get_object ().get ("/Font" , DictionaryObject ()).get_object ()
486- font_resource = document_font_resources .get (font_name , None )
487- if not is_null_or_none (font_resource ):
488- font_resource = cast (DictionaryObject , font_resource .get_object ())
489-
490- # Retrieve field text and selected values
491- field_flags = field .get (FieldDictionaryAttributes .Ff , 0 )
492- if (
493- field .get (FieldDictionaryAttributes .FT , "/Tx" ) == "/Ch" and
494- field_flags & FieldDictionaryAttributes .FfBits .Combo == 0
495- ):
496- text = "\n " .join (annotation .get_inherited (FieldDictionaryAttributes .Opt , []))
497- selection = field .get ("/V" , [])
498- if not isinstance (selection , list ):
499- selection = [selection ]
500- else : # /Tx
501- text = field .get ("/V" , "" )
502- selection = []
503-
504- # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
505- text = text .replace ("\\ " , "\\ \\ " ).replace ("(" , r"\(" ).replace (")" , r"\)" )
539+ font_name , font_resource = cls ._find_annotation_font_resource (font_name , annotation , acro_form )
506540
507541 # Retrieve formatting information
508542 is_comb = False
@@ -535,11 +569,21 @@ def from_text_annotation(
535569 is_comb = is_comb ,
536570 max_length = max_length
537571 )
572+
538573 if AnnotationDictionaryAttributes .AP in annotation :
539574 for key , value in (
540575 cast (DictionaryObject , annotation [AnnotationDictionaryAttributes .AP ]).get ("/N" , {}).items ()
541576 ):
542- if key not in {"/BBox" , "/Length" , "/Subtype" , "/Type" , "/Filter" }:
577+ if key in {"/BBox" , "/Length" , "/Subtype" , "/Type" , "/Filter" }:
578+ continue
579+ # Don't overwrite font resources added by TextAppearanceStream.__init__
580+ if key == "/Resources" :
581+ if "/Font" not in value :
582+ value .get_object ()[NameObject ("/Font" )] = DictionaryObject ()
583+ value ["/Font" ].get_object ()[NameObject (font_name )] = getattr (
584+ font_resource , "indirect_reference" , font_resource
585+ )
586+ else :
543587 new_appearance_stream [key ] = value
544588
545589 return new_appearance_stream
0 commit comments