1 /***************************************************************************/
5 /* Type 1 parser (body). */
7 /* Copyright 1996-2000 by */
8 /* David Turner, Robert Wilhelm, and Werner Lemberg. */
10 /* This file is part of the FreeType project, and may only be used, */
11 /* modified, and distributed under the terms of the FreeType project */
12 /* license, LICENSE.TXT. By continuing to use, modify, or distribute */
13 /* this file you indicate that you have read the license and */
14 /* understand and accept it fully. */
16 /***************************************************************************/
19 /*************************************************************************/
21 /* The tokenizer is in charge of loading and reading a Type1 font file */
22 /* (either in PFB or PFA format), and extracting successive tokens and */
23 /* keywords from its two streams (i.e. the font program, and the private */
26 /* Eexec decryption is performed automatically when entering the private */
27 /* dictionary, or when retrieving char strings. */
29 /*************************************************************************/
32 #include <freetype/internal/ftstream.h>
33 #include <freetype/internal/ftdebug.h>
36 #ifdef FT_FLAT_COMPILE
43 #include <type1/t1tokens.h>
44 #include <type1/t1load.h>
49 #include <string.h> /* for strncmp() */
52 #undef READ_BUFFER_INCREMENT
53 #define READ_BUFFER_INCREMENT 0x400
56 /*************************************************************************/
58 /* The macro FT_COMPONENT is used in trace mode. It is an implicit */
59 /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */
60 /* messages during execution. */
63 #define FT_COMPONENT trace_t1load
66 /* An array of Type1 keywords supported by this engine. This table */
67 /* places the keyword in lexicographical order. It should always */
68 /* correspond to the enums `key_xxx'! */
70 const char* t1_keywords
[key_max
- key_first_
] =
72 "-|", "ExpertEncoding", "ND", "NP", "RD", "StandardEncoding", "array",
73 "begin", "closefile", "currentdict", "currentfile", "def", "dict", "dup",
74 "eexec", "end", "executeonly", "false", "for", "index", "noaccess",
75 "put", "readonly", "true", "userdict", "|", "|-"
79 const char* t1_immediates
[imm_max
- imm_first_
] =
81 "-|", ".notdef", "BlendAxisTypes", "BlueFuzz", "BlueScale", "BlueShift",
82 "BlueValues", "CharStrings", "Encoding", "FamilyBlues", "FamilyName",
83 "FamilyOtherBlues", "FID", "FontBBox", "FontID", "FontInfo", "FontMatrix",
84 "FontName", "FontType", "ForceBold", "FullName", "ItalicAngle",
85 "LanguageGroup", "Metrics", "MinFeature", "ND", "NP", "Notice",
86 "OtherBlues", "OtherSubrs", "PaintType", "Private", "RD", "RndStemUp",
87 "StdHW", "StdVW", "StemSnapH", "StemSnapV", "StrokeWidth", "Subrs",
88 "UnderlinePosition", "UnderlineThickness", "UniqueID", "Weight",
89 "isFixedPitch", "lenIV", "password", "version", "|", "|-"
93 /* lexicographic comparison of two strings */
95 int lexico_strcmp( const char* str1
,
102 for ( ; str1_len
> 0; str1_len
-- )
119 /* find a given token/name, performing binary search */
121 int Find_Name( char* base
,
130 right
= table_len
- 1;
132 while ( right
- left
> 1 )
134 int middle
= left
+ ( ( right
- left
) >> 1 );
138 cmp
= lexico_strcmp( base
, length
, table
[middle
] );
148 if ( !lexico_strcmp( base
, length
, table
[left
] ) )
150 if ( !lexico_strcmp( base
, length
, table
[right
] ) )
157 /* read the small PFB section header */
159 FT_Error
Read_PFB_Tag( FT_Stream stream
,
168 FT_TRACE2(( "Read_PFB_Tag: reading\n" ));
170 if ( ACCESS_Frame( 6L ) )
179 *asize
= ( ( size
& 0xFF ) << 24 ) |
180 ( ( ( size
>> 8 ) & 0xFF ) << 16 ) |
181 ( ( ( size
>> 16 ) & 0xFF ) << 8 ) |
182 ( ( ( size
>> 24 ) & 0xFF ) );
184 FT_TRACE2(( " tag = %04x\n", tag
));
185 FT_TRACE4(( " asze = %08x\n", size
));
186 FT_TRACE2(( " size = %08x\n", *asize
));
193 FT_Error
grow( T1_Tokenizer tokzer
)
197 FT_Memory memory
= tokzer
->memory
;
200 left_bytes
= tokzer
->max
- tokzer
->limit
;
202 if ( left_bytes
> 0 )
204 FT_Stream stream
= tokzer
->stream
;
207 if ( left_bytes
> READ_BUFFER_INCREMENT
)
208 left_bytes
= READ_BUFFER_INCREMENT
;
210 FT_TRACE2(( "Growing tokenizer buffer by %d bytes\n", left_bytes
));
212 if ( !REALLOC( tokzer
->base
, tokzer
->limit
,
213 tokzer
->limit
+ left_bytes
) &&
214 !FILE_Read( tokzer
->base
+ tokzer
->limit
, left_bytes
) )
215 tokzer
->limit
+= left_bytes
;
219 FT_ERROR(( "Unexpected end of Type1 fragment!\n" ));
220 error
= T1_Err_Invalid_File_Format
;
223 tokzer
->error
= error
;
228 /*************************************************************************/
234 /* Performs the Type 1 charstring decryption process. */
237 /* buffer :: The base address of the data to decrypt. */
238 /* length :: The number of bytes to decrypt (beginning from the base */
240 /* seed :: The encryption seed (4330 for charstrings). */
243 void t1_decrypt( FT_Byte
* buffer
,
252 plain
= ( *buffer
^ ( seed
>> 8 ) );
253 seed
= ( *buffer
+ seed
) * 52845 + 22719;
260 /*************************************************************************/
266 /* Creates a new tokenizer from a given input stream. This function */
267 /* automatically recognizes `pfa' or `pfb' files. The function */
268 /* Read_Token() can then be used to extract successive tokens from */
272 /* stream :: The input stream. */
275 /* tokenizer :: A handle to a new tokenizer object. */
278 /* FreeType error code. 0 means success. */
281 /* This function copies the stream handle within the object. Callers */
282 /* should not discard `stream'. This is done by the Done_Tokenizer() */
286 FT_Error
New_Tokenizer( FT_Stream stream
,
287 T1_Tokenizer
* tokenizer
)
289 FT_Memory memory
= stream
->memory
;
302 /* allocate object */
303 if ( FILE_Seek( 0L ) ||
304 ALLOC( tokzer
, sizeof ( *tokzer
) ) )
307 tokzer
->stream
= stream
;
308 tokzer
->memory
= stream
->memory
;
311 tokzer
->in_private
= 0;
315 tok_max
= stream
->size
;
317 error
= Read_PFB_Tag( stream
, &tag
, &size
);
323 /* assume that it is a PFA file -- an error will be produced later */
324 /* if a character with value > 127 is encountered */
326 /* rewind to start of file */
327 if ( FILE_Seek( 0L ) )
335 /* if it is a memory-based resource, set up pointer */
338 tok_base
= (FT_Byte
*)stream
->base
+ stream
->pos
;
342 /* check that the `size' field is valid */
343 if ( FILE_Skip( size
) )
346 else if ( tag
== 0x8001 )
348 /* read segment in memory */
349 if ( ALLOC( tok_base
, size
) )
352 if ( FILE_Read( tok_base
, size
) )
362 tokzer
->base
= tok_base
;
363 tokzer
->limit
= tok_limit
;
364 tokzer
->max
= tok_max
;
369 /* now check font format; we must see `%!PS-AdobeFont-1' */
370 /* or `%!FontType' */
372 if ( 16 > tokzer
->limit
)
375 if ( tokzer
->limit
<= 16 ||
376 ( strncmp( (const char*)tokzer
->base
, "%!PS-AdobeFont-1", 16 ) &&
377 strncmp( (const char*)tokzer
->base
, "%!FontType", 10 ) ) )
379 FT_TRACE2(( "[not a Type1 font]\n" ));
380 error
= FT_Err_Unknown_File_Format
;
387 FREE( tokzer
->base
);
393 /* return the value of an hexadecimal digit */
395 int hexa_value( char c
)
400 d
= (unsigned int)( c
- '0' );
404 d
= (unsigned int)( c
- 'a' );
406 return (int)( d
+ 10 );
408 d
= (unsigned int)( c
- 'A' );
410 return (int)( d
+ 10 );
416 /*************************************************************************/
422 /* Closes a given tokenizer. This function will also close the */
423 /* stream embedded in the object. */
426 /* tokenizer :: The target tokenizer object. */
429 /* FreeType error code. 0 means success. */
432 FT_Error
Done_Tokenizer( T1_Tokenizer tokenizer
)
434 FT_Memory memory
= tokenizer
->memory
;
437 /* clear read buffer if needed (disk-based resources) */
438 if ( tokenizer
->in_private
|| !tokenizer
->stream
->base
)
439 FREE( tokenizer
->base
);
446 /*************************************************************************/
449 /* Open_PrivateDict */
452 /* This function must be called to set the tokenizer to the private */
453 /* section of the Type1 file. It recognizes automatically the */
454 /* the kind of eexec encryption used (ascii or binary). */
457 /* tokenizer :: The target tokenizer object. */
458 /* lenIV :: The value of the `lenIV' variable. */
461 /* FreeType error code. 0 means success. */
464 FT_Error
Open_PrivateDict( T1_Tokenizer tokenizer
)
466 T1_Tokenizer tokzer
= tokenizer
;
467 FT_Stream stream
= tokzer
->stream
;
468 FT_Memory memory
= tokzer
->memory
;
474 FT_Byte
* private_dict
;
476 /* are we already in the private dictionary ? */
477 if ( tokzer
->in_private
)
480 if ( tokzer
->in_pfb
)
482 /* in the case of the PFB format, the private dictionary can be */
483 /* made of several segments. We thus first read the number of */
484 /* segments to compute the total size of the private dictionary */
485 /* then re-read them into memory. */
486 FT_Long start_pos
= FILE_Pos();
487 FT_ULong private_dict_size
= 0;
492 error
= Read_PFB_Tag( stream
, &tag
, &size
);
493 if ( error
|| tag
!= 0x8002 )
496 private_dict_size
+= size
;
498 if ( FILE_Skip( size
) )
502 /* check that we have a private dictionary there */
503 /* and allocate private dictionary buffer */
504 if ( private_dict_size
== 0 )
506 FT_ERROR(( "Open_PrivateDict:" ));
507 FT_ERROR(( " invalid private dictionary section\n" ));
508 error
= T1_Err_Invalid_File_Format
;
512 if ( ALLOC( private_dict
, private_dict_size
) )
515 /* read all sections into buffer */
516 if ( FILE_Seek( start_pos
) )
519 private_dict_size
= 0;
522 error
= Read_PFB_Tag( stream
, &tag
, &size
);
523 if ( error
|| tag
!= 0x8002 )
529 if ( FILE_Read( private_dict
+ private_dict_size
, size
) )
532 private_dict_size
+= size
;
535 /* we must free the field `tokzer.base' if we are in a disk-based */
538 FREE( tokzer
->base
);
540 tokzer
->base
= private_dict
;
542 tokzer
->limit
= private_dict_size
;
543 tokzer
->max
= private_dict_size
;
550 /* we are in a PFA file; read each token until we find `eexec' */
551 while ( tokzer
->token
.kind2
!= key_eexec
)
553 error
= Read_Token( tokzer
);
558 /* now determine whether the private dictionary is encoded in binary */
559 /* or hexadecimal ASCII format. */
561 /* we need to access the next 4 bytes (after the final \r following */
562 /* the `eexec' keyword); if they all are hexadecimal digits, then */
563 /* we have a case of ASCII storage. */
564 while ( tokzer
->cursor
+ 5 > tokzer
->limit
)
566 error
= grow( tokzer
);
571 /* skip whitespace/line feed after `eexec' */
572 base
= (char*)tokzer
->base
+ tokzer
->cursor
+ 1;
573 if ( ( hexa_value( base
[0] ) | hexa_value( base
[1] ) |
574 hexa_value( base
[2] ) | hexa_value( base
[3] ) ) < 0 )
576 /* binary encoding -- `simply' read the stream */
578 /* if it is a memory-based resource, we need to allocate a new */
579 /* storage buffer for the private dictionary, as it must be */
580 /* decrypted later */
583 size
= stream
->size
- tokzer
->cursor
- 1; /* remaining bytes */
585 if ( ALLOC( private_dict
, size
) ) /* alloc private dict buffer */
588 /* copy eexec-encrypted bytes */
589 MEM_Copy( private_dict
, tokzer
->base
+ tokzer
->cursor
+ 1, size
);
591 /* reset pointers - forget about file mapping */
592 tokzer
->base
= private_dict
;
593 tokzer
->limit
= size
;
597 /* On the opposite, for disk based resources, we simply grow */
598 /* the current buffer until its completion, and decrypt the */
599 /* bytes within it. In all cases, the `base' buffer will be */
600 /* discarded on DoneTokenizer if we are in the private dict. */
603 /* grow the read buffer to the full file */
604 while ( tokzer
->limit
< tokzer
->max
)
606 error
= grow( tokenizer
);
611 /* set up cursor to first encrypted byte */
617 /* ASCII hexadecimal encoding. This sucks... */
624 /* allocate a buffer, read each one byte at a time */
625 count
= stream
->size
- tokzer
->cursor
;
628 if ( ALLOC( private_dict
, size
) ) /* alloc private dict buffer */
631 write
= private_dict
;
632 cur
= tokzer
->base
+ tokzer
->cursor
;
633 limit
= tokzer
->base
+ tokzer
->limit
;
635 /* read each bytes */
638 /* ensure that we can read the next 2 bytes! */
639 while ( cur
+ 2 > limit
)
641 int cursor
= cur
- tokzer
->base
;
644 error
= grow( tokzer
);
647 cur
= tokzer
->base
+ cursor
;
648 limit
= tokzer
->base
+ tokzer
->limit
;
651 /* check for new line */
652 if ( cur
[0] == '\r' || cur
[0] == '\n' )
659 int hex1
= hexa_value(cur
[0]);
662 /* exit if we have a non-hexadecimal digit which isn't */
663 /* a new-line character */
667 /* otherwise, store byte */
668 *write
++ = ( hex1
<< 4 ) | hexa_value( cur
[1] );
674 /* get rid of old buffer in the case of disk-based resources */
676 FREE( tokzer
->base
);
678 /* set up pointers */
679 tokzer
->base
= private_dict
;
680 tokzer
->limit
= size
;
686 /* finally, decrypt the private dictionary - and skip the lenIV bytes */
687 t1_decrypt( tokzer
->base
, tokzer
->limit
, 55665 );
694 FREE( private_dict
);
699 /*************************************************************************/
705 /* Reads a new token from the current input stream. This function */
706 /* extracts a token from the font program until Open_PrivateDict() */
707 /* has been called. After this, it returns tokens from the */
708 /* (eexec-encrypted) private dictionary. */
711 /* tokenizer :: The target tokenizer object. */
714 /* FreeType error code. 0 means success. */
717 /* Use the function Read_CharStrings() to read the binary charstrings */
718 /* from the private dict. */
721 FT_Error
Read_Token( T1_Tokenizer tokenizer
)
723 T1_Tokenizer tok
= tokenizer
;
726 char c
, starter
, ender
;
727 FT_Bool token_started
;
732 tok
->error
= T1_Err_Ok
;
733 tok
->token
.kind
= tok_any
;
751 c
= (char)base
[cur
++];
753 /* check that we have an ASCII character */
754 if ( (FT_Byte
)c
> 127 )
756 FT_ERROR(( "Read_Token:" ));
757 FT_ERROR(( " unexpected binary data in Type1 fragment!\n" ));
758 tok
->error
= T1_Err_Invalid_File_Format
;
767 case '\t': /* skip initial whitespace => skip to next */
770 /* possibly a name, keyword, wathever */
771 tok
->token
.kind
= tok_any
;
772 tok
->token
.len
= cur
-tok
->token
.start
- 1;
775 /* otherwise, skip everything */
778 case '%': /* this is a comment -- skip everything */
781 FT_Int left
= limit
- cur
;
786 c
= (char)base
[cur
++];
787 if ( c
== '\r' || c
== '\n' )
792 if ( grow( tokenizer
) )
798 case '(': /* a Postscript string */
803 if ( !token_started
)
806 tok
->token
.start
= cur
- 1;
810 FT_Int nest_level
= 1;
816 FT_Int left
= limit
- cur
;
821 c
= (char)base
[cur
++];
826 else if ( c
== ender
)
829 if ( nest_level
<= 0 )
831 tok
->token
.kind
= kind
;
832 tok
->token
.len
= cur
- tok
->token
.start
;
846 case '[': /* a Postscript array */
855 case '{': /* a Postscript program */
864 case '<': /* a Postscript hex byte array? */
873 case '0': /* any number */
886 tok
->token
.kind
= tok_number
;
888 tok
->token
.start
= cur
- 1;
893 FT_Int left
= limit
-cur
;
898 c
= (char)base
[cur
++];
913 tok
->token
.len
= cur
- tok
->token
.start
- 1;
928 case '.': /* maybe a number */
935 tok
->token
.start
= cur
- 1;
939 FT_Int left
= limit
- cur
;
944 /* test for any following digit, interpreted as number */
946 tok
->token
.kind
= ( c
>= '0' && c
<= '9' ? tok_number
: tok_any
);
956 case '/': /* maybe an immediate name */
957 if ( !token_started
)
960 tok
->token
.start
= cur
- 1;
964 FT_Int left
= limit
- cur
;
969 /* test for single '/', interpreted as garbage */
971 tok
->token
.kind
= ( c
== ' ' || c
== '\t' ||
972 c
== '\r' || c
== '\n' ) ? tok_any
985 Any_Token
: /* possibly a name or wathever */
987 tok
->token
.len
= cur
- tok
->token
.start
;
992 if ( !token_started
)
995 tok
->token
.start
= cur
- 1;
1008 /* now, tries to match keywords and immediate names */
1012 switch ( tok
->token
.kind
)
1014 case tok_immediate
: /* immediate name */
1015 index
= Find_Name( (char*)( tok
->base
+ tok
->token
.start
+ 1 ),
1018 imm_max
- imm_first_
);
1019 tok
->token
.kind2
= ( index
>= 0 )
1020 ? (T1_TokenType
)( imm_first_
+ index
)
1024 case tok_any
: /* test for keyword */
1025 index
= Find_Name( (char*)( tok
->base
+ tok
->token
.start
),
1028 key_max
- key_first_
);
1031 tok
->token
.kind
= tok_keyword
;
1032 tok
->token
.kind2
= (T1_TokenType
)( key_first_
+ index
);
1035 tok
->token
.kind2
= tok_error
;
1039 tok
->token
.kind2
= tok_error
;
1042 return tokenizer
->error
;
1048 /*************************************************************************/
1051 /* Read_CharStrings */
1054 /* Reads a charstrings element from the current input stream. These */
1055 /* are binary bytes that encode each individual glyph outline. */
1057 /* The caller is responsible for skipping the `lenIV' bytes at the */
1058 /* start of the record. */
1061 /* tokenizer :: The target tokenizer object. */
1062 /* num_chars :: The number of binary bytes to read. */
1065 /* buffer :: The target array of bytes. These are */
1066 /* eexec-decrypted. */
1069 /* FreeType error code. 0 means success. */
1072 /* Use the function Read_CharStrings() to read binary charstrings */
1073 /* from the private dict. */
1076 FT_Error
Read_CharStrings( T1_Tokenizer tokenizer
,
1082 FT_Int left
= tokenizer
->limit
- tokenizer
->cursor
;
1085 if ( left
>= num_chars
)
1087 MEM_Copy( buffer
, tokenizer
->base
+ tokenizer
->cursor
, num_chars
);
1088 t1_decrypt( buffer
, num_chars
, 4330 );
1089 tokenizer
->cursor
+= num_chars
;
1093 if ( grow( tokenizer
) )
1094 return tokenizer
->error
;