home *** CD-ROM | disk | FTP | other *** search
- % Copyright (c) 2001, AccuSoft Corporation.
- % All rights reserved.
- %
- % This is a PostScript library for sending GhostScript
- % rendering text to stdout stream. The library is used
- % by PDF extension of the ImageGear DLL to extract text
- % from PostScript files.
- %
- % Created: 09/17/2001 AlexR
- % Last modified:
- % Revision:
- %
- % The library redefines show, ashow, widthshow, awidthshow,
- % kshow, xshow, yshow and xyshow text rendered operators as
- % procedures to extract text translated to ISOLatin1 encoding.
- % The output has following format:
- %
- % #S <x0> <y0> <len1> <len2> <string> <x1> <y1>
- % This line indicates displaying text
- %
- % #P
- % This line indicates the end of the page
- % <x0>, <y0>, <x1>, <y1> are an integer coordinates in 1/100 of device units.
- % The (<x0>,<y0>) is start point of painted text and (<x1>, <y1>) is end point.
- % These two points specify the direction of text and its location.
- % The <string> is extracted text that consist of printable characters in ISOLatin1
- % encoding. <len2> is length of <string>. <len1> is a number of glyphs represented
- % by <string>. <len2> can be not equal to <len1> (for example the character with
- % glyph name /trademark is represented by "^TM").
-
- userdict begin
-
- % Redefines the operator if it already exists
- % Used for redefining standard "show" operators
-
- /IG_redef % name proc IG_redef -
- {
- 1 index where
- { pop def }
- { pop pop } ifelse
- } bind def
-
- /IG_PStoTxtDict 50 dict def
-
- % Declare all names in our private dictionary
-
- //IG_PStoTxtDict begin
-
- % OT1 encoding. It is used for self-named glyphs of Type 3 fonts.
-
- /_DvipsOT1
- [
- % 0
- (\\Gamma) (\\Delta) (\\Theta) (\\Lambda) (\\Xi) (\\Pi) (\\Sigma) (\\Upsilon)
- (\\Phi) (\\Psi) (\\Omega) (ff) (fi) (fl) (ffi) (ffl) (i) (j) (`)
- (') (\237) (\226) (\257) (\232) (\270) (\337) (ae) (oe) (\370) (AE) (OE)
- (\330) (/) (!) ('') (#) ($) (%) (&) (') (\() (\)) (*) (+) (,) (\255) (.) (/)
- (0) (1) (2) (3) (4) (5) (6) (7) (8) (9) (:) (;) (!) (=) (?) (?) (@)
- % 65
- (A) (B) (C) (D) (E) (F) (G) (H) (I) (J) (K) (L) (M) (N) (O) (P) (Q) (R) (S)
- (T) (U) (V) (W) (X) (Y) (Z)
- % 91
- ([) (``) (]) (^) (_) (`)
- % 97
- (a) (b) (c) (d) (e) (f) (g) (h) (i) (j) (k) (l) (m) (n) (o) (p) (q) (r) (s)
- (t) (u) (v) (w) (x) (y) (z)
- % 123
- (--) (---) (\235) (~) (\250)
- ] def
-
-
- % Glyph names used in most True Type encoding vectors (for Type 42 fonts)
-
- /_TTGlyphNames1
- [
- % 0 gap
- /G00 /G01 /G02 /G03 /G04 /G05 /G06 /G07 /G08 /G09 /G0A /G0B /G0C /G0D /G0E /G0F
- /G10 /G11 /G12 /G13 /G14 /G15 /G16 /G17 /G18 /G19 /G1A /G1B /G1C /G1D /G1E /G1F
-
- % 32
- /G20 /G21 /G22 /G23 /G24 /G25 /G26 /G27 /G28 /G29 /G2A /G2B /G2C /G2D /G2E /G2F
- /G30 /G31 /G32 /G33 /G34 /G35 /G36 /G37 /G38 /G39 /G3A /G3B /G3C /G3D /G3E /G3F
- /G40 /G41 /G42 /G43 /G44 /G45 /G46 /G47 /G48 /G49 /G4A /G4B /G4C /G4D /G4E /G4F
- /G50 /G51 /G52 /G53 /G54 /G55 /G56 /G57 /G58 /G59 /G5A /G5B /G5C /G5D /G5E /G5F
- /G60 /G61 /G62 /G63 /G64 /G65 /G66 /G67 /G68 /G69 /G6A /G6B /G6C /G6D /G6E /G6F
- /G70 /G71 /G72 /G73 /G74 /G75 /G76 /G77 /G78 /G79 /G7A /G7B /G7C /G7D /G7E
-
- % 127
- /G7F
- /G80 /G81 /G82 /G83 /G84 /G85 /G86 /G87 /G88 /G89 /G8A /G8B /G8C /G8D /G8E /G8F
-
- % 144
- /G90 /G91 /G92 /G93 /G94 /G95 /G96 /G97 /G98 /G99 /G9A /G9B /G9C /G9D /G9E /G9F
-
- /GA0 /GA1 /GA2 /GA3 /GA4 /GA5 /GA6 /GA7 /GA8 /GA9 /GAA /GAB /GAC /GAD /GAE /GAF
- /GB0 /GB1 /GB2 /GB3 /GB4 /GB5 /GB6 /GB7 /GB8 /GB9 /GBA /GBB /GBC /GBD /GBE /GBF
- /GC0 /GC1 /GC2 /GC3 /GC4 /GC5 /GC6 /GC7 /GC8 /GC9 /GCA /GCB /GCC /GCD /GCE /GCF
- /GD0 /GD1 /GD2 /GD3 /GD4 /GD5 /GD6 /GD7 /GD8 /GD9 /GDA /GDB /GDC /GDD /GDE /GDF
- /GE0 /GE1 /GE2 /GE3 /GE4 /GE5 /GE6 /GE7 /GE8 /GE9 /GEA /GEB /GEC /GED /GEE /GEF
- /GF0 /GF1 /GF2 /GF3 /GF4 /GF5 /GF6 /GF7 /GF8 /GF9 /GFA /GFB /GFC /GFD /GFE /GFF
- ] def
-
- /_TTGlyphNames2
- [
- % 0 gap
- /G0 /G1 /G2 /G3 /G4 /G5 /G6 /G7 /G8 /G9 /G0a /G0b /G0c /G0d /G0e /G0f
- /G10 /G11 /G12 /G13 /G14 /G15 /G16 /G17 /G18 /G19 /G1a /G1b /G1c /G1d /G1e /G1f
-
- % 32
- /G20 /G21 /G22 /G23 /G24 /G25 /G26 /G27 /G28 /G29 /G2a /G2b /G2c /G2d /G2e /G2f
- /G30 /G31 /G32 /G33 /G34 /G35 /G36 /G37 /G38 /G39 /G3a /G3b /G3c /G3d /G3e /G3f
- /G40 /G41 /G42 /G43 /G44 /G45 /G46 /G47 /G48 /G49 /G4a /G4b /G4c /G4d /G4e /G4f
- /G50 /G51 /G52 /G53 /G54 /G55 /G56 /G57 /G58 /G59 /G5a /G5b /G5c /G5d /G5e /G5f
- /G60 /G61 /G62 /G63 /G64 /G65 /G66 /G67 /G68 /G69 /G6a /G6b /G6c /G6d /G6e /G6f
- /G70 /G71 /G72 /G73 /G74 /G75 /G76 /G77 /G78 /G79 /G7a /G7b /G7c /G7d /G7e
-
- % 127
- /G7F
- /G80 /G81 /G82 /G83 /G84 /G85 /G86 /G87 /G88 /G89 /G8a /G8b /G8c /G8d /G8e /G8f
-
- % 144
- /G90 /G91 /G92 /G93 /G94 /G95 /G96 /G97 /G98 /G99 /G9a /G9b /G9c /G9d /G9e /G9f
-
- /Ga0 /Ga1 /Ga2 /Ga3 /Ga4 /Ga5 /Ga6 /Ga7 /Ga8 /Ga9 /Gaa /Gab /Gac /Gad /Gae /Gaf
- /Gb0 /Gb1 /Gb2 /Gb3 /Gb4 /Gb5 /Gb6 /Gb7 /Gb8 /Gb9 /Gba /Gbb /Gbc /Gbd /Gbe /Gbf
- /Gc0 /Gc1 /Gc2 /Gc3 /Gc4 /Gc5 /Gc6 /Gc7 /Gc8 /Gc9 /Gca /Gcb /Gcc /Gcd /Gce /Gcf
- /Gd0 /Gd1 /Gd2 /Gd3 /Gd4 /Gd5 /Gd6 /Gd7 /Gd8 /Gd9 /Gda /Gdb /Gdc /Gdd /Gde /Gdf
- /Ge0 /Ge1 /Ge2 /Ge3 /Ge4 /Ge5 /Ge6 /Ge7 /Ge8 /Ge9 /Gea /Geb /Gec /Ged /Gee /Gef
- /Gf0 /Gf1 /Gf2 /Gf3 /Gf4 /Gf5 /Gf6 /Gf7 /Gf8 /Gf9 /Gfa /Gfb /Gfc /Gfd /Gfe /Gff
-
- ] def
-
-
- % Translates character into one character string
-
- /_char_to_string % int _char_to_string str1
- % int - character code
- {
- 1 string dup 0 4 -1 roll put
- } def
-
-
- % This function juxtaposes glyph names from _TTGlyphNames1 or
- % _TTGlyphNames2 with its string representation.
- % Used for filling of ISOLatin1CharStrings dictionary
-
- /_tt_pair_get % int bool _tt_pair_get name str
-
- % int - character code that is index of glyph name
- % bool - if true the name is taken from _TTGlyphNames1
- % array otherwise it is taken from _TTGlyphNames2
- {
- {
- dup //_TTGlyphNames1 exch get % get glyph name
- exch //_char_to_string exec
- }
- {
-
- dup //_TTGlyphNames2 exch get dup % int name name
- //_TTGlyphNames1 3 index get % int name name name1
- eq
- {
- % skip
- pop pop
- }
- {
- exch //_char_to_string exec
- }
- ifelse
- }
- ifelse
- } def
-
- % The first 256 characters of True Type encoding (excluding 130 - 159) coincide
- % with ISO1 Latin encoding.
- % Pushes True Type encoding to stack
-
- /_tt_glyph_encode % bool _tt_glyph1_encode name1 str1 ... namen strn
-
- % bool - if true the name is taken from _TTGlyphNames1
- % array otherwise it is taken from _TTGlyphNames2
- {
- /tt_glyph1 exch def
-
- % The first 32 characters is undefined. Skip them
- 32 1 127
- {
- tt_glyph1 //_tt_pair_get exec
- } for
-
- % Skip characters 128 - 159
-
- 160 1 255
- {
- tt_glyph1 //_tt_pair_get exec
- } for
-
- } def
-
-
- % Character encoding dictionary.
- % The keys are most popular glyph names, the values are strings in ISO Latin1 encoding
- % associated with this names.
-
- /ISOLatin1CharStrings
- <<
- /.notdef ()
-
- % 0 - 9
-
- /zero (0) /one (1) /two (2) /three (3) /four (4) /five (5)
- /six (6) /seven (7) /eight (8) /nine (9)
- %
- /ampersand (&) /at (@) /cent (c) /hyphen (-)
- /space ( ) /comma (,) /asciicircum (^) /asciitilde (~)
- /asterisk (*) /colon (:) /period (.) /periodcentered (\267)
- /minus (-) /endash (--) /emdash (---) /exclam (!)
- /fraction (/) /backslash (\\) /bar (|) /braceleft ({)
- /braceright (}) /bracketleft ([) /bracketright (]) /brockenbar (|)
- /dolar ($) /equal (=) /greater (>) /greaterequal (>=)
- /less (<) /lessequal (<=) /numbersign (#) /parenleft (\()
- /parenright (\)) /percent (%) /perthousand (o/oo) /plus (+)
- /question (?) /quotedbl (") /quotedblbase (,,) /quotedblleft (``)
- /quotedblright ('') /quoteleft (`) /quoteright (')
- /quotesinglbase (') /quotesingle (') /slash (/) /semicolon (;)
- /trademark (^TM) /underscore (_) /ff (ff) /fi (fi)
- /ffi (ffi) /fl (fl) /ffl (ffl) /floring (f)
- /second ('') /minute (') /daggerdbl (#)
-
- % ISO Latin1 special
-
- /acute (\264) /breve (\226) /caron (\237) /circumflex (\223)
- /dieresis (\250) /dotaccent (\227) /dotlessi (\220) /grave (\221)
- /hungarumlaut (\235) /macron (\257) /ogonek (\236) /ring (\232)
- /tilde (\224)
-
- % Other standard latin characters
-
- /ae (ae) /AE (AE) /Aacute (\301) /Abreve (A\226)
- /Acircumpflex (\302) /Adieresis (\304) /Agrave (\300) /Amacron (A\257)
- /Aogonek (A\236) /Aring (\305) /Atilde (\303) /Cacute (C\264)
- /Ccaron (C\237) /Ccedilla (\307) /Dcaron (D\237) /Dcroat (\320)
- /Eacute (\311) /Ecaron (E\237) /Ecircumflex (\312) /Edieresis (\313)
- /Edotaccent (E\227) /Egrave (\310) /Emacron (E\257) /Eogonek (E\236)
- /Eth (\320) /Gbreve (G\226) /Gcommaaccent (G,) /Iacute (\315)
- /Icircumflex (\316) /Idieresis (\317) /Idotaccent (I\227) /Igrave (\314)
- /Imacron (I\257) /Iogonek (I\236) /Kcommaaccent (K,) /Lacute (L\264)
- /Lcaron (L\237) /Lcommaaccent (L,) /Lslash (L/) /Nacute (N\234)
- /Ncaron (N\237) /Ncommaaccent (N,) /Ntilde (\321) /OE (OE)
- /Oacute (\323) /Ocircumflex (\324) /Odieresis (\326) /Ograve (\322)
- /Ohungarumlaut (O\235) /Omacron (O\257) /Oslash (\330) /Otilde (\325)
- /Racute (R\264) /Rcaron (R\237) /Rcommaaccent (R,) /Sacute (S\264)
- /Scaron (S\237) /Scedilla (S,) /Scommaaccent (S,) /Tcaron (T\237)
- /Tcommaaccent (T,) /Thorn (\336) /Uacute (\332) /Ucircumflex (\333)
- /Udieresis (\334) /Ugrave (\331) /Uhungarumlaut (U\235) /Umacron (U\257)
- /Uogonek (U\236) /Uring (U\232) /Yacute (\335) /Ydieresis (Z\250)
- /Zacute (Z\264) /Zcaron (Z\237) /Zdotaccent (Z\227) /aacute (\341)
- /abreve (a\226) /acircumflex (\342) /adieresis (\344) /agrave (\340)
- /amacron (a\257) /aogonek (a\236) /aring (\345) /atilde (\343)
- /bullet (.) /cacute (c\264) /ccaron (c\237) /ccedilla (\347)
- /commaaccent (,) /copyright (\251) /currency (\244) /dcaron (d\237)
- /dcroat (d) /degree (\260) /devide (\360) /eacute (\351)
- /ecaron (e\237) /ecircumflex (\352) /edieresis (\353) /edotaccent (e\227)
- /egrave (\350) /ellipsis (...) /emacron (e\267) /eogonek (e\236)
- /eth (\360) /exclamdown (!) /dagger (+) /gbreve (g\226)
- /gcommaaccent (g,) /germandbls (\337) /guillemotleft (\253) /guillemotright (\273)
- /guilsinglleft (<) /guilsinglright (>) /iacute (\355) /icircumflex (\356)
- /idieresis (\357) /igrave (\354) /imacron (i\257) /iogonek (i\236)
- /kcommaaccent (k,) /lacute (l\264) /lcaron (l\237) /lcommaaccent (l,)
- /logicalnot (\254) /lslash (l/) /mu (\265) /multiply (\327)
- /nacute (n\264) /ncaron (n\237) /ncommaaccent (n,) /notequal (=)
- /ntilde (\361) /oacute (\363) /ocircumflex (\364) /odieresis (\366)
- /oe (oe) /ograve (\362) /ohungarumlaut (o\237) /omacron (o\267)
- /onehalf (1/2) /onequarter (1/4) /onesuperior (\271) /ordfeminine (\252)
- /ordmasculine (\272) /oslash (\370) /otilde (\365) /paragraph (\266)
- /partialdiff (d) /plusminus (\261) /questiondown (?) /racute (\162)
- /rcaron (r\237) /rcommaaccent (r,) /registered (\256) /sacute (s\264)
- /scaron (s\237) /scedilla (s,) /scommaaccent (s,) /section (\247)
- /sterling (\243) /tcaron (t\237) /tcommaaccent (t,) /thorn (\376)
- /threequarter (3/4) /threesuperior (\263) /twosuperior (\262) /uacute (\372)
- /ucircumflex (\373) /udieresis (\374) /ugrave (\371) /uhungarumlaut (u\237)
- /umacron (u\257) /uogonek (u\236) /uring (u\232) /yacute (\375)
- /ydieresis (\377) /yen (\245) /zacute (z\264) /zcaron (z\237)
- /zdotaccent (z\227)
-
- % Some characters from Symbol Encoding
-
- /Alpha (A) /Beta (B) /Chi (X) /Delta (\\Delta)
- /Epsilon (E) /Eta (H) /Euro (C) /Gamma (\\Gamma)
- /Ifraktur (J) /Iota (I) /Kappa (K) /Lambda (\\Lambda)
- /Mu (M) /Nu (N) /Omega (O) /Omicron (O)
- /Phi (\\Phi) /Pi (\\Pi) /Psi (\\Psi) /Rfraktur (R)
- /Rho (P) /Sigma (\\Sigma) /Tau (T) /Theta (\\Theta)
- /Upsilon (Y) /Upsilon1 (Y) /Xi (\\Xi) /Zeta (Z)
- /aleph (x) /alpha (a) /angleleft (<) /angleright (>)
- /approxequal (=) /asteriskmath (*) /chi (X) /copyrightsans (\(C\))
- /copyrightserif (\(C\)) /delta (\\delta) /dotmath (.) /element (c)
- /equivalence (=) /eta (n) /gamma (\\gamma) /iota (i)
- /kappa (k) /lambda (\lambda) /logicaland (^) /logicalor (V)
- /omicron (o) /phi (\\phi) /phi1 (\\phi) /pi (\\pi)
- /psi (\\psi) /registersans (\(R\)) /registerserif (\(R\))
- /rho (p) /sigma (o) /sigma1 (c) /tau (t)
- /theta (\\theta) /theta1 (\\theta) /trademarksans (^TM) /trademarkserif (^TM)
-
- % self-named glyphs OT1 encoding including letters A - Z, a - z
-
- 0 1 127
- {
- dup //_char_to_string exec cvn _DvipsOT1 3 -1 roll get
- } for
-
- % number-named glyph names
-
- 0 1 127
- {
- dup 10 3 string cvrs cvn _DvipsOT1 3 -1 roll get
- } for
-
- % True Type encoding
-
- true //_tt_glyph_encode exec
- false //_tt_glyph_encode exec
-
- % True Type from 130 to 159
- /G82 (') /G83 (f) /G84 ('') /G85 (...) /G86 (+) /G87 (#) /G88 (\223)
- /G89 (o/oo) /G8A (S\237) /G8B (<) /G8C (OE) /G91 (`) /G92 (') /G93 (``)
- /G94 ('') /G95 (.) /G96 (--) /G97 (---) /G98 (~) /G99 (^TM) /G9A (s\237)
- /G9B (>) /G9C (oe) /G9F (Y\250)
-
- /G8a (S\237) /G8b (<) /G8c (OE) /G9a (s\237) /G9b (>) /G9c (oe) /G9f (Y\250)
-
- >> def
-
-
- % Converts integer into string and writes it to stdout
- /_int_output % int _int_output -
- {
- 20 string cvs print
- } def
-
-
- % Transforms point (x,y) into output coordinates
- /_to_output_coord % x y _to_output_coord x' y'
- {
- transform
-
- 100 mul
- round cvi
- exch
- 100 mul
- round cvi
- exch
- } def
-
-
- % Writes current point into stdout
- /_print_current_location % - _print_current_location -
- {
- currentpoint % stack: x y
- //_to_output_coord exec % stack: x' y'
-
- exch
- //_int_output exec % print x'
- ( ) print
- //_int_output exec % print y'
- ( ) print
- } def
-
- % Replace all string character by ISO Latin1 equivalent
- % according to ISOLatin1CharStrings dictionary
-
- /_output_string_get % str _print_string str2 str2Length
- {
- //userdict /IG_PStoTxtDict get
- begin
-
- /outString () def
- /outWidth 0 def
-
- {
- dup % stack: ch ch
-
- % Get the glyph name
- currentfont /Encoding get exch get % stack: ch name
-
- ISOLatin1CharStrings exch 2 copy % stack: ch ISOLatin1CharStrings name ISOLatin1CharStrings name
- known
- { % stack: ch ISOLatin1CharStrings name
- get % stack: ch str1
- exch pop % stack: str1
- }
- % undefined in ISOLatin1CharStrings name
- { % stack: ch ISOLatin1CharStrings name
- pop pop % stack: ch
-
- dup dup 32 ge exch 126 le and
- {
- % Try to get ascii character.
- % For some fonts it is wrong but
- % usually we will get right results
-
- 1 string dup % stack: ch str str
- 0 4 -1 roll % stack: str str 0 ch
- put % stack: str1
- }
- {
- % we cannot say anything about this character
- pop
- (#)
- }
- ifelse
-
- }
- ifelse
-
- % put to result string
-
- dup length outWidth add string dup dup % stack: str1 str str str
- 0 outString putinterval % stack: str1 outString_ outString_
- outWidth 4 -1 roll putinterval % stack: outString_str1
- /outString exch def
- /outWidth outString length def
-
- } forall
-
- outString
- outWidth
-
- end % IG_PStoTxtDict
- } def
-
- % Writes #S directive into stdout
-
- /IG_output_make % ... string proc IG_output_make -
- % proc - is operator that shows the string
- {
- //userdict /IG_PStoTxtDict get
- begin
-
- 1 index
- dup () eq
- { % stack: ... string proc string
- % draw text to change current point
- pop
- exec
- }
- { % stack: ... string proc string
- (#S ) print
-
- % print string start location
- _print_current_location % stack: ... string proc string
-
- dup % stack: ... string proc string string
-
- % print glyphs count
- length _int_output % stack: ... string proc string
-
- ( ) print
-
- % get the string in ISO Latin1 encoding and its length
-
- _output_string_get % stack: ... string proc str strLength
-
- % print output string length
-
- _int_output % stack: ... string proc str
-
- ( ) print
-
- % print text
-
- print % stack: ... string proc
- ( ) print
-
- % draw text to change current point
- exec % stack: -
-
- % print string end location
- _print_current_location
-
- (\n) print
- } ifelse
-
- end % IG_PStoTxtDict
- } def
-
- end %IG_PStoTxtDict begin
-
- % Redefine the end-of-page operators.
-
- /copypage
- {
- (\n) print
- (#P\n) print
- (\n) print
- } bind IG_redef
-
- /showpage
- {
- //userdict /copypage get exec
- initgraphics
- } bind IG_redef
-
- % Redefine "show" operators to extract text
- /show
- {
- //systemdict /show get
- //userdict /IG_PStoTxtDict get /IG_output_make get exec
- } bind IG_redef
-
- /ashow
- {
- //systemdict /ashow get
- //userdict /IG_PStoTxtDict get /IG_output_make get exec
- } bind IG_redef
-
- /widthshow
- {
- //systemdict /widthshow get
- //userdict /IG_PStoTxtDict get /IG_output_make get exec
- } bind IG_redef
-
- /awidthshow
- {
- //systemdict /awidthshow get
- //userdict /IG_PStoTxtDict get /IG_output_make get exec
- } bind IG_redef
-
- /kshow
- {
- //userdict /IG_PStoTxtDict get
- begin
- % stack: proc str
- dup % stack: proc str str
- /str_length exch length def % stack: proc str
-
- { % stack: proc ch
- /str_length str_length 1 sub def
-
- _char_to_string % stack: proc str1
- //userdict /show get exec % stack: proc
-
- str_length 0 gt
- {
- dup exec % stack: proc
- } if
- } forall
-
- end % IG_PStoTxtDict % stack: proc
-
- pop
- } bind IG_redef
-
- /xshow
- {
- pop //userdict /show get exec
- } bind IG_redef
-
- /yshow
- {
- pop //userdict /show get exec
- } bind IG_redef
-
- /xyshow
- {
- pop //userdict /show get exec
- } bind IG_redef
-
- end % userdict begin
-
-
- systemdict begin .bindoperators end
-
- % Reenable 'bind' for future file
- NOBIND
- {
- /bind /.bind load def
- }
- if
-
- systemdict readonly pop
-