Demonstration of groff .psbb request handling code, for EPS and PDF input files
リビジョン | b9a8bc4c7cf15ce9fdc8e487d4dd006cbef86d7c (tree) |
---|---|
日時 | 2017-10-08 06:39:47 |
作者 | Keith Marshall <keithmarshall@user...> |
コミッター | Keith Marshall |
Implement an extended .psbb request handling API.
* psbb.h psbb.y psbblex.l: New files; they implement the API.
* t-psbb.cpp: New file; it implements a test program, emulating the
intended gtroff usage of this API.
* GNUmakefile: New file; it facilitates building the test program,
using GNU make.
* README .gitignore .hgignore: New files.
@@ -0,0 +1,9 @@ | ||
1 | +ChangeLog | |
2 | +*.bak | |
3 | +*.orig | |
4 | +*.safe | |
5 | +*.tab.* | |
6 | +*.[ao] | |
7 | +ps*.c | |
8 | +psbb | |
9 | +*~ |
@@ -0,0 +1,10 @@ | ||
1 | +syntax: glob | |
2 | +ChangeLog | |
3 | +**.bak | |
4 | +**.orig | |
5 | +**.safe | |
6 | +**.tab.* | |
7 | +**.[ao] | |
8 | +ps**.c | |
9 | +psbb | |
10 | +**~ |
@@ -0,0 +1,67 @@ | ||
1 | +# GNUmakefile | |
2 | +# | |
3 | +# Simple makefile to build .psbb request handler test/demonstration | |
4 | +# program; with no apology, this may gratuitously require GNU make. | |
5 | +# | |
6 | +# Written by Keith Marshall <keith@users.osdn.me> | |
7 | +# Copyright (C) 2017, Free Software Foundation, Inc. | |
8 | +# | |
9 | +# This file is part of groff. | |
10 | +# | |
11 | +# groff is free software; you can redistribute it and/or modify it under | |
12 | +# the terms of the GNU General Public License as published by the Free | |
13 | +# Software Foundation, either version 3 of the License, or | |
14 | +# (at your option) any later version. | |
15 | +# | |
16 | +# groff is distributed in the hope that it will be useful, but WITHOUT ANY | |
17 | +# WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
18 | +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 | +# for more details. | |
20 | +# | |
21 | +# You should have received a copy of the GNU General Public License | |
22 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
23 | +# | |
24 | +psbb: | |
25 | + | |
26 | +# For convenience, we've placed a necessary subset of libgroff sources | |
27 | +# and headers in our local libgroff subdirectory; ensure that make, and | |
28 | +# the C/C++ compilers can see them. | |
29 | +# | |
30 | +vpath %.c ./libgroff | |
31 | +vpath %.cpp ./libgroff | |
32 | +vpath %.h ./libgroff | |
33 | + | |
34 | +CFLAGS = -g -O2 -I./libgroff | |
35 | +CXXFLAGS = -g -O2 -I./libgroff | |
36 | + | |
37 | +# GNU make's default $(YACC) rule doesn't satisfy the dependencies we | |
38 | +# would like, so we specify our own alternative rule. | |
39 | +# | |
40 | +%.tab.c %.tab.h: %.y | |
41 | + $(YACC) $(YFLAGS) -b $* -d $< | |
42 | + | |
43 | +# This is a minimal subset of libgroff.a, (just sufficient to satisfy | |
44 | +# our immediate requirements for our .psbb handler test program). | |
45 | +# | |
46 | +libgroff.a: error.o errarg.o itoa.o fatal.o | |
47 | + $(AR) rcs $@ $^ | |
48 | + | |
49 | +# By default, GNU make uses $(CC) for linking, but we need C++ support, | |
50 | +# (which $(CC) doesn't give us automatically); moreover, we do not want | |
51 | +# psbb to incur a default dependency on psbb.o, so link explicitly. | |
52 | +# | |
53 | +psbb: t-psbb.o psbblex.o psbb.tab.o libgroff.a | |
54 | + $(CXX) $(LDFLAGS) $(TARGET_ARCH) $^ -o $@ | |
55 | + | |
56 | +# Object file dependencies: GCC could generate these automatically, but | |
57 | +# this is simpler, in this trivial instance. | |
58 | +# | |
59 | +psbblex.o: psbblex.c psbb.tab.h psbb.h | |
60 | +t-psbb.o psbblex.o psbb.tab.o error.o errarg.o: error.h errarg.h | |
61 | +t-psbb.o: psbb.h | |
62 | + | |
63 | +# Clean up rules | |
64 | +# | |
65 | +clean:; $(RM) *.o psbb | |
66 | +realclean: clean | |
67 | + $(RM) `echo *.l | sed 's,\.l,.c,g'` *.tab.* *.a |
@@ -0,0 +1,4 @@ | ||
1 | +The code in this directory implements a proposed new API, extending | |
2 | +the capabilities of groff's .psbb request to support extraction of the | |
3 | +bounding box (/MediaBox) properties from PDF files, in addition to the | |
4 | +original support for %%BoundingBox extraction from [E]PS files. |
@@ -0,0 +1,52 @@ | ||
1 | +/* psbb.h | |
2 | + * | |
3 | + * Declaration of .psbb request handling API. | |
4 | + * | |
5 | + * Written by Keith Marshall <keith@users.osdn.me> | |
6 | + * Copyright (C) 2017, Free Software Foundation, Inc. | |
7 | + * | |
8 | + * This file is part of groff. | |
9 | + * | |
10 | + * groff is free software; you can redistribute it and/or modify it under | |
11 | + * the terms of the GNU General Public License as published by the Free | |
12 | + * Software Foundation, either version 3 of the License, or | |
13 | + * (at your option) any later version. | |
14 | + * | |
15 | + * groff is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | + * WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 | + * for more details. | |
19 | + * | |
20 | + * You should have received a copy of the GNU General Public License | |
21 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
22 | + */ | |
23 | +#ifndef YY_PSBB_H_INCLUDED | |
24 | +#define YY_PSBB_H_INCLUDED | |
25 | + | |
26 | +#include <stdio.h> | |
27 | + | |
28 | +#define yylval psbb_lval | |
29 | +#define yyparse psbb_parse | |
30 | +#define yyerror psbb_error | |
31 | + | |
32 | +#define psbb_error errprintf | |
33 | + | |
34 | +#undef EXTERN_C | |
35 | +#ifdef __cplusplus | |
36 | +# define EXTERN_C extern "C" | |
37 | +#else | |
38 | +# define EXTERN_C | |
39 | +#endif | |
40 | + | |
41 | +EXTERN_C void psbb_get_bounding_box (const char *); | |
42 | +EXTERN_C FILE *psbb_open_file_for_parse (const char *); | |
43 | +EXTERN_C void psbb_assign_registers (int, int, int, int); | |
44 | +EXTERN_C void psbb_error (const char *, ...); | |
45 | + | |
46 | +EXTERN_C void psbb_lookup (int, int); | |
47 | +EXTERN_C void psbb_locate (int, int); | |
48 | +EXTERN_C int psbb_chkref (int, int); | |
49 | + | |
50 | +EXTERN_C void psbb_walk (void); | |
51 | + | |
52 | +#endif /* YY_PSBB_H_INCLUDED: end of file */ |
@@ -0,0 +1,62 @@ | ||
1 | +/* psbb.y | |
2 | + * | |
3 | + * Parser grammar to drive the lexical analyser for extraction of bounding | |
4 | + * box properties from EPS, or PDF files, to support groff's .psbb request. | |
5 | + * | |
6 | + * Written by Keith Marshall <keith@users.osdn.me> | |
7 | + * Copyright (C) 2017, Free Software Foundation, Inc. | |
8 | + * | |
9 | + * This file is part of groff. | |
10 | + * | |
11 | + * groff is free software; you can redistribute it and/or modify it under | |
12 | + * the terms of the GNU General Public License as published by the Free | |
13 | + * Software Foundation, either version 3 of the License, or | |
14 | + * (at your option) any later version. | |
15 | + * | |
16 | + * groff is distributed in the hope that it will be useful, but WITHOUT ANY | |
17 | + * WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
18 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 | + * for more details. | |
20 | + * | |
21 | + * You should have received a copy of the GNU General Public License | |
22 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
23 | + */ | |
24 | +%{ | |
25 | +#include "psbb.h" | |
26 | + | |
27 | +#define psbb_assign psbb_assign_registers | |
28 | +%} | |
29 | + | |
30 | +%name-prefix = "psbb_" | |
31 | + | |
32 | +%token PDFSTART PDFROOT | |
33 | +%token VALUE PDFLOOKUP PDFOBJECT PDFOBJREF PDFENDOBJ | |
34 | + | |
35 | +%% | |
36 | +/* A .psbb related parse of either any [E]PS, or any PDF file, | |
37 | + * MUST conform to this "psbb" grammar. | |
38 | + */ | |
39 | +psbb: /* nothing */ | |
40 | + | psbb root PDFOBJREF { psbb_walk(); } | |
41 | + | psbb PDFLOOKUP VALUE VALUE { psbb_lookup( $3, $4 ); } | |
42 | + | psbb PDFOBJREF VALUE VALUE 'R' { psbb_locate( $3, $4 ); } | |
43 | + | psbb VALUE VALUE PDFOBJECT { psbb_chkref( $2, $3 ); } | |
44 | + | psbb PDFENDOBJ { psbb_walk(); } | |
45 | + | psbb bbox | |
46 | + ; | |
47 | + | |
48 | +/* The "root" rule is specific to parsing of PDF files; it should | |
49 | + * be invoked just once, early in the parse cycle for each file, to | |
50 | + * initiate location and parsing of the PDF /Catalog object. | |
51 | + */ | |
52 | +root: PDFROOT VALUE VALUE 'R' { psbb_locate( $2, $3 ); } | |
53 | + ; | |
54 | + | |
55 | +/* Applicable to either [E]PS or PDF files, at any time when we | |
56 | + * have accumulated four numeric values on the parser stack, we | |
57 | + * assume that they represent bounding box co-ordinates. | |
58 | + */ | |
59 | +bbox: VALUE VALUE VALUE VALUE { psbb_assign( $1, $2, $3, $4 ); } | |
60 | + ; | |
61 | + | |
62 | +/* vim: set cin fo=croqj: */ |
@@ -0,0 +1,719 @@ | ||
1 | +/* psbblex.l | |
2 | + * | |
3 | + * Lexical analyser for extraction of bounding box properties from [E]PS, | |
4 | + * or PDF files, in response to groff's .psbb request. | |
5 | + * | |
6 | + * Written by Keith Marshall <keith@users.osdn.me> | |
7 | + * Copyright (C) 2017, Free Software Foundation, Inc. | |
8 | + * | |
9 | + * This file is part of groff. | |
10 | + * | |
11 | + * groff is free software; you can redistribute it and/or modify it under | |
12 | + * the terms of the GNU General Public License as published by the Free | |
13 | + * Software Foundation, either version 3 of the License, or | |
14 | + * (at your option) any later version. | |
15 | + * | |
16 | + * groff is distributed in the hope that it will be useful, but WITHOUT ANY | |
17 | + * WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
18 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 | + * for more details. | |
20 | + * | |
21 | + * You should have received a copy of the GNU General Public License | |
22 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
23 | + */ | |
24 | +%{ | |
25 | +#include <ctype.h> | |
26 | +#include <stdio.h> | |
27 | +#include <stdlib.h> | |
28 | +#include <stdarg.h> | |
29 | +#include <errno.h> | |
30 | +#include <math.h> | |
31 | + | |
32 | +#include "psbb.h" | |
33 | +#include "psbb.tab.h" | |
34 | + | |
35 | +#if DEBUGGING | |
36 | +# define DEBUG(FOO) FOO | |
37 | +# define DEBUG_ECHO debug_msg( "%d: %s\n", YYSTATE, yytext ) | |
38 | + | |
39 | +# define DEBUG_MSG(ARGLIST) do { debug_msg ARGLIST; } while(0) | |
40 | + | |
41 | +# define DEBUG_RETURN(TOKEN, NAME) \ | |
42 | + do { debug_msg("%d: return token %s (%d)\n", YYSTATE, NAME, TOKEN); \ | |
43 | + return TOKEN; \ | |
44 | + } while(0) | |
45 | + | |
46 | +static void debug_msg (const char *fmt, ...) | |
47 | +{ va_list av; va_start(av, fmt); vfprintf(stderr, fmt, av); va_end(av); } | |
48 | + | |
49 | +#else | |
50 | +# define DEBUG(FOO) | |
51 | +# define DEBUG_ECHO | |
52 | +# define DEBUG_MSG(ARGLIST) | |
53 | +# define DEBUG_RETURN(TOKEN, NAME) return TOKEN | |
54 | +#endif | |
55 | + | |
56 | +#define RETURN(TOKEN) DEBUG_RETURN(TOKEN, #TOKEN) | |
57 | + | |
58 | +static int ref[2] = { 0, 0 }; | |
59 | +static size_t xrefbase, xrefptr; | |
60 | +enum { PSBB_PHASE_INIT = 0, PSBB_IN_HEADER, PSBB_IN_TRAILER }; | |
61 | +static int psbb_phase, psbb_crescendo_seek( void ), psbb_parse_status; | |
62 | + | |
63 | +#define DEBUG_PDFINVOKE(STATE, TOKEN, NAME) \ | |
64 | + do { yy_push_state( STATE ); DEBUG_RETURN(TOKEN, NAME); \ | |
65 | + } while(0) | |
66 | + | |
67 | +#define PDFINVOKE(STATE, TOKEN) DEBUG_PDFINVOKE(STATE, TOKEN, #TOKEN) | |
68 | + | |
69 | +#define PDFINVOKE_IF(FROM, STATE, TOKEN) \ | |
70 | + if( yy_top_state() == FROM ) DEBUG_PDFINVOKE(STATE, TOKEN, #TOKEN) | |
71 | + | |
72 | +#define PSBB_PARSE_FAILURE ((psbb_parse_status = 1) & 0) | |
73 | +%} | |
74 | + | |
75 | +%option stack noyywrap prefix = "psbb_" | |
76 | + | |
77 | +%x SKIP UNKNOWN | |
78 | +%x PSHDR PSBB PSTRAILER PSATEND PSVOID | |
79 | +%x PDFINIT PDFTRAILER PDFDICT PDFSTARTXREF PDFXREF | |
80 | +%x PDFGETREF PDFGOXREF PDFGETOBJECT PDFSCANOBJECT PDFREFER | |
81 | +%x PDFEVAL PDFDUMP PDFIGNORE PDFOBJTYPE PDFKIDS PDFALLKIDS | |
82 | +%x PDFXREFCONT PDFXREFPREV PDFXREFWALK | |
83 | +%x PDFMEDIABOX PDFMEDIABOXEVAL | |
84 | + | |
85 | +INTVAL ([0-9]+) | |
86 | +FLOATVAL ([0-9]+"."[0-9]*)|("."[0-9]+) | |
87 | +SEP ([\000\t\f\r\n\040]) | |
88 | + | |
89 | +LINEDATA ([^\r\n]*) | |
90 | +EOL (\r?\n)|\r | |
91 | + | |
92 | +READLN {LINEDATA}{EOL} | |
93 | + | |
94 | +PDFNAME ([^][(){}/%<>\000\t\f\r\n\040]+) | |
95 | +PDFSEP ([][(){}/%<>\000\t\f\r\n\040]) | |
96 | + | |
97 | +%{ | |
98 | +static const char *psbb_input_file; | |
99 | + | |
100 | +static int pdfseek( ssize_t offset ) | |
101 | +{ /* Awkwardly, PDF files are not organized sequentially, and must be | |
102 | + * scanned in (effectively) random block order. This helper function | |
103 | + * prepares the lexer to resume scanning at an arbitrary location. | |
104 | + */ | |
105 | + yy_flush_buffer( YY_CURRENT_BUFFER ); | |
106 | + DEBUG_MSG(("%d: pdfseek to offset = %d\n", YYSTATE, offset)); | |
107 | + return fseek( yyin, offset, SEEK_SET ); | |
108 | +} | |
109 | + | |
110 | +static int pdf_trailer( void ) | |
111 | +{ /* A local helper function, invoked from the INITIAL state rule when | |
112 | + * the lexer input has been identified as a PDF file; it attempts to | |
113 | + * locate the PDF file trailer, and reset to input context to its | |
114 | + * starting offset. | |
115 | + */ | |
116 | + if( (psbb_parse_status = psbb_crescendo_seek()) == EOF ) | |
117 | + yyerror( "PDF file '%s' is malformed; no trailer found", psbb_input_file ); | |
118 | + return psbb_parse_status; | |
119 | +} | |
120 | +%} | |
121 | +%% | |
122 | + /* Pattern rules section: this defines the behaviour of yylex(). The | |
123 | + * initial code block will be placed at the start of yylex() itself; it | |
124 | + * provides a hook whereby the lexer may be forced back to the INITIAL | |
125 | + * state, for each new input file to be scanned in sequence. | |
126 | + */ | |
127 | +%{ if( psbb_phase == PSBB_PHASE_INIT ) | |
128 | + { psbb_phase = PSBB_IN_HEADER; BEGIN INITIAL; | |
129 | + } | |
130 | +%} | |
131 | + /* Unqualified patterns apply in start condition INITIAL only; we use | |
132 | + * this to identify either PostScript or PDF input, or we bail out. | |
133 | + */ | |
134 | +.|\n { yymore(); BEGIN UNKNOWN; } | |
135 | +"%PDF-" { BEGIN PDFINIT; if (pdf_trailer() == EOF) return 0; } | |
136 | +"%!PS-Adobe-" { BEGIN PSHDR; yy_push_state( SKIP ); } | |
137 | + | |
138 | + | |
139 | + /* State: INITIAL | |
140 | + * | |
141 | + * We should have switched out of the INITIAL condition, as soon as any | |
142 | + * input stream content has been scanned; if we reach EOF while still in | |
143 | + * this condition, we were given a zero-length stream. | |
144 | + */ | |
145 | +<INITIAL><<EOF>> { yyerror( "file '%s' is empty", psbb_input_file ); | |
146 | + return PSBB_PARSE_FAILURE; | |
147 | + } | |
148 | + | |
149 | + /* State: SKIP | |
150 | + * | |
151 | + * We use the SKIP condition to swallow all input, after an initially | |
152 | + * matched pattern, up to end of line, before resuming in a specified | |
153 | + * condition for examination of the next line; (the obvious `.*\n' is | |
154 | + * not sufficient here, since we need to be prepared to handle any of | |
155 | + * the CR only, LF only, or CRLF line ending conventions). | |
156 | + */ | |
157 | +<SKIP>{READLN} { yy_pop_state(); } | |
158 | + | |
159 | + | |
160 | + /* State: PSHDR | |
161 | + * | |
162 | + * Scanning state used exclusively while reading the header comments | |
163 | + * within a PostScript file; any `%X', where `X' is any non-whitespace | |
164 | + * character, is a valid comment, but the header must terminate at any | |
165 | + * `%%EndComments' input, or any input line which does not match the | |
166 | + * `%X' start-of-line requirement. | |
167 | + */ | |
168 | +<PSHDR>{ | |
169 | +"%"[^ \t] { yy_push_state( SKIP ); } | |
170 | +"%%EndComments" { BEGIN PSVOID; } | |
171 | +} | |
172 | + | |
173 | + /* States: PSHDR and PSTRAILER | |
174 | + * | |
175 | + * In the case of PostScript input files, our objective is to identify | |
176 | + * a `%%BoundingBox:' specification within header or trailer comments, | |
177 | + * and to interpret its bounding box arguments. This start condition | |
178 | + * is made active when scanning these file sections; it identifies the | |
179 | + * requisite specification, then initiates the PSBB scanning state, to | |
180 | + * interpret the arguments. | |
181 | + */ | |
182 | +<PSHDR,PSTRAILER>{ | |
183 | +"%%BoundingBox:" { BEGIN PSBB; } | |
184 | +} | |
185 | + | |
186 | + /* States: PSHDR and PSVOID | |
187 | + * | |
188 | + * Scanning states provided as a shared resource, to facilitate the | |
189 | + * diagnosis of a missing %%BoundingBox specification, when scanning | |
190 | + * in either of the PSHDR or PSTRAILER contexts. | |
191 | + */ | |
192 | +<PSHDR,PSVOID>{ | |
193 | +<<EOF>> | | |
194 | +. { yyerror( "no '%s' specification found in file '%s'", | |
195 | + "%%BoundingBox", psbb_input_file | |
196 | + ); | |
197 | + return PSBB_PARSE_FAILURE; | |
198 | + } | |
199 | +} | |
200 | + | |
201 | + /* State: PSBB | |
202 | + * | |
203 | + * Scanning state used exclusively to interpret the arguments to a | |
204 | + * `%%BoundingBox:' comment, in either the PostScript file header, or | |
205 | + * the trailer; we expect four space-separated numeric values, or (in | |
206 | + * the header only) "(atend)". In the former case, we return each | |
207 | + * value separately; in the latter, we redirect the search to the | |
208 | + * file trailer, where we hope to find four values. | |
209 | + */ | |
210 | +<PSBB>{ | |
211 | +[ \t]+ | |
212 | +{INTVAL}/{SEP} { yylval = atol( yytext ); RETURN(VALUE); } | |
213 | +{FLOATVAL}/{SEP} { yylval = lround( atof( yytext )); RETURN(VALUE); } | |
214 | +"(atend)" { if( psbb_phase == PSBB_IN_HEADER ) | |
215 | + { | |
216 | + /* In header comments, `%%BoundingBox: (atend)' | |
217 | + * indicates that the real specification for the | |
218 | + * bounding box will found in the file trailer; we | |
219 | + * use a crescendo seek, from the end of the input | |
220 | + * file, with recursive invocation of the lexer | |
221 | + * itself, to locate this. | |
222 | + */ | |
223 | + BEGIN PSATEND; psbb_crescendo_seek(); | |
224 | + } | |
225 | + else | |
226 | + { /* We've already been redirected to the trailer, | |
227 | + * and found `%%BoundingBox: (atend)' again. | |
228 | + */ | |
229 | + yyerror( "'%s' is not allowed in trailer of '%s'", | |
230 | + yytext, psbb_input_file | |
231 | + ); | |
232 | + return PSBB_PARSE_FAILURE; | |
233 | + } | |
234 | + } | |
235 | +[^0-9 \t\r\n]+ { yyerror( "psbb: %s", yytext ); } | |
236 | +{EOL} { return 0; } | |
237 | +} | |
238 | + | |
239 | + /* State: PSATEND | |
240 | + * | |
241 | + * This start condition is used exclusively within recursive invocations | |
242 | + * of the lexer, initiated from the PSBB start condition, while performing | |
243 | + * the crescendo seek for the PostScript file trailer. Return is always | |
244 | + * to the calling lexer instance, with non-zero placing the caller in the | |
245 | + * appropriate condition for interpretation of the trailer. | |
246 | + */ | |
247 | +<PSATEND>{ | |
248 | +"%%Trailer" { psbb_phase = PSBB_IN_TRAILER; | |
249 | + BEGIN PSTRAILER; return 1; | |
250 | + } | |
251 | +.|\n | |
252 | +} | |
253 | + | |
254 | + /* State: PSTRAILER | |
255 | + * | |
256 | + * Scanning state used exclusively when scanning the PostScript file | |
257 | + * trailer, after redirection by `%%BoundingBox: (atend)' in the header; | |
258 | + * it looks for a further explicit bounding box specification within the | |
259 | + * trailer, further redirecting to PSVOID if none is present. (Notice | |
260 | + * that there is no `%%BoundingBox:' pattern here; that is specified | |
261 | + * above, in a start condition scope shared with PSHDR). | |
262 | + */ | |
263 | +<PSTRAILER>{ | |
264 | +. { BEGIN PSTRAILER; yy_push_state( SKIP ); } | |
265 | +<<EOF>> { BEGIN PSVOID; } | |
266 | +\n | |
267 | +} | |
268 | + | |
269 | + /* State: PDFINIT | |
270 | + * | |
271 | + * Scanning state used exclusively during crescendo_seek() on a PDF | |
272 | + * file, to locate the trailer section whence the starting offset for | |
273 | + * the primary cross reference index may be obtained. | |
274 | + */ | |
275 | +<PDFINIT>{ | |
276 | +"trailer"/{PDFSEP} { BEGIN PDFTRAILER; return PDFSTART; } | |
277 | +.|\n | |
278 | +} | |
279 | + | |
280 | + /* State: PDFTRAILER | |
281 | + * | |
282 | + * Scanning state initiated on locating a PDF file trailer; it is | |
283 | + * used to subsequently initiate parsing of the trailer dictionary, | |
284 | + * and to establish the starting location for its associated cross | |
285 | + * reference table. | |
286 | + */ | |
287 | +<PDFTRAILER>{ | |
288 | +"<<" { yy_push_state( PDFDICT ); } | |
289 | +"startxref"/{SEP} { BEGIN PDFSTARTXREF; } | |
290 | +{SEP}+ | |
291 | +. | |
292 | +} | |
293 | + | |
294 | + /* State: PDFSTARTXREF | |
295 | + * | |
296 | + * Scanning state initiated after locating a startxref record within | |
297 | + * a PDF file trailer; its purpose is to return the PDF file offset of | |
298 | + * the associated xref data to the parser. | |
299 | + */ | |
300 | +<PDFSTARTXREF>{ | |
301 | +{INTVAL}/{SEP} { xrefbase = atol( yytext ); RETURN(PDFOBJREF); } | |
302 | +{SEP}+ | |
303 | +} | |
304 | + | |
305 | + /* State: PDFDICT | |
306 | + * | |
307 | + * Scanning state initated on locating the opening "<<" token of any | |
308 | + * PDF dictionary; here, we identify those dictionary entries which are | |
309 | + * of interest, regardless of context, and switch to an appropriate new | |
310 | + * start condition to handle each; (note that this lookup may be made | |
311 | + * dependent on the context whence this start condition was attained, | |
312 | + * by use of PDFINVOKE_IF to initiate the subsequent state switch). | |
313 | + */ | |
314 | +<PDFDICT>{ | |
315 | +"/Root"/{PDFSEP} { PDFINVOKE_IF( PDFTRAILER, PDFREFER, PDFROOT ); } | |
316 | +"/Prev"/{PDFSEP} { if( yy_top_state() == PDFXREFCONT ) BEGIN PDFXREFWALK; | |
317 | + else yy_push_state( PDFIGNORE ); | |
318 | + } | |
319 | +"/Type"/{PDFSEP} { yy_push_state( PDFOBJTYPE ); } | |
320 | +"/Pages"/{PDFSEP} { yy_push_state( PDFREFER ); } | |
321 | +"/Kids"/{PDFSEP} { yy_push_state( PDFALLKIDS ); } | |
322 | +"/MediaBox"/{PDFSEP} { yy_push_state( PDFMEDIABOX ); } | |
323 | +"/"{PDFNAME}/{PDFSEP} { yy_push_state( PDFIGNORE ); } | |
324 | +">>" { yy_pop_state(); } | |
325 | +.|\n | |
326 | +} | |
327 | + | |
328 | + /* State: PDFOBJTYPE | |
329 | + * | |
330 | + * Scanning state initiated on identifying a /Type key within a PDF | |
331 | + * object dictionary; it effectively causes the scanner to swallow the | |
332 | + * object type designation, for those object types which we expect to | |
333 | + * encounter, before reverting to the PDFDICT state, (also returning | |
334 | + * a PDFOBJREF token to the parser, in the specific case when the | |
335 | + * /Catalog object is identified). | |
336 | + * | |
337 | + * FIXME: we may need to add error reporting for detection of any | |
338 | + * object type which we do not expect to encounter. | |
339 | + */ | |
340 | +<PDFOBJTYPE>{ | |
341 | +"/Catalog"/{PDFSEP} { yy_pop_state(); RETURN(PDFOBJREF); } | |
342 | +"/Page"s?/{PDFSEP} { yy_pop_state(); } | |
343 | +{SEP}+ | |
344 | +} | |
345 | + | |
346 | + /* States: PDFKIDS and PDFALLKIDS | |
347 | + * | |
348 | + * Scanning states employed to extract the first object reference from | |
349 | + * a /Kids object dictionary entry. Always entered via the PDFALLKIDS | |
350 | + * state, whence the PDFREFER state is invoked to extract the first of | |
351 | + * the indirect object references within the associated reference list; | |
352 | + * on return, the state degrades to PDFKIDS, so causing any additional | |
353 | + * references present to be ignored, before returning to the PDFDICT | |
354 | + * state. | |
355 | + */ | |
356 | +<PDFALLKIDS>"["{SEP}* { BEGIN PDFKIDS; PDFINVOKE( PDFREFER, PDFOBJREF ); } | |
357 | +<PDFKIDS,PDFALLKIDS>{ | |
358 | +"]" { yy_pop_state(); } | |
359 | +{INTVAL}/{SEP} | |
360 | +"R"/{PDFSEP} | |
361 | +{SEP}+ | |
362 | +} | |
363 | + | |
364 | + /* State: PDFREFER | |
365 | + * | |
366 | + * Scanning state initiated when the anticipated PDF parsing context | |
367 | + * represents a PDF object reference; it extracts the object index and | |
368 | + * object version values, returning them separately to the parser, and | |
369 | + * then expects, and returns the 'R' operator, before reverting to the | |
370 | + * start condition whence this state was attained. | |
371 | + */ | |
372 | +<PDFREFER>{ | |
373 | +"R"/{PDFSEP} { yy_pop_state(); RETURN('R'); } | |
374 | +{INTVAL}/{PDFSEP} { yylval = atol( yytext ); RETURN(VALUE); } | |
375 | +./({EOL}|"/") { yy_pop_state(); } | |
376 | +[ \t\r\n]+ | |
377 | +} | |
378 | + | |
379 | + /* State: PDFMEDIABOX | |
380 | + * | |
381 | + * Scanning state initiated at commencement of parsing a PDF MediaBox | |
382 | + * specification; after locating the opening bracket of the bounding | |
383 | + * box array, control is delegated to the following PDFMEDIABOXEVAL | |
384 | + * state, to capture the array values. | |
385 | + */ | |
386 | +<PDFMEDIABOX>{ | |
387 | +"[" { BEGIN PDFMEDIABOXEVAL; } | |
388 | +{SEP}+ | |
389 | +} | |
390 | + | |
391 | + /* State: PDFMEDIABOXEVAL | |
392 | + * | |
393 | + * Scanning state initiated exclusively from the PDFMEDIABOX state, to | |
394 | + * capture the values from the bounding box array; we require these to | |
395 | + * be integers, but some applications specify them as floating point, | |
396 | + * so we must be prepared to interpret either. | |
397 | + */ | |
398 | +<PDFMEDIABOXEVAL>{ | |
399 | +{FLOATVAL}/{PDFSEP} { yylval = lround( atof( yytext )); RETURN(VALUE); } | |
400 | +{INTVAL}/{PDFSEP} { yylval = atol( yytext ); RETURN(VALUE); } | |
401 | +"]" { yy_pop_state(); } | |
402 | +{SEP}+ | |
403 | +} | |
404 | + | |
405 | + /* State: PDFEVAL | |
406 | + * | |
407 | + * Scanning state initiated when we expect an integer value token in the | |
408 | + * PDF parse stream; swallow leading white space, capture the token, then | |
409 | + * revert to the state whence this condition was invoked. | |
410 | + */ | |
411 | +<PDFEVAL>{ | |
412 | +{INTVAL}/{PDFSEP} { yylval = atol( yytext ); yy_pop_state(); RETURN(VALUE); } | |
413 | +[ \t\r\n]+ | |
414 | +} | |
415 | + | |
416 | + /* State: PDFIGNORE | |
417 | + * | |
418 | + * Scanning state in which all input is ignored, until the next EOL, | |
419 | + * or the next PDF dictionary key, or possible dictionary terminator. | |
420 | + */ | |
421 | +<PDFIGNORE>{ | |
422 | +./({EOL}|[/>]) { DEBUG_ECHO; yy_pop_state(); } | |
423 | +. { yymore(); } | |
424 | +} | |
425 | + | |
426 | + /* State: PDFXREF | |
427 | + * | |
428 | + * Scanning state initiated after we have repositioned the PDF stream to | |
429 | + * a point where we expect to find an "xref" table; confirm this position | |
430 | + * is as expected, then delegate "xref" lookup to the following PDFGETREF | |
431 | + * start condition. | |
432 | + */ | |
433 | +<PDFXREF>{ | |
434 | +"xref"{SEP}+ { xrefptr += yyleng; BEGIN PDFGETREF; return PDFLOOKUP; } | |
435 | +.|\n { yyerror( "in '%s'; expected 'xref', but found '%s'", | |
436 | + psbb_input_file, yytext | |
437 | + ); | |
438 | + return PSBB_PARSE_FAILURE; | |
439 | + } | |
440 | +} | |
441 | + | |
442 | + /* State: PDFGETREF | |
443 | + * | |
444 | + * Scanning state initiated exclusively from the PDFXREF state, after | |
445 | + * verification of the "xref" parse context, to lookup the offset of the | |
446 | + * PDF object with index specified in global variable "ref[0]", and with | |
447 | + * generation count as specified in "ref[1]". We begin by capturing a | |
448 | + * a pair of integer values, representing the base index and span for | |
449 | + * the current "xref" table... | |
450 | + */ | |
451 | +<PDFGETREF>{ | |
452 | +{INTVAL}/{SEP} { xrefptr += yyleng; yylval = atol( yytext ); } | |
453 | +{SEP}+ { xrefptr += yyleng; RETURN(VALUE); } | |
454 | +} | |
455 | + | |
456 | + /* State: PDFXREFCONT | |
457 | + * | |
458 | + * Scanning state initiated when a specific object reference is not | |
459 | + * represented within the currently accessible segment of a PDF xref | |
460 | + * table; it first looks for any immediately following segment of the | |
461 | + * xref table, which may include the reference, ultimately falling | |
462 | + * through to the following trailer dictionary, in which case, the | |
463 | + * PDFXREFPREV state is invoked, attempting to follow a /Prev link | |
464 | + * to an earlier generation of the xref table. | |
465 | + */ | |
466 | +<PDFXREFCONT>{ | |
467 | +{INTVAL}/{SEP} { yyless(0); BEGIN PDFGETREF; RETURN(PDFLOOKUP); } | |
468 | +"trailer"/{PDFSEP} { yy_push_state( PDFXREFPREV ); } | |
469 | +{SEP}+ { xrefptr += yyleng; } | |
470 | +} | |
471 | + | |
472 | + /* State: PDFXREFPREV | |
473 | + * | |
474 | + * Scanning state initiated on fall through from the PDFXREFCONT state, | |
475 | + * into the PDF trailer; it looks for the start of the trailer dictionary, | |
476 | + * then switches to a PDFDICT scan to locate the /Prev key, whence the | |
477 | + * PDFXREFWALK state is invoked, to follow the /Prev link. | |
478 | + */ | |
479 | +<PDFXREFPREV>{ | |
480 | +"<<" { BEGIN PDFDICT; } | |
481 | +{SEP}+ | |
482 | +} | |
483 | + | |
484 | + /* State: PDFXREFWALK | |
485 | + * | |
486 | + * Scanning state initiated after identification of the /Prev key in a | |
487 | + * PDF trailer dictionary; it repositions the file input pointer to the | |
488 | + * associated offset value, before restarting the PDFXREF scan. | |
489 | + */ | |
490 | +<PDFXREFWALK>{ | |
491 | +{INTVAL}/{PDFSEP} { pdfseek( xrefptr = atol( yytext )); BEGIN PDFXREF; } | |
492 | +{SEP}+ | |
493 | +} | |
494 | + | |
495 | + /* State: PDFGOXREF | |
496 | + * | |
497 | + * Scanning state initiated after locating a PDF xref table entry for a | |
498 | + * specified object; | |
499 | + */ | |
500 | +<PDFGOXREF>{READLN} { long offset, gen; char disp; | |
501 | + sscanf( yytext, "%10ld %5ld %c", &offset, &gen, &disp ); | |
502 | + DEBUG_MSG(("%d: %.18s --> %d; %d %c\n", YYSTATE, yytext, offset, gen, disp)); | |
503 | + if( disp == 'n' && gen == ref[1] ) | |
504 | + { pdfseek( offset ); BEGIN PDFGETOBJECT; | |
505 | + } | |
506 | + else | |
507 | + { yyerror( "index entry '%.18s' unexpected in file '%s'", | |
508 | + yytext, psbb_input_file | |
509 | + ); | |
510 | + return PSBB_PARSE_FAILURE; | |
511 | + } | |
512 | + } | |
513 | + | |
514 | + /* State: PDFGETOBJECT | |
515 | + * | |
516 | + * Scanning state initiated when the PDF input pointer has been set | |
517 | + * to the start of a specific object; it returns the associated object | |
518 | + * identification tokens to the parser, for confirmation of expected | |
519 | + * object identity, before switching to the PDFSCANOBJECT state, to | |
520 | + * scan the associated object data. | |
521 | + */ | |
522 | +<PDFGETOBJECT>{ | |
523 | +"obj"/{PDFSEP} { BEGIN PDFSCANOBJECT; RETURN(PDFOBJECT); } | |
524 | +{INTVAL}/{SEP} { yylval = atol( yytext ); RETURN(VALUE); } | |
525 | +{SEP}+ | |
526 | +} | |
527 | + | |
528 | + /* State: PDFSCANOBJECT | |
529 | + * | |
530 | + * Scanning state initiated when scanning PDF object data; effectively, | |
531 | + * it ignores all content, up to the terminating "endobj" token, except | |
532 | + * for the content of any embedded object dictionary, which is scanned | |
533 | + * in the PDFDICT state. | |
534 | + */ | |
535 | +<PDFSCANOBJECT>{ | |
536 | +{SEP}*"<<" { yy_push_state( PDFDICT ); } | |
537 | +"endobj"/{SEP} { DEBUG_ECHO; RETURN(PDFENDOBJ); } | |
538 | +(.|\n) | |
539 | +} | |
540 | + | |
541 | + /* State: UNKNOWN | |
542 | + * | |
543 | + * Finally, the UNKNOWN scanning state is activated when the INITIAL scan | |
544 | + * of the first input line fails to recognize the file signature; it causes | |
545 | + * the lexer to bail out immediately. | |
546 | + */ | |
547 | +<UNKNOWN>[^\r\n]* { yyerror( "unknown file signature '%s' in file '%s'", | |
548 | + yytext, psbb_input_file | |
549 | + ); | |
550 | + return PSBB_PARSE_FAILURE; | |
551 | + } | |
552 | +%% | |
553 | +/* General code section: this provides the implementation for the | |
554 | + * parser and lexical analyser API, servicing groff's psbb request. | |
555 | + */ | |
556 | +int psbb_parser_status_check; | |
557 | +void psbb_get_bounding_box( const char *source ) | |
558 | +{ | |
559 | + /* This is the primary entry point for the parser/lexer combination; | |
560 | + * it sets up the specified source file as the lexer input, then... | |
561 | + */ | |
562 | + psbb_parser_status_check = EOF; | |
563 | + if( (yyin = psbb_open_file_for_parse( psbb_input_file = source )) != NULL ) | |
564 | + { | |
565 | + /* ...when successful, forces the lexer to enter its initial state, | |
566 | + * and invokes the parser to process the sequence of tokens which the | |
567 | + * lexer returns. | |
568 | + */ | |
569 | + psbb_parse_status = 0; | |
570 | + psbb_phase = PSBB_PHASE_INIT; | |
571 | + psbb_parser_status_check = yyparse() | psbb_parse_status; | |
572 | + yy_flush_buffer( YY_CURRENT_BUFFER ); | |
573 | + } | |
574 | +} | |
575 | + | |
576 | +static int psbb_crescendo_seek() | |
577 | +{ | |
578 | + /* A helper function to iteratively search for any pattern, | |
579 | + * close to the end of the file, which causes the lexer to | |
580 | + * return a non-zero token. Initial search is limited to a | |
581 | + * block of 64 bytes, at the bitter end of the file; on each | |
582 | + * subsequent iteration, the block size is doubled, until a | |
583 | + * successful match is found, or the block size grows to | |
584 | + * exceed the size of the file. | |
585 | + */ | |
586 | + ssize_t offset; | |
587 | + for( offset = 64L; offset > 0L; offset <<= 1 ) | |
588 | + { | |
589 | + /* In this case, we use a crescendo seek, with | |
590 | + * recursive invocation of the lexer itself, to | |
591 | + * locate the start of the trailer... | |
592 | + */ | |
593 | + int status; | |
594 | + yy_flush_buffer( YY_CURRENT_BUFFER ); | |
595 | + if( (status = fseek( yyin, -offset, SEEK_END )) != 0 ) | |
596 | + /* | |
597 | + * ...with a "last chance" search of the whole | |
598 | + * file, if the crescendo overruns the start of | |
599 | + * the file without finding it... | |
600 | + */ | |
601 | + status = fseek( yyin, offset = 0L, SEEK_SET ); | |
602 | + | |
603 | + if( (status == 0) && (yylex() > 0) ) | |
604 | + /* | |
605 | + * ...breaking out of the crescendo cycle, as | |
606 | + * soon as we find it, (or we've searched the | |
607 | + * entire file without finding it). | |
608 | + */ | |
609 | + return 0; | |
610 | + } | |
611 | + return EOF; | |
612 | +} | |
613 | + | |
614 | +void psbb_locate( int index, int generation ) | |
615 | +{ | |
616 | + /* PDF object location function, invoked by the parser | |
617 | + * when processing a PDFOBJREF token sequence, i.e. one | |
618 | + * of:-- | |
619 | + * | |
620 | + * PDFOBJREF VALUE VALUE 'R', or | |
621 | + * VALUE VALUE 'R' PDFOBJREF | |
622 | + */ | |
623 | + ref[0] = index; | |
624 | + ref[1] = generation; | |
625 | +} | |
626 | + | |
627 | +void psbb_walk( void ) | |
628 | +{ | |
629 | + /* Helper function, invoked by the parser when processing | |
630 | + * a root PDFOBJREF token, or PDFENDOBJ token, to walk the | |
631 | + * chain of PDF object references from the document root, | |
632 | + * until the first leaf node, (nominally expected to be | |
633 | + * the first /Page object), has been located. | |
634 | + */ | |
635 | + if( ref[0] > 0 ) | |
636 | + { /* The last object parsed has at least one child object | |
637 | + * reference; reset the scanner context, to locate and | |
638 | + * process the first such object. | |
639 | + */ | |
640 | + BEGIN PDFXREF; pdfseek( xrefptr = xrefbase ); | |
641 | + } | |
642 | + else | |
643 | + { /* The last object parsed is a leaf node object; ensure | |
644 | + * that there is no residual data in the input buffer, | |
645 | + * and force EOF on the next input operation. | |
646 | + */ | |
647 | + yy_flush_buffer( YY_CURRENT_BUFFER ); | |
648 | + fseek( yyin, 0, SEEK_END ); | |
649 | + } | |
650 | +} | |
651 | + | |
652 | +void psbb_lookup( int base, int span ) | |
653 | +{ | |
654 | + /* A helper function, invoked (possibly iteratively) by | |
655 | + * the lexer, as a callback via the parser, during the | |
656 | + * sequence of start conditions initiated from PDFXREF, | |
657 | + * while handling a psbb_locate() request, to retrieve | |
658 | + * a possible xref table entry for the object identified | |
659 | + * by global index ref[0], within a section of the table | |
660 | + * representing span objects, contiguously numbered from | |
661 | + * the specified base index. | |
662 | + */ | |
663 | + if( (ref[0] >= base) && (ref[0] < (base + span)) ) | |
664 | + { | |
665 | + /* The required xref entry lies within the span of the | |
666 | + * xref table section at the current xrefptr offset; we | |
667 | + * simply adjust the xrefptr to the start of the entry | |
668 | + * required, and follow the reference. | |
669 | + */ | |
670 | + pdfseek( xrefptr + 20 * (ref[0] - base) ); | |
671 | + BEGIN PDFGOXREF; | |
672 | + } | |
673 | + else | |
674 | + { /* The required xref entry is NOT accessible from the | |
675 | + * xref table section at the current xrefptr offset; we | |
676 | + * move the xrefptr just beyond the current section of | |
677 | + * the table, then switch to the transient PDFXREFCONT | |
678 | + * state, to search in any subsequent section of the | |
679 | + * table, or to follow any /Prev link to an earlier | |
680 | + * generation of it. | |
681 | + */ | |
682 | + pdfseek( xrefptr += 20 * span ); | |
683 | + BEGIN PDFXREFCONT; | |
684 | + } | |
685 | + DEBUG_MSG(("%d: lookup object #%d @ %d within %d..%d\n", | |
686 | + YYSTATE, ref[0], xrefptr, base, base + span | |
687 | + )); | |
688 | +} | |
689 | + | |
690 | +static | |
691 | +int pdf_object_lookup_failed( const char *desc, int wanted, int found ) | |
692 | +{ | |
693 | + /* A local helper function, invoked by the following psbb_chkref() | |
694 | + * function, to report a PDF lookup mismatch for either the requested | |
695 | + * object number, or its generation number. | |
696 | + */ | |
697 | + yyerror( "object reference mismatch in '%s': expected %s %d but found %d", | |
698 | + psbb_input_file, desc, wanted, found | |
699 | + ); | |
700 | + return PSBB_PARSE_FAILURE; | |
701 | +} | |
702 | + | |
703 | +int psbb_chkref( int obj, int gen ) | |
704 | +{ | |
705 | + /* A helper function, invoked by the parser, to confirm that a | |
706 | + * PDF object reference lookup has located the correct object, | |
707 | + * or diagnose otherwise. | |
708 | + */ | |
709 | + if( obj != ref[0] ) | |
710 | + return pdf_object_lookup_failed( "object", ref[0], obj ); | |
711 | + | |
712 | + if( gen != ref[1] ) | |
713 | + return pdf_object_lookup_failed( "generation", ref[1], gen ); | |
714 | + | |
715 | + DEBUG_MSG(("%d: object: %d; generation = %d\n", YYSTATE, obj, gen)); | |
716 | + return ref[0] = 0; | |
717 | +} | |
718 | + | |
719 | +/* vim: set cin fo=croqj: */ |
@@ -0,0 +1,201 @@ | ||
1 | +// t-psbb.cpp -*- C++ -*- | |
2 | +// | |
3 | +// Test the effect of the groff .psbb request handling code. | |
4 | +// | |
5 | +// Written by Keith Marshall <keith@users.osdn.me> | |
6 | +// Copyright (C) 2017, Free Software Foundation, Inc. | |
7 | +// | |
8 | +// This file is part of groff. | |
9 | +// | |
10 | +// groff is free software; you can redistribute it and/or modify it under | |
11 | +// the terms of the GNU General Public License as published by the Free | |
12 | +// Software Foundation, either version 3 of the License, or | |
13 | +// (at your option) any later version. | |
14 | +// | |
15 | +// groff is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | +// WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 | +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 | +// for more details. | |
19 | +// | |
20 | +// You should have received a copy of the GNU General Public License | |
21 | +// along with this program. If not, see <http://www.gnu.org/licenses/>. | |
22 | +// | |
23 | +#define GROFF_PSBB_TEST_CODE | |
24 | +#define GROFF_INPUT_CPP_EMULATION | |
25 | + | |
26 | +#ifdef GROFF_INPUT_CPP_EMULATION | |
27 | +// Always defined, within this test module; this serves to demarcate | |
28 | +// code which emulates (not necessarily faithfully) features which are | |
29 | +// required by groff's .psbb request handling code, (which may be found | |
30 | +// in src/roff/troff/input.cpp). | |
31 | +// | |
32 | +#include "psbb.h" | |
33 | +#include "errarg.h" | |
34 | +#include "error.h" | |
35 | + | |
36 | +#include <cstring> | |
37 | +#include <cerrno> | |
38 | + | |
39 | +const char *program_name = "psbb"; | |
40 | +const char *current_filename = "t-psbb"; | |
41 | +const char *current_source_filename = "t-psbb.cpp"; | |
42 | + | |
43 | +int current_lineno = 0; | |
44 | + | |
45 | +class symbol | |
46 | +{ // Minimal replacement for groff's class implementation, sufficient | |
47 | + // to satisfy the usage pattern of the ps_bbox_request() function; it | |
48 | + // can furnish exactly one token, corresponding to each argv element | |
49 | + // supplied by the test program, subsequently reporting both end of | |
50 | + // line and end of file, until reset by a new argv. | |
51 | + public: | |
52 | + symbol():m_contents((const char *)(NULL)){} | |
53 | + const char *contents(){ return m_contents; } | |
54 | + void next(){ m_contents = (const char *)(NULL); } | |
55 | + void set_contents(const char *contents){ m_contents = contents; } | |
56 | + int is_null(){ return (m_contents == (const char *)(NULL)); } | |
57 | + int newline(){ return 1; } | |
58 | + int eof(){ return 1; } | |
59 | + | |
60 | + private: | |
61 | + const char *m_contents; | |
62 | +}; | |
63 | + | |
64 | +// Simplified implementation of get_long_name(); always return this | |
65 | +// fixed symbol class instance. | |
66 | +// | |
67 | +symbol tok; | |
68 | +symbol get_long_name(int unused){ return tok; } | |
69 | + | |
70 | +// Substitute a no-op for skip_line() | |
71 | +// | |
72 | +static void skip_line(void){} | |
73 | + | |
74 | +class search_path | |
75 | +{ // Minimal replacement for groff's class implementation, sufficient | |
76 | + // to emulate its open_file_cautious() method. | |
77 | + public: | |
78 | + FILE *open_file_cautious( const char *filename, int opt, const char *mode ) | |
79 | + { return fopen( filename, mode ); } | |
80 | +}; | |
81 | +#define FOPEN_RB "rb" | |
82 | + | |
83 | +// Not exactly as groff implements it, but sufficient for our needs, | |
84 | +// without requiring anything more than a default constructor for the | |
85 | +// search_path class. | |
86 | +// | |
87 | +search_path include_search_path; | |
88 | + | |
89 | +#endif | |
90 | +// Following the GROFF_INPUT_CPP_EMULATION block, we reproduce content | |
91 | +// from src/roff/troff/input.cpp itself, (as we intend that it would be | |
92 | +// ultimately implemented). | |
93 | + | |
94 | +// .psbb | |
95 | +// | |
96 | +// Extract bounding box limits from PostScript file, and assign | |
97 | +// them to the following four gtroff registers:-- | |
98 | +// | |
99 | +static int llx_reg_contents = 0; | |
100 | +static int lly_reg_contents = 0; | |
101 | +static int urx_reg_contents = 0; | |
102 | +static int ury_reg_contents = 0; | |
103 | + | |
104 | +// psbb_assign_registers() | |
105 | +// | |
106 | +// An extern "C" callback function, invoked via our yacc parser, | |
107 | +// to perform initialization and/or register assignment. | |
108 | +// | |
109 | +void psbb_assign_registers(int llx, int lly, int urx, int ury) | |
110 | +{ | |
111 | + llx_reg_contents = llx; | |
112 | + lly_reg_contents = lly; | |
113 | + urx_reg_contents = urx; | |
114 | + ury_reg_contents = ury; | |
115 | +} | |
116 | + | |
117 | +// psbb_open_file_for_parse() | |
118 | +// | |
119 | +// A further extern "C" callback function, called by our yacc parser | |
120 | +// start-up routine, psbb_get_bounding_box(), to attach "yyin" to the | |
121 | +// specified file, in preparation for lexical analysis. | |
122 | +// | |
123 | +FILE *psbb_open_file_for_parse(const char *filename) | |
124 | +{ FILE *fp = include_search_path.open_file_cautious(filename, 0, FOPEN_RB); | |
125 | + if (fp == NULL) error("cannot open '%1': %2", filename, strerror(errno)); | |
126 | + return fp; | |
127 | +} | |
128 | + | |
129 | +// ps_bbox_request() | |
130 | +// | |
131 | +// Handle the .psbb request; this is, effectively, a verbatim copy of | |
132 | +// code, as it should ultimately appear, in src/roff/troff/input.cpp | |
133 | +// | |
134 | +void ps_bbox_request() | |
135 | +{ // Parse input line, to extract file name. | |
136 | + // | |
137 | + symbol nm = get_long_name(1); | |
138 | + if (nm.is_null()) | |
139 | + // No file name specified: ignore the entire request. | |
140 | + // | |
141 | + skip_line(); | |
142 | + else { | |
143 | + // File name acquired: swallow the rest of the line. | |
144 | + // | |
145 | + while (!tok.newline() && !tok.eof()) | |
146 | + tok.next(); | |
147 | + errno = 0; | |
148 | + | |
149 | + // Initialize, then update {llx,lly,urx,ury}_reg_contents. | |
150 | + // | |
151 | + psbb_assign_registers(0, 0, 0, 0); | |
152 | + psbb_get_bounding_box(nm.contents()); | |
153 | + | |
154 | + // All done for .psbb; move on, to continue | |
155 | + // input stream processing. | |
156 | + // | |
157 | + tok.next(); | |
158 | + } | |
159 | +} | |
160 | + | |
161 | +#ifdef GROFF_PSBB_TEST_CODE | |
162 | +// Again, always defined; this block implements the test procedure, | |
163 | +// simulating a groff input stream in which each element of argv is | |
164 | +// interpreted as if parsed as file name arguments to a succession | |
165 | +// of .psbb requests, subsequently printing the bounding box range | |
166 | +// co-ordinates extracted from each named file. | |
167 | + | |
168 | +EXTERN_C int psbb_parser_status_check; | |
169 | +static int ps_bbox_request_status(const char *argv) | |
170 | +{ | |
171 | + // Push a single argv element into the emulated groff input | |
172 | + // stream, then invoke actual src/roff/troff/input.cpp code, | |
173 | + // as if this argv has been read as ".psbb argv"; return the | |
174 | + // final internal status code from the underlying parser. | |
175 | + // | |
176 | + tok.set_contents(argv); ps_bbox_request(); | |
177 | + return psbb_parser_status_check; | |
178 | +} | |
179 | + | |
180 | +int main(int argc, char **argv) | |
181 | +{ | |
182 | + // Require at least one command argument... | |
183 | + // | |
184 | + if (argc < 2) | |
185 | + { error("usage: psbb filename ...\n"); | |
186 | + return 2; | |
187 | + } | |
188 | + // ...then push each, in turn, into the simulated groff input | |
189 | + // stream, and interpret it as if read as an argument to .psbb; | |
190 | + // successfully processed, report the bounding box result. | |
191 | + // | |
192 | + while (--argc > 0) | |
193 | + { current_lineno = __LINE__; if (ps_bbox_request_status(*++argv) == 0) | |
194 | + printf("%s: bounding box = (%d,%d)..(%d,%d)\n", *argv, | |
195 | + llx_reg_contents, lly_reg_contents, urx_reg_contents, ury_reg_contents | |
196 | + ); | |
197 | + } | |
198 | + return 0; | |
199 | +} | |
200 | + | |
201 | +#endif /* GROFF_PSBB_TEST_CODE */ |