• R/O
  • HTTP
  • SSH
  • HTTPS

コミット

タグ
未設定

よく使われているワード(クリックで追加)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Demonstration of groff .psbb request handling code, for EPS and PDF input files


コミットメタ情報

リビジョンb9a8bc4c7cf15ce9fdc8e487d4dd006cbef86d7c (tree)
日時2017-10-08 06:39:47
作者Keith Marshall <keithmarshall@user...>
コミッターKeith Marshall

ログメッセージ

Implement an extended .psbb request handling API.

* psbb.h psbb.y psbblex.l: New files; they implement the API.
* t-psbb.cpp: New file; it implements a test program, emulating the
intended gtroff usage of this API.

* GNUmakefile: New file; it facilitates building the test program,
using GNU make.

* README .gitignore .hgignore: New files.

変更サマリ

差分

--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
1+ChangeLog
2+*.bak
3+*.orig
4+*.safe
5+*.tab.*
6+*.[ao]
7+ps*.c
8+psbb
9+*~
--- /dev/null
+++ b/.hgignore
@@ -0,0 +1,10 @@
1+syntax: glob
2+ChangeLog
3+**.bak
4+**.orig
5+**.safe
6+**.tab.*
7+**.[ao]
8+ps**.c
9+psbb
10+**~
--- /dev/null
+++ b/GNUmakefile
@@ -0,0 +1,67 @@
1+# GNUmakefile
2+#
3+# Simple makefile to build .psbb request handler test/demonstration
4+# program; with no apology, this may gratuitously require GNU make.
5+#
6+# Written by Keith Marshall <keith@users.osdn.me>
7+# Copyright (C) 2017, Free Software Foundation, Inc.
8+#
9+# This file is part of groff.
10+#
11+# groff is free software; you can redistribute it and/or modify it under
12+# the terms of the GNU General Public License as published by the Free
13+# Software Foundation, either version 3 of the License, or
14+# (at your option) any later version.
15+#
16+# groff is distributed in the hope that it will be useful, but WITHOUT ANY
17+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
18+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19+# for more details.
20+#
21+# You should have received a copy of the GNU General Public License
22+# along with this program. If not, see <http://www.gnu.org/licenses/>.
23+#
24+psbb:
25+
26+# For convenience, we've placed a necessary subset of libgroff sources
27+# and headers in our local libgroff subdirectory; ensure that make, and
28+# the C/C++ compilers can see them.
29+#
30+vpath %.c ./libgroff
31+vpath %.cpp ./libgroff
32+vpath %.h ./libgroff
33+
34+CFLAGS = -g -O2 -I./libgroff
35+CXXFLAGS = -g -O2 -I./libgroff
36+
37+# GNU make's default $(YACC) rule doesn't satisfy the dependencies we
38+# would like, so we specify our own alternative rule.
39+#
40+%.tab.c %.tab.h: %.y
41+ $(YACC) $(YFLAGS) -b $* -d $<
42+
43+# This is a minimal subset of libgroff.a, (just sufficient to satisfy
44+# our immediate requirements for our .psbb handler test program).
45+#
46+libgroff.a: error.o errarg.o itoa.o fatal.o
47+ $(AR) rcs $@ $^
48+
49+# By default, GNU make uses $(CC) for linking, but we need C++ support,
50+# (which $(CC) doesn't give us automatically); moreover, we do not want
51+# psbb to incur a default dependency on psbb.o, so link explicitly.
52+#
53+psbb: t-psbb.o psbblex.o psbb.tab.o libgroff.a
54+ $(CXX) $(LDFLAGS) $(TARGET_ARCH) $^ -o $@
55+
56+# Object file dependencies: GCC could generate these automatically, but
57+# this is simpler, in this trivial instance.
58+#
59+psbblex.o: psbblex.c psbb.tab.h psbb.h
60+t-psbb.o psbblex.o psbb.tab.o error.o errarg.o: error.h errarg.h
61+t-psbb.o: psbb.h
62+
63+# Clean up rules
64+#
65+clean:; $(RM) *.o psbb
66+realclean: clean
67+ $(RM) `echo *.l | sed 's,\.l,.c,g'` *.tab.* *.a
--- /dev/null
+++ b/README
@@ -0,0 +1,4 @@
1+The code in this directory implements a proposed new API, extending
2+the capabilities of groff's .psbb request to support extraction of the
3+bounding box (/MediaBox) properties from PDF files, in addition to the
4+original support for %%BoundingBox extraction from [E]PS files.
--- /dev/null
+++ b/psbb.h
@@ -0,0 +1,52 @@
1+/* psbb.h
2+ *
3+ * Declaration of .psbb request handling API.
4+ *
5+ * Written by Keith Marshall <keith@users.osdn.me>
6+ * Copyright (C) 2017, Free Software Foundation, Inc.
7+ *
8+ * This file is part of groff.
9+ *
10+ * groff is free software; you can redistribute it and/or modify it under
11+ * the terms of the GNU General Public License as published by the Free
12+ * Software Foundation, either version 3 of the License, or
13+ * (at your option) any later version.
14+ *
15+ * groff is distributed in the hope that it will be useful, but WITHOUT ANY
16+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or
17+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18+ * for more details.
19+ *
20+ * You should have received a copy of the GNU General Public License
21+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22+ */
23+#ifndef YY_PSBB_H_INCLUDED
24+#define YY_PSBB_H_INCLUDED
25+
26+#include <stdio.h>
27+
28+#define yylval psbb_lval
29+#define yyparse psbb_parse
30+#define yyerror psbb_error
31+
32+#define psbb_error errprintf
33+
34+#undef EXTERN_C
35+#ifdef __cplusplus
36+# define EXTERN_C extern "C"
37+#else
38+# define EXTERN_C
39+#endif
40+
41+EXTERN_C void psbb_get_bounding_box (const char *);
42+EXTERN_C FILE *psbb_open_file_for_parse (const char *);
43+EXTERN_C void psbb_assign_registers (int, int, int, int);
44+EXTERN_C void psbb_error (const char *, ...);
45+
46+EXTERN_C void psbb_lookup (int, int);
47+EXTERN_C void psbb_locate (int, int);
48+EXTERN_C int psbb_chkref (int, int);
49+
50+EXTERN_C void psbb_walk (void);
51+
52+#endif /* YY_PSBB_H_INCLUDED: end of file */
--- /dev/null
+++ b/psbb.y
@@ -0,0 +1,62 @@
1+/* psbb.y
2+ *
3+ * Parser grammar to drive the lexical analyser for extraction of bounding
4+ * box properties from EPS, or PDF files, to support groff's .psbb request.
5+ *
6+ * Written by Keith Marshall <keith@users.osdn.me>
7+ * Copyright (C) 2017, Free Software Foundation, Inc.
8+ *
9+ * This file is part of groff.
10+ *
11+ * groff is free software; you can redistribute it and/or modify it under
12+ * the terms of the GNU General Public License as published by the Free
13+ * Software Foundation, either version 3 of the License, or
14+ * (at your option) any later version.
15+ *
16+ * groff is distributed in the hope that it will be useful, but WITHOUT ANY
17+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or
18+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19+ * for more details.
20+ *
21+ * You should have received a copy of the GNU General Public License
22+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
23+ */
24+%{
25+#include "psbb.h"
26+
27+#define psbb_assign psbb_assign_registers
28+%}
29+
30+%name-prefix = "psbb_"
31+
32+%token PDFSTART PDFROOT
33+%token VALUE PDFLOOKUP PDFOBJECT PDFOBJREF PDFENDOBJ
34+
35+%%
36+/* A .psbb related parse of either any [E]PS, or any PDF file,
37+ * MUST conform to this "psbb" grammar.
38+ */
39+psbb: /* nothing */
40+ | psbb root PDFOBJREF { psbb_walk(); }
41+ | psbb PDFLOOKUP VALUE VALUE { psbb_lookup( $3, $4 ); }
42+ | psbb PDFOBJREF VALUE VALUE 'R' { psbb_locate( $3, $4 ); }
43+ | psbb VALUE VALUE PDFOBJECT { psbb_chkref( $2, $3 ); }
44+ | psbb PDFENDOBJ { psbb_walk(); }
45+ | psbb bbox
46+ ;
47+
48+/* The "root" rule is specific to parsing of PDF files; it should
49+ * be invoked just once, early in the parse cycle for each file, to
50+ * initiate location and parsing of the PDF /Catalog object.
51+ */
52+root: PDFROOT VALUE VALUE 'R' { psbb_locate( $2, $3 ); }
53+ ;
54+
55+/* Applicable to either [E]PS or PDF files, at any time when we
56+ * have accumulated four numeric values on the parser stack, we
57+ * assume that they represent bounding box co-ordinates.
58+ */
59+bbox: VALUE VALUE VALUE VALUE { psbb_assign( $1, $2, $3, $4 ); }
60+ ;
61+
62+/* vim: set cin fo=croqj: */
--- /dev/null
+++ b/psbblex.l
@@ -0,0 +1,719 @@
1+/* psbblex.l
2+ *
3+ * Lexical analyser for extraction of bounding box properties from [E]PS,
4+ * or PDF files, in response to groff's .psbb request.
5+ *
6+ * Written by Keith Marshall <keith@users.osdn.me>
7+ * Copyright (C) 2017, Free Software Foundation, Inc.
8+ *
9+ * This file is part of groff.
10+ *
11+ * groff is free software; you can redistribute it and/or modify it under
12+ * the terms of the GNU General Public License as published by the Free
13+ * Software Foundation, either version 3 of the License, or
14+ * (at your option) any later version.
15+ *
16+ * groff is distributed in the hope that it will be useful, but WITHOUT ANY
17+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or
18+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19+ * for more details.
20+ *
21+ * You should have received a copy of the GNU General Public License
22+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
23+ */
24+%{
25+#include <ctype.h>
26+#include <stdio.h>
27+#include <stdlib.h>
28+#include <stdarg.h>
29+#include <errno.h>
30+#include <math.h>
31+
32+#include "psbb.h"
33+#include "psbb.tab.h"
34+
35+#if DEBUGGING
36+# define DEBUG(FOO) FOO
37+# define DEBUG_ECHO debug_msg( "%d: %s\n", YYSTATE, yytext )
38+
39+# define DEBUG_MSG(ARGLIST) do { debug_msg ARGLIST; } while(0)
40+
41+# define DEBUG_RETURN(TOKEN, NAME) \
42+ do { debug_msg("%d: return token %s (%d)\n", YYSTATE, NAME, TOKEN); \
43+ return TOKEN; \
44+ } while(0)
45+
46+static void debug_msg (const char *fmt, ...)
47+{ va_list av; va_start(av, fmt); vfprintf(stderr, fmt, av); va_end(av); }
48+
49+#else
50+# define DEBUG(FOO)
51+# define DEBUG_ECHO
52+# define DEBUG_MSG(ARGLIST)
53+# define DEBUG_RETURN(TOKEN, NAME) return TOKEN
54+#endif
55+
56+#define RETURN(TOKEN) DEBUG_RETURN(TOKEN, #TOKEN)
57+
58+static int ref[2] = { 0, 0 };
59+static size_t xrefbase, xrefptr;
60+enum { PSBB_PHASE_INIT = 0, PSBB_IN_HEADER, PSBB_IN_TRAILER };
61+static int psbb_phase, psbb_crescendo_seek( void ), psbb_parse_status;
62+
63+#define DEBUG_PDFINVOKE(STATE, TOKEN, NAME) \
64+ do { yy_push_state( STATE ); DEBUG_RETURN(TOKEN, NAME); \
65+ } while(0)
66+
67+#define PDFINVOKE(STATE, TOKEN) DEBUG_PDFINVOKE(STATE, TOKEN, #TOKEN)
68+
69+#define PDFINVOKE_IF(FROM, STATE, TOKEN) \
70+ if( yy_top_state() == FROM ) DEBUG_PDFINVOKE(STATE, TOKEN, #TOKEN)
71+
72+#define PSBB_PARSE_FAILURE ((psbb_parse_status = 1) & 0)
73+%}
74+
75+%option stack noyywrap prefix = "psbb_"
76+
77+%x SKIP UNKNOWN
78+%x PSHDR PSBB PSTRAILER PSATEND PSVOID
79+%x PDFINIT PDFTRAILER PDFDICT PDFSTARTXREF PDFXREF
80+%x PDFGETREF PDFGOXREF PDFGETOBJECT PDFSCANOBJECT PDFREFER
81+%x PDFEVAL PDFDUMP PDFIGNORE PDFOBJTYPE PDFKIDS PDFALLKIDS
82+%x PDFXREFCONT PDFXREFPREV PDFXREFWALK
83+%x PDFMEDIABOX PDFMEDIABOXEVAL
84+
85+INTVAL ([0-9]+)
86+FLOATVAL ([0-9]+"."[0-9]*)|("."[0-9]+)
87+SEP ([\000\t\f\r\n\040])
88+
89+LINEDATA ([^\r\n]*)
90+EOL (\r?\n)|\r
91+
92+READLN {LINEDATA}{EOL}
93+
94+PDFNAME ([^][(){}/%<>\000\t\f\r\n\040]+)
95+PDFSEP ([][(){}/%<>\000\t\f\r\n\040])
96+
97+%{
98+static const char *psbb_input_file;
99+
100+static int pdfseek( ssize_t offset )
101+{ /* Awkwardly, PDF files are not organized sequentially, and must be
102+ * scanned in (effectively) random block order. This helper function
103+ * prepares the lexer to resume scanning at an arbitrary location.
104+ */
105+ yy_flush_buffer( YY_CURRENT_BUFFER );
106+ DEBUG_MSG(("%d: pdfseek to offset = %d\n", YYSTATE, offset));
107+ return fseek( yyin, offset, SEEK_SET );
108+}
109+
110+static int pdf_trailer( void )
111+{ /* A local helper function, invoked from the INITIAL state rule when
112+ * the lexer input has been identified as a PDF file; it attempts to
113+ * locate the PDF file trailer, and reset to input context to its
114+ * starting offset.
115+ */
116+ if( (psbb_parse_status = psbb_crescendo_seek()) == EOF )
117+ yyerror( "PDF file '%s' is malformed; no trailer found", psbb_input_file );
118+ return psbb_parse_status;
119+}
120+%}
121+%%
122+ /* Pattern rules section: this defines the behaviour of yylex(). The
123+ * initial code block will be placed at the start of yylex() itself; it
124+ * provides a hook whereby the lexer may be forced back to the INITIAL
125+ * state, for each new input file to be scanned in sequence.
126+ */
127+%{ if( psbb_phase == PSBB_PHASE_INIT )
128+ { psbb_phase = PSBB_IN_HEADER; BEGIN INITIAL;
129+ }
130+%}
131+ /* Unqualified patterns apply in start condition INITIAL only; we use
132+ * this to identify either PostScript or PDF input, or we bail out.
133+ */
134+.|\n { yymore(); BEGIN UNKNOWN; }
135+"%PDF-" { BEGIN PDFINIT; if (pdf_trailer() == EOF) return 0; }
136+"%!PS-Adobe-" { BEGIN PSHDR; yy_push_state( SKIP ); }
137+
138+
139+ /* State: INITIAL
140+ *
141+ * We should have switched out of the INITIAL condition, as soon as any
142+ * input stream content has been scanned; if we reach EOF while still in
143+ * this condition, we were given a zero-length stream.
144+ */
145+<INITIAL><<EOF>> { yyerror( "file '%s' is empty", psbb_input_file );
146+ return PSBB_PARSE_FAILURE;
147+ }
148+
149+ /* State: SKIP
150+ *
151+ * We use the SKIP condition to swallow all input, after an initially
152+ * matched pattern, up to end of line, before resuming in a specified
153+ * condition for examination of the next line; (the obvious `.*\n' is
154+ * not sufficient here, since we need to be prepared to handle any of
155+ * the CR only, LF only, or CRLF line ending conventions).
156+ */
157+<SKIP>{READLN} { yy_pop_state(); }
158+
159+
160+ /* State: PSHDR
161+ *
162+ * Scanning state used exclusively while reading the header comments
163+ * within a PostScript file; any `%X', where `X' is any non-whitespace
164+ * character, is a valid comment, but the header must terminate at any
165+ * `%%EndComments' input, or any input line which does not match the
166+ * `%X' start-of-line requirement.
167+ */
168+<PSHDR>{
169+"%"[^ \t] { yy_push_state( SKIP ); }
170+"%%EndComments" { BEGIN PSVOID; }
171+}
172+
173+ /* States: PSHDR and PSTRAILER
174+ *
175+ * In the case of PostScript input files, our objective is to identify
176+ * a `%%BoundingBox:' specification within header or trailer comments,
177+ * and to interpret its bounding box arguments. This start condition
178+ * is made active when scanning these file sections; it identifies the
179+ * requisite specification, then initiates the PSBB scanning state, to
180+ * interpret the arguments.
181+ */
182+<PSHDR,PSTRAILER>{
183+"%%BoundingBox:" { BEGIN PSBB; }
184+}
185+
186+ /* States: PSHDR and PSVOID
187+ *
188+ * Scanning states provided as a shared resource, to facilitate the
189+ * diagnosis of a missing %%BoundingBox specification, when scanning
190+ * in either of the PSHDR or PSTRAILER contexts.
191+ */
192+<PSHDR,PSVOID>{
193+<<EOF>> |
194+. { yyerror( "no '%s' specification found in file '%s'",
195+ "%%BoundingBox", psbb_input_file
196+ );
197+ return PSBB_PARSE_FAILURE;
198+ }
199+}
200+
201+ /* State: PSBB
202+ *
203+ * Scanning state used exclusively to interpret the arguments to a
204+ * `%%BoundingBox:' comment, in either the PostScript file header, or
205+ * the trailer; we expect four space-separated numeric values, or (in
206+ * the header only) "(atend)". In the former case, we return each
207+ * value separately; in the latter, we redirect the search to the
208+ * file trailer, where we hope to find four values.
209+ */
210+<PSBB>{
211+[ \t]+
212+{INTVAL}/{SEP} { yylval = atol( yytext ); RETURN(VALUE); }
213+{FLOATVAL}/{SEP} { yylval = lround( atof( yytext )); RETURN(VALUE); }
214+"(atend)" { if( psbb_phase == PSBB_IN_HEADER )
215+ {
216+ /* In header comments, `%%BoundingBox: (atend)'
217+ * indicates that the real specification for the
218+ * bounding box will found in the file trailer; we
219+ * use a crescendo seek, from the end of the input
220+ * file, with recursive invocation of the lexer
221+ * itself, to locate this.
222+ */
223+ BEGIN PSATEND; psbb_crescendo_seek();
224+ }
225+ else
226+ { /* We've already been redirected to the trailer,
227+ * and found `%%BoundingBox: (atend)' again.
228+ */
229+ yyerror( "'%s' is not allowed in trailer of '%s'",
230+ yytext, psbb_input_file
231+ );
232+ return PSBB_PARSE_FAILURE;
233+ }
234+ }
235+[^0-9 \t\r\n]+ { yyerror( "psbb: %s", yytext ); }
236+{EOL} { return 0; }
237+}
238+
239+ /* State: PSATEND
240+ *
241+ * This start condition is used exclusively within recursive invocations
242+ * of the lexer, initiated from the PSBB start condition, while performing
243+ * the crescendo seek for the PostScript file trailer. Return is always
244+ * to the calling lexer instance, with non-zero placing the caller in the
245+ * appropriate condition for interpretation of the trailer.
246+ */
247+<PSATEND>{
248+"%%Trailer" { psbb_phase = PSBB_IN_TRAILER;
249+ BEGIN PSTRAILER; return 1;
250+ }
251+.|\n
252+}
253+
254+ /* State: PSTRAILER
255+ *
256+ * Scanning state used exclusively when scanning the PostScript file
257+ * trailer, after redirection by `%%BoundingBox: (atend)' in the header;
258+ * it looks for a further explicit bounding box specification within the
259+ * trailer, further redirecting to PSVOID if none is present. (Notice
260+ * that there is no `%%BoundingBox:' pattern here; that is specified
261+ * above, in a start condition scope shared with PSHDR).
262+ */
263+<PSTRAILER>{
264+. { BEGIN PSTRAILER; yy_push_state( SKIP ); }
265+<<EOF>> { BEGIN PSVOID; }
266+\n
267+}
268+
269+ /* State: PDFINIT
270+ *
271+ * Scanning state used exclusively during crescendo_seek() on a PDF
272+ * file, to locate the trailer section whence the starting offset for
273+ * the primary cross reference index may be obtained.
274+ */
275+<PDFINIT>{
276+"trailer"/{PDFSEP} { BEGIN PDFTRAILER; return PDFSTART; }
277+.|\n
278+}
279+
280+ /* State: PDFTRAILER
281+ *
282+ * Scanning state initiated on locating a PDF file trailer; it is
283+ * used to subsequently initiate parsing of the trailer dictionary,
284+ * and to establish the starting location for its associated cross
285+ * reference table.
286+ */
287+<PDFTRAILER>{
288+"<<" { yy_push_state( PDFDICT ); }
289+"startxref"/{SEP} { BEGIN PDFSTARTXREF; }
290+{SEP}+
291+.
292+}
293+
294+ /* State: PDFSTARTXREF
295+ *
296+ * Scanning state initiated after locating a startxref record within
297+ * a PDF file trailer; its purpose is to return the PDF file offset of
298+ * the associated xref data to the parser.
299+ */
300+<PDFSTARTXREF>{
301+{INTVAL}/{SEP} { xrefbase = atol( yytext ); RETURN(PDFOBJREF); }
302+{SEP}+
303+}
304+
305+ /* State: PDFDICT
306+ *
307+ * Scanning state initated on locating the opening "<<" token of any
308+ * PDF dictionary; here, we identify those dictionary entries which are
309+ * of interest, regardless of context, and switch to an appropriate new
310+ * start condition to handle each; (note that this lookup may be made
311+ * dependent on the context whence this start condition was attained,
312+ * by use of PDFINVOKE_IF to initiate the subsequent state switch).
313+ */
314+<PDFDICT>{
315+"/Root"/{PDFSEP} { PDFINVOKE_IF( PDFTRAILER, PDFREFER, PDFROOT ); }
316+"/Prev"/{PDFSEP} { if( yy_top_state() == PDFXREFCONT ) BEGIN PDFXREFWALK;
317+ else yy_push_state( PDFIGNORE );
318+ }
319+"/Type"/{PDFSEP} { yy_push_state( PDFOBJTYPE ); }
320+"/Pages"/{PDFSEP} { yy_push_state( PDFREFER ); }
321+"/Kids"/{PDFSEP} { yy_push_state( PDFALLKIDS ); }
322+"/MediaBox"/{PDFSEP} { yy_push_state( PDFMEDIABOX ); }
323+"/"{PDFNAME}/{PDFSEP} { yy_push_state( PDFIGNORE ); }
324+">>" { yy_pop_state(); }
325+.|\n
326+}
327+
328+ /* State: PDFOBJTYPE
329+ *
330+ * Scanning state initiated on identifying a /Type key within a PDF
331+ * object dictionary; it effectively causes the scanner to swallow the
332+ * object type designation, for those object types which we expect to
333+ * encounter, before reverting to the PDFDICT state, (also returning
334+ * a PDFOBJREF token to the parser, in the specific case when the
335+ * /Catalog object is identified).
336+ *
337+ * FIXME: we may need to add error reporting for detection of any
338+ * object type which we do not expect to encounter.
339+ */
340+<PDFOBJTYPE>{
341+"/Catalog"/{PDFSEP} { yy_pop_state(); RETURN(PDFOBJREF); }
342+"/Page"s?/{PDFSEP} { yy_pop_state(); }
343+{SEP}+
344+}
345+
346+ /* States: PDFKIDS and PDFALLKIDS
347+ *
348+ * Scanning states employed to extract the first object reference from
349+ * a /Kids object dictionary entry. Always entered via the PDFALLKIDS
350+ * state, whence the PDFREFER state is invoked to extract the first of
351+ * the indirect object references within the associated reference list;
352+ * on return, the state degrades to PDFKIDS, so causing any additional
353+ * references present to be ignored, before returning to the PDFDICT
354+ * state.
355+ */
356+<PDFALLKIDS>"["{SEP}* { BEGIN PDFKIDS; PDFINVOKE( PDFREFER, PDFOBJREF ); }
357+<PDFKIDS,PDFALLKIDS>{
358+"]" { yy_pop_state(); }
359+{INTVAL}/{SEP}
360+"R"/{PDFSEP}
361+{SEP}+
362+}
363+
364+ /* State: PDFREFER
365+ *
366+ * Scanning state initiated when the anticipated PDF parsing context
367+ * represents a PDF object reference; it extracts the object index and
368+ * object version values, returning them separately to the parser, and
369+ * then expects, and returns the 'R' operator, before reverting to the
370+ * start condition whence this state was attained.
371+ */
372+<PDFREFER>{
373+"R"/{PDFSEP} { yy_pop_state(); RETURN('R'); }
374+{INTVAL}/{PDFSEP} { yylval = atol( yytext ); RETURN(VALUE); }
375+./({EOL}|"/") { yy_pop_state(); }
376+[ \t\r\n]+
377+}
378+
379+ /* State: PDFMEDIABOX
380+ *
381+ * Scanning state initiated at commencement of parsing a PDF MediaBox
382+ * specification; after locating the opening bracket of the bounding
383+ * box array, control is delegated to the following PDFMEDIABOXEVAL
384+ * state, to capture the array values.
385+ */
386+<PDFMEDIABOX>{
387+"[" { BEGIN PDFMEDIABOXEVAL; }
388+{SEP}+
389+}
390+
391+ /* State: PDFMEDIABOXEVAL
392+ *
393+ * Scanning state initiated exclusively from the PDFMEDIABOX state, to
394+ * capture the values from the bounding box array; we require these to
395+ * be integers, but some applications specify them as floating point,
396+ * so we must be prepared to interpret either.
397+ */
398+<PDFMEDIABOXEVAL>{
399+{FLOATVAL}/{PDFSEP} { yylval = lround( atof( yytext )); RETURN(VALUE); }
400+{INTVAL}/{PDFSEP} { yylval = atol( yytext ); RETURN(VALUE); }
401+"]" { yy_pop_state(); }
402+{SEP}+
403+}
404+
405+ /* State: PDFEVAL
406+ *
407+ * Scanning state initiated when we expect an integer value token in the
408+ * PDF parse stream; swallow leading white space, capture the token, then
409+ * revert to the state whence this condition was invoked.
410+ */
411+<PDFEVAL>{
412+{INTVAL}/{PDFSEP} { yylval = atol( yytext ); yy_pop_state(); RETURN(VALUE); }
413+[ \t\r\n]+
414+}
415+
416+ /* State: PDFIGNORE
417+ *
418+ * Scanning state in which all input is ignored, until the next EOL,
419+ * or the next PDF dictionary key, or possible dictionary terminator.
420+ */
421+<PDFIGNORE>{
422+./({EOL}|[/>]) { DEBUG_ECHO; yy_pop_state(); }
423+. { yymore(); }
424+}
425+
426+ /* State: PDFXREF
427+ *
428+ * Scanning state initiated after we have repositioned the PDF stream to
429+ * a point where we expect to find an "xref" table; confirm this position
430+ * is as expected, then delegate "xref" lookup to the following PDFGETREF
431+ * start condition.
432+ */
433+<PDFXREF>{
434+"xref"{SEP}+ { xrefptr += yyleng; BEGIN PDFGETREF; return PDFLOOKUP; }
435+.|\n { yyerror( "in '%s'; expected 'xref', but found '%s'",
436+ psbb_input_file, yytext
437+ );
438+ return PSBB_PARSE_FAILURE;
439+ }
440+}
441+
442+ /* State: PDFGETREF
443+ *
444+ * Scanning state initiated exclusively from the PDFXREF state, after
445+ * verification of the "xref" parse context, to lookup the offset of the
446+ * PDF object with index specified in global variable "ref[0]", and with
447+ * generation count as specified in "ref[1]". We begin by capturing a
448+ * a pair of integer values, representing the base index and span for
449+ * the current "xref" table...
450+ */
451+<PDFGETREF>{
452+{INTVAL}/{SEP} { xrefptr += yyleng; yylval = atol( yytext ); }
453+{SEP}+ { xrefptr += yyleng; RETURN(VALUE); }
454+}
455+
456+ /* State: PDFXREFCONT
457+ *
458+ * Scanning state initiated when a specific object reference is not
459+ * represented within the currently accessible segment of a PDF xref
460+ * table; it first looks for any immediately following segment of the
461+ * xref table, which may include the reference, ultimately falling
462+ * through to the following trailer dictionary, in which case, the
463+ * PDFXREFPREV state is invoked, attempting to follow a /Prev link
464+ * to an earlier generation of the xref table.
465+ */
466+<PDFXREFCONT>{
467+{INTVAL}/{SEP} { yyless(0); BEGIN PDFGETREF; RETURN(PDFLOOKUP); }
468+"trailer"/{PDFSEP} { yy_push_state( PDFXREFPREV ); }
469+{SEP}+ { xrefptr += yyleng; }
470+}
471+
472+ /* State: PDFXREFPREV
473+ *
474+ * Scanning state initiated on fall through from the PDFXREFCONT state,
475+ * into the PDF trailer; it looks for the start of the trailer dictionary,
476+ * then switches to a PDFDICT scan to locate the /Prev key, whence the
477+ * PDFXREFWALK state is invoked, to follow the /Prev link.
478+ */
479+<PDFXREFPREV>{
480+"<<" { BEGIN PDFDICT; }
481+{SEP}+
482+}
483+
484+ /* State: PDFXREFWALK
485+ *
486+ * Scanning state initiated after identification of the /Prev key in a
487+ * PDF trailer dictionary; it repositions the file input pointer to the
488+ * associated offset value, before restarting the PDFXREF scan.
489+ */
490+<PDFXREFWALK>{
491+{INTVAL}/{PDFSEP} { pdfseek( xrefptr = atol( yytext )); BEGIN PDFXREF; }
492+{SEP}+
493+}
494+
495+ /* State: PDFGOXREF
496+ *
497+ * Scanning state initiated after locating a PDF xref table entry for a
498+ * specified object;
499+ */
500+<PDFGOXREF>{READLN} { long offset, gen; char disp;
501+ sscanf( yytext, "%10ld %5ld %c", &offset, &gen, &disp );
502+ DEBUG_MSG(("%d: %.18s --> %d; %d %c\n", YYSTATE, yytext, offset, gen, disp));
503+ if( disp == 'n' && gen == ref[1] )
504+ { pdfseek( offset ); BEGIN PDFGETOBJECT;
505+ }
506+ else
507+ { yyerror( "index entry '%.18s' unexpected in file '%s'",
508+ yytext, psbb_input_file
509+ );
510+ return PSBB_PARSE_FAILURE;
511+ }
512+ }
513+
514+ /* State: PDFGETOBJECT
515+ *
516+ * Scanning state initiated when the PDF input pointer has been set
517+ * to the start of a specific object; it returns the associated object
518+ * identification tokens to the parser, for confirmation of expected
519+ * object identity, before switching to the PDFSCANOBJECT state, to
520+ * scan the associated object data.
521+ */
522+<PDFGETOBJECT>{
523+"obj"/{PDFSEP} { BEGIN PDFSCANOBJECT; RETURN(PDFOBJECT); }
524+{INTVAL}/{SEP} { yylval = atol( yytext ); RETURN(VALUE); }
525+{SEP}+
526+}
527+
528+ /* State: PDFSCANOBJECT
529+ *
530+ * Scanning state initiated when scanning PDF object data; effectively,
531+ * it ignores all content, up to the terminating "endobj" token, except
532+ * for the content of any embedded object dictionary, which is scanned
533+ * in the PDFDICT state.
534+ */
535+<PDFSCANOBJECT>{
536+{SEP}*"<<" { yy_push_state( PDFDICT ); }
537+"endobj"/{SEP} { DEBUG_ECHO; RETURN(PDFENDOBJ); }
538+(.|\n)
539+}
540+
541+ /* State: UNKNOWN
542+ *
543+ * Finally, the UNKNOWN scanning state is activated when the INITIAL scan
544+ * of the first input line fails to recognize the file signature; it causes
545+ * the lexer to bail out immediately.
546+ */
547+<UNKNOWN>[^\r\n]* { yyerror( "unknown file signature '%s' in file '%s'",
548+ yytext, psbb_input_file
549+ );
550+ return PSBB_PARSE_FAILURE;
551+ }
552+%%
553+/* General code section: this provides the implementation for the
554+ * parser and lexical analyser API, servicing groff's psbb request.
555+ */
556+int psbb_parser_status_check;
557+void psbb_get_bounding_box( const char *source )
558+{
559+ /* This is the primary entry point for the parser/lexer combination;
560+ * it sets up the specified source file as the lexer input, then...
561+ */
562+ psbb_parser_status_check = EOF;
563+ if( (yyin = psbb_open_file_for_parse( psbb_input_file = source )) != NULL )
564+ {
565+ /* ...when successful, forces the lexer to enter its initial state,
566+ * and invokes the parser to process the sequence of tokens which the
567+ * lexer returns.
568+ */
569+ psbb_parse_status = 0;
570+ psbb_phase = PSBB_PHASE_INIT;
571+ psbb_parser_status_check = yyparse() | psbb_parse_status;
572+ yy_flush_buffer( YY_CURRENT_BUFFER );
573+ }
574+}
575+
576+static int psbb_crescendo_seek()
577+{
578+ /* A helper function to iteratively search for any pattern,
579+ * close to the end of the file, which causes the lexer to
580+ * return a non-zero token. Initial search is limited to a
581+ * block of 64 bytes, at the bitter end of the file; on each
582+ * subsequent iteration, the block size is doubled, until a
583+ * successful match is found, or the block size grows to
584+ * exceed the size of the file.
585+ */
586+ ssize_t offset;
587+ for( offset = 64L; offset > 0L; offset <<= 1 )
588+ {
589+ /* In this case, we use a crescendo seek, with
590+ * recursive invocation of the lexer itself, to
591+ * locate the start of the trailer...
592+ */
593+ int status;
594+ yy_flush_buffer( YY_CURRENT_BUFFER );
595+ if( (status = fseek( yyin, -offset, SEEK_END )) != 0 )
596+ /*
597+ * ...with a "last chance" search of the whole
598+ * file, if the crescendo overruns the start of
599+ * the file without finding it...
600+ */
601+ status = fseek( yyin, offset = 0L, SEEK_SET );
602+
603+ if( (status == 0) && (yylex() > 0) )
604+ /*
605+ * ...breaking out of the crescendo cycle, as
606+ * soon as we find it, (or we've searched the
607+ * entire file without finding it).
608+ */
609+ return 0;
610+ }
611+ return EOF;
612+}
613+
614+void psbb_locate( int index, int generation )
615+{
616+ /* PDF object location function, invoked by the parser
617+ * when processing a PDFOBJREF token sequence, i.e. one
618+ * of:--
619+ *
620+ * PDFOBJREF VALUE VALUE 'R', or
621+ * VALUE VALUE 'R' PDFOBJREF
622+ */
623+ ref[0] = index;
624+ ref[1] = generation;
625+}
626+
627+void psbb_walk( void )
628+{
629+ /* Helper function, invoked by the parser when processing
630+ * a root PDFOBJREF token, or PDFENDOBJ token, to walk the
631+ * chain of PDF object references from the document root,
632+ * until the first leaf node, (nominally expected to be
633+ * the first /Page object), has been located.
634+ */
635+ if( ref[0] > 0 )
636+ { /* The last object parsed has at least one child object
637+ * reference; reset the scanner context, to locate and
638+ * process the first such object.
639+ */
640+ BEGIN PDFXREF; pdfseek( xrefptr = xrefbase );
641+ }
642+ else
643+ { /* The last object parsed is a leaf node object; ensure
644+ * that there is no residual data in the input buffer,
645+ * and force EOF on the next input operation.
646+ */
647+ yy_flush_buffer( YY_CURRENT_BUFFER );
648+ fseek( yyin, 0, SEEK_END );
649+ }
650+}
651+
652+void psbb_lookup( int base, int span )
653+{
654+ /* A helper function, invoked (possibly iteratively) by
655+ * the lexer, as a callback via the parser, during the
656+ * sequence of start conditions initiated from PDFXREF,
657+ * while handling a psbb_locate() request, to retrieve
658+ * a possible xref table entry for the object identified
659+ * by global index ref[0], within a section of the table
660+ * representing span objects, contiguously numbered from
661+ * the specified base index.
662+ */
663+ if( (ref[0] >= base) && (ref[0] < (base + span)) )
664+ {
665+ /* The required xref entry lies within the span of the
666+ * xref table section at the current xrefptr offset; we
667+ * simply adjust the xrefptr to the start of the entry
668+ * required, and follow the reference.
669+ */
670+ pdfseek( xrefptr + 20 * (ref[0] - base) );
671+ BEGIN PDFGOXREF;
672+ }
673+ else
674+ { /* The required xref entry is NOT accessible from the
675+ * xref table section at the current xrefptr offset; we
676+ * move the xrefptr just beyond the current section of
677+ * the table, then switch to the transient PDFXREFCONT
678+ * state, to search in any subsequent section of the
679+ * table, or to follow any /Prev link to an earlier
680+ * generation of it.
681+ */
682+ pdfseek( xrefptr += 20 * span );
683+ BEGIN PDFXREFCONT;
684+ }
685+ DEBUG_MSG(("%d: lookup object #%d @ %d within %d..%d\n",
686+ YYSTATE, ref[0], xrefptr, base, base + span
687+ ));
688+}
689+
690+static
691+int pdf_object_lookup_failed( const char *desc, int wanted, int found )
692+{
693+ /* A local helper function, invoked by the following psbb_chkref()
694+ * function, to report a PDF lookup mismatch for either the requested
695+ * object number, or its generation number.
696+ */
697+ yyerror( "object reference mismatch in '%s': expected %s %d but found %d",
698+ psbb_input_file, desc, wanted, found
699+ );
700+ return PSBB_PARSE_FAILURE;
701+}
702+
703+int psbb_chkref( int obj, int gen )
704+{
705+ /* A helper function, invoked by the parser, to confirm that a
706+ * PDF object reference lookup has located the correct object,
707+ * or diagnose otherwise.
708+ */
709+ if( obj != ref[0] )
710+ return pdf_object_lookup_failed( "object", ref[0], obj );
711+
712+ if( gen != ref[1] )
713+ return pdf_object_lookup_failed( "generation", ref[1], gen );
714+
715+ DEBUG_MSG(("%d: object: %d; generation = %d\n", YYSTATE, obj, gen));
716+ return ref[0] = 0;
717+}
718+
719+/* vim: set cin fo=croqj: */
--- /dev/null
+++ b/t-psbb.cpp
@@ -0,0 +1,201 @@
1+// t-psbb.cpp -*- C++ -*-
2+//
3+// Test the effect of the groff .psbb request handling code.
4+//
5+// Written by Keith Marshall <keith@users.osdn.me>
6+// Copyright (C) 2017, Free Software Foundation, Inc.
7+//
8+// This file is part of groff.
9+//
10+// groff is free software; you can redistribute it and/or modify it under
11+// the terms of the GNU General Public License as published by the Free
12+// Software Foundation, either version 3 of the License, or
13+// (at your option) any later version.
14+//
15+// groff is distributed in the hope that it will be useful, but WITHOUT ANY
16+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
17+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18+// for more details.
19+//
20+// You should have received a copy of the GNU General Public License
21+// along with this program. If not, see <http://www.gnu.org/licenses/>.
22+//
23+#define GROFF_PSBB_TEST_CODE
24+#define GROFF_INPUT_CPP_EMULATION
25+
26+#ifdef GROFF_INPUT_CPP_EMULATION
27+// Always defined, within this test module; this serves to demarcate
28+// code which emulates (not necessarily faithfully) features which are
29+// required by groff's .psbb request handling code, (which may be found
30+// in src/roff/troff/input.cpp).
31+//
32+#include "psbb.h"
33+#include "errarg.h"
34+#include "error.h"
35+
36+#include <cstring>
37+#include <cerrno>
38+
39+const char *program_name = "psbb";
40+const char *current_filename = "t-psbb";
41+const char *current_source_filename = "t-psbb.cpp";
42+
43+int current_lineno = 0;
44+
45+class symbol
46+{ // Minimal replacement for groff's class implementation, sufficient
47+ // to satisfy the usage pattern of the ps_bbox_request() function; it
48+ // can furnish exactly one token, corresponding to each argv element
49+ // supplied by the test program, subsequently reporting both end of
50+ // line and end of file, until reset by a new argv.
51+ public:
52+ symbol():m_contents((const char *)(NULL)){}
53+ const char *contents(){ return m_contents; }
54+ void next(){ m_contents = (const char *)(NULL); }
55+ void set_contents(const char *contents){ m_contents = contents; }
56+ int is_null(){ return (m_contents == (const char *)(NULL)); }
57+ int newline(){ return 1; }
58+ int eof(){ return 1; }
59+
60+ private:
61+ const char *m_contents;
62+};
63+
64+// Simplified implementation of get_long_name(); always return this
65+// fixed symbol class instance.
66+//
67+symbol tok;
68+symbol get_long_name(int unused){ return tok; }
69+
70+// Substitute a no-op for skip_line()
71+//
72+static void skip_line(void){}
73+
74+class search_path
75+{ // Minimal replacement for groff's class implementation, sufficient
76+ // to emulate its open_file_cautious() method.
77+ public:
78+ FILE *open_file_cautious( const char *filename, int opt, const char *mode )
79+ { return fopen( filename, mode ); }
80+};
81+#define FOPEN_RB "rb"
82+
83+// Not exactly as groff implements it, but sufficient for our needs,
84+// without requiring anything more than a default constructor for the
85+// search_path class.
86+//
87+search_path include_search_path;
88+
89+#endif
90+// Following the GROFF_INPUT_CPP_EMULATION block, we reproduce content
91+// from src/roff/troff/input.cpp itself, (as we intend that it would be
92+// ultimately implemented).
93+
94+// .psbb
95+//
96+// Extract bounding box limits from PostScript file, and assign
97+// them to the following four gtroff registers:--
98+//
99+static int llx_reg_contents = 0;
100+static int lly_reg_contents = 0;
101+static int urx_reg_contents = 0;
102+static int ury_reg_contents = 0;
103+
104+// psbb_assign_registers()
105+//
106+// An extern "C" callback function, invoked via our yacc parser,
107+// to perform initialization and/or register assignment.
108+//
109+void psbb_assign_registers(int llx, int lly, int urx, int ury)
110+{
111+ llx_reg_contents = llx;
112+ lly_reg_contents = lly;
113+ urx_reg_contents = urx;
114+ ury_reg_contents = ury;
115+}
116+
117+// psbb_open_file_for_parse()
118+//
119+// A further extern "C" callback function, called by our yacc parser
120+// start-up routine, psbb_get_bounding_box(), to attach "yyin" to the
121+// specified file, in preparation for lexical analysis.
122+//
123+FILE *psbb_open_file_for_parse(const char *filename)
124+{ FILE *fp = include_search_path.open_file_cautious(filename, 0, FOPEN_RB);
125+ if (fp == NULL) error("cannot open '%1': %2", filename, strerror(errno));
126+ return fp;
127+}
128+
129+// ps_bbox_request()
130+//
131+// Handle the .psbb request; this is, effectively, a verbatim copy of
132+// code, as it should ultimately appear, in src/roff/troff/input.cpp
133+//
134+void ps_bbox_request()
135+{ // Parse input line, to extract file name.
136+ //
137+ symbol nm = get_long_name(1);
138+ if (nm.is_null())
139+ // No file name specified: ignore the entire request.
140+ //
141+ skip_line();
142+ else {
143+ // File name acquired: swallow the rest of the line.
144+ //
145+ while (!tok.newline() && !tok.eof())
146+ tok.next();
147+ errno = 0;
148+
149+ // Initialize, then update {llx,lly,urx,ury}_reg_contents.
150+ //
151+ psbb_assign_registers(0, 0, 0, 0);
152+ psbb_get_bounding_box(nm.contents());
153+
154+ // All done for .psbb; move on, to continue
155+ // input stream processing.
156+ //
157+ tok.next();
158+ }
159+}
160+
161+#ifdef GROFF_PSBB_TEST_CODE
162+// Again, always defined; this block implements the test procedure,
163+// simulating a groff input stream in which each element of argv is
164+// interpreted as if parsed as file name arguments to a succession
165+// of .psbb requests, subsequently printing the bounding box range
166+// co-ordinates extracted from each named file.
167+
168+EXTERN_C int psbb_parser_status_check;
169+static int ps_bbox_request_status(const char *argv)
170+{
171+ // Push a single argv element into the emulated groff input
172+ // stream, then invoke actual src/roff/troff/input.cpp code,
173+ // as if this argv has been read as ".psbb argv"; return the
174+ // final internal status code from the underlying parser.
175+ //
176+ tok.set_contents(argv); ps_bbox_request();
177+ return psbb_parser_status_check;
178+}
179+
180+int main(int argc, char **argv)
181+{
182+ // Require at least one command argument...
183+ //
184+ if (argc < 2)
185+ { error("usage: psbb filename ...\n");
186+ return 2;
187+ }
188+ // ...then push each, in turn, into the simulated groff input
189+ // stream, and interpret it as if read as an argument to .psbb;
190+ // successfully processed, report the bounding box result.
191+ //
192+ while (--argc > 0)
193+ { current_lineno = __LINE__; if (ps_bbox_request_status(*++argv) == 0)
194+ printf("%s: bounding box = (%d,%d)..(%d,%d)\n", *argv,
195+ llx_reg_contents, lly_reg_contents, urx_reg_contents, ury_reg_contents
196+ );
197+ }
198+ return 0;
199+}
200+
201+#endif /* GROFF_PSBB_TEST_CODE */