wiki style document editor
リビジョン | 911ecc9226ff9d2f40452d0b6588da589efa6f9c (tree) |
---|---|
日時 | 2014-09-01 21:26:22 |
作者 | Hiromichi Matsushima <hylom@Hiro...> |
コミッター | Hiromichi Matsushima |
export: separate text parser for export
@@ -4,11 +4,10 @@ | ||
4 | 4 | * node export-single.js <target directory> <output directory> |
5 | 5 | */ |
6 | 6 | |
7 | -var config = require('../../config'); | |
8 | 7 | var path = require('path'); |
9 | 8 | var fs = require('fs'); |
10 | -var child_process = require('child_process'); | |
11 | -var util = require('util'); | |
9 | + | |
10 | +var exportParser = require('./export_parser'); | |
12 | 11 | |
13 | 12 | var HTML_HEADER = 'header.txt'; |
14 | 13 | var HTML_FOOTER = 'footer.txt'; |
@@ -65,7 +64,7 @@ function exportSingle(target, output, chapter, pageStart, pageCount) { | ||
65 | 64 | return; |
66 | 65 | } |
67 | 66 | |
68 | - var result = parseText(data, target); | |
67 | + var result = exportParser.parseText(data, target); | |
69 | 68 | |
70 | 69 | // create output directory |
71 | 70 | if (!fs.existsSync(output)) { |
@@ -98,286 +97,6 @@ function exportSingle(target, output, chapter, pageStart, pageCount) { | ||
98 | 97 | }); |
99 | 98 | } |
100 | 99 | |
101 | -function findFigureFile(caption, imageDir) { | |
102 | - var figId = caption.match(/^☆図([0-9]+-[0-9]+)/); | |
103 | - var figName = 'fig' + figId[1] + '.fw.png'; | |
104 | - if (isExistsFigure(figName, imageDir)) { | |
105 | - return figName; | |
106 | - } | |
107 | - return ''; | |
108 | -} | |
109 | - | |
110 | -function findImageFile(name, targetDir) { | |
111 | - if (fs.existsSync(path.join(targetDir, name))) { | |
112 | - return name; | |
113 | - } | |
114 | - var files = fs.readdirSync(targetDir); | |
115 | - var exts = config.figureFormat; | |
116 | - for (var i = 0; i < files.length; i++) { | |
117 | - for (var j = 0; j < exts.length; j++) { | |
118 | - if (files[i].indexOf(name + exts[j]) != 0) { | |
119 | - return name + exts[j]; | |
120 | - } | |
121 | - } | |
122 | - } | |
123 | - return ''; | |
124 | -} | |
125 | - | |
126 | -// split body-text and others | |
127 | -function splitBody(lines) { | |
128 | - // split honmon and caption | |
129 | - var counter = 0; | |
130 | - var isCaption = false; | |
131 | - | |
132 | - var bodies = []; | |
133 | - var captions = []; | |
134 | - | |
135 | - for (i = 0; i < lines.length; i++) { | |
136 | - if (lines[i].match(/^>>>>/)) { | |
137 | - captions.push(lines[i]); | |
138 | - i++; | |
139 | - while (!lines[i].match(/^>>>>/)) { | |
140 | - captions.push(lines[i]); | |
141 | - i++; | |
142 | - } | |
143 | - continue; | |
144 | - } | |
145 | - | |
146 | - if (i >= lines.length) { | |
147 | - break; | |
148 | - } | |
149 | - | |
150 | - // cleanup blank line | |
151 | - if (lines[i].match(/^\s+$/)) { | |
152 | - lines[i] = ''; | |
153 | - } | |
154 | - | |
155 | - // end | |
156 | - if ( lines[i] === '' | |
157 | - && bodies.length > 0 | |
158 | - && bodies[bodies.length-1] === '' ) { | |
159 | - // do nothing | |
160 | - } else { | |
161 | - // *図、*表を置換 | |
162 | - lines[i] = lines[i].replace(/\*図/g, '★図'); | |
163 | - lines[i] = lines[i].replace(/\*表/g, '★表'); | |
164 | - bodies.push(lines[i]); | |
165 | - } | |
166 | - } | |
167 | - | |
168 | - var result = { | |
169 | - bodies: bodies, | |
170 | - blockItems: captions | |
171 | - } | |
172 | - return result; | |
173 | -} | |
174 | - | |
175 | -function createFigureList(lines, imageDir) { | |
176 | - var figureList = []; | |
177 | - var figMode = false; | |
178 | - var currentFigure; | |
179 | - | |
180 | - for (var i = 0; i < lines.length; i++) { | |
181 | - if (lines[i].match(/^>>>>/)) { | |
182 | - figMode = false; | |
183 | - continue; | |
184 | - } | |
185 | - | |
186 | - if (lines[i].match(/^====/)) { | |
187 | - figMode = false; | |
188 | - continue; | |
189 | - } | |
190 | - | |
191 | - if (lines[i].match(/^☆図/)) { | |
192 | - figMode = true; | |
193 | - currentFigure = findFigureFile(lines[i], imageDir); | |
194 | - if (currentFigure != '') { | |
195 | - figureList.push(currentFigure); | |
196 | - } | |
197 | - continue; | |
198 | - } | |
199 | - | |
200 | - if (figMode) { | |
201 | - if (lines[i].match(/^\s*$/)) { | |
202 | - continue; | |
203 | - } | |
204 | - var fullPath = findImageFile(lines[i], imageDir); | |
205 | - figureList.push(fullPath); | |
206 | - continue; | |
207 | - } | |
208 | - } | |
209 | - return figureList; | |
210 | -} | |
211 | - | |
212 | -function extractCaptions(lines, imageDir) { | |
213 | - var captions = []; | |
214 | - var figMode = false; | |
215 | - var currentFigure; | |
216 | - | |
217 | - for (var i = 0; i < lines.length; i++) { | |
218 | - if (lines[i].match(/^>>>>/)) { | |
219 | - figMode = false; | |
220 | - continue; | |
221 | - } | |
222 | - | |
223 | - if (lines[i].match(/^====/)) { | |
224 | - figMode = false; | |
225 | - captions.push(''); | |
226 | - continue; | |
227 | - } | |
228 | - | |
229 | - if (lines[i].match(/^☆図/)) { | |
230 | - captions.push(''); | |
231 | - captions.push(lines[i]); | |
232 | - figMode = true; | |
233 | - currentFigure = findFigureFile(lines[i], imageDir); | |
234 | - /* | |
235 | - if (currentFigure != '') { | |
236 | - captions.push(currentFigure); | |
237 | - } | |
238 | - */ | |
239 | - continue; | |
240 | - } | |
241 | - | |
242 | - if (figMode) { | |
243 | - if (currentFigure != '') { | |
244 | - continue; | |
245 | - } | |
246 | - if (lines[i].match(/^\s*$/)) { | |
247 | - continue; | |
248 | - } | |
249 | - var fullPath = findImageFile(lines[i], imageDir); | |
250 | - /* | |
251 | - captions.push(fullPath); | |
252 | - */ | |
253 | - continue; | |
254 | - } | |
255 | - captions.push(lines[i]); | |
256 | - } | |
257 | - return captions; | |
258 | -} | |
259 | - | |
260 | -function isExistsFigure(figName, targetDir) { | |
261 | - var fullPath = path.join(targetDir, figName); | |
262 | - return fs.existsSync(fullPath); | |
263 | -} | |
264 | - | |
265 | -function makeHtmls(lines, imageDir) { | |
266 | - var html = []; | |
267 | - var figMode = false; | |
268 | - var figTextMode = false; | |
269 | - var assumedFigName; | |
270 | - var figName | |
271 | - var figId; | |
272 | - for (var i = 0; i < lines.length; i++) { | |
273 | - | |
274 | - if (lines[i].match(/^>>>>/)) { | |
275 | - figMode = false; | |
276 | - continue; | |
277 | - } | |
278 | - | |
279 | - // figure | |
280 | - if (lines[i].match(/^☆図/)) { | |
281 | - figId = lines[i].match(/^☆図([0-9]+-[0-9]+)/); | |
282 | - console.log(lines[i]); | |
283 | - assumedFigName = 'fig' + figId[1] + '.fw.png'; | |
284 | - figMode = true; | |
285 | - if (isExistsFigure(assumedFigName, imageDir)) { | |
286 | - html.push('<span class="caption">' + lines[i] + '</span>'); | |
287 | - html.push('<img src="figures/' + assumedFigName + '">'); | |
288 | - html.push('<span class="filename">' + assumedFigName + '</span>'); | |
289 | - i++; | |
290 | - while((i < lines.length) && (!lines[i].match(/^>>>>/))) { | |
291 | - if (lines[i].match(/^\s*$/)) { | |
292 | - i++; | |
293 | - continue; | |
294 | - } | |
295 | - if (lines[i].match(/^====/)) { | |
296 | - figMode = false; | |
297 | - } | |
298 | - if (figMode) { | |
299 | - figName = findImageFile(lines[i], imageDir); | |
300 | - if ((figName != '') && (figName != assumedFigName)) { | |
301 | - html.push('<span class="filename">' + figName + '</span>'); | |
302 | - } | |
303 | - } | |
304 | - i++; | |
305 | - } | |
306 | - continue; | |
307 | - } | |
308 | - figMode = true; | |
309 | - html.push('<span class="caption">' + lines[i] + '</span>'); | |
310 | - continue; | |
311 | - } | |
312 | - | |
313 | - if (lines[i].match(/^====/)) { | |
314 | - i++; | |
315 | - while(!lines[i].match(/^>>>>/)) { | |
316 | - i++; | |
317 | - } | |
318 | - continue; | |
319 | - } | |
320 | - | |
321 | - // figure mode | |
322 | - if (figMode) { | |
323 | - if (lines[i].match(/^\s*$/)) { | |
324 | - continue; | |
325 | - } | |
326 | - figName = findImageFile(lines[i], imageDir); | |
327 | - if (figName != '') { | |
328 | - html.push('<img src="figures/' + figName + '">'); | |
329 | - html.push('<span class="filename">' + figName + '</span>'); | |
330 | - } | |
331 | - continue; | |
332 | - } | |
333 | - | |
334 | - // table | |
335 | - if (lines[i].match(/^☆表/)) { | |
336 | - html.push('<span class="caption">' + lines[i] + '</span>'); | |
337 | - html.push('<table>'); | |
338 | - i++; | |
339 | - while((i < lines.length) && (!lines[i].match(/^\s*$/))) { | |
340 | - if (lines[i].match(/^>>>>/)) { | |
341 | - break; | |
342 | - } | |
343 | - html.push(lines[i].replace(/^/, '<tr><td>') | |
344 | - .replace(/$/, '</td></tr>') | |
345 | - .replace(/\t/, '</td><td>')); | |
346 | - i++; | |
347 | - } | |
348 | - html.push('</table>'); | |
349 | - continue; | |
350 | - } | |
351 | - | |
352 | - // others | |
353 | - html.push(lines[i]); | |
354 | - } | |
355 | - return html; | |
356 | -} | |
357 | - | |
358 | -// parse main text to separate body text and captions/tables | |
359 | -function parseText(data, target) { | |
360 | - var lines = data.split('\n'); | |
361 | - var targetDir = path.dirname(target); | |
362 | - var imageDir = path.join(targetDir, 'figure'); | |
363 | - | |
364 | - // split body and others | |
365 | - // result.bodies: | |
366 | - // result.blockItems: | |
367 | - var result = splitBody(lines); | |
368 | - | |
369 | - // generate captions | |
370 | - result.captions = extractCaptions(result.blockItems, imageDir) | |
371 | - | |
372 | - // generate htmls | |
373 | - result.htmls = makeHtmls(result.blockItems, imageDir); | |
374 | - | |
375 | - // generate Figure List | |
376 | - result.figures = createFigureList(result.blockItems, imageDir); | |
377 | - | |
378 | - return result; | |
379 | -} | |
380 | - | |
381 | 100 | module.exports = exportSingle; |
382 | 101 | |
383 | 102 | // main action |
@@ -0,0 +1,289 @@ | ||
1 | + | |
2 | +var path = require('path'); | |
3 | +var fs = require('fs'); | |
4 | +var config = require('../../config'); | |
5 | + | |
6 | +function findFigureFile(caption, imageDir) { | |
7 | + var figId = caption.match(/^☆図([0-9]+-[0-9]+)/); | |
8 | + var figName = 'fig' + figId[1] + '.fw.png'; | |
9 | + if (isExistsFigure(figName, imageDir)) { | |
10 | + return figName; | |
11 | + } | |
12 | + return ''; | |
13 | +} | |
14 | + | |
15 | +function findImageFile(name, targetDir) { | |
16 | + if (fs.existsSync(path.join(targetDir, name))) { | |
17 | + return name; | |
18 | + } | |
19 | + var files = fs.readdirSync(targetDir); | |
20 | + var exts = config.figureFormat; | |
21 | + for (var i = 0; i < files.length; i++) { | |
22 | + for (var j = 0; j < exts.length; j++) { | |
23 | + if (files[i].indexOf(name + exts[j]) != 0) { | |
24 | + return name + exts[j]; | |
25 | + } | |
26 | + } | |
27 | + } | |
28 | + return ''; | |
29 | +} | |
30 | + | |
31 | +// split body-text and others | |
32 | +function splitBody(lines) { | |
33 | + // split honmon and caption | |
34 | + var counter = 0; | |
35 | + var isCaption = false; | |
36 | + | |
37 | + var bodies = []; | |
38 | + var captions = []; | |
39 | + | |
40 | + for (i = 0; i < lines.length; i++) { | |
41 | + if (lines[i].match(/^>>>>/)) { | |
42 | + captions.push(lines[i]); | |
43 | + i++; | |
44 | + while (!lines[i].match(/^>>>>/)) { | |
45 | + captions.push(lines[i]); | |
46 | + i++; | |
47 | + } | |
48 | + continue; | |
49 | + } | |
50 | + | |
51 | + if (i >= lines.length) { | |
52 | + break; | |
53 | + } | |
54 | + | |
55 | + // cleanup blank line | |
56 | + if (lines[i].match(/^\s+$/)) { | |
57 | + lines[i] = ''; | |
58 | + } | |
59 | + | |
60 | + // end | |
61 | + if ( lines[i] === '' | |
62 | + && bodies.length > 0 | |
63 | + && bodies[bodies.length-1] === '' ) { | |
64 | + // do nothing | |
65 | + } else { | |
66 | + // *図、*表を置換 | |
67 | + lines[i] = lines[i].replace(/\*図/g, '★図'); | |
68 | + lines[i] = lines[i].replace(/\*表/g, '★表'); | |
69 | + bodies.push(lines[i]); | |
70 | + } | |
71 | + } | |
72 | + | |
73 | + var result = { | |
74 | + bodies: bodies, | |
75 | + blockItems: captions | |
76 | + } | |
77 | + return result; | |
78 | +} | |
79 | + | |
80 | +function createFigureList(lines, imageDir) { | |
81 | + var figureList = []; | |
82 | + var figMode = false; | |
83 | + var currentFigure; | |
84 | + | |
85 | + for (var i = 0; i < lines.length; i++) { | |
86 | + if (lines[i].match(/^>>>>/)) { | |
87 | + figMode = false; | |
88 | + continue; | |
89 | + } | |
90 | + | |
91 | + if (lines[i].match(/^====/)) { | |
92 | + figMode = false; | |
93 | + continue; | |
94 | + } | |
95 | + | |
96 | + if (lines[i].match(/^☆図/)) { | |
97 | + figMode = true; | |
98 | + currentFigure = findFigureFile(lines[i], imageDir); | |
99 | + if (currentFigure != '') { | |
100 | + figureList.push(currentFigure); | |
101 | + } | |
102 | + continue; | |
103 | + } | |
104 | + | |
105 | + if (figMode) { | |
106 | + if (lines[i].match(/^\s*$/)) { | |
107 | + continue; | |
108 | + } | |
109 | + var fullPath = findImageFile(lines[i], imageDir); | |
110 | + figureList.push(fullPath); | |
111 | + continue; | |
112 | + } | |
113 | + } | |
114 | + return figureList; | |
115 | +} | |
116 | + | |
117 | +function extractCaptions(lines, imageDir) { | |
118 | + var captions = []; | |
119 | + var figMode = false; | |
120 | + var currentFigure; | |
121 | + | |
122 | + for (var i = 0; i < lines.length; i++) { | |
123 | + if (lines[i].match(/^>>>>/)) { | |
124 | + figMode = false; | |
125 | + continue; | |
126 | + } | |
127 | + | |
128 | + if (lines[i].match(/^====/)) { | |
129 | + figMode = false; | |
130 | + captions.push(''); | |
131 | + continue; | |
132 | + } | |
133 | + | |
134 | + if (lines[i].match(/^☆図/)) { | |
135 | + captions.push(''); | |
136 | + captions.push(lines[i]); | |
137 | + figMode = true; | |
138 | + currentFigure = findFigureFile(lines[i], imageDir); | |
139 | + /* | |
140 | + if (currentFigure != '') { | |
141 | + captions.push(currentFigure); | |
142 | + } | |
143 | + */ | |
144 | + continue; | |
145 | + } | |
146 | + | |
147 | + if (figMode) { | |
148 | + if (currentFigure != '') { | |
149 | + continue; | |
150 | + } | |
151 | + if (lines[i].match(/^\s*$/)) { | |
152 | + continue; | |
153 | + } | |
154 | + var fullPath = findImageFile(lines[i], imageDir); | |
155 | + /* | |
156 | + captions.push(fullPath); | |
157 | + */ | |
158 | + continue; | |
159 | + } | |
160 | + captions.push(lines[i]); | |
161 | + } | |
162 | + return captions; | |
163 | +} | |
164 | + | |
165 | +function isExistsFigure(figName, targetDir) { | |
166 | + var fullPath = path.join(targetDir, figName); | |
167 | + return fs.existsSync(fullPath); | |
168 | +} | |
169 | + | |
170 | +function makeHtmls(lines, imageDir) { | |
171 | + var html = []; | |
172 | + var figMode = false; | |
173 | + var figTextMode = false; | |
174 | + var assumedFigName; | |
175 | + var figName | |
176 | + var figId; | |
177 | + for (var i = 0; i < lines.length; i++) { | |
178 | + | |
179 | + if (lines[i].match(/^>>>>/)) { | |
180 | + figMode = false; | |
181 | + continue; | |
182 | + } | |
183 | + | |
184 | + // figure | |
185 | + if (lines[i].match(/^☆図/)) { | |
186 | + figId = lines[i].match(/^☆図([0-9]+-[0-9]+)/); | |
187 | + console.log(lines[i]); | |
188 | + assumedFigName = 'fig' + figId[1] + '.fw.png'; | |
189 | + figMode = true; | |
190 | + if (isExistsFigure(assumedFigName, imageDir)) { | |
191 | + html.push('<span class="caption">' + lines[i] + '</span>'); | |
192 | + html.push('<img src="figures/' + assumedFigName + '">'); | |
193 | + html.push('<span class="filename">' + assumedFigName + '</span>'); | |
194 | + i++; | |
195 | + while((i < lines.length) && (!lines[i].match(/^>>>>/))) { | |
196 | + if (lines[i].match(/^\s*$/)) { | |
197 | + i++; | |
198 | + continue; | |
199 | + } | |
200 | + if (lines[i].match(/^====/)) { | |
201 | + figMode = false; | |
202 | + } | |
203 | + if (figMode) { | |
204 | + figName = findImageFile(lines[i], imageDir); | |
205 | + if ((figName != '') && (figName != assumedFigName)) { | |
206 | + html.push('<span class="filename">' + figName + '</span>'); | |
207 | + } | |
208 | + } | |
209 | + i++; | |
210 | + } | |
211 | + continue; | |
212 | + } | |
213 | + figMode = true; | |
214 | + html.push('<span class="caption">' + lines[i] + '</span>'); | |
215 | + continue; | |
216 | + } | |
217 | + | |
218 | + if (lines[i].match(/^====/)) { | |
219 | + i++; | |
220 | + while(!lines[i].match(/^>>>>/)) { | |
221 | + i++; | |
222 | + } | |
223 | + continue; | |
224 | + } | |
225 | + | |
226 | + // figure mode | |
227 | + if (figMode) { | |
228 | + if (lines[i].match(/^\s*$/)) { | |
229 | + continue; | |
230 | + } | |
231 | + figName = findImageFile(lines[i], imageDir); | |
232 | + if (figName != '') { | |
233 | + html.push('<img src="figures/' + figName + '">'); | |
234 | + html.push('<span class="filename">' + figName + '</span>'); | |
235 | + } | |
236 | + continue; | |
237 | + } | |
238 | + | |
239 | + // table | |
240 | + if (lines[i].match(/^☆表/)) { | |
241 | + html.push('<span class="caption">' + lines[i] + '</span>'); | |
242 | + html.push('<table>'); | |
243 | + i++; | |
244 | + while((i < lines.length) && (!lines[i].match(/^\s*$/))) { | |
245 | + if (lines[i].match(/^>>>>/)) { | |
246 | + break; | |
247 | + } | |
248 | + html.push(lines[i].replace(/^/, '<tr><td>') | |
249 | + .replace(/$/, '</td></tr>') | |
250 | + .replace(/\t/, '</td><td>')); | |
251 | + i++; | |
252 | + } | |
253 | + html.push('</table>'); | |
254 | + continue; | |
255 | + } | |
256 | + | |
257 | + // others | |
258 | + html.push(lines[i]); | |
259 | + } | |
260 | + return html; | |
261 | +} | |
262 | + | |
263 | +// parse main text to separate body text and captions/tables | |
264 | +function parseText(data, target) { | |
265 | + var lines = data.split('\n'); | |
266 | + var targetDir = path.dirname(target); | |
267 | + var imageDir = path.join(targetDir, 'figure'); | |
268 | + | |
269 | + // split body and others | |
270 | + // result.bodies: | |
271 | + // result.blockItems: | |
272 | + var result = splitBody(lines); | |
273 | + | |
274 | + // generate captions | |
275 | + result.captions = extractCaptions(result.blockItems, imageDir) | |
276 | + | |
277 | + // generate htmls | |
278 | + result.htmls = makeHtmls(result.blockItems, imageDir); | |
279 | + | |
280 | + // generate Figure List | |
281 | + result.figures = createFigureList(result.blockItems, imageDir); | |
282 | + | |
283 | + return result; | |
284 | +} | |
285 | + | |
286 | +var exportParser = {}; | |
287 | +exportParser.parseText = parseText; | |
288 | +module.exports = exportParser; | |
289 | + |