View Javadoc

1   package org.paneris.bibliomania.pagination;
2   
3   import java.io.BufferedReader;
4   import java.io.BufferedWriter;
5   import java.io.ByteArrayOutputStream;
6   import java.io.File;
7   import java.io.FileInputStream;
8   import java.io.FileNotFoundException;
9   import java.io.FileWriter;
10  import java.io.IOException;
11  import java.io.InputStream;
12  import java.io.InputStreamReader;
13  import java.io.PrintWriter;
14  import java.io.Reader;
15  import java.io.StreamTokenizer;
16  import java.io.StringWriter;
17  import java.io.Writer;
18  import java.net.URL;
19  import java.util.Enumeration;
20  import java.util.Hashtable;
21  import java.util.Stack;
22  import java.util.Vector;
23  
24  import javax.swing.text.AttributeSet;
25  import javax.swing.text.BadLocationException;
26  import javax.swing.text.MutableAttributeSet;
27  import javax.swing.text.html.HTML;
28  import javax.swing.text.html.HTMLEditorKit;
29  
30  import org.melati.Melati;
31  import org.melati.MelatiConfig;
32  import org.melati.template.webmacro.MelatiFastWriter;
33  import org.paneris.bibliomania.util.FictionalNotifiableParserCallback;
34  import org.paneris.bibliomania.util.FictionalNotifyingDocumentParser;
35  import org.melati.util.FtellPrintWriter;
36  import org.melati.util.FtellWriter;
37  import org.melati.util.HTMLUtils;
38  import org.melati.util.IoUtils;
39  import org.melati.util.MelatiRuntimeException;
40  import org.melati.poem.util.StringUtils;
41  import org.melati.util.UnexpectedExceptionException;
42  import org.paneris.bibliomania.fti.DbUtils;
43  import org.paneris.bibliomania.fti.FivePacker;
44  import org.paneris.bibliomania.fti.FourPacker;
45  import org.paneris.bibliomania.fti.Text;
46  import org.webmacro.Context;
47  import org.webmacro.WM;
48  import org.webmacro.WebMacro;
49  import org.webmacro.engine.FileTemplate;
50  
51  import com.sleepycat.db.Database;
52  import com.sleepycat.db.DatabaseException;
53  import com.sleepycat.db.DatabaseEntry;
54  import com.sleepycat.db.LockMode;
55  import com.sleepycat.db.OperationStatus;
56  
57  public class Pagination {
58    public static final String pageAnchorPrefix = "__page__";
59  
60    /**
61     * If you change this, no shell metacharacters please.
62     */
63  
64    public static final File tempDir = new File("/tmp");
65  
66    public static final String dvi2ttyWithBreakinfo = "dvi2tty_breakinfo";
67  
68    private Database pageBreaksOfTextID;
69  
70    public Pagination(File dbHome) throws DatabaseException, IOException {
71      pageBreaksOfTextID = DbUtils.openOrCreateBTreeDb(dbHome, "pageBreaksOfTextID", 0);
72      //pageBreaksOfTextID.open(null, new File(dbHome, "pageBreaksOfTextID.db")
73      //    .getPath(), null, DbConstants.DB_BTREE, DbConstants.DB_CREATE, 0644);
74      
75      // provoke exception now rather than later:
76      HTMLUtils.dtdForHTMLParser();
77    }
78  
79    public class PageSpan {
80      public int number;
81  
82      public int startOffset, endOffset;
83  
84      public int totalPages;
85  
86      public PageSpan(int number, int startOffset, int endOffset, int totalPages) {
87        this.number = number;
88        this.startOffset = startOffset;
89        this.endOffset = endOffset;
90        this.totalPages = totalPages;
91      }
92  
93      public String toString() {
94        return number + "/" + totalPages + ":" + startOffset + "-" + endOffset;
95      }
96    }
97  
98    public static class TagInTexException extends RuntimeException {
99      private static final long serialVersionUID = 1L;
100 
101     public HTML.Tag tag;
102 
103     public AttributeSet atts;
104 
105     public TagInTexException(HTML.Tag tag, AttributeSet atts) {
106       this.tag = tag;
107       this.atts = atts;
108     }
109 
110     public String getMessage() {
111       return "The element `"
112           + (atts == null ? tag.toString() : HTMLUtils.stringOf(tag, atts))
113           + "' occurs inside <span class=tex>. "
114           + "If you really want to put < or > in the literal TeX code, you "
115           + "should use &lt; and &gt;.";
116     }
117   }
118 
119   public static class FootnoteSequence {
120     public int lastNumber = 0;
121 
122     private int spansOnStack = 0;
123 
124     public static class NestingException extends RuntimeException {
125       private static final long serialVersionUID = 1L;
126 
127       public HTML.Tag tag;
128 
129       public AttributeSet atts;
130 
131       public NestingException(HTML.Tag tag, AttributeSet atts) {
132         this.tag = tag;
133         this.atts = atts;
134       }
135 
136       public String getMessage() {
137         return "The element " + HTMLUtils.stringOf(tag, atts) + " "
138             + "constitutes a footnote-within-a-footnote, which is not allowed";
139       }
140     }
141 
142     public int number(HTML.Tag tag, AttributeSet atts) {
143       boolean isSpan = tag.toString().equalsIgnoreCase("span");
144       boolean isFootnote = isSpan
145           && atts.isDefined(HTML.Attribute.CLASS)
146           && atts.getAttribute(HTML.Attribute.CLASS).toString()
147               .equalsIgnoreCase("footnote");
148 
149       if (spansOnStack > 0) {
150         if (isSpan)
151           ++spansOnStack;
152 
153         if (isFootnote)
154           throw new NestingException(tag, atts);
155       } else {
156         if (isFootnote) {
157           ++spansOnStack;
158           if (atts.isDefined("number")) {
159             try {
160               lastNumber = Integer.parseInt(atts.getAttribute("number")
161                   .toString());
162             } catch (Exception e) {
163               ++lastNumber;
164             }
165           } else
166             ++lastNumber;
167 
168           return lastNumber;
169         }
170       }
171 
172       return -1;
173     }
174 
175     public boolean inFootnote() {
176       return spansOnStack > 0;
177     }
178 
179     public boolean footnoteEnded(HTML.Tag tag) {
180       if (spansOnStack != 0 && tag.toString().equalsIgnoreCase("span")
181           && --spansOnStack == 0) {
182         return true;
183       } else
184         return false;
185     }
186   }
187 
188   public class PageFinder {
189     private DatabaseEntry textID = DbUtils.userMemDatabaseEntry(5);
190 
191     private DatabaseEntry pageBreaks = DbUtils.userMemDatabaseEntry(1000);
192 
193     private long theTextID = -1;
194 
195     private byte[] thePageBreaks = null;
196 
197     private synchronized byte[] pageBreaks(long textIdP) throws DatabaseException {
198       if (theTextID != textIdP) {
199         FivePacker.set_(this.textID.getData(), 0, textIdP);
200         theTextID = textIdP;
201         if (pageBreaksOfTextID.get(null, this.textID, pageBreaks, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
202           thePageBreaks = new byte[pageBreaks.getSize() + 4];
203           System.arraycopy(pageBreaks.getData(), 0, thePageBreaks, 4,
204               thePageBreaks.length - 4);
205         } else
206           thePageBreaks = null;
207       }
208 
209       return thePageBreaks;
210     }
211 
212     public int totalPages(long textIdP) throws DatabaseException {
213       byte[] pageBreaksL = pageBreaks(textIdP);
214       return pageBreaksL == null ? 0 : pageBreaksL.length / 4 - 1;
215     }
216 
217     public PageSpan pageOfNumber(long textIdP, int number) throws DatabaseException {
218       byte[] pageBreaksL = pageBreaks(textIdP);
219       if (pageBreaksL == null || pageBreaksL.length < (number + 2) * 4)
220         return null;
221       else
222         return new PageSpan(number,
223             FourPacker.number_(pageBreaksL, number * 4), FourPacker.number_(
224                 pageBreaksL, (number + 1) * 4), pageBreaksL.length / 4 - 1);
225     }
226 
227     public PageSpan pageOfOffset(long textIdP, int offset) throws DatabaseException {
228       byte[] pageBreaksL = pageBreaks(textIdP);
229 
230       if (pageBreaksL == null || pageBreaksL.length == 0)
231         return null;
232 
233       int lastBreakOffset = FourPacker.number_(pageBreaksL,
234           pageBreaksL.length - 4);
235       if (offset >= lastBreakOffset)
236         return null;
237 
238       int totalPages = pageBreaksL.length / 4 - 1;
239 
240       int p = (int)((long)offset * pageBreaksL.length / lastBreakOffset);
241       if ((p + 1) * 4 >= pageBreaksL.length)
242         p = (pageBreaksL.length / 4) - 1;
243 
244       int offP = FourPacker.number_(pageBreaksL, p * 4);
245       if (offP == offset)
246         return (p + 2) * 4 >= pageBreaksL.length ? null : new PageSpan(p, offP,
247             FourPacker.number_(pageBreaksL, (p + 1) * 4), totalPages);
248       else if (offP < offset) {
249         for (;;) {
250           int offP1 = FourPacker.number_(pageBreaksL, (p + 1) * 4);
251           if (offP1 > offset)
252             return new PageSpan(p, offP, offP1, totalPages);
253           ++p;
254           offP = offP1;
255         }
256       } else {
257         int offP1;
258         do {
259           --p;
260           if (p < 0)
261             return new PageSpan(0, 0, offP, totalPages);
262           offP1 = offP;
263           offP = FourPacker.number_(pageBreaksL, p * 4);
264         } while (offP > offset);
265         return new PageSpan(p, offP, offP1, totalPages);
266       }
267     }
268   }
269 
270   private static class TexGeneratingCallback extends
271       HTMLEditorKit.ParserCallback {
272 
273     private static final int NONE = 0, SPACE = 1, NOSPACE = 2;
274 
275     private int wordBreak = SPACE;
276 
277     private FtellPrintWriter tex;
278 
279     private int pos = 0;
280 
281     //private int inFootnote = 0;
282 
283     private FootnoteSequence footnoteSequence = new FootnoteSequence();
284 
285     private boolean passthrough = false;
286 
287     private long lastTexLineBreak = 0;
288 
289     public TexGeneratingCallback(FtellPrintWriter tex) {
290       this.tex = tex;
291     }
292 
293     private void writeTexSpace() {
294       long here = tex.ftell();
295       if (here - lastTexLineBreak > 1000) {
296         tex.write('\n');
297         lastTexLineBreak = here;
298       } else
299         tex.write(' ');
300     }
301 
302     public void handleText(char[] text, int _pos) {
303       if (passthrough) {
304         tex.write("in passthough:");
305         tex.write(text);
306         tex.write(":");
307       } else if (!footnoteSequence.inFootnote()) {
308         tex.write('{');
309         for (int i = 0; i < text.length; ++i) {
310           char c = text[i];
311           if (Character.isWhitespace(c)) {
312             writeTexSpace();
313             wordBreak = SPACE;
314           } else if (c == '-' || c == 173) {
315             tex.write(c);
316             wordBreak = NOSPACE;
317           } else {
318             if (wordBreak == SPACE)
319               writeTexSpace();
320 
321             switch (c) {
322             case '!':
323             case '@':
324             case '*':
325             case '(':
326             case ')':
327             case '.':
328             case ',':
329             case '/':
330             case '?':
331             case '[':
332             case ']':
333             case '|':
334             case '=':
335             case '+':
336               tex.write(c);
337               break;
338             case 151:
339               tex.write("---");
340               break;
341             case 145:
342               tex.write('`');
343               break;
344             case 146:
345               tex.write('\'');
346               break;
347             case 147:
348               tex.write("``");
349               break;
350             case 148:
351               tex.write("''");
352               break;
353             default:
354               if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c
355                   && c <= '9')
356                 tex.write(c);
357               else if (32 <= c && c < 128) {
358                 tex.write("\\char");
359                 tex.write(Integer.toString(c));
360                 writeTexSpace();
361               }
362             }
363 
364             if (wordBreak != NONE) {
365               tex.write("\\special{.=");
366               tex.write("" + (pos + i));
367               tex.write("}");
368               wordBreak = NONE;
369             }
370           }
371         }
372         tex.write('}');
373       }
374       else tex.write("Seem to be in footnote:" + text);
375     }
376 
377     public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet a, int posP) {
378       if (passthrough)
379         throw new TagInTexException(tag, a);
380 
381       if (!footnoteSequence.inFootnote())
382         this.pos = posP;
383     }
384 
385     private static class TagTranslation {
386       String begin, end;
387 
388       TagTranslation(String begin, String end) {
389         this.begin = begin;
390         this.end = end;
391       }
392 
393       public void begin(PrintWriter tex, HTML.Tag tag, MutableAttributeSet a) {
394         tex.write(begin);
395       }
396 
397       public void end(PrintWriter tex, HTML.Tag tag) {
398         tex.write(end);
399       }
400     }
401 
402     private static final Hashtable<HTML.Tag, TagTranslation> tagTranslations = new Hashtable<HTML.Tag, TagTranslation>();
403 
404     static {
405       tagTranslations.put(HTML.Tag.P, new TagTranslation("\n\n", ""));
406       tagTranslations.put(HTML.Tag.H1, new TagTranslation("\\H{I}{", "}"));
407       tagTranslations.put(HTML.Tag.H2, new TagTranslation("\\H{II}{", "}"));
408       tagTranslations.put(HTML.Tag.H3, new TagTranslation("\\H{III}{", "}"));
409       tagTranslations.put(HTML.Tag.H4, new TagTranslation("\\H{IIII}{", "}"));
410       TagTranslation i = new TagTranslation("{\\it ", "}");
411       tagTranslations.put(HTML.Tag.I, i);
412       tagTranslations.put(HTML.Tag.EM, i);
413       TagTranslation b = new TagTranslation("{\\bf ", "}");
414       tagTranslations.put(HTML.Tag.B, b);
415       tagTranslations.put(HTML.Tag.STRONG, b);
416       tagTranslations.put(HTML.Tag.OL, new TagTranslation("\\orderedlist ",
417           "\\endorderedlist "));
418       tagTranslations.put(HTML.Tag.UL, new TagTranslation("\\unorderedlist ",
419           "\\endunorderedlist "));
420       tagTranslations.put(HTML.Tag.LI, new TagTranslation("\\li ", ""));
421     }
422 
423     public void handleStartTag(HTML.Tag tag, MutableAttributeSet a, int posP) {
424 
425       if (passthrough)
426         throw new TagInTexException(tag, a);
427 
428       if (tag.toString().equalsIgnoreCase("span")
429           && a.isDefined(HTML.Attribute.CLASS)
430           && a.getAttribute(HTML.Attribute.CLASS).toString().equalsIgnoreCase(
431               "tex")) {
432         passthrough = true;
433       } else {
434         int footnum = footnoteSequence.number(tag, a);
435         if (footnum != -1) {
436           tex.write("\\footnotenumber{" + footnum + "}");
437           this.pos = posP;
438         } else if (!footnoteSequence.inFootnote()) {
439           TagTranslation translation = (TagTranslation)tagTranslations.get(tag);
440           if (translation != null)
441             translation.begin(tex, tag, a);
442 
443           if (tag.breaksFlow())
444             wordBreak = SPACE;
445 
446           this.pos = posP;
447         }
448       }
449     }
450 
451     public void handleEndTag(HTML.Tag tag, int posL) {
452       if (passthrough) {
453         if (tag.toString().equalsIgnoreCase("span"))
454           passthrough = false;
455         else
456           throw new TagInTexException(tag, null);
457       } else if (footnoteSequence.footnoteEnded(tag)) {
458         wordBreak = NOSPACE;
459         this.pos = posL;
460       } else if (!footnoteSequence.inFootnote()) {
461         TagTranslation translation = (TagTranslation)tagTranslations.get(tag);
462         if (translation != null)
463           translation.end(tex, tag);
464 
465         if (tag.breaksFlow())
466           wordBreak = SPACE;
467 
468         this.pos = posL;
469       }
470     }
471 
472     public void handleError(String msg, int posP) {
473       if (msg.startsWith("start") || msg.startsWith("end")
474           || msg.startsWith("unmatched") || msg.startsWith("invalid")) {
475       } else
476         System.err.println("Bibliomania pagination: " + posP + ": " + msg);
477     }
478 
479     public void flush() throws BadLocationException {
480       // ready for the next time
481       wordBreak = SPACE;
482     }
483   }
484 
485   private static final String defaultTexHeaderName = "header.tex";
486 
487   public static class DefaultTexHeaderNotFoundException extends
488       MelatiRuntimeException {
489 
490     private static final long serialVersionUID = 1L;
491 
492     public DefaultTexHeaderNotFoundException(Exception problem) {
493       super(problem);
494     }
495 
496     public String getMessage() {
497       return "Strangely, a problem arose opening the default TeX header for "
498           + "pagination; it ought to be in the software source tree as `"
499           + defaultTexHeaderName + "'!?\n" + subException.getMessage();
500     }
501   }
502 
503   public String defaultTexHeader() {
504     try {
505       URL it = getClass().getResource(defaultTexHeaderName);
506       if (it == null)
507         throw new FileNotFoundException("Resource `" + defaultTexHeaderName
508             + "' not found by " + getClass());
509       return new String(IoUtils.slurp(new InputStreamReader(it.openStream()),
510           5000));
511     } catch (Exception e) {
512       throw new DefaultTexHeaderNotFoundException(e);
513     }
514   }
515 
516   private void htmlToTEX(Reader html, String texHeader, Writer tex)
517       throws IOException {
518     tex.write(texHeader == null ? defaultTexHeader() : texHeader);
519     tex.write('\n');
520 
521     FtellPrintWriter texP = new FtellPrintWriter(tex);
522 
523     newDocumentParser().parse(html, new TexGeneratingCallback(texP),
524         true);
525 
526     if (texP.checkError())
527       throw new IOException("Error writing to TeX buffer");
528     tex.write("\n\\bye\n");
529   }
530 
531   /**
532    * @return a new parser  
533    */  
534   public static FictionalNotifyingDocumentParser newDocumentParser() {   
535     return new FictionalNotifyingDocumentParser(HTMLUtils.dtdForHTMLParser());   
536   }
537 
538   public static class TexFailedException extends IOException {
539     private static final long serialVersionUID = 1L;
540 
541     public String[] command;
542 
543     public File texFile;
544 
545     public TexFailedException(String[] command, File texFile) {
546       this.command = command;
547       this.texFile = texFile;
548     }
549 
550     public String getLogFile() {
551       String p = texFile.getPath();
552       return (p.endsWith(".tex") ? p.substring(0, p.length() - 4) : p) + ".log";
553     }
554 
555     public String getMessage() {
556       return "Nonzero exit status from [["
557           + StringUtils.concatenated(" ", command) + "]]; "
558           + "look for errors in " + getLogFile();
559     }
560   }
561 
562   public void texToDVI(File texTemp) throws IOException, InterruptedException {
563     String[] command = {
564         "/bin/sh",
565         "-c",
566         "cd " + tempDir + " && " + "tex " + texTemp
567             + " </dev/null >/dev/null 2>&1" };
568 
569     if (Runtime.getRuntime().exec(command).waitFor() != 0)
570       throw new TexFailedException(command, texTemp);
571   }
572 
573   public static class BreakInfoOfDVI {
574     public static final int LINE = 0, PAGE = 1;
575 
576     private Process dvi2tty;
577 
578     private StreamTokenizer tokens;
579 
580     private int kind = -1, position = -1;
581 
582     public BreakInfoOfDVI(File dviTemp) throws IOException {
583       String[] command = {
584           "/bin/sh",
585           "-c",
586           dvi2ttyWithBreakinfo + " -b " + dviTemp.getPath() + "| "
587               + "sort -n -k2" };
588       dvi2tty = Runtime.getRuntime().exec(command);
589       tokens = new StreamTokenizer(new BufferedReader(new InputStreamReader(
590           dvi2tty.getInputStream())));
591       if (tokens.nextToken() == StreamTokenizer.TT_EOF)
592         throw new IOException("[[" + StringUtils.concatenated(" ", command)
593             + "]] produced no output");
594       tokens.pushBack();
595     }
596 
597     public boolean next() throws IOException {
598       if (tokens.nextToken() == StreamTokenizer.TT_EOF) {
599         close();
600         kind = -1;
601         position = -1;
602         return false;
603       }
604 
605       if (tokens.ttype != StreamTokenizer.TT_WORD)
606         throw new IOException("Unexpected nonword " + tokens + " from "
607             + dvi2ttyWithBreakinfo);
608       if (tokens.sval.equals("line"))
609         kind = LINE;
610       else if (tokens.sval.equals("page"))
611         kind = PAGE;
612       else
613         throw new IOException("Unexpected " + tokens + " from "
614             + dvi2ttyWithBreakinfo + ": " + "expected `line' or `page'");
615 
616       if (tokens.nextToken() != StreamTokenizer.TT_NUMBER)
617         throw new IOException("Unexpected nonnumber " + tokens + " from "
618             + dvi2ttyWithBreakinfo);
619 
620       position = (int)tokens.nval;
621 
622       return true;
623     }
624 
625     public int kind() {
626       return kind;
627     }
628 
629     public int position() {
630       return position;
631     }
632 
633     public void close() {
634       dvi2tty.destroy();
635     }
636 
637     protected void finalize() throws Throwable {
638       close();
639     }
640   }
641 
642   private class LineFixatingCallback extends FictionalNotifiableParserCallback {
643     private FtellWriter linedHTML;
644 
645     private StringWriter footnote = null;
646 
647     private Vector pages;
648 
649     private BreakInfoOfDVI breaks;
650 
651     private boolean inWord = false, inLine = true, hadBreak = false;
652 
653     private int pos = 0;
654 
655     private Stack openTags = new Stack();
656 
657     private Vector pendingStartTags = new Vector();
658 
659     private int pageNum = 0;
660 
661     private FootnoteSequence footnoteSequence = new FootnoteSequence();
662 
663     private org.webmacro.Template footnoteTemplate;
664 
665     private WebMacro footnoteWebmacro;
666 
667     private String contentEncoding;
668 
669     private boolean currentIsFictional = false;
670 
671     private boolean literalTeX = false;
672 
673     private int footnoteSeqInDoc = 0;
674 
675     public LineFixatingCallback(FtellWriter linedHTML, BreakInfoOfDVI breaks,
676         Vector pages, org.webmacro.Template footnoteTemplate,
677         WebMacro footnoteWebmacro, String contentEncoding) throws IOException {
678       this.linedHTML = linedHTML;
679       this.breaks = breaks;
680       this.pages = pages;
681       this.footnoteTemplate = footnoteTemplate;
682       this.footnoteWebmacro = footnoteWebmacro;
683       this.contentEncoding = contentEncoding;
684       breaks.next();
685     }
686 
687     public void notifyCurrentIsFictional(boolean is) {
688       currentIsFictional = is;
689     }
690 
691     /**
692      * Just added to remove warning.
693      * @return whether current is fictional 
694      */
695     protected boolean getCurrentIsFictional() {
696       return currentIsFictional;
697     }
698 
699     public void checkBreaks(int posP) {
700       try {
701         while (breaks.position() != -1 && breaks.position() <= posP) {
702           if (breaks.kind() == BreakInfoOfDVI.PAGE) {
703 
704             for (int e = openTags.size() - 1; e >= 0; --e) {
705               linedHTML.write("</");
706               linedHTML
707                   .write(((HTMLUtils.TagInstance)openTags.elementAt(e)).tag
708                       .toString());
709               linedHTML.write(">");
710             }
711 
712             linedHTML
713                 .write("<a name=" + pageAnchorPrefix + ++pageNum + "></a>");
714 
715             pages.addElement(new Integer((int)linedHTML.ftell()));
716 
717             for (int e = 0; e < openTags.size(); ++e)
718               emitTag((HTMLUtils.TagInstance)openTags.elementAt(e));
719 
720             emitPendingStartTags();
721           }
722 
723           inLine = false;
724 
725           breaks.next();
726         }
727       } catch (IOException e) {
728         throw new UnexpectedExceptionException(e);
729       }
730     }
731 
732     public void handleText(char[] text, int _pos) {
733       if (!literalTeX)
734         try {
735           if (footnote != null) {
736             for (int i = 0; i < text.length; ++i) {
737               char c = text[i];
738               if (c < 32 || 128 <= c)
739                 footnote.write("&#" + (int)c + ';');
740               else
741                 footnote.write(c);
742             }
743           } else {
744             checkBreaks(pos);
745             emitPendingStartTags();
746             for (int i = 0; i < text.length; ++i) {
747               char c = text[i];
748               if (Character.isWhitespace(c)) {
749                 if (c == '\n')
750                   inLine = false;
751                 inWord = false;
752               } else {
753                 checkBreaks(pos + i);
754 
755                 if (!inLine) {
756                   if (!hadBreak)
757                     linedHTML.write('\n');
758                   inLine = true;
759                 } else if (!inWord)
760                   linedHTML.write(' ');
761 
762                 if (c < 32 || 128 <= c)
763                   linedHTML.write("&#" + (int)c + ';');
764                 else
765                   linedHTML.write(c);
766 
767                 inWord = true;
768                 hadBreak = false;
769               }
770             }
771           }
772         } catch (IOException e) {
773           throw new UnexpectedExceptionException(e);
774         }
775     }
776 
777     private void emitFootnote() {
778       try {
779         Context context = footnoteWebmacro.getContext();
780         context.put("seqInDoc", new Integer(++footnoteSeqInDoc));
781         context.put("number", new Integer(footnoteSequence.lastNumber));
782         context.put("text", footnote.toString());
783 
784         ByteArrayOutputStream buf = new ByteArrayOutputStream();
785         MelatiFastWriter fmw = new MelatiFastWriter(footnoteWebmacro
786             .getBroker(), buf, contentEncoding);
787         Melati m = new Melati(new MelatiConfig(), fmw);
788         context.put("melati", m);
789         footnoteTemplate.write(fmw.getFastWriter().getOutputStream(), context);
790         fmw.flush();
791 
792         /*
793          * Melati m = new Melati( new MelatiConfig(), new SimpleMelatiWriter(new
794          * OutputStreamWriter(buf))); 
795          * m.setBufferingOff(false);
796          * context.put("melati", m); 
797          * footnoteTemplate.write(new FastWriter(buf,
798          * contentEncoding), context);
799          */
800         linedHTML.write(buf.toString().trim());
801       } catch (Exception e) {
802         throw new UnexpectedExceptionException(e);
803       } finally {
804         footnote = null;
805       }
806     }
807 
808     private void emitTag(HTML.Tag tag, AttributeSet attributes)
809         throws IOException {
810       if (!inWord) {
811         linedHTML.write(' ');
812         inWord = true;
813       }
814 
815       if (tag.breaksFlow()) {
816         inLine = true;
817         inWord = true;
818         hadBreak = true;
819       }
820 
821       if (tag == HTML.Tag.P || tag == HTML.Tag.LI)
822         linedHTML.write("     " + preStart); // Spaces are hack to replace
823                                               // <pre> without upsetting
824                                               // pagination
825 
826       // following is
827       // HTMLUtils.write(linedHTML, tag, attributes);
828       // except that we don't write P CLASS=footnote
829       // as a workaround for a bug in Sun's HTML parser :(
830 
831       linedHTML.write('<');
832       linedHTML.write(tag.toString());
833       for (Enumeration<?> a = attributes.getAttributeNames(); a.hasMoreElements();) {
834         Object n = a.nextElement();
835         if (attributes.isDefined(n)) {
836           linedHTML.write(' ');
837           String name = n.toString();
838           String value = attributes.getAttribute(n).toString();
839           if (!(tag == HTML.Tag.P && name.equalsIgnoreCase("class") && value
840               .equalsIgnoreCase("footnote"))) {
841             linedHTML.write(name);
842             linedHTML.write("=\"");
843             linedHTML.write(HTMLUtils.entitied(value));
844             linedHTML.write('"');
845           }
846         }
847       }
848       linedHTML.write('>');
849     }
850 
851     private void emitTag(HTMLUtils.TagInstance tag) throws IOException {
852       emitTag(tag.tag, tag.attributes);
853     }
854 
855     public static final String preStart = "";
856 
857     public static final String preEnd = "";
858 
859     private void emitPendingStartTags() throws IOException {
860       for (int i = 0; i < pendingStartTags.size(); ++i) {
861         HTMLUtils.TagInstance tag = (HTMLUtils.TagInstance)pendingStartTags
862             .elementAt(i);
863         emitTag(tag);
864         openTags.push(tag);
865       }
866 
867       pendingStartTags.clear();
868     }
869 
870     public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes,
871         int posP) {
872       if (literalTeX)
873         throw new TagInTexException(tag, null);
874 
875       if (!tag.toString().equals("__EndOfLineTag__"))
876         try {
877           if (footnote != null)
878             HTMLUtils.write(footnote, tag, attributes);
879           else {
880             emitPendingStartTags();
881             // if (tag != HTML.Tag.IMG)
882             emitTag(tag, attributes);
883             this.pos = posP;
884           }
885         } catch (IOException e) {
886           throw new UnexpectedExceptionException(e);
887         }
888     }
889 
890     public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes,
891         int posP) {
892       if (literalTeX)
893         throw new TagInTexException(tag, attributes);
894 
895       if (tag.toString().equalsIgnoreCase("span")
896           && attributes.isDefined(HTML.Attribute.CLASS)
897           && attributes.getAttribute(HTML.Attribute.CLASS).toString()
898               .equalsIgnoreCase("tex")) {
899         literalTeX = true;
900       } else {
901         int footnoteNumber = footnoteSequence.number(tag, attributes);
902         if (footnoteNumber != -1) {
903           footnote = new StringWriter();
904           this.pos = posP;
905         } else if (footnote != null) {
906           try {
907             HTMLUtils.write(footnote, tag, attributes);
908           } catch (IOException e) {
909             throw new UnexpectedExceptionException(e);
910           }
911         } else {
912           if (tag != HTML.Tag.HTML && tag != HTML.Tag.HEAD
913               && tag != HTML.Tag.BODY)
914             pendingStartTags.addElement(new HTMLUtils.TagInstance(tag,
915                 attributes.copyAttributes()));
916           this.pos = posP;
917         }
918       }
919     }
920 
921     public void handleEndTag(HTML.Tag tag, int posP) {
922       if (literalTeX) {
923         if (tag.toString().equalsIgnoreCase("span"))
924           literalTeX = false;
925         else
926           throw new TagInTexException(tag, null);
927       } else
928         try {
929           if (footnoteSequence.footnoteEnded(tag)) {
930             emitFootnote();
931             this.pos = posP;
932           } else if (footnote != null)
933             footnote.write("</" + tag + ">");
934           else {
935             emitPendingStartTags();
936 
937             if (tag.breaksFlow()) {
938               inLine = false;
939               inWord = true;
940               hadBreak = true;
941             }
942 
943             if (tag != HTML.Tag.HTML && tag != HTML.Tag.HEAD
944                 && tag != HTML.Tag.BODY) {
945               linedHTML.write("</" + tag + ">");
946 
947               if (tag == HTML.Tag.P || tag == HTML.Tag.LI)
948                 linedHTML.write(preEnd + "     \n"); // Spaces are hack to
949                                                       // replace <pre> without
950                                                       // upsetting pagination
951 
952               while (!openTags.empty()
953                   && ((HTMLUtils.TagInstance)openTags.pop()).tag != tag)
954                 ;
955             }
956 
957             this.pos = posP;
958           }
959         } catch (IOException e) {
960           throw new UnexpectedExceptionException(e);
961         }
962     }
963 
964     public void handleError(String msg, int posP) {
965       if (msg.startsWith("start") || msg.startsWith("end")
966           || msg.startsWith("unmatched") || msg.startsWith("invalid")) {
967       } else
968         System.err.println("Bibliomania pagination: " + posP + ": " + msg);
969     }
970   }
971 
972   private String tempPrefix() {
973     return getClass().getName();
974   }
975 
976   public boolean isTempFile(File file) {
977     return file.getPath().startsWith(new File(tempDir, tempPrefix()).getPath());
978   }
979 
980   public void paginate(Text text, String texHeader, Writer linedHTML,
981       org.webmacro.Template footnoteTemplate, WebMacro footnoteWebmacro,
982       String contentEncoding) throws IOException, DatabaseException {
983     Reader html = new InputStreamReader(text.body());
984     File texTemp = File.createTempFile(tempPrefix(), ".tex", tempDir);
985     String base = texTemp.getPath()
986         .substring(0, texTemp.getPath().length() - 4);
987     File dviTemp = new File(base + ".dvi");
988 
989     Writer toTEXTemp = new BufferedWriter(new FileWriter(texTemp));
990     BreakInfoOfDVI breaks = null;
991     try {
992       htmlToTEX(html, texHeader, toTEXTemp);
993       html.close();
994       toTEXTemp.close();
995 
996       try {
997         texToDVI(texTemp);
998       } catch (InterruptedException e) {
999         throw new IOException("Interrupted while waiting for tex");
1000       }
1001 
1002       html = new InputStreamReader(text.body());
1003       breaks = new BreakInfoOfDVI(dviTemp);
1004 
1005       Vector pages = new Vector();
1006 
1007       FtellWriter linedF = new FtellWriter(linedHTML);
1008 
1009       try {
1010         newDocumentParser().parse(
1011             html,
1012             new LineFixatingCallback(linedF, breaks, pages, footnoteTemplate,
1013                 footnoteWebmacro, contentEncoding), true);
1014       } catch (UnexpectedExceptionException e) {
1015         if (e.subException instanceof IOException)
1016           throw (IOException)e.subException;
1017         else
1018           throw e;
1019       }
1020 
1021       linedF.flush();
1022 
1023       pages.addElement(new Integer((int)linedF.ftell()));
1024       DatabaseEntry pageBreaks = DbUtils.userMemDatabaseEntry(pages.size() * 4);
1025       for (int i = 0; i < pages.size(); ++i)
1026         FourPacker.set_(pageBreaks.getData(), i * 4, ((Integer)pages
1027             .elementAt(i)).intValue());
1028 
1029       DatabaseEntry textID = DbUtils.userMemDatabaseEntry(5);
1030       FivePacker.set_(textID.getData(), 0, text.ftiTextID());
1031 
1032       pageBreaksOfTextID.put(null, textID, pageBreaks);
1033 
1034       // we deliberately don't delete these if there was a problem
1035 
1036       if (!new File(tempDir, "bibliomania.Pagination.leaveTempFiles").exists()) {
1037         texTemp.delete();
1038         dviTemp.delete();
1039         new File(base + ".log").delete();
1040       }
1041     } finally {
1042       try {
1043         if (breaks != null) 
1044           breaks.close();
1045       } catch (Exception e) {
1046       }
1047       try {
1048         html.close();
1049       } catch (Exception e) {
1050       }
1051       try {
1052         toTEXTemp.close();
1053       } catch (Exception e) {
1054       }
1055     }
1056   }
1057 
1058   public void flush() throws DatabaseException {
1059     pageBreaksOfTextID.sync();
1060   }
1061 
1062   public void close() {
1063     try {
1064       pageBreaksOfTextID.close();
1065     } catch (Exception e) {
1066       throw new RuntimeException();
1067     }
1068   }
1069 
1070   protected void finalize() throws Throwable {
1071     close();
1072   }
1073 
1074   public static void main(final String[] args) throws Exception {
1075     WebMacro wm = new WM();
1076 
1077     Pagination pagination = new Pagination(new File("/tmp"));
1078 
1079     if (args[0].equals("-page"))
1080       System.out.println(pagination.new PageFinder().pageOfNumber(Integer
1081           .parseInt(args[1]), Integer.parseInt(args[2])));
1082     else if (args[0].equals("-offset"))
1083       System.out.println(pagination.new PageFinder().pageOfOffset(Integer
1084           .parseInt(args[1]), Integer.parseInt(args[2])));
1085     else if (args[0].equals("-paginate")) {
1086       // FIXME
1087       File templateFile = new File(
1088           "/dist/Bibliomania/src/org/paneris/bibliomania/html/pagination/footnote.wm");
1089       org.webmacro.Template fnt = new FileTemplate(wm.getBroker(),
1090           templateFile, "UTF8");
1091       fnt.parse();
1092 
1093       pagination.paginate(new Text() {
1094         public InputStream body() throws IOException {
1095           return new FileInputStream(args[1]);
1096         }
1097 
1098         public InputStream bodyForFragment() throws IOException {
1099           return body();
1100         }
1101 
1102         public long ftiTextID() {
1103           return Long.parseLong(args[2]);
1104         }
1105       }, null, new BufferedWriter(new FileWriter("/tmp/lined.html")), fnt, wm,
1106           "UTF8");
1107     }
1108 
1109     pagination.close();
1110   }
1111 }