1 package org.paneris.bibliomania.fti;
2
3 import java.io.BufferedInputStream;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.IOException;
7 import java.io.InputStream;
8
9 import org.melati.poem.NoSuchRowPoemException;
10
11 import com.sleepycat.db.DatabaseException;
12
13 public class ContextSearchResults implements SearchResults {
14
15 private static byte[] skipBuffer = new byte[5000];
16
17 public static final String contextUnavailable = "(context not available)";
18
19 public static final int contextWordsAfterHit = 5;
20
21 public static final int wordsGapBetweenAreas = 6;
22
23 private Library library;
24
25 private Text currentText = null;
26
27 private SearchResults results;
28
29 private AnchorFinder blockmarks;
30
31
32
33
34 private InputStream body = null;
35
36 private long bodyPosition;
37
38 public ContextSearchResults(Library library, SearchResults results,
39 IndexOther fti) {
40 this.library = library;
41 this.results = results;
42 blockmarks = new AnchorFinder(fti, true);
43 }
44
45 public int frequency() {
46 return results.frequency();
47 }
48
49 public int hitWordsCount() {
50 return results.hitWordsCount();
51 }
52
53 public void init() {
54 closeBody();
55 results.init();
56 }
57
58 public void skipToNextHit() {
59 results.skipToNextHit();
60 }
61
62 public void skipToWordIndex(int wordIndex) {
63 results.skipToWordIndex(wordIndex);
64 }
65
66 public int currentWordIndex() {
67 return results.currentWordIndex();
68 }
69
70 public int currentOffset() {
71 return results.currentOffset();
72 }
73
74 public void gotoText(long textID) throws DatabaseException {
75 closeBody();
76 results.gotoText(textID);
77 long it = results.currentTextID();
78 currentText = it == -1 ? null : library.text(it);
79 }
80
81 public void gotoPosition(long position) throws DatabaseException {
82 gotoText(position >> 32);
83 if (currentTextID() != -1)
84 skipToWordIndex((int) position);
85 }
86
87 public long getPosition() {
88 int wi = currentWordIndex();
89 return wi == -1 ? -1 : (currentTextID() << 32 | wi);
90 }
91
92 public long currentTextID() {
93 return results.currentTextID();
94 }
95
96 public Text currentText() {
97 return currentText;
98 }
99
100 public String currentAnchor() throws DatabaseException {
101
102
103 int index = currentWordIndex();
104 return index == -1 ? null : blockmarks
105 .anchorOfIndex(currentTextID(), index);
106 }
107
108 private synchronized void closeBody() {
109 if (body != null) {
110 try {
111 body.close();
112 } catch (IOException e) {
113 }
114 body = null;
115 }
116 }
117
118 public synchronized String nextArea() {
119 try {
120 long target = results.currentOffset();
121 if (target == -1) {
122 closeBody();
123 return null;
124 }
125
126 if (body == null) {
127 if (currentText == null)
128 return null;
129 try {
130 body = new BufferedInputStream(currentText.bodyForFragment());
131 bodyPosition = 0;
132 } catch (NoSuchRowPoemException e) {
133
134
135 return null;
136 }
137 }
138
139 if (bodyPosition > target)
140 throw new IOException("tried to skip backwards");
141
142
143
144
145
146 if (bodyPosition == 0)
147 bodyPosition += body.skip(target - bodyPosition);
148 else {
149 long read;
150 do {
151 bodyPosition += (read = body.read(skipBuffer, 0, (int) Math.min(
152 target - bodyPosition, skipBuffer.length)));
153 } while (read > 0);
154 }
155
156 if (bodyPosition < target)
157 throw new IOException("skipped only to " + bodyPosition + " not "
158 + target);
159
160 StringBuffer buf = new StringBuffer();
161 IndexTokenizer words = new IndexTokenizer(body, true);
162 int baseIndex = Math.max(results.currentWordIndex()
163 - IndexOther.contextWordsBeforeHit, 0);
164
165 do {
166 int limitIndex = results.currentWordIndex() + results.hitWordsCount()
167 + contextWordsAfterHit - baseIndex;
168
169 while (words.hasMoreWords() && words.wordIndex() < limitIndex) {
170 String word = words.nextWord();
171 if (!word.startsWith("#")) {
172 if (words.hadBreak() && buf.length() > 0)
173 buf.append(' ');
174 buf.append(word);
175 }
176 }
177
178
179
180 results.skipToNextHit();
181 } while (results.currentWordIndex() != -1
182 && (results.currentWordIndex() - baseIndex) - words.wordIndex() < wordsGapBetweenAreas);
183
184 bodyPosition += words.bytesReadFromUnderlyingStream();
185
186 return buf.toString();
187 } catch (IOException e) {
188 System.err.println(e);
189
190 try {
191 body.close();
192 } catch (Exception ee) {
193 }
194
195 return null;
196 }
197 }
198
199 public static void main(String[] args) throws Exception {
200 IndexOther fti = new IndexOther(new File("/tmp"));
201
202 SearchResults rawResults;
203 if (args[0].charAt(0) == '_') {
204 args[0] = args[0].substring(1);
205 rawResults = fti.groupSearchResults(args);
206 System.out.println("phrase");
207 } else if (args[0].equals("-query")) {
208 rawResults = fti.querySearchResults(args[1]);
209 } else {
210 rawResults = fti.andSearchResults(args);
211 System.out.println("and");
212 }
213
214 ContextSearchResults results = new ContextSearchResults(new Library() {
215 public Text text(final long textID) {
216 return new Text() {
217 public InputStream body() throws IOException {
218 return new FileInputStream(
219 "/usr/doc/HOWTO/other-formats/html/CDROM-HOWTO-3.html");
220 }
221
222 public InputStream bodyForFragment() throws IOException {
223 return body();
224 }
225
226 public long ftiTextID() {
227 return textID;
228 }
229 };
230 }
231 }, rawResults, fti);
232
233 for (results.gotoText(0); results.currentTextID() != -1;
234
235 results.gotoText(results.currentTextID() + 1)) {
236 System.out.println("== " + results.currentTextID());
237 for (;;) {
238 String anchor = results.currentAnchor();
239 String area = results.nextArea();
240 if (area == null)
241 break;
242 System.out.println("-- A HREF=http://doc#" + anchor);
243 System.out.println(area);
244 }
245 }
246 }
247
248 public void close() {
249 results.close();
250 blockmarks.close();
251 closeBody();
252 }
253 }