001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018
019 package org.apache.commons.net.nntp;
020
021 /**
022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025 *
026 * @author rwinston <rwinston@checkfree.com>
027 *
028 */
029
030 import java.util.HashMap;
031 import java.util.Iterator;
032 import java.util.List;
033
034 public class Threader {
035 private ThreadContainer root;
036 private HashMap<String,ThreadContainer> idTable;
037 private int bogusIdCount = 0;
038
039 /**
040 * The client passes in a list of Threadable objects, and
041 * the Threader constructs a connected 'graph' of messages
042 * @param messages list of messages to thread
043 * @return null if messages == null or root.child == null
044 * @since 2.2
045 */
046 public Threadable thread(List<? extends Threadable> messages) {
047 return thread((Iterable<? extends Threadable>)messages);
048 }
049
050 /**
051 * The client passes in a list of Iterable objects, and
052 * the Threader constructs a connected 'graph' of messages
053 * @param messages iterable of messages to thread
054 * @return null if messages == null or root.child == null
055 * @since 3.0
056 */
057 public Threadable thread(Iterable<? extends Threadable> messages) {
058 if (messages == null) {
059 return null;
060 }
061
062 idTable = new HashMap<String,ThreadContainer>();
063
064 // walk through each Threadable element
065 for (Threadable t : messages) {
066 if (!t.isDummy()) {
067 buildContainer(t);
068 }
069 }
070
071 root = findRootSet();
072 idTable.clear();
073 idTable = null;
074
075 pruneEmptyContainers(root);
076
077 root.reverseChildren();
078 gatherSubjects();
079
080 if (root.next != null) {
081 throw new RuntimeException("root node has a next:" + root);
082 }
083
084 for (ThreadContainer r = root.child; r != null; r = r.next) {
085 if (r.threadable == null) {
086 r.threadable = r.child.threadable.makeDummy();
087 }
088 }
089
090 Threadable result = (root.child == null ? null : root.child.threadable);
091 root.flush();
092 root = null;
093
094 return result;
095 }
096
097 /**
098 *
099 * @param threadable
100 */
101 private void buildContainer(Threadable threadable) {
102 String id = threadable.messageThreadId();
103 ThreadContainer container = idTable.get(id);
104
105 // A ThreadContainer exists for this id already. This should be a forward reference, but may
106 // be a duplicate id, in which case we will need to generate a bogus placeholder id
107 if (container != null) {
108 if (container.threadable != null) { // oops! duplicate ids...
109 id = "<Bogus-id:" + (bogusIdCount++) + ">";
110 container = null;
111 } else {
112 // The container just contained a forward reference to this message, so let's
113 // fill in the threadable field of the container with this message
114 container.threadable = threadable;
115 }
116 }
117
118 // No container exists for that message Id. Create one and insert it into the hash table.
119 if (container == null) {
120 container = new ThreadContainer();
121 container.threadable = threadable;
122 idTable.put(id, container);
123 }
124
125 // Iterate through all of the references and create ThreadContainers for any references that
126 // don't have them.
127 ThreadContainer parentRef = null;
128 {
129 String[] references = threadable.messageThreadReferences();
130 for (String refString : references)
131 {
132 ThreadContainer ref = idTable.get(refString);
133
134 // if this id doesnt have a container, create one
135 if (ref == null) {
136 ref = new ThreadContainer();
137 idTable.put(refString, ref);
138 }
139
140 // Link references together in the order they appear in the References: header,
141 // IF they dont have a have a parent already &&
142 // IF it will not cause a circular reference
143 if ((parentRef != null)
144 && (ref.parent == null)
145 && (parentRef != ref)
146 && !(ref.findChild(parentRef))) {
147 // Link ref into the parent's child list
148 ref.parent = parentRef;
149 ref.next = parentRef.child;
150 parentRef.child = ref;
151 }
152 parentRef = ref;
153 }
154 }
155
156 // parentRef is now set to the container of the last element in the references field. make that
157 // be the parent of this container, unless doing so causes a circular reference
158 if (parentRef != null
159 && (parentRef == container || container.findChild(parentRef)))
160 {
161 parentRef = null;
162 }
163
164 // if it has a parent already, its because we saw this message in a References: field, and presumed
165 // a parent based on the other entries in that field. Now that we have the actual message, we can
166 // throw away the old parent and use this new one
167 if (container.parent != null) {
168 ThreadContainer rest, prev;
169
170 for (prev = null, rest = container.parent.child;
171 rest != null;
172 prev = rest, rest = rest.next) {
173 if (rest == container) {
174 break;
175 }
176 }
177
178 if (rest == null) {
179 throw new RuntimeException(
180 "Didnt find "
181 + container
182 + " in parent"
183 + container.parent);
184 }
185
186 // Unlink this container from the parent's child list
187 if (prev == null) {
188 container.parent.child = container.next;
189 } else {
190 prev.next = container.next;
191 }
192
193 container.next = null;
194 container.parent = null;
195 }
196
197 // If we have a parent, link container into the parents child list
198 if (parentRef != null) {
199 container.parent = parentRef;
200 container.next = parentRef.child;
201 parentRef.child = container;
202 }
203 }
204
205 /**
206 * Find the root set of all existing ThreadContainers
207 * @return root the ThreadContainer representing the root node
208 */
209 private ThreadContainer findRootSet() {
210 ThreadContainer root = new ThreadContainer();
211 Iterator<String> iter = idTable.keySet().iterator();
212
213 while (iter.hasNext()) {
214 Object key = iter.next();
215 ThreadContainer c = idTable.get(key);
216 if (c.parent == null) {
217 if (c.next != null) {
218 throw new RuntimeException(
219 "c.next is " + c.next.toString());
220 }
221 c.next = root.child;
222 root.child = c;
223 }
224 }
225 return root;
226 }
227
228 /**
229 * Delete any empty or dummy ThreadContainers
230 * @param parent
231 */
232 private void pruneEmptyContainers(ThreadContainer parent) {
233 ThreadContainer container, prev, next;
234 for (prev = null, container = parent.child, next = container.next;
235 container != null;
236 prev = container,
237 container = next,
238 next = (container == null ? null : container.next)) {
239
240 // Is it empty and without any children? If so,delete it
241 if (container.threadable == null && container.child == null) {
242 if (prev == null) {
243 parent.child = container.next;
244 } else {
245 prev.next = container.next;
246 }
247
248 // Set container to prev so that prev keeps its same value the next time through the loop
249 container = prev;
250 }
251
252 // Else if empty, with kids, and (not at root or only one kid)
253 else if (
254 container.threadable == null
255 && container.child != null
256 && (container.parent != null
257 || container.child.next == null)) {
258 // We have an invalid/expired message with kids. Promote the kids to this level.
259 ThreadContainer tail;
260 ThreadContainer kids = container.child;
261
262 // Remove this container and replace with 'kids'.
263 if (prev == null) {
264 parent.child = kids;
265 } else {
266 prev.next = kids;
267 }
268
269 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
270 // i.e. splice kids into the list in place of container
271 for (tail = kids; tail.next != null; tail = tail.next) {
272 tail.parent = container.parent;
273 }
274
275 tail.parent = container.parent;
276 tail.next = container.next;
277
278 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
279 // promoted items next time round
280 next = kids;
281
282 // Set container to prev so that prev keeps its same value the next time through the loop
283 container = prev;
284 } else if (container.child != null) {
285 // A real message , with kids
286 // Iterate over the children
287 pruneEmptyContainers(container);
288 }
289 }
290 }
291
292 /**
293 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
294 */
295 private void gatherSubjects() {
296
297 int count = 0;
298
299 for (ThreadContainer c = root.child; c != null; c = c.next) {
300 count++;
301 }
302
303 // TODO verify this will avoid rehashing
304 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
305 count = 0;
306
307 for (ThreadContainer c = root.child; c != null; c = c.next) {
308 Threadable threadable = c.threadable;
309
310 // No threadable? If so, it is a dummy node in the root set.
311 // Only root set members may be dummies, and they alway have at least 2 kids
312 // Take the first kid as representative of the subject
313 if (threadable == null) {
314 threadable = c.child.threadable;
315 }
316
317 String subj = threadable.simplifiedSubject();
318
319 if (subj == null || subj == "") {
320 continue;
321 }
322
323 ThreadContainer old = subjectTable.get(subj);
324
325 // Add this container to the table iff:
326 // - There exists no container with this subject
327 // - or this is a dummy container and the old one is not - the dummy one is
328 // more interesting as a root, so put it in the table instead
329 // - The container in the table has a "Re:" version of this subject, and
330 // this container has a non-"Re:" version of this subject. The non-"Re:" version
331 // is the more interesting of the two.
332 if (old == null
333 || (c.threadable == null && old.threadable != null)
334 || (old.threadable != null
335 && old.threadable.subjectIsReply()
336 && c.threadable != null
337 && !c.threadable.subjectIsReply())) {
338 subjectTable.put(subj, c);
339 count++;
340 }
341 }
342
343 // If the table is empty, we're done
344 if (count == 0) {
345 return;
346 }
347
348 // subjectTable is now populated with one entry for each subject which occurs in the
349 // root set. Iterate over the root set, and gather together the difference.
350 ThreadContainer prev, c, rest;
351 for (prev = null, c = root.child, rest = c.next;
352 c != null;
353 prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
354 Threadable threadable = c.threadable;
355
356 // is it a dummy node?
357 if (threadable == null) {
358 threadable = c.child.threadable;
359 }
360
361 String subj = threadable.simplifiedSubject();
362
363 // Dont thread together all subjectless messages
364 if (subj == null || subj == "") {
365 continue;
366 }
367
368 ThreadContainer old = subjectTable.get(subj);
369
370 if (old == c) { // That's us
371 continue;
372 }
373
374 // We have now found another container in the root set with the same subject
375 // Remove the "second" message from the root set
376 if (prev == null) {
377 root.child = c.next;
378 } else {
379 prev.next = c.next;
380 }
381 c.next = null;
382
383 if (old.threadable == null && c.threadable == null) {
384 // both dummies - merge them
385 ThreadContainer tail;
386 for (tail = old.child;
387 tail != null && tail.next != null;
388 tail = tail.next) {
389 // do nothing
390 }
391
392 if (tail != null) { // protect against possible NPE
393 tail.next = c.child;
394 }
395
396 for (tail = c.child; tail != null; tail = tail.next) {
397 tail.parent = old;
398 }
399
400 c.child = null;
401 } else if (
402 old.threadable == null
403 || (c.threadable != null
404 && c.threadable.subjectIsReply()
405 && !old.threadable.subjectIsReply())) {
406 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
407 c.parent = old;
408 c.next = old.child;
409 old.child = c;
410 } else {
411 // else make the old and new messages be children of a new dummy container.
412 // We create a new container object for old.msg and empty the old container
413 ThreadContainer newc = new ThreadContainer();
414 newc.threadable = old.threadable;
415 newc.child = old.child;
416
417 for (ThreadContainer tail = newc.child;
418 tail != null;
419 tail = tail.next)
420 {
421 tail.parent = newc;
422 }
423
424 old.threadable = null;
425 old.child = null;
426
427 c.parent = old;
428 newc.parent = old;
429
430 // Old is now a dummy- give it 2 kids , c and newc
431 old.child = c;
432 c.next = newc;
433 }
434 // We've done a merge, so keep the same prev
435 c = prev;
436 }
437
438 subjectTable.clear();
439 subjectTable = null;
440
441 }
442
443
444 // DEPRECATED METHODS - for API compatibility only - DO NOT USE
445
446 /**
447 * The client passes in an array of Threadable objects, and
448 * the Threader constructs a connected 'graph' of messages
449 * @param messages array of messages to thread
450 * @return null if messages == null or root.child == null
451 * @deprecated (2.2) prefer {@link #thread(List)}
452 */
453 @Deprecated
454 public Threadable thread(Threadable[] messages) {
455 return thread(java.util.Arrays.asList(messages));
456 }
457
458 }