001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 019 package org.apache.commons.net.nntp; 020 021 /** 022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 025 * 026 * @author rwinston <rwinston@checkfree.com> 027 * 028 */ 029 030 import java.util.HashMap; 031 import java.util.Iterator; 032 import java.util.List; 033 034 public class Threader { 035 private ThreadContainer root; 036 private HashMap<String,ThreadContainer> idTable; 037 private int bogusIdCount = 0; 038 039 /** 040 * The client passes in a list of Threadable objects, and 041 * the Threader constructs a connected 'graph' of messages 042 * @param messages list of messages to thread 043 * @return null if messages == null or root.child == null 044 * @since 2.2 045 */ 046 public Threadable thread(List<? extends Threadable> messages) { 047 return thread((Iterable<? extends Threadable>)messages); 048 } 049 050 /** 051 * The client passes in a list of Iterable objects, and 052 * the Threader constructs a connected 'graph' of messages 053 * @param messages iterable of messages to thread 054 * @return null if messages == null or root.child == null 055 * @since 3.0 056 */ 057 public Threadable thread(Iterable<? extends Threadable> messages) { 058 if (messages == null) { 059 return null; 060 } 061 062 idTable = new HashMap<String,ThreadContainer>(); 063 064 // walk through each Threadable element 065 for (Threadable t : messages) { 066 if (!t.isDummy()) { 067 buildContainer(t); 068 } 069 } 070 071 root = findRootSet(); 072 idTable.clear(); 073 idTable = null; 074 075 pruneEmptyContainers(root); 076 077 root.reverseChildren(); 078 gatherSubjects(); 079 080 if (root.next != null) { 081 throw new RuntimeException("root node has a next:" + root); 082 } 083 084 for (ThreadContainer r = root.child; r != null; r = r.next) { 085 if (r.threadable == null) { 086 r.threadable = r.child.threadable.makeDummy(); 087 } 088 } 089 090 Threadable result = (root.child == null ? null : root.child.threadable); 091 root.flush(); 092 root = null; 093 094 return result; 095 } 096 097 /** 098 * 099 * @param threadable 100 */ 101 private void buildContainer(Threadable threadable) { 102 String id = threadable.messageThreadId(); 103 ThreadContainer container = idTable.get(id); 104 105 // A ThreadContainer exists for this id already. This should be a forward reference, but may 106 // be a duplicate id, in which case we will need to generate a bogus placeholder id 107 if (container != null) { 108 if (container.threadable != null) { // oops! duplicate ids... 109 id = "<Bogus-id:" + (bogusIdCount++) + ">"; 110 container = null; 111 } else { 112 // The container just contained a forward reference to this message, so let's 113 // fill in the threadable field of the container with this message 114 container.threadable = threadable; 115 } 116 } 117 118 // No container exists for that message Id. Create one and insert it into the hash table. 119 if (container == null) { 120 container = new ThreadContainer(); 121 container.threadable = threadable; 122 idTable.put(id, container); 123 } 124 125 // Iterate through all of the references and create ThreadContainers for any references that 126 // don't have them. 127 ThreadContainer parentRef = null; 128 { 129 String[] references = threadable.messageThreadReferences(); 130 for (String refString : references) 131 { 132 ThreadContainer ref = idTable.get(refString); 133 134 // if this id doesnt have a container, create one 135 if (ref == null) { 136 ref = new ThreadContainer(); 137 idTable.put(refString, ref); 138 } 139 140 // Link references together in the order they appear in the References: header, 141 // IF they dont have a have a parent already && 142 // IF it will not cause a circular reference 143 if ((parentRef != null) 144 && (ref.parent == null) 145 && (parentRef != ref) 146 && !(ref.findChild(parentRef))) { 147 // Link ref into the parent's child list 148 ref.parent = parentRef; 149 ref.next = parentRef.child; 150 parentRef.child = ref; 151 } 152 parentRef = ref; 153 } 154 } 155 156 // parentRef is now set to the container of the last element in the references field. make that 157 // be the parent of this container, unless doing so causes a circular reference 158 if (parentRef != null 159 && (parentRef == container || container.findChild(parentRef))) 160 { 161 parentRef = null; 162 } 163 164 // if it has a parent already, its because we saw this message in a References: field, and presumed 165 // a parent based on the other entries in that field. Now that we have the actual message, we can 166 // throw away the old parent and use this new one 167 if (container.parent != null) { 168 ThreadContainer rest, prev; 169 170 for (prev = null, rest = container.parent.child; 171 rest != null; 172 prev = rest, rest = rest.next) { 173 if (rest == container) { 174 break; 175 } 176 } 177 178 if (rest == null) { 179 throw new RuntimeException( 180 "Didnt find " 181 + container 182 + " in parent" 183 + container.parent); 184 } 185 186 // Unlink this container from the parent's child list 187 if (prev == null) { 188 container.parent.child = container.next; 189 } else { 190 prev.next = container.next; 191 } 192 193 container.next = null; 194 container.parent = null; 195 } 196 197 // If we have a parent, link container into the parents child list 198 if (parentRef != null) { 199 container.parent = parentRef; 200 container.next = parentRef.child; 201 parentRef.child = container; 202 } 203 } 204 205 /** 206 * Find the root set of all existing ThreadContainers 207 * @return root the ThreadContainer representing the root node 208 */ 209 private ThreadContainer findRootSet() { 210 ThreadContainer root = new ThreadContainer(); 211 Iterator<String> iter = idTable.keySet().iterator(); 212 213 while (iter.hasNext()) { 214 Object key = iter.next(); 215 ThreadContainer c = idTable.get(key); 216 if (c.parent == null) { 217 if (c.next != null) { 218 throw new RuntimeException( 219 "c.next is " + c.next.toString()); 220 } 221 c.next = root.child; 222 root.child = c; 223 } 224 } 225 return root; 226 } 227 228 /** 229 * Delete any empty or dummy ThreadContainers 230 * @param parent 231 */ 232 private void pruneEmptyContainers(ThreadContainer parent) { 233 ThreadContainer container, prev, next; 234 for (prev = null, container = parent.child, next = container.next; 235 container != null; 236 prev = container, 237 container = next, 238 next = (container == null ? null : container.next)) { 239 240 // Is it empty and without any children? If so,delete it 241 if (container.threadable == null && container.child == null) { 242 if (prev == null) { 243 parent.child = container.next; 244 } else { 245 prev.next = container.next; 246 } 247 248 // Set container to prev so that prev keeps its same value the next time through the loop 249 container = prev; 250 } 251 252 // Else if empty, with kids, and (not at root or only one kid) 253 else if ( 254 container.threadable == null 255 && container.child != null 256 && (container.parent != null 257 || container.child.next == null)) { 258 // We have an invalid/expired message with kids. Promote the kids to this level. 259 ThreadContainer tail; 260 ThreadContainer kids = container.child; 261 262 // Remove this container and replace with 'kids'. 263 if (prev == null) { 264 parent.child = kids; 265 } else { 266 prev.next = kids; 267 } 268 269 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next 270 // i.e. splice kids into the list in place of container 271 for (tail = kids; tail.next != null; tail = tail.next) { 272 tail.parent = container.parent; 273 } 274 275 tail.parent = container.parent; 276 tail.next = container.next; 277 278 // next currently points to the item after the inserted items in the chain - reset that so we process the newly 279 // promoted items next time round 280 next = kids; 281 282 // Set container to prev so that prev keeps its same value the next time through the loop 283 container = prev; 284 } else if (container.child != null) { 285 // A real message , with kids 286 // Iterate over the children 287 pruneEmptyContainers(container); 288 } 289 } 290 } 291 292 /** 293 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 294 */ 295 private void gatherSubjects() { 296 297 int count = 0; 298 299 for (ThreadContainer c = root.child; c != null; c = c.next) { 300 count++; 301 } 302 303 // TODO verify this will avoid rehashing 304 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9); 305 count = 0; 306 307 for (ThreadContainer c = root.child; c != null; c = c.next) { 308 Threadable threadable = c.threadable; 309 310 // No threadable? If so, it is a dummy node in the root set. 311 // Only root set members may be dummies, and they alway have at least 2 kids 312 // Take the first kid as representative of the subject 313 if (threadable == null) { 314 threadable = c.child.threadable; 315 } 316 317 String subj = threadable.simplifiedSubject(); 318 319 if (subj == null || subj == "") { 320 continue; 321 } 322 323 ThreadContainer old = subjectTable.get(subj); 324 325 // Add this container to the table iff: 326 // - There exists no container with this subject 327 // - or this is a dummy container and the old one is not - the dummy one is 328 // more interesting as a root, so put it in the table instead 329 // - The container in the table has a "Re:" version of this subject, and 330 // this container has a non-"Re:" version of this subject. The non-"Re:" version 331 // is the more interesting of the two. 332 if (old == null 333 || (c.threadable == null && old.threadable != null) 334 || (old.threadable != null 335 && old.threadable.subjectIsReply() 336 && c.threadable != null 337 && !c.threadable.subjectIsReply())) { 338 subjectTable.put(subj, c); 339 count++; 340 } 341 } 342 343 // If the table is empty, we're done 344 if (count == 0) { 345 return; 346 } 347 348 // subjectTable is now populated with one entry for each subject which occurs in the 349 // root set. Iterate over the root set, and gather together the difference. 350 ThreadContainer prev, c, rest; 351 for (prev = null, c = root.child, rest = c.next; 352 c != null; 353 prev = c, c = rest, rest = (rest == null ? null : rest.next)) { 354 Threadable threadable = c.threadable; 355 356 // is it a dummy node? 357 if (threadable == null) { 358 threadable = c.child.threadable; 359 } 360 361 String subj = threadable.simplifiedSubject(); 362 363 // Dont thread together all subjectless messages 364 if (subj == null || subj == "") { 365 continue; 366 } 367 368 ThreadContainer old = subjectTable.get(subj); 369 370 if (old == c) { // That's us 371 continue; 372 } 373 374 // We have now found another container in the root set with the same subject 375 // Remove the "second" message from the root set 376 if (prev == null) { 377 root.child = c.next; 378 } else { 379 prev.next = c.next; 380 } 381 c.next = null; 382 383 if (old.threadable == null && c.threadable == null) { 384 // both dummies - merge them 385 ThreadContainer tail; 386 for (tail = old.child; 387 tail != null && tail.next != null; 388 tail = tail.next) { 389 // do nothing 390 } 391 392 if (tail != null) { // protect against possible NPE 393 tail.next = c.child; 394 } 395 396 for (tail = c.child; tail != null; tail = tail.next) { 397 tail.parent = old; 398 } 399 400 c.child = null; 401 } else if ( 402 old.threadable == null 403 || (c.threadable != null 404 && c.threadable.subjectIsReply() 405 && !old.threadable.subjectIsReply())) { 406 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 407 c.parent = old; 408 c.next = old.child; 409 old.child = c; 410 } else { 411 // else make the old and new messages be children of a new dummy container. 412 // We create a new container object for old.msg and empty the old container 413 ThreadContainer newc = new ThreadContainer(); 414 newc.threadable = old.threadable; 415 newc.child = old.child; 416 417 for (ThreadContainer tail = newc.child; 418 tail != null; 419 tail = tail.next) 420 { 421 tail.parent = newc; 422 } 423 424 old.threadable = null; 425 old.child = null; 426 427 c.parent = old; 428 newc.parent = old; 429 430 // Old is now a dummy- give it 2 kids , c and newc 431 old.child = c; 432 c.next = newc; 433 } 434 // We've done a merge, so keep the same prev 435 c = prev; 436 } 437 438 subjectTable.clear(); 439 subjectTable = null; 440 441 } 442 443 444 // DEPRECATED METHODS - for API compatibility only - DO NOT USE 445 446 /** 447 * The client passes in an array of Threadable objects, and 448 * the Threader constructs a connected 'graph' of messages 449 * @param messages array of messages to thread 450 * @return null if messages == null or root.child == null 451 * @deprecated (2.2) prefer {@link #thread(List)} 452 */ 453 @Deprecated 454 public Threadable thread(Threadable[] messages) { 455 return thread(java.util.Arrays.asList(messages)); 456 } 457 458 }