001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    
019    package org.apache.commons.net.nntp;
020    
021    /**
022     * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023     * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024     * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025     *
026     * @author rwinston <rwinston@checkfree.com>
027     *
028     */
029    
030    import java.util.HashMap;
031    import java.util.Iterator;
032    import java.util.List;
033    
034    public class Threader {
035        private ThreadContainer root;
036        private HashMap<String,ThreadContainer> idTable;
037        private int bogusIdCount = 0;
038    
039        /**
040         * The client passes in a list of Threadable objects, and
041         * the Threader constructs a connected 'graph' of messages
042         * @param messages list of messages to thread
043         * @return null if messages == null or root.child == null
044         * @since 2.2
045         */
046        public Threadable thread(List<? extends Threadable> messages) {
047            return thread((Iterable<? extends Threadable>)messages);
048        }
049    
050        /**
051         * The client passes in a list of Iterable objects, and
052         * the Threader constructs a connected 'graph' of messages
053         * @param messages iterable of messages to thread
054         * @return null if messages == null or root.child == null
055         * @since 3.0
056         */
057        public Threadable thread(Iterable<? extends Threadable> messages) {
058            if (messages == null) {
059                return null;
060            }
061    
062            idTable = new HashMap<String,ThreadContainer>();
063    
064            // walk through each Threadable element
065            for (Threadable t : messages) {
066                if (!t.isDummy()) {
067                    buildContainer(t);
068                }
069            }
070    
071            root = findRootSet();
072            idTable.clear();
073            idTable = null;
074    
075            pruneEmptyContainers(root);
076    
077            root.reverseChildren();
078            gatherSubjects();
079    
080            if (root.next != null) {
081                throw new RuntimeException("root node has a next:" + root);
082            }
083    
084            for (ThreadContainer r = root.child; r != null; r = r.next) {
085                if (r.threadable == null) {
086                    r.threadable = r.child.threadable.makeDummy();
087                }
088            }
089    
090            Threadable result = (root.child == null ? null : root.child.threadable);
091            root.flush();
092            root = null;
093    
094            return result;
095        }
096    
097        /**
098         *
099         * @param threadable
100         */
101        private void buildContainer(Threadable threadable) {
102            String id = threadable.messageThreadId();
103            ThreadContainer container = idTable.get(id);
104    
105            // A ThreadContainer exists for this id already. This should be a forward reference, but may
106            // be a duplicate id, in which case we will need to generate a bogus placeholder id
107            if (container != null) {
108                if (container.threadable != null) { // oops! duplicate ids...
109                    id = "<Bogus-id:" + (bogusIdCount++) + ">";
110                    container = null;
111                } else {
112                    // The container just contained a forward reference to this message, so let's
113                    // fill in the threadable field of the container with this message
114                    container.threadable = threadable;
115                }
116            }
117    
118            // No container exists for that message Id. Create one and insert it into the hash table.
119            if (container == null) {
120                container = new ThreadContainer();
121                container.threadable = threadable;
122                idTable.put(id, container);
123            }
124    
125            // Iterate through all of the references and create ThreadContainers for any references that
126            // don't have them.
127            ThreadContainer parentRef = null;
128            {
129                String[] references = threadable.messageThreadReferences();
130                for (String refString : references)
131                {
132                    ThreadContainer ref = idTable.get(refString);
133    
134                    // if this id doesnt have a container, create one
135                    if (ref == null) {
136                        ref = new ThreadContainer();
137                        idTable.put(refString, ref);
138                    }
139    
140                    // Link references together in the order they appear in the References: header,
141                    // IF they dont have a have a parent already &&
142                    // IF it will not cause a circular reference
143                    if ((parentRef != null)
144                        && (ref.parent == null)
145                        && (parentRef != ref)
146                        && !(ref.findChild(parentRef))) {
147                        // Link ref into the parent's child list
148                        ref.parent = parentRef;
149                        ref.next = parentRef.child;
150                        parentRef.child = ref;
151                    }
152                    parentRef = ref;
153                }
154            }
155    
156            // parentRef is now set to the container of the last element in the references field. make that
157            // be the parent of this container, unless doing so causes a circular reference
158            if (parentRef != null
159                && (parentRef == container || container.findChild(parentRef)))
160            {
161                parentRef = null;
162            }
163    
164            // if it has a parent already, its because we saw this message in a References: field, and presumed
165            // a parent based on the other entries in that field. Now that we have the actual message, we can
166            // throw away the old parent and use this new one
167            if (container.parent != null) {
168                ThreadContainer rest, prev;
169    
170                for (prev = null, rest = container.parent.child;
171                    rest != null;
172                    prev = rest, rest = rest.next) {
173                    if (rest == container) {
174                        break;
175                    }
176                }
177    
178                if (rest == null) {
179                    throw new RuntimeException(
180                        "Didnt find "
181                            + container
182                            + " in parent"
183                            + container.parent);
184                }
185    
186                // Unlink this container from the parent's child list
187                if (prev == null) {
188                    container.parent.child = container.next;
189                } else {
190                    prev.next = container.next;
191                }
192    
193                container.next = null;
194                container.parent = null;
195            }
196    
197            // If we have a parent, link container into the parents child list
198            if (parentRef != null) {
199                container.parent = parentRef;
200                container.next = parentRef.child;
201                parentRef.child = container;
202            }
203        }
204    
205        /**
206         * Find the root set of all existing ThreadContainers
207         * @return root the ThreadContainer representing the root node
208         */
209        private ThreadContainer findRootSet() {
210            ThreadContainer root = new ThreadContainer();
211            Iterator<String> iter = idTable.keySet().iterator();
212    
213            while (iter.hasNext()) {
214                Object key = iter.next();
215                ThreadContainer c = idTable.get(key);
216                if (c.parent == null) {
217                    if (c.next != null) {
218                        throw new RuntimeException(
219                                "c.next is " + c.next.toString());
220                    }
221                    c.next = root.child;
222                    root.child = c;
223                }
224            }
225            return root;
226        }
227    
228        /**
229         * Delete any empty or dummy ThreadContainers
230         * @param parent
231         */
232        private void pruneEmptyContainers(ThreadContainer parent) {
233            ThreadContainer container, prev, next;
234            for (prev = null, container = parent.child, next = container.next;
235                container != null;
236                prev = container,
237                    container = next,
238                    next = (container == null ? null : container.next)) {
239    
240                // Is it empty and without any children? If so,delete it
241                if (container.threadable == null && container.child == null) {
242                    if (prev == null) {
243                        parent.child = container.next;
244                    } else {
245                        prev.next = container.next;
246                    }
247    
248                    // Set container to prev so that prev keeps its same value the next time through the loop
249                    container = prev;
250                }
251    
252                // Else if empty, with kids, and (not at root or only one kid)
253                else if (
254                    container.threadable == null
255                        && container.child != null
256                        && (container.parent != null
257                            || container.child.next == null)) {
258                    // We have an invalid/expired message with kids. Promote the kids to this level.
259                    ThreadContainer tail;
260                    ThreadContainer kids = container.child;
261    
262                    // Remove this container and replace with 'kids'.
263                    if (prev == null) {
264                        parent.child = kids;
265                    } else {
266                        prev.next = kids;
267                    }
268    
269                    // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
270                    // i.e. splice kids into the list in place of container
271                    for (tail = kids; tail.next != null; tail = tail.next) {
272                        tail.parent = container.parent;
273                    }
274    
275                    tail.parent = container.parent;
276                    tail.next = container.next;
277    
278                    // next currently points to the item after the inserted items in the chain - reset that so we process the newly
279                    // promoted items next time round
280                    next = kids;
281    
282                    // Set container to prev so that prev keeps its same value the next time through the loop
283                    container = prev;
284                } else if (container.child != null) {
285                    // A real message , with kids
286                    // Iterate over the children
287                    pruneEmptyContainers(container);
288                }
289            }
290        }
291    
292        /**
293         *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
294         */
295        private void gatherSubjects() {
296    
297            int count = 0;
298    
299            for (ThreadContainer c = root.child; c != null; c = c.next) {
300                count++;
301            }
302    
303            // TODO verify this will avoid rehashing
304            HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
305            count = 0;
306    
307            for (ThreadContainer c = root.child; c != null; c = c.next) {
308                Threadable threadable = c.threadable;
309    
310                // No threadable? If so, it is a dummy node in the root set.
311                // Only root set members may be dummies, and they alway have at least 2 kids
312                // Take the first kid as representative of the subject
313                if (threadable == null) {
314                    threadable = c.child.threadable;
315                }
316    
317                String subj = threadable.simplifiedSubject();
318    
319                if (subj == null || subj == "") {
320                    continue;
321                }
322    
323                ThreadContainer old = subjectTable.get(subj);
324    
325                // Add this container to the table iff:
326                // - There exists no container with this subject
327                // - or this is a dummy container and the old one is not - the dummy one is
328                // more interesting as a root, so put it in the table instead
329                // - The container in the table has a "Re:" version of this subject, and
330                // this container has a non-"Re:" version of this subject. The non-"Re:" version
331                // is the more interesting of the two.
332                if (old == null
333                    || (c.threadable == null && old.threadable != null)
334                    || (old.threadable != null
335                        && old.threadable.subjectIsReply()
336                        && c.threadable != null
337                        && !c.threadable.subjectIsReply())) {
338                    subjectTable.put(subj, c);
339                    count++;
340                }
341            }
342    
343            // If the table is empty, we're done
344            if (count == 0) {
345                return;
346            }
347    
348            // subjectTable is now populated with one entry for each subject which occurs in the
349            // root set. Iterate over the root set, and gather together the difference.
350            ThreadContainer prev, c, rest;
351            for (prev = null, c = root.child, rest = c.next;
352                c != null;
353                prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
354                Threadable threadable = c.threadable;
355    
356                // is it a dummy node?
357                if (threadable == null) {
358                    threadable = c.child.threadable;
359                }
360    
361                String subj = threadable.simplifiedSubject();
362    
363                // Dont thread together all subjectless messages
364                if (subj == null || subj == "") {
365                    continue;
366                }
367    
368                ThreadContainer old = subjectTable.get(subj);
369    
370                if (old == c) { // That's us
371                    continue;
372                }
373    
374                // We have now found another container in the root set with the same subject
375                // Remove the "second" message from the root set
376                if (prev == null) {
377                    root.child = c.next;
378                } else {
379                    prev.next = c.next;
380                }
381                c.next = null;
382    
383                if (old.threadable == null && c.threadable == null) {
384                    // both dummies - merge them
385                    ThreadContainer tail;
386                    for (tail = old.child;
387                        tail != null && tail.next != null;
388                        tail = tail.next) {
389                        // do nothing
390                    }
391    
392                    if (tail != null) { // protect against possible NPE
393                        tail.next = c.child;
394                    }
395    
396                    for (tail = c.child; tail != null; tail = tail.next) {
397                        tail.parent = old;
398                    }
399    
400                    c.child = null;
401                } else if (
402                    old.threadable == null
403                        || (c.threadable != null
404                            && c.threadable.subjectIsReply()
405                            && !old.threadable.subjectIsReply())) {
406                    // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
407                    c.parent = old;
408                    c.next = old.child;
409                    old.child = c;
410                } else {
411                    // else make the old and new messages be children of a new dummy container.
412                    // We create a new container object for old.msg and empty the old container
413                    ThreadContainer newc = new ThreadContainer();
414                    newc.threadable = old.threadable;
415                    newc.child = old.child;
416    
417                    for (ThreadContainer tail = newc.child;
418                        tail != null;
419                        tail = tail.next) 
420                    {
421                        tail.parent = newc;
422                    }
423    
424                    old.threadable = null;
425                    old.child = null;
426    
427                    c.parent = old;
428                    newc.parent = old;
429    
430                    // Old is now a dummy- give it 2 kids , c and newc
431                    old.child = c;
432                    c.next = newc;
433                }
434                // We've done a merge, so keep the same prev
435                c = prev;
436            }
437    
438            subjectTable.clear();
439            subjectTable = null;
440    
441        }
442    
443    
444        // DEPRECATED METHODS - for API compatibility only - DO NOT USE
445    
446        /**
447         * The client passes in an array of Threadable objects, and
448         * the Threader constructs a connected 'graph' of messages
449         * @param messages array of messages to thread
450         * @return null if messages == null or root.child == null
451         * @deprecated (2.2) prefer {@link #thread(List)}
452         */
453        @Deprecated
454        public Threadable thread(Threadable[] messages) {
455            return thread(java.util.Arrays.asList(messages));
456        }
457    
458    }