package edu.vt.marian.search; import java.util.*; import java.io.*; import edu.vt.marian.common.*; /** * A PriQueueSequencer is a Sequencer designed for large numbers of * component sets. It is presumed that the sets are all proper * WtdObjSets (that they all have varying weights among their * elements) and that they are added with similar scaling weights, so * that we are unlikely to be able to ignore any one set for long. * (If this consequence is violated, we should be using a * WtdObjSetSequencer (q.v.)). *

* Since the number of sets is large, we need an O(k) search for the * next highest element. Back in the old days, somebody would build a * heap and use it to implement a priority queue of component sets * where the key is the (optinally weighted) top unused element in the * set. But this is Java. Is there a PirorityQueue class in the * library? *

* By the way, the reasoning below indicates that we want to keep all * the sets but the current one on the PriQueue until elements from * the current set fall below the top weight in thePriQueue. *

* We expect the weights in any WtdObjSet to drop off, not smoothly, * but as a stairstep function. Thus instead of searching for the * top element among the (scaled) component sets, we begin be * searching for the top two sets, and search only when the * top weight in the second set exceeds that in the top set. * *

* All the HEAP algorithms used in this class are implementations of * those presented in Cormen, Lieserson, and Rivest, _Introduction * to Algorithms_, Chapter 7. Note that in the algorithms presented * in the above book, heaps are implemented using one-dimensional * arrays indexed from 1..heapSize. However, Java Vectors are * indexed from 0..heapSize-1. So, to simplify implementation, and * closer track the algorithms presented in the book, we shall * address the heap Vector from 1..heapSize, and simply leave element * 0 of that vector unused. * * @author Paul Mather * @author Robert France * @version $Id: PriQueueSequencer.java,v 1.2 2000/02/15 16:59:46 paul Exp $ * @see Sequencer * @see java.util.Enumeration */ public class PriQueueSequencer implements SetCollectionSequencer // and therefore, // Sequencer and Enumeration. { /** * Ye olde debug object. */ protected Debug debug; /** * The size of the universe of which all the sets are subsets. Needed * to calculate the expected number of elements. */ protected long clSize; /** * The vector containing the heap elements. */ protected Vector heap; private class PriQueueWtdObjSet implements Sortable { /** * The weighted object at the head of this weighted object * set. This object is used when ordering this weighted * object set relative to others. */ protected WtdObj head; /** * The rest of the weighted objects in this weighted object set. */ protected WtdObjSetEnumeration tail; /** * Create a new PriQueueWtdObjSet given a WtdObjSetEnumeration. * @param s enumeration of a weighted object set */ protected PriQueueWtdObjSet(WtdObjSet s) { WtdObjSetEnumeration enum; if (s == null) { head = null; tail = null; } else { // Convert the weighted object set into an enumeration enum = (WtdObjSetEnumeration) s.elements(); // Get the head element of the enumeration try { head = (WtdObj) enum.nextElement(); } catch( NoSuchElementException e) { head = null; } // Save the tail tail = enum; } } /** * Compare this PriQueueWtdObjSet either to another * PriQueueWtdObjSet or to a WtdObj. * * @param obj object to which we will compare ourself * @return Sortable.{LESS,EQUAL,GREATER} depending upon * how we fare compared to the object supplied, or * Sortable.INCOMPARABLE if the given object is not * compatible for comparison purposes. */ public int compare(Object obj) { WtdObj topElt = null; int returnValue; if (obj instanceof PriQueueWtdObjSet) { if (obj != null) topElt = ((PriQueueWtdObjSet) obj).head; else topElt = null; } if (obj instanceof WtdObj) { topElt = (WtdObj) obj; } if (obj instanceof PriQueueWtdObjSet || obj instanceof WtdObj) { if (head == null || topElt == null) returnValue = Sortable.INCOMPARABLE; else { returnValue = head.compare(topElt); } } else return Sortable.INCOMPARABLE; return returnValue; } /** * Pop the head WtdObj off a PriQueueWtdObjSet and move the * next WtdObj from the tail into the head. * @return WtdObj from the head. * @exception NoSuchElementException if the PriQueueWtdObjSet * is empty (head is null) */ protected WtdObj popElement() throws NoSuchElementException { WtdObj returnValue; if (head == null) throw new NoSuchElementException("PriQueueWtdObjSet empty"); else { returnValue = head; head = null; if (tail != null) { try { head = (WtdObj) tail.nextElement(); } catch( NoSuchElementException e) { head = null; } } } return returnValue; } /** * Return the maximum number of elements contained in this * weighted object set. * @return maximum number of weighted objects left */ protected int maxNumRemaining() { int maxNumLeft = 0; if (tail != null) maxNumLeft = tail.maxNumRemaining(); if (head != null) maxNumLeft++; return maxNumLeft; } } /** * Create a priority queue sequencer for an unknown number of sets. * @param classSize the size of the universe of which all the * sets in this sequencer are subsets * @param d debug object */ public PriQueueSequencer(long classSize, Debug d) { clSize = classSize; debug = d; heap = new Vector(); // Add "unused" element, to adjust Vector addressing to // match that of heap addressing heap.addElement(null); } /** * Create a merging sequencer for an expected number of sets. * @param expectedNumSets As many sets as we expect to add. * Does not need to be perfectly accurate. * @param classSize the size of the universe of which all the * sets in this sequencer are subsets * @param d debug object */ public PriQueueSequencer(int expectedNumSets, long classSize, Debug d) { clSize = classSize; debug = d; heap = new Vector(expectedNumSets); // Add "unused" element, to adjust Vector addressing to // match that of heap addressing heap.addElement(null); } /** * Return the index of the parent of the index supplied. * * @param i index of the heap element whose parent we are to determine * @return the index of the parent */ private static final int parent(int i) { return (i >> 1); } /** * Return the left child of the given heap node. * * @param i heap node whose left child's index we are to compute * @return the index of the left child */ private static final int left(int i) { return (i << 1); } /** * Return the right child of the given heap node. * * @param i heap node whose right child's index we are to compute * @return the index of the right child */ private static final int right(int i) { return ((i << 1) | 1); } /** * Return the weighted object stored at the given element of the * given heap Vector. This is really just a shorthand. It would * be nice to think that Java could "inline" calls to this * method * @param h heap Vector from which element is to be returned * @param i index into heap Vector from which element is to be returned * @return PriQueueWtdObjSet from the given heap */ private static final PriQueueWtdObjSet heapElt(Vector h, int i) { return (PriQueueWtdObjSet) h.elementAt( i ); } /** * Return the size of the given heap. This will be the size * of the given Vector - 1, because we assume element zero * of the Vector given to us is "unused," to adjust addressing * of the heap from 1 onwards. * * @param h the Vector representing the heap * @return the size of the heap represented by the given Vector * @exception IndexOutOfBoundsException if the given heap size * indicates the underlying heap Vector is malformed. */ private static int heapSize(Vector h) throws IndexOutOfBoundsException { int s = h.size()-1; if (s < 0) throw new IndexOutOfBoundsException("Invalid heap"); else return s; } /** * Heapify the given heap at the given node. * @param A the heap to be heapified * @param i the point at which to heapify */ private void heapify(Vector A, int i) { int l, r, largest;; l = left(i); r = right(i); if (l <= heapSize(A) && heapElt(A,l).compare(heapElt(A,i)) == Sortable.GREATER) largest = l; else largest = i; if (r <= heapSize(A) && heapElt(A,r).compare(heapElt(A,largest)) == Sortable.GREATER) largest = r; if (largest != i) { // Exchange A[i] and A[largest] Object obj = A.elementAt(i); A.setElementAt(A.elementAt(largest), i); A.setElementAt(obj, largest); // heapify again... heapify(A,largest); } } /** * Extract the next element from the priority queue. Here we must * deviate from the algorithm in Cormen, Lieserson, and Rivest, * _Introduction to Algorithms_ a little because our priority * queue heap actually has a WtdObjSetEnumeration as each heap * element. Thus, removing the max element from the heap does * not necessarily shorten the heap by one element: that only * happens if we remove the last element of the set behind the * element. Otherwise, we simply heapify again. * @param A heap from which we are to extract the max WtdObj * @return highest weighted WtdObj from this sequencer * @exception NoSuchElementException if the heap is empty */ private WtdObj heapExtractMax(Vector A) throws NoSuchElementException { WtdObj max; int sizeOfHeap = heapSize(A); // Check for heap underflow if (sizeOfHeap < 1) throw new NoSuchElementException("Heap underflow"); // Pop off the max heap element max = ((PriQueueWtdObjSet) A.elementAt(1)).popElement(); // Peek at the new top element to check to see if this // PriQueueWtdObjSet is now empty and thus needs to be removed // from the heap Vector if (((PriQueueWtdObjSet) A.elementAt(1)).head == null) { // The top PriQueueWtdObjSet is empty, so we need // to shrink the heap by one set and... A.setElementAt(A.elementAt(sizeOfHeap), 1); A.removeElementAt(sizeOfHeap); } // Re-heapify heapify(A,1); return max; } /** * Add a new component set to the Sequencer with no scaling constant. * * @param set a component weighted object set. * @return ReturnCodes.OK -- set was added without problems; * anything else -- problems. */ public int addSet(WtdObjSet set) { PriQueueWtdObjSet key; int i; // Convert the weighted object set into an element that // the heap stores key = new PriQueueWtdObjSet( set ); // Extend the heap by one element (initially empty) heap.addElement(null); // Perform Heap-Insert procedure from CLR i = heapSize(heap); while (i > 1 && heapElt(heap,parent(i)).compare(key) == Sortable.LESS) { heap.setElementAt((PriQueueWtdObjSet) heapElt(heap,parent(i)), i); i = parent(i); } heap.setElementAt((PriQueueWtdObjSet) key, i); return ReturnCodes.OK; } /** * Return the expected number of unique elements in the result * sequence. * * NOTE: The statistics here are drawn from the hypergeometric * distribution. Follow code and documentation in the C++ file * "search/woset_coll.cc." * @return the expected number of unique elements left in this sequencer */ public int expectedNumElts() { int numSets = heapSize(heap); int i, j; long numTotalItems, sizeOverlap; int numRemaining[] = new int[numSets]; PriQueueWtdObjSet sequencedSet; numRemaining[0] = 0; for (i=0; i < numSets; i++) { // Figure out the approximate number of remaining elements // for this weighted object set in the sequencer sequencedSet = (PriQueueWtdObjSet) heapElt(heap,i+1); if (sequencedSet.tail == null) numRemaining[i] = 0; else numRemaining[i] = sequencedSet.tail.approxNumRemaining(); // Adjust total for elements in the "head" if (sequencedSet.head != null) numRemaining[i]++; } numTotalItems = numRemaining[0]; sizeOverlap = 0; for (i=1; i < numSets; i++) { numTotalItems += numRemaining[i]; for (j=i; j < numSets; j++) { sizeOverlap += numRemaining[i-1] * numRemaining[j]; } } return (int) (numTotalItems - sizeOverlap/clSize); } /** * Return the maximum number of unique elements in the result * sequence. (This is just the sum of all the elements in each * component set,) * @return the maximum number of elements left in this sequencer */ public int maxNumElts() { int numElts = 0; int numSets = heapSize(heap); PriQueueWtdObjSet elt; for (int i=1; i <= numSets; i++) { elt = heapElt(heap,i); if (elt != null) numElts += elt.maxNumRemaining(); } return numElts; } /** * Return the next weighted object (of highest weight) in this sequencer. * @return top-weighted object from this sequencer */ public Object nextElement() throws NoSuchElementException { return heapExtractMax(heap); } /** * Determine if there are any objects left in this sequencer. * @return true if there are weighted objects left to be returned * by this sequencer; false if there are none */ public boolean hasMoreElements() { return (heapSize(heap) > 0 && heapElt(heap,1) != null && heapElt(heap,1).head != null); } /** * Return a string representation of this sequencer and its contents. * @return string representation of the sequencer. */ public String toString() { return this.profile(); } /** * Return a string that summarises the main features of this sequencer. * @return string giving the gist of what this sequencer contains */ public String profile() { int numSets = heapSize(heap); StringBuffer str = new StringBuffer(numSets*20 + 50); // Rough estimate boolean first = true; int numLeft; PriQueueWtdObjSet topSet; WtdObj top; str.append("{WtdObjSets: "); str.append(numSets); for (int i=1; i <= numSets; i++) { // Alter separator text according to whether this is the // first item output if (first) { str.append("; top items (max remaining): "); first = false; } else { str.append(", "); } // Add weighted object set i to profile topSet = heapElt(heap, i); if (topSet == null) { top = null; numLeft = 0; } else { top = topSet.head; numLeft = topSet.tail.maxNumRemaining(); } if (top == null) str.append(""); else str.append(top.toString()); str.append(" ("); str.append(numLeft); str.append(')'); } str.append('}'); return str.toString(); } }