package edu.vt.marian.search; import java.util.*; import java.io.*; import edu.vt.marian.common.*; /** * A PriQueueSequencer is a Sequencer designed for large numbers of * component sets. It is presumed that the sets are all proper * WtdObjSets (that they all have varying weights among their * elements) and that they are added with similar scaling weights, so * that we are unlikely to be able to ignore any one set for long. * (If this consequence is violated, we should be using a * WtdObjSetSequencer (q.v.)). *
* Since the number of sets is large, we need an O(k) search for the * next highest element. Back in the old days, somebody would build a * heap and use it to implement a priority queue of component sets * where the key is the (optinally weighted) top unused element in the * set. But this is Java. Is there a PirorityQueue class in the * library? *
* By the way, the reasoning below indicates that we want to keep all * the sets but the current one on the PriQueue until elements from * the current set fall below the top weight in thePriQueue. *
* We expect the weights in any WtdObjSet to drop off, not smoothly, * but as a stairstep function. Thus instead of searching for the * top element among the (scaled) component sets, we begin be * searching for the top two sets, and search only when the * top weight in the second set exceeds that in the top set. * *
* All the HEAP algorithms used in this class are implementations of
* those presented in Cormen, Lieserson, and Rivest, _Introduction
* to Algorithms_, Chapter 7. Note that in the algorithms presented
* in the above book, heaps are implemented using one-dimensional
* arrays indexed from 1..heapSize. However, Java Vectors are
* indexed from 0..heapSize-1. So, to simplify implementation, and
* closer track the algorithms presented in the book, we shall
* address the heap Vector from 1..heapSize, and simply leave element
* 0 of that vector unused.
*
* @author Paul Mather
* @author Robert France
* @version $Id: PriQueueSequencer.java,v 1.2 2000/02/15 16:59:46 paul Exp $
* @see Sequencer
* @see java.util.Enumeration */
public class PriQueueSequencer implements SetCollectionSequencer
// and therefore,
// Sequencer and Enumeration.
{
/**
* Ye olde debug object.
*/
protected Debug debug;
/**
* The size of the universe of which all the sets are subsets. Needed
* to calculate the expected number of elements.
*/
protected long clSize;
/**
* The vector containing the heap elements.
*/
protected Vector heap;
private class PriQueueWtdObjSet implements Sortable
{
/**
* The weighted object at the head of this weighted object
* set. This object is used when ordering this weighted
* object set relative to others.
*/
protected WtdObj head;
/**
* The rest of the weighted objects in this weighted object set.
*/
protected WtdObjSetEnumeration tail;
/**
* Create a new PriQueueWtdObjSet given a WtdObjSetEnumeration.
* @param s enumeration of a weighted object set
*/
protected PriQueueWtdObjSet(WtdObjSet s)
{
WtdObjSetEnumeration enum;
if (s == null)
{
head = null;
tail = null;
}
else
{
// Convert the weighted object set into an enumeration
enum = (WtdObjSetEnumeration) s.elements();
// Get the head element of the enumeration
try
{
head = (WtdObj) enum.nextElement();
}
catch( NoSuchElementException e)
{
head = null;
}
// Save the tail
tail = enum;
}
}
/**
* Compare this PriQueueWtdObjSet either to another
* PriQueueWtdObjSet or to a WtdObj.
*
* @param obj object to which we will compare ourself
* @return Sortable.{LESS,EQUAL,GREATER} depending upon
* how we fare compared to the object supplied, or
* Sortable.INCOMPARABLE if the given object is not
* compatible for comparison purposes.
*/
public int compare(Object obj)
{
WtdObj topElt = null;
int returnValue;
if (obj instanceof PriQueueWtdObjSet)
{
if (obj != null)
topElt = ((PriQueueWtdObjSet) obj).head;
else
topElt = null;
}
if (obj instanceof WtdObj)
{
topElt = (WtdObj) obj;
}
if (obj instanceof PriQueueWtdObjSet || obj instanceof WtdObj)
{
if (head == null || topElt == null)
returnValue = Sortable.INCOMPARABLE;
else
{
returnValue = head.compare(topElt);
}
}
else
return Sortable.INCOMPARABLE;
return returnValue;
}
/**
* Pop the head WtdObj off a PriQueueWtdObjSet and move the
* next WtdObj from the tail into the head.
* @return WtdObj from the head.
* @exception NoSuchElementException if the PriQueueWtdObjSet
* is empty (head is null)
*/
protected WtdObj popElement() throws NoSuchElementException
{
WtdObj returnValue;
if (head == null)
throw new NoSuchElementException("PriQueueWtdObjSet empty");
else
{
returnValue = head;
head = null;
if (tail != null)
{
try
{
head = (WtdObj) tail.nextElement();
}
catch( NoSuchElementException e)
{
head = null;
}
}
}
return returnValue;
}
/**
* Return the maximum number of elements contained in this
* weighted object set.
* @return maximum number of weighted objects left
*/
protected int maxNumRemaining()
{
int maxNumLeft = 0;
if (tail != null)
maxNumLeft = tail.maxNumRemaining();
if (head != null)
maxNumLeft++;
return maxNumLeft;
}
}
/**
* Create a priority queue sequencer for an unknown number of sets.
* @param classSize the size of the universe of which all the
* sets in this sequencer are subsets
* @param d debug object
*/
public PriQueueSequencer(long classSize, Debug d)
{
clSize = classSize;
debug = d;
heap = new Vector();
// Add "unused" element, to adjust Vector addressing to
// match that of heap addressing
heap.addElement(null);
}
/**
* Create a merging sequencer for an expected number of sets.
* @param expectedNumSets As many sets as we expect to add.
* Does not need to be perfectly accurate.
* @param classSize the size of the universe of which all the
* sets in this sequencer are subsets
* @param d debug object
*/
public PriQueueSequencer(int expectedNumSets, long classSize, Debug d)
{
clSize = classSize;
debug = d;
heap = new Vector(expectedNumSets);
// Add "unused" element, to adjust Vector addressing to
// match that of heap addressing
heap.addElement(null);
}
/**
* Return the index of the parent of the index supplied.
*
* @param i index of the heap element whose parent we are to determine
* @return the index of the parent
*/
private static final int parent(int i)
{
return (i >> 1);
}
/**
* Return the left child of the given heap node.
*
* @param i heap node whose left child's index we are to compute
* @return the index of the left child
*/
private static final int left(int i)
{
return (i << 1);
}
/**
* Return the right child of the given heap node.
*
* @param i heap node whose right child's index we are to compute
* @return the index of the right child
*/
private static final int right(int i)
{
return ((i << 1) | 1);
}
/**
* Return the weighted object stored at the given element of the
* given heap Vector. This is really just a shorthand. It would
* be nice to think that Java could "inline" calls to this
* method
* @param h heap Vector from which element is to be returned
* @param i index into heap Vector from which element is to be returned
* @return PriQueueWtdObjSet from the given heap
*/
private static final PriQueueWtdObjSet heapElt(Vector h, int i)
{
return (PriQueueWtdObjSet) h.elementAt( i );
}
/**
* Return the size of the given heap. This will be the size
* of the given Vector - 1, because we assume element zero
* of the Vector given to us is "unused," to adjust addressing
* of the heap from 1 onwards.
*
* @param h the Vector representing the heap
* @return the size of the heap represented by the given Vector
* @exception IndexOutOfBoundsException if the given heap size
* indicates the underlying heap Vector is malformed.
*/
private static int heapSize(Vector h) throws IndexOutOfBoundsException
{
int s = h.size()-1;
if (s < 0)
throw new IndexOutOfBoundsException("Invalid heap");
else
return s;
}
/**
* Heapify the given heap at the given node.
* @param A the heap to be heapified
* @param i the point at which to heapify
*/
private void heapify(Vector A, int i)
{
int l, r, largest;;
l = left(i);
r = right(i);
if (l <= heapSize(A) && heapElt(A,l).compare(heapElt(A,i)) == Sortable.GREATER)
largest = l;
else
largest = i;
if (r <= heapSize(A) && heapElt(A,r).compare(heapElt(A,largest)) == Sortable.GREATER)
largest = r;
if (largest != i)
{
// Exchange A[i] and A[largest]
Object obj = A.elementAt(i);
A.setElementAt(A.elementAt(largest), i);
A.setElementAt(obj, largest);
// heapify again...
heapify(A,largest);
}
}
/**
* Extract the next element from the priority queue. Here we must
* deviate from the algorithm in Cormen, Lieserson, and Rivest,
* _Introduction to Algorithms_ a little because our priority
* queue heap actually has a WtdObjSetEnumeration as each heap
* element. Thus, removing the max element from the heap does
* not necessarily shorten the heap by one element: that only
* happens if we remove the last element of the set behind the
* element. Otherwise, we simply heapify again.
* @param A heap from which we are to extract the max WtdObj
* @return highest weighted WtdObj from this sequencer
* @exception NoSuchElementException if the heap is empty
*/
private WtdObj heapExtractMax(Vector A) throws NoSuchElementException
{
WtdObj max;
int sizeOfHeap = heapSize(A);
// Check for heap underflow
if (sizeOfHeap < 1)
throw new NoSuchElementException("Heap underflow");
// Pop off the max heap element
max = ((PriQueueWtdObjSet) A.elementAt(1)).popElement();
// Peek at the new top element to check to see if this
// PriQueueWtdObjSet is now empty and thus needs to be removed
// from the heap Vector
if (((PriQueueWtdObjSet) A.elementAt(1)).head == null)
{
// The top PriQueueWtdObjSet is empty, so we need
// to shrink the heap by one set and...
A.setElementAt(A.elementAt(sizeOfHeap), 1);
A.removeElementAt(sizeOfHeap);
}
// Re-heapify
heapify(A,1);
return max;
}
/**
* Add a new component set to the Sequencer with no scaling constant.
*
* @param set a component weighted object set.
* @return ReturnCodes.OK -- set was added without problems;
* anything else -- problems.
*/
public int addSet(WtdObjSet set)
{
PriQueueWtdObjSet key;
int i;
// Convert the weighted object set into an element that
// the heap stores
key = new PriQueueWtdObjSet( set );
// Extend the heap by one element (initially empty)
heap.addElement(null);
// Perform Heap-Insert procedure from CLR
i = heapSize(heap);
while (i > 1 && heapElt(heap,parent(i)).compare(key) == Sortable.LESS)
{
heap.setElementAt((PriQueueWtdObjSet) heapElt(heap,parent(i)), i);
i = parent(i);
}
heap.setElementAt((PriQueueWtdObjSet) key, i);
return ReturnCodes.OK;
}
/**
* Return the expected number of unique elements in the result
* sequence.
*
* NOTE: The statistics here are drawn from the hypergeometric
* distribution. Follow code and documentation in the C++ file
* "search/woset_coll.cc."
* @return the expected number of unique elements left in this sequencer
*/
public int expectedNumElts()
{
int numSets = heapSize(heap);
int i, j;
long numTotalItems, sizeOverlap;
int numRemaining[] = new int[numSets];
PriQueueWtdObjSet sequencedSet;
numRemaining[0] = 0;
for (i=0; i < numSets; i++)
{
// Figure out the approximate number of remaining elements
// for this weighted object set in the sequencer
sequencedSet = (PriQueueWtdObjSet) heapElt(heap,i+1);
if (sequencedSet.tail == null)
numRemaining[i] = 0;
else
numRemaining[i] = sequencedSet.tail.approxNumRemaining();
// Adjust total for elements in the "head"
if (sequencedSet.head != null)
numRemaining[i]++;
}
numTotalItems = numRemaining[0];
sizeOverlap = 0;
for (i=1; i < numSets; i++)
{
numTotalItems += numRemaining[i];
for (j=i; j < numSets; j++)
{
sizeOverlap += numRemaining[i-1] * numRemaining[j];
}
}
return (int) (numTotalItems - sizeOverlap/clSize);
}
/**
* Return the maximum number of unique elements in the result
* sequence. (This is just the sum of all the elements in each
* component set,)
* @return the maximum number of elements left in this sequencer
*/
public int maxNumElts()
{
int numElts = 0;
int numSets = heapSize(heap);
PriQueueWtdObjSet elt;
for (int i=1; i <= numSets; i++)
{
elt = heapElt(heap,i);
if (elt != null)
numElts += elt.maxNumRemaining();
}
return numElts;
}
/**
* Return the next weighted object (of highest weight) in this sequencer.
* @return top-weighted object from this sequencer
*/
public Object nextElement() throws NoSuchElementException
{
return heapExtractMax(heap);
}
/**
* Determine if there are any objects left in this sequencer.
* @return true if there are weighted objects left to be returned
* by this sequencer; false if there are none
*/
public boolean hasMoreElements()
{
return (heapSize(heap) > 0 && heapElt(heap,1) != null &&
heapElt(heap,1).head != null);
}
/**
* Return a string representation of this sequencer and its contents.
* @return string representation of the sequencer.
*/
public String toString()
{
return this.profile();
}
/**
* Return a string that summarises the main features of this sequencer.
* @return string giving the gist of what this sequencer contains
*/
public String profile()
{
int numSets = heapSize(heap);
StringBuffer str = new StringBuffer(numSets*20 + 50); // Rough estimate
boolean first = true;
int numLeft;
PriQueueWtdObjSet topSet;
WtdObj top;
str.append("{WtdObjSets: ");
str.append(numSets);
for (int i=1; i <= numSets; i++)
{
// Alter separator text according to whether this is the
// first item output
if (first)
{
str.append("; top items (max remaining): ");
first = false;
}
else
{
str.append(", ");
}
// Add weighted object set i to profile
topSet = heapElt(heap, i);
if (topSet == null)
{
top = null;
numLeft = 0;
}
else
{
top = topSet.head;
numLeft = topSet.tail.maxNumRemaining();
}
if (top == null)
str.append("