package edu.vt.marian.search; import java.util.*; import java.io.*; import edu.vt.marian.common.*; /** * A MergeSequencer is a Sequencer designed to work on a few component * sets. Since the number of sets is small, we can perform a linear * search for the set with the next highest element more efficiently * than any fancier search. *
* We expect the weights in any WtdObjSet to drop off, not smoothly,
* but as a stairstep function. Thus instead of searching for the
* top element among the (scaled) component sets, we begin be
* searching for the top two sets, and search only when the top
* weight in the second set exceeds that in the top set.
*
* @author Paul Mather
* @author Robert France
* @version $Id: MergeSequencer.java,v 1.2 2000/02/15 16:58:52 paul Exp $
* @see Sequencer
* @see java.util.Enumeration
*/
public class MergeSequencer implements Sequencer // and therefore, Enumeration.
{
/** The omnipresent debug object */
protected Debug debug;
/**
* The total size of the universe of objects over which this
* sequencer works. This is for use with expectedNumElts()
*/
protected long classSize;
/**
* A vector containing all the (enumerated) weighted object sets
* added to this merge sequencer
*/
protected Vector wtdObjSets;
/** A vector containing the top elements of each weighted object set */
protected Vector topElements;
/** The index of the set with the highest current weight */
protected int highestWeightSet;
/** The weight below which we change to another highest set */
protected Weight weightChangeTrigger;
/** The index of the set to which we change upon a weight trigger */
protected int weightChangeSet;
/** Value (negative) to indicate the set that triggered the weight
* change has not been defined yet.
*/
private final static int UNDEFINED = -1;
/**
* Create an empty merge sequencer.
* @param debug the omnipresent debug object
*/
public MergeSequencer(long classSize, Debug debug)
{
wtdObjSets = new Vector();
topElements = new Vector();
weightChangeTrigger = Weight.bottomWt;
weightChangeSet = UNDEFINED;
highestWeightSet = UNDEFINED;
this.classSize = classSize;
this.debug = debug;
}
/**
* Create a merge sequencer given an initial weighted object set.
* @param set an initial weighted object set
* @param debug the omnipresent debug object
*/
public MergeSequencer(long classSize, WtdObjSet set, Debug debug)
{
// Make a new vector ready to accept the weighted object
// sets to be sequenced
wtdObjSets = new Vector();
topElements = new Vector();
weightChangeTrigger = Weight.bottomWt;
weightChangeSet = UNDEFINED;
highestWeightSet = UNDEFINED;
this.classSize = classSize;
this.debug = debug;
// Add the initial weighted object set
this.addSet( set );
}
/**
* Add a new component set to the Sequencer with no scaling constant.
*
* @param set a component weighted object set.
* @return ReturnValues.OK -- set was added without problems;
* anything else -- problems.
*/
public int addSet(WtdObjSet set)
{
return actuallyAddSet( set, new Weight(1.0, debug) );
}
/**
* Add a new component set to the Sequencer with no scaling constant.
*
* @param set a component weighted object set.
* @param wt A number 0.0 .. 1.0 to scale by.
* @return ReturnValues.OK -- set was added without problems;
* anything else -- problems.
*/
public int addSet(WtdObjSet set, double wt)
{
if (wt >= 0.0 && wt <= 1.0)
return actuallyAddSet( set, new Weight(wt, debug) );
else
return ReturnCodes.BAD_PARAMS;
}
/**
* Add a new component set to the Sequencer with no scaling constant.
*
* @param set a component weighted object set.
* @param wt A Weight to scale by.
* @return ReturnValues.OK -- set was added without problems;
* anything else -- problems.
*/
public int addSet(WtdObjSet set, Weight wt)
{
return actuallyAddSet( set, wt );
}
/**
* Add a new weighted object set to the sequencer with the
* given weight as a scaling factor. We assume the weighted
* object set is not empty, and that the weight is non-null.
* @param set weighted object set to be added to the sequencer
* @param wt the Weight by which all elements in this weighted
* object set must be scaled
* @return ReturnCodes.OK if the set was added without problems;
* anything else if problems were encountered
*/
private int actuallyAddSet(WtdObjSet set, Weight wt)
{
// Validate the arguments: return an error if any of them are bad
if (set.isEmpty() || !wt.isValid())
return ReturnCodes.BAD_PARAMS;
// Turn the WtdObjSet into a ScaledWtdObjSet
ScaledWtdObjSet scaledSet = new ScaledWtdObjSet(set, wt, debug);
// Get an enumeration of the objects in the scaled weighted object set
WtdObjSetEnumeration wtdObjs = scaledSet.elements();
// Get the first element of this weighted object set
WtdObj top;
try
{
top = (WtdObj) wtdObjs.nextElement();
}
catch( NoSuchElementException e )
{
// This should not happen, because set is non-empty
// (as checked above), yet its enumeration didn't
// return an element!
return ReturnCodes.BAD_PARAMS;
}
// Add the enumeration of the objects in the weighted object
// set to the collection of sets being sequenced
wtdObjSets.addElement( wtdObjs );
// Add the first element from this weighted object set
// to the merge list, scaled by the scaling factor
topElements.addElement( top );
// Remember where we added the new set
int newSet = wtdObjSets.size()-1;
// Figure out if the addition of this set causes any change
// in the top-weight set; trigger value & set; etc.
if (highestWeightSet == UNDEFINED)
{
// This is the first set added
highestWeightSet = newSet;
}
else
{
// See if the top value of the newly-added set exceeds
// the current top value
WtdObj currentTop = (WtdObj) topElements.elementAt(highestWeightSet);
Weight newSetTopWeight = top.getWeight();
if (top.compare( currentTop ) == Sortable.GREATER)
{
// The new set is bigger: make it the top weighted
// set and the previous set now has to play second-fiddle
weightChangeSet = highestWeightSet;
weightChangeTrigger = currentTop.getWeight();
highestWeightSet = newSet;
}
else
{
// The new set might muscle out the second-top weighted set
if (weightChangeSet == UNDEFINED)
{
weightChangeSet = newSet;
weightChangeTrigger = newSetTopWeight;
}
else
{
// See if top element of new set exceeds current
// second-top weighted set
if (top.compare( weightChangeTrigger ) == Sortable.GREATER)
{
// Make the newly-added set the second-top
// weighted set
weightChangeSet = newSet;
weightChangeTrigger = newSetTopWeight;
}
}
}
}
return ReturnCodes.OK;
}
/**
* Return the expected number of unique elements in the result
* sequence.
*
* NOTE: The statistics here are drawn from the hypergeometric
* distribution. Follow code and documentation in the C++ file
* "search/woset_coll.cc."
* @return the expected number of unique elements left in the sequencer
*/
public int expectedNumElts()
{
int numSets = wtdObjSets.size();
int i, j;
long numTotalItems, sizeOverlap;
int numRemaining[] = new int[numSets];
WtdObjSetEnumeration sequencedSet;
numRemaining[0] = 0;
for (i=0; i < numSets; i++)
{
// Figure out the approximate number of remaining elements
// for this weighted object set in the sequencer
sequencedSet = (WtdObjSetEnumeration) wtdObjSets.elementAt(i);
if (sequencedSet == null)
numRemaining[i] = 0;
else
numRemaining[i] = sequencedSet.approxNumRemaining();
// Adjust total for the elements popped off onto the "top" list
if ((WtdObj) topElements.elementAt(i) != null)
numRemaining[i]++;
}
numTotalItems = numRemaining[0];
sizeOverlap = 0;
for (i=1; i < numSets; i++)
{
numTotalItems += numRemaining[i];
for (j=i; j < numSets; j++)
{
sizeOverlap += numRemaining[i-1] * numRemaining[j];
}
}
return (int) (numTotalItems - sizeOverlap/classSize);
}
/**
* Return the maximum number of unique elements in the result
* sequence. (This is just the sum of all the elements in each
* component set,)
* @return the (maximum) number of elements left in the sequencer
* @see WtdObjSetEnumeration#maxNumRemaining
*/
public int maxNumElts()
{
int numElts = 0;
int numSets = wtdObjSets.size();
for (int i=0; i < numSets; i++)
{
numElts += ((WtdObjSetEnumeration) wtdObjSets.elementAt(i)).maxNumRemaining();
if (topElements.elementAt(i) != null)
numElts++;
}
return numElts;
}
public Object nextElement() throws NoSuchElementException
{
Object returnValue;
WtdObj top;
// Is there currently a top-weighted set?
if (highestWeightSet == UNDEFINED)
{
// No, so there must be no sets in this sequencer
throw new NoSuchElementException("No WtdObjs in MergeSequencer");
}
else
{
// Return the item from the top-weighted set
returnValue = topElements.elementAt( highestWeightSet );
if (returnValue == null)
{
// There was actually nothing there, so throw
// a NoSuchElementException instead
throw new NoSuchElementException("No more WtdObjs in MergeSequencer");
}
// Pop off the next element in the top-weighted set
try
{
top = (WtdObj)((WtdObjSetEnumeration) wtdObjSets.elementAt( highestWeightSet )).nextElement();
}
catch( NoSuchElementException e )
{
// Nothing left in this set
top = null;
}
// Store the popped-off element
topElements.setElementAt(top, highestWeightSet);
// See if we need to switch to another weighted object set
if (top == null)
{
// Yes---the top weighted set is now empty!
shuffleTopSets();
}
else
{
if (top.compare(weightChangeTrigger) == Sortable.LESS)
{
// The top-weighted set is now below the trigger
// threshold, so make the trigger set the new
// top-weighted set
shuffleTopSets();
}
}
}
return returnValue;
}
/*
* Make the second-highest weighted set the top-weighted set
* and search for a new weight-change trigger value and associated
* set.
*
*/
private void shuffleTopSets()
{
Weight highWeight = Weight.bottomWt;
WtdObj wtdObj;
int numSets = topElements.size();
// Pass on the torch to the weight-change set
highestWeightSet = weightChangeSet;
// Now look for a worthy successor...
weightChangeSet = UNDEFINED;
weightChangeTrigger = Weight.bottomWt;
for (int i=0; i < numSets; i++)
{
wtdObj = (WtdObj) topElements.elementAt(i);
if (i != highestWeightSet && wtdObj != null &&
wtdObj.compare(highWeight) == Sortable.GREATER)
{
weightChangeSet = i;
weightChangeTrigger = wtdObj.getWeight();
highWeight = weightChangeTrigger;
}
}
}
/*
* Determine if there are more weighted objects left in this sequencer.
* It does this by checking to see if there is any weighted object
* at the front of any of the weighted object sets in this sequencer.
* @return true if there are elements left in the sequencer; false
* otherwise.
*/
public boolean hasMoreElements()
{
int numSets = topElements.size();
// We have no more elements if all the entries in the
// topElements vector are null
for (int i = 0; i < numSets; i++)
if (topElements.elementAt( i ) != null)
return true;
return false;
}
public String toString()
{
return this.profile();
}
/*
* Output a rough representation of the components of this sequencer.
* @return string containing "profile" of sequencer contents
*/
public String profile()
{
int numSets = topElements.size();
StringBuffer str = new StringBuffer(numSets*20 + 50); // Rough estimate
boolean first = true;
WtdObj top;
str.append("{WtdObjSets: ");
str.append(numSets);
for (int i=0; i < numSets; i++)
{
// Alter separator text according to whether this is the
// first item output
if (first)
{
str.append("; top items (max remaining): ");
first = false;
}
else
{
str.append(", ");
}
// Add weighted object set i to profile
top = (WtdObj) topElements.elementAt(i);
if (top == null)
str.append("