package edu.vt.marian.search; import java.util.*; import java.io.*; import edu.vt.marian.common.*; /** * A MergeSequencer is a Sequencer designed to work on a few component * sets. Since the number of sets is small, we can perform a linear * search for the set with the next highest element more efficiently * than any fancier search. *

* We expect the weights in any WtdObjSet to drop off, not smoothly, * but as a stairstep function. Thus instead of searching for the * top element among the (scaled) component sets, we begin be * searching for the top two sets, and search only when the top * weight in the second set exceeds that in the top set. * * @author Paul Mather * @author Robert France * @version $Id: MergeSequencer.java,v 1.2 2000/02/15 16:58:52 paul Exp $ * @see Sequencer * @see java.util.Enumeration */ public class MergeSequencer implements Sequencer // and therefore, Enumeration. { /** The omnipresent debug object */ protected Debug debug; /** * The total size of the universe of objects over which this * sequencer works. This is for use with expectedNumElts() */ protected long classSize; /** * A vector containing all the (enumerated) weighted object sets * added to this merge sequencer */ protected Vector wtdObjSets; /** A vector containing the top elements of each weighted object set */ protected Vector topElements; /** The index of the set with the highest current weight */ protected int highestWeightSet; /** The weight below which we change to another highest set */ protected Weight weightChangeTrigger; /** The index of the set to which we change upon a weight trigger */ protected int weightChangeSet; /** Value (negative) to indicate the set that triggered the weight * change has not been defined yet. */ private final static int UNDEFINED = -1; /** * Create an empty merge sequencer. * @param debug the omnipresent debug object */ public MergeSequencer(long classSize, Debug debug) { wtdObjSets = new Vector(); topElements = new Vector(); weightChangeTrigger = Weight.bottomWt; weightChangeSet = UNDEFINED; highestWeightSet = UNDEFINED; this.classSize = classSize; this.debug = debug; } /** * Create a merge sequencer given an initial weighted object set. * @param set an initial weighted object set * @param debug the omnipresent debug object */ public MergeSequencer(long classSize, WtdObjSet set, Debug debug) { // Make a new vector ready to accept the weighted object // sets to be sequenced wtdObjSets = new Vector(); topElements = new Vector(); weightChangeTrigger = Weight.bottomWt; weightChangeSet = UNDEFINED; highestWeightSet = UNDEFINED; this.classSize = classSize; this.debug = debug; // Add the initial weighted object set this.addSet( set ); } /** * Add a new component set to the Sequencer with no scaling constant. * * @param set a component weighted object set. * @return ReturnValues.OK -- set was added without problems; * anything else -- problems. */ public int addSet(WtdObjSet set) { return actuallyAddSet( set, new Weight(1.0, debug) ); } /** * Add a new component set to the Sequencer with no scaling constant. * * @param set a component weighted object set. * @param wt A number 0.0 .. 1.0 to scale by. * @return ReturnValues.OK -- set was added without problems; * anything else -- problems. */ public int addSet(WtdObjSet set, double wt) { if (wt >= 0.0 && wt <= 1.0) return actuallyAddSet( set, new Weight(wt, debug) ); else return ReturnCodes.BAD_PARAMS; } /** * Add a new component set to the Sequencer with no scaling constant. * * @param set a component weighted object set. * @param wt A Weight to scale by. * @return ReturnValues.OK -- set was added without problems; * anything else -- problems. */ public int addSet(WtdObjSet set, Weight wt) { return actuallyAddSet( set, wt ); } /** * Add a new weighted object set to the sequencer with the * given weight as a scaling factor. We assume the weighted * object set is not empty, and that the weight is non-null. * @param set weighted object set to be added to the sequencer * @param wt the Weight by which all elements in this weighted * object set must be scaled * @return ReturnCodes.OK if the set was added without problems; * anything else if problems were encountered */ private int actuallyAddSet(WtdObjSet set, Weight wt) { // Validate the arguments: return an error if any of them are bad if (set.isEmpty() || !wt.isValid()) return ReturnCodes.BAD_PARAMS; // Turn the WtdObjSet into a ScaledWtdObjSet ScaledWtdObjSet scaledSet = new ScaledWtdObjSet(set, wt, debug); // Get an enumeration of the objects in the scaled weighted object set WtdObjSetEnumeration wtdObjs = scaledSet.elements(); // Get the first element of this weighted object set WtdObj top; try { top = (WtdObj) wtdObjs.nextElement(); } catch( NoSuchElementException e ) { // This should not happen, because set is non-empty // (as checked above), yet its enumeration didn't // return an element! return ReturnCodes.BAD_PARAMS; } // Add the enumeration of the objects in the weighted object // set to the collection of sets being sequenced wtdObjSets.addElement( wtdObjs ); // Add the first element from this weighted object set // to the merge list, scaled by the scaling factor topElements.addElement( top ); // Remember where we added the new set int newSet = wtdObjSets.size()-1; // Figure out if the addition of this set causes any change // in the top-weight set; trigger value & set; etc. if (highestWeightSet == UNDEFINED) { // This is the first set added highestWeightSet = newSet; } else { // See if the top value of the newly-added set exceeds // the current top value WtdObj currentTop = (WtdObj) topElements.elementAt(highestWeightSet); Weight newSetTopWeight = top.getWeight(); if (top.compare( currentTop ) == Sortable.GREATER) { // The new set is bigger: make it the top weighted // set and the previous set now has to play second-fiddle weightChangeSet = highestWeightSet; weightChangeTrigger = currentTop.getWeight(); highestWeightSet = newSet; } else { // The new set might muscle out the second-top weighted set if (weightChangeSet == UNDEFINED) { weightChangeSet = newSet; weightChangeTrigger = newSetTopWeight; } else { // See if top element of new set exceeds current // second-top weighted set if (top.compare( weightChangeTrigger ) == Sortable.GREATER) { // Make the newly-added set the second-top // weighted set weightChangeSet = newSet; weightChangeTrigger = newSetTopWeight; } } } } return ReturnCodes.OK; } /** * Return the expected number of unique elements in the result * sequence. * * NOTE: The statistics here are drawn from the hypergeometric * distribution. Follow code and documentation in the C++ file * "search/woset_coll.cc." * @return the expected number of unique elements left in the sequencer */ public int expectedNumElts() { int numSets = wtdObjSets.size(); int i, j; long numTotalItems, sizeOverlap; int numRemaining[] = new int[numSets]; WtdObjSetEnumeration sequencedSet; numRemaining[0] = 0; for (i=0; i < numSets; i++) { // Figure out the approximate number of remaining elements // for this weighted object set in the sequencer sequencedSet = (WtdObjSetEnumeration) wtdObjSets.elementAt(i); if (sequencedSet == null) numRemaining[i] = 0; else numRemaining[i] = sequencedSet.approxNumRemaining(); // Adjust total for the elements popped off onto the "top" list if ((WtdObj) topElements.elementAt(i) != null) numRemaining[i]++; } numTotalItems = numRemaining[0]; sizeOverlap = 0; for (i=1; i < numSets; i++) { numTotalItems += numRemaining[i]; for (j=i; j < numSets; j++) { sizeOverlap += numRemaining[i-1] * numRemaining[j]; } } return (int) (numTotalItems - sizeOverlap/classSize); } /** * Return the maximum number of unique elements in the result * sequence. (This is just the sum of all the elements in each * component set,) * @return the (maximum) number of elements left in the sequencer * @see WtdObjSetEnumeration#maxNumRemaining */ public int maxNumElts() { int numElts = 0; int numSets = wtdObjSets.size(); for (int i=0; i < numSets; i++) { numElts += ((WtdObjSetEnumeration) wtdObjSets.elementAt(i)).maxNumRemaining(); if (topElements.elementAt(i) != null) numElts++; } return numElts; } public Object nextElement() throws NoSuchElementException { Object returnValue; WtdObj top; // Is there currently a top-weighted set? if (highestWeightSet == UNDEFINED) { // No, so there must be no sets in this sequencer throw new NoSuchElementException("No WtdObjs in MergeSequencer"); } else { // Return the item from the top-weighted set returnValue = topElements.elementAt( highestWeightSet ); if (returnValue == null) { // There was actually nothing there, so throw // a NoSuchElementException instead throw new NoSuchElementException("No more WtdObjs in MergeSequencer"); } // Pop off the next element in the top-weighted set try { top = (WtdObj)((WtdObjSetEnumeration) wtdObjSets.elementAt( highestWeightSet )).nextElement(); } catch( NoSuchElementException e ) { // Nothing left in this set top = null; } // Store the popped-off element topElements.setElementAt(top, highestWeightSet); // See if we need to switch to another weighted object set if (top == null) { // Yes---the top weighted set is now empty! shuffleTopSets(); } else { if (top.compare(weightChangeTrigger) == Sortable.LESS) { // The top-weighted set is now below the trigger // threshold, so make the trigger set the new // top-weighted set shuffleTopSets(); } } } return returnValue; } /* * Make the second-highest weighted set the top-weighted set * and search for a new weight-change trigger value and associated * set. * */ private void shuffleTopSets() { Weight highWeight = Weight.bottomWt; WtdObj wtdObj; int numSets = topElements.size(); // Pass on the torch to the weight-change set highestWeightSet = weightChangeSet; // Now look for a worthy successor... weightChangeSet = UNDEFINED; weightChangeTrigger = Weight.bottomWt; for (int i=0; i < numSets; i++) { wtdObj = (WtdObj) topElements.elementAt(i); if (i != highestWeightSet && wtdObj != null && wtdObj.compare(highWeight) == Sortable.GREATER) { weightChangeSet = i; weightChangeTrigger = wtdObj.getWeight(); highWeight = weightChangeTrigger; } } } /* * Determine if there are more weighted objects left in this sequencer. * It does this by checking to see if there is any weighted object * at the front of any of the weighted object sets in this sequencer. * @return true if there are elements left in the sequencer; false * otherwise. */ public boolean hasMoreElements() { int numSets = topElements.size(); // We have no more elements if all the entries in the // topElements vector are null for (int i = 0; i < numSets; i++) if (topElements.elementAt( i ) != null) return true; return false; } public String toString() { return this.profile(); } /* * Output a rough representation of the components of this sequencer. * @return string containing "profile" of sequencer contents */ public String profile() { int numSets = topElements.size(); StringBuffer str = new StringBuffer(numSets*20 + 50); // Rough estimate boolean first = true; WtdObj top; str.append("{WtdObjSets: "); str.append(numSets); for (int i=0; i < numSets; i++) { // Alter separator text according to whether this is the // first item output if (first) { str.append("; top items (max remaining): "); first = false; } else { str.append(", "); } // Add weighted object set i to profile top = (WtdObj) topElements.elementAt(i); if (top == null) str.append(""); else str.append(top.toString()); str.append(" ("); str.append(((WtdObjSetEnumeration) wtdObjSets.elementAt(i)).maxNumRemaining()); str.append(')'); } str.append('}'); return str.toString(); } }