package edu.vt.marian.search; import java.util.*; import java.io.*; import java.util.*; import edu.vt.marian.common.*; /** * Provides hash table functionality for (doubly-linked) weighted * objects keyed on FullIDs, and "threaded" into multiple linked lists. * Specifically, the Table is to be used to maintain "images" of a collection * of weighted object sets. Since each set is inserted into the table in * non-increasing weight order, the images are also in that order. Objects * may be removed from each image, rejoining the list around the removed * element, the order being maintained. *
* When this Table is used in a Searcher, each set image is composed of only * those objects that occur in only one component set. Objects that occur in * more than one set are excised from the set image and added to another list * of "multiply hit" elements. Note that the weighted object in question does * not leave the table, and does not move within the hash order. The only * change to the object (other than accumulating weight) is that its links * to next and previous elements are switched from elements in a set image to * elements in the multiHit list. *
* This class is essentially a wrapper around * WtdObjTable to impose the set image semantics. *
* This structure implicitly relies upon the hashCode method to be * defined in the object that is going to be used as a key object. *
* NOTE: Since this Table maintains many separate Enumerations (one for * each set image pkus one for multiHit) it cannot be itself an Enumeration. * * @author Paul Mather * @version 0.9 * @see edu.vt.marian.search.WtdObjTable * @see edu.vt.marian.common.WtdObjSingleLink * @see edu.vt.marian.common.WtdObj * @see java.util.Hashtable * @see edu.vt.marian.common.Debug */ public class SetImageWtdObjTable extends WtdObjTable { /** * Constant indicating no set image has been selected */ protected static final int NO_SET_IMAGE_SELECTED = -1; /** * The index of the multi-hit list. */ protected static final int MULTI_HIT_LIST = 0; /** * Vector containing the previous element inserted at the end * of each set image. Note, element 0 represents the multi-hit * list; elements 1 onwards represent the set images */ protected Vector prevKeysInserted = null; /** * Vector containing the FullID key of the first element in * each setImage. Note, element 0 represents the multi-hit * list; elements 1 onwards represent the set images */ protected Vector firstKeys = null; /** * The currently selected set image */ protected int currentSetImage = NO_SET_IMAGE_SELECTED; /** * The total number of set images currently created */ protected int numSetImages = MULTI_HIT_LIST; /** * Initialise the vectors to track insertions for the multi-hit * list and the set images. */ private void initialiseVectors() { // Initialise vectors for at least the multi-hit list prevKeysInserted = new Vector(); prevKeysInserted.addElement(null); firstKeys = new Vector(); firstKeys.addElement(null); } /** * Create an empty hash table for (singly-linked) weighted objects */ public SetImageWtdObjTable(Debug debug) { super(debug); initialiseVectors(); } /** * Create an empty hash table for (singly-linked) weighted objects * specifying initial capacity and load factor. * @param initialCapacity initial size of hash table * @param loadFactor occupancy threshold before growth needed * @param debug debugging object */ public SetImageWtdObjTable(int initialCapacity, float loadFactor, Debug debug) { super(initialCapacity, loadFactor, debug); initialiseVectors(); } /** * Create an empty hash table for (singly-linked) weighted objects * specifying initial capacity, but with default load factor. * @param initialCapacity initial size of hash table * @param debug debugging object */ public SetImageWtdObjTable(int initialCapacity, Debug debug) { super(initialCapacity, debug); initialiseVectors(); } /** * This class is an "extension" of WtdObjSingleLink denoting the * doubly-linked WtdObj is a member of a set image or the multi-hit * list. An integer is used to denote to which set image the WtdObj * belongs; zero (0) denotes membership of the multi-hit list. We do * this to avoid lots of searching to determine whether a given item * in a SetImageWtdObjTable is already in the multi-hit list, or * whether it is in a set image and thus needs to be unlinked and * moved into the multi-hit list. * * @author Paul Mather * @version 0.9 * @see edu.vt.marian.common.WtdObjSingleLink * @see edu.vt.marian.search.SetImageWtdObjTable * @see edu.vt.marian.common.FullID * @see edu.vt.marian.common.Debug */ private class WtdObjMultiThreaded extends WtdObjSingleLink { /** * Debug object, because we cannot see that defined in WtdObj * across packages. :-( */ protected Debug debug; /** "Pointer" to the previous "list" entry */ protected FullID previous; /** * The index of the set image or the multi-hit list to which * this doubly-linked WtdObj belongs. */ protected int image = NO_SET_IMAGE_SELECTED; /** * Create a doubly-linked weighted object belonging to a * multi-threaded weighted object table given a weighted object, a * debug object, and a membership class. * @param w weighted object * @param identity set image or multi-hit membership of WtdObj * @param debug debug object */ public WtdObjMultiThreaded( WtdObj w, int identity, Debug debug ) { super(w, debug); next = null; previous = null; image = identity; } /** * Return the FullID key of the previous item pointed to in this * ordering. * @return key of the next weighted object in this ordering */ public FullID getPrevious() { return this.previous; } /** * Set the FullID key of the previous item pointed to in this * ordering. * @param nextElt the "key" of the next element "pointed" to * by this one * @return key of the next weighted object in this ordering */ public void setPrevious(FullID prevElt) { this.previous = prevElt; } /** * Set the membership class of this doubly-linked weighted object. * @param membership the set image or multi-hit list membership * of this weighted object * @return number of set image/multi-hit list selected, or * ReturnCodes.BAD_PARAMS if the supplied value was outside the * legal range */ public int setMembership( int membership ) { int returnValue; // Check that the supplied membership is potentially valid if (membership < MULTI_HIT_LIST || membership > numSetImages) { debug.dumpTrace("WtdObjMultiThreaded.setMembership: Illegal " + "thread/multi-hit value supplied"); returnValue = ReturnCodes.BAD_PARAMS; } else { image = membership; returnValue = image; } return returnValue; } /** * Get the membership class of this doubly-linked weighted object. * @return the set image or multi-hit list membership of this * weighted object */ public int getMembership() { return image; } /** * Convert this WtdObjMultiThreaded to a string representation. * The printable form is "WtdObj(FullID)(FullID)[image]", where WtdObj * is the string representation of the weighted object portion * of this object, the FullIDs are the string representations of * the FullID "next" and "previous" pointers of this object * respectively, and "image" is the "thread" to which this object * belongs. * @return string containing printable representation of object data * @see edu.vt.marian.common.WtdObj#toString * @see edu.vt.marian.common.FullID#toString */ public String toString() { if (previous == null) return super.toString() + "(null)[" + image + ']' ; else return super.toString() + '(' + previous.toString() + ')' + '[' + image + ']'; } } /** * Add a new set image. * @return An integer that will serve as identifier for this image * in future calls to selectImage() and imageElements(), or a * negative integer on failure [[ if failure is possible. Paul: * is it? ]] */ public int newImage() { int returnValue; // Append an element to the prevKeysInserted and firstKeys // vectors to track the insertions into this new set image prevKeysInserted.addElement(null); firstKeys.addElement(null); // Increment total number of set images created so far numSetImages++; // Be paranoid and do some consistency checks: // 1) both vectors should be the same length, and // 2) their length should be 1 more than the value of // numSetImages if (prevKeysInserted.size() != firstKeys.size()) { debug.dumpTrace("SetImageWtdObjTable.newImage: Internal " + "inconsistency---sets are differing sizes!"); returnValue = ReturnCodes.IMPOSSIBLE_ERROR; } else { if (prevKeysInserted.size() == numSetImages+1) returnValue = numSetImages; else { debug.dumpTrace("SetImageWtdObjTable.newImage: Internal " + "inconsistency---set sizes not tracking " + "numSetImages value properly"); returnValue = ReturnCodes.IMPOSSIBLE_ERROR; } } return returnValue; } /** * Determine if the integer supplied is currently a valid set image id. * Encapsulate this into a function so we always have a consistent * definition of what criteria we are using to determine a valid * set image id. * @param image id of set image * @return true if "image" is currently in the valid range for set * image ids; false otherwise */ protected boolean isValidSetImageID(int image) { return (image > MULTI_HIT_LIST && image <= numSetImages); } /** * Change to a previously defined set image. * @param image An image ID previously returned by newImage(). * @return OK, or some error code on failure. [[ if failure not possible, * change method to void ]] */ public int setImage(int image) { int returnValue; // Check image is in the range 1..numSetImages if (isValidSetImageID(image)) { currentSetImage = image; returnValue = image; } else { // Attempt made to switch to non-existant setImage! debug.dumpTrace("SetImageWtdObjTable.setImage: Supplied image " + "to switch to (" + image + ") is out of bounds"); returnValue = ReturnCodes.BAD_PARAMS; } return returnValue; } /** * Check to see if membership indicates object is part of the * multi-hit list. * @param identity thread to which weighted object currently belongs * @return true if weighted object is a member of the multi-hit list */ public boolean isInMultiHitList( int identity ) { return (identity == MULTI_HIT_LIST); } /** * Add a weighted object to the table in the set image last selected. * This method both adds the weighted object into the table and connects * it to the end of the last list defined by a newImage() or selected by * a selectImage().
OK, PAUL, DESIGN QESTION TIME: Is it better for the Table to maintain multiHit and do both the weight computations and the moving between lists as part of insert(), or should the searcher be responsible for testing isInTable(), snipping the object out of the set image, adding in the new wieght and attaching it to multiHit? My best thinking at the moment is that the Table should maintain multiHit and do the moving, that the WtdObjDoubleLink should handle Weight accumulation, and that the Searcher does nothing but call insert() as many times as it needs. But I'm not completely convinced yet. * @param wtdObj weighted object to be inserted * @return null if the weighted object was not already present, * otherwise return the Object that was already there. */ public Object insert( WtdObj wtdObj ) { Object returnValue = null; WtdObjMultiThreaded hashedItem = null; int membership; FullID wtdObjKey; // Check WtdObj is valid for insertion if (wtdObj.isValid()) { // Get (copy of) FullID of weighted object wtdObjKey = wtdObj.getID(); // Is this WtdObj already in the table?? if (containsKey(wtdObjKey)) { // Yes, WtdObj is in the table, so we need to take // special action: do we move it to the multi-hit list // or update the weight already in the multi-hit list? hashedItem = (WtdObjMultiThreaded) get(wtdObjKey); membership = hashedItem.getMembership(); if (isInMultiHitList(membership)) { // WtdObj is already in multi-hit list. Add the // weight of the WtdObj we're inserting to that // already in the multi-hit list hashedItem.accum( wtdObj.getWeight() ); } else { if (isValidSetImageID(membership)) { // WtdObj is in a set image, so remove it from // there and add it to the multi-hit list moveToMultiHitList(wtdObjKey, wtdObj, hashedItem); } else { // Meep! This should not happen!! } } } else { // WtdObj is not in the table, so append it to the // current set image if (isValidSetImageID( currentSetImage )) { appendToList(wtdObjKey, wtdObj, currentSetImage); } } } else { // We weren't given a valid WtdObj so complain debug.dumpTrace("SetImageWtdObjTable.insert: Invalid WtdObj passed"); } return returnValue; } /** * Move a WtdObj from its current position in a set image into * the multi-hit list. * @param hashedItem a hashed WtdObj * @param wtdObjKey the key of the hashed WtdObj * @param wtdObj the WtdObj being hashed */ private void moveToMultiHitList(FullID wtdObjKey, WtdObj wtdObj, WtdObjMultiThreaded hashedItem) { WtdObjMultiThreaded lastItem; // Unlink the hashed item from its current "thread" unlinkFromList( hashedItem ); // Mark it as part of the multi-hit list now hashedItem.setMembership( MULTI_HIT_LIST ); // Now we have "unlinked" hashedItem from its set image, and // flagged it as a tentative member of the multi-hit list, we // need to append it to the multi-hit list. appendToList( wtdObjKey, wtdObj, MULTI_HIT_LIST ); } /** * Unlink an item from the doubly-linked list it is currently part of. * Unlinking the item may cause the "first" and "previous insertion" * keys for a "thread" to be updated if the unlinked item is the first * or the last item of its list respectively. After unlinking, the * "thread identity" of the item is set to an invalid identity, to * denote this object is now part of no "thread." * @param hashedItem the item to be unlinked from its current list * @return the object just unlinked, or null if an error occurred. */ private WtdObjMultiThreaded unlinkFromList(WtdObjMultiThreaded hashedItem) { WtdObjMultiThreaded prevElt, nextElt; FullID prevEltKey, nextEltKey; int setImage; if (hashedItem != null) { setImage = hashedItem.getMembership(); // Get the "previous" and "next" elements in the given thread // "pointed" to by hashedItem to be removed prevEltKey = hashedItem.getPrevious(); nextEltKey = hashedItem.getNext(); // Swap "pointers" to unlink hashedItem from the // doubly-linked list: set prevElt.next -> nextEltKey and // nextElt.previous -> prevEltKey if (prevEltKey != null) { prevElt = (WtdObjMultiThreaded) get( prevEltKey ); prevElt.setNext( nextEltKey ); } else { // itemHashed must have been "first" in its set image // if its previous "pointer" was set to null (i.e., // no WtdObj preceded it in the doubly-linked list) // Accordingly, we must alter that set image's // "first" entry to point to nextEltKey instead. firstKeys.setElementAt(nextEltKey, setImage); } if (nextEltKey != null) { nextElt = (WtdObjMultiThreaded) get( nextEltKey ); nextElt.setPrevious( prevEltKey ); } else { // itemHashed must have been the last item appended // to some set image if it has no nextElt (i.e., its // next element "pointer" is null). Accordingly, we // must alter that set image's "prevInserted" entry // to point to prevEltKey instead. prevKeysInserted.setElementAt(prevEltKey, setImage); } // Now we've unlinked the item, signify it now belongs // to no valid "thread" in this table. Also, unset its // "next" and "previous" "pointers" to be null hashedItem.setMembership(NO_SET_IMAGE_SELECTED); hashedItem.setNext(null); hashedItem.setPrevious(null); } return hashedItem; } /** * Add a WtdObj not currently in the table to the table as part * of the currently selected set image. * @param wtdObjKey FullID key of the supplied WtdObj * @param wtdObj actual WtdObj being inserted into the table * @param listID the list to which to append this object */ private void appendToList(FullID wtdObjKey, WtdObj wtdObj, int listID) { WtdObjMultiThreaded elt, prevInsertion; FullID prevKeyInserted, first; // Make new WtdObjMultiThreaded comprising: // (null)<-(#key#weight#thread)->(null) elt = new WtdObjMultiThreaded(wtdObj, listID, debug); // Set the "prev" "pointer" of the item we are going // to append to point to prevKeyInserted prevKeyInserted = (FullID)prevKeysInserted.elementAt(listID); elt.setPrevious( prevKeyInserted ); // Now get the item we inserted last time this method was // invoked (if applicable) and update its "next" pointer if (prevKeyInserted != null) { prevInsertion = (WtdObjMultiThreaded) get(prevKeyInserted); if (prevInsertion == null) { // This should not happen!! debug.dumpTrace("SetImageWtdObjTable.appendToCurrentSetImage: " + "Previously inserted object has vanished " + "from the table!!"); } else { // Set the "next" "pointer" to be that of our wtdObjKey prevInsertion.setNext( wtdObjKey ); } } else { // This was the first object inserted, so make a copy // of its key in the current set image firstKeys.setElementAt(new FullID( wtdObjKey, debug ), listID); } // Insert this new element into the table put(wtdObjKey, elt); // Set previous insertion key to that of the new // item inserted this time around prevKeysInserted.setElementAt(wtdObjKey, listID); } /** * Return a WtdObjSetEnumeration for the weighted objects in some * set image. * @param image An image ID previously returned by newImage(). * @return A WtdObjSetEnumeration containing weighted object elements * in the image selected, or null if the parameter is bad. * @see WtdObjSetEnumeration */ public WtdObjSetEnumeration imageElements(int image) { if (isValidSetImageID(image)) return threadElements(image); else return null; } /** * Return an Enumeration for the weighted objects in the multiHit list. * @return Enumeration containing weighted object elements in the list. *
* NOTE: Unlike the set images, multiHit is not in Weight order, so * the ordering of elements in the Enumeration returned is arbitrary. * @see nextElement * @see hasMoreElements */ public WtdObjSetEnumeration multiHitElements() { return threadElements( MULTI_HIT_LIST ); } /** * Return an Enumeration for the weighted objects in the multiHit list * such that the elements of the multi-hit list are in non-increasing * order of weight. * @return Enumeration containing weighted object elements in the list * in non-increasing order of weight */ public Enumeration sortedMultiHitElements() { WtdObjQuickSort sorter; sorter = new WtdObjQuickSort( threadElements( MULTI_HIT_LIST ), Sortable.DECREASING, debug ); return sorter.sortedWtdObjSetElements(); } /** * Return a weighted object set enumeration over the weighted * objects in this thread of the weighted object table. * @param thread thread id (set image/multi-hit list) * @return enumeration over this thread of the weighted object * table */ private WtdObjSetEnumeration threadElements( int thread ) { /** * Provides an implementation of WtdObjSetEnumeration for * SetImageWtdObjTable in that it will return an object which * then can be used as an iterator over a specified thread in * the SetImageWtdObjTable. The iterator starts from the * first item in the selected thread (the multi-hit list is * the first thread of the table). *
* This method implicitly * relies upon the hashCode method to be defined in the object * that is going to be used as a key object. * @author Paul Mather * @version 0.9 * @see java.util.Hashtable */ class SetImageWtdObjTableEnum implements WtdObjSetEnumeration { /** * nextElement item for use in Enumerations */ private WtdObjMultiThreaded enum = null; /** * Number of elements iterated over so far */ private int numIteratedOver = 0; /** * Create an enumeration over an InsertionOrderWtdObjTable * initialised to the first entry in the table. * @param thread id of the set image (or multi-hit list) * we are to iterate over. */ public SetImageWtdObjTableEnum(int thread) { FullID first; // Set the starting point of our enumeration to the // first item of the desired thread first = (FullID) firstKeys.elementAt(thread); if (first != null) { enum = (WtdObjMultiThreaded) get(first); if (enum == null) { // This should not happen!! debug.dumpTrace("SetImageWtdObjTableEnum: " + "Previously inserted element has " + "vanished from the table!!"); } } else { // Nothing has been inserted yet enum = null; } // Reset count of number of elements returned so far numIteratedOver = 0; } /** * Return true if this table enumeration has more * elements, else return false. * @return boolean denoting whether there are elements * remaining in this table's enumeration */ public boolean hasMoreElements() { return (enum != null); } /** * Return the next weighted object element in this thread's * enumeration. Also, set up the enum to be the next * weighted object in the threaded sequence. * @return Object that is the next in the sequence * @see java.util.Enumeration#nextElement * @exception java.util.NoSuchElementException if is no * next element in the ordering */ public synchronized Object nextElement() throws NoSuchElementException { Object nextElt; FullID nextItem; if (enum == null) throw new NoSuchElementException("Attempt to iterate past end of SetImageWtdObjTable"); else { // Save a copy of the nextElement nextElt = new WtdObj(enum.getID(),enum.getWeight(),debug); // Register that we've supplied another item from // this enumeration numIteratedOver++; // Move pointer on to next item in enumeration nextItem = enum.getNext(); if (nextItem != null) { enum = (WtdObjMultiThreaded) get(nextItem); if (enum == null) // This should not happen!! debug.dumpTrace("SetImageWtdObjTableEnum.nextElement: " + "Previously inserted elements has " + "vanished from the table!!"); } else // End of the list... enum = null; } return nextElt; } /** * Skip forward a given number of elements in this * enumeration. * @param k number of elements to skip over * @exception java.util.NoSuchElementException if we skip * past the end of the enumeration */ public void skip( int k ) throws NoSuchElementException { // Make sure a valid value for k was supplied if (k < 0) debug.dumpTrace("SetImageWtdObjTableEnum.skip: " + "attempt to skip backward in enumeration"); else { for (int i=0; i < k; i++) { // If we iterate past the end, the statement // below should throw NoSuchElementException, // which we do not catch here, but let // propagate back up the calling chain to be // caught elsewhere this.nextElement(); } } } /** * Copy a certain number of elements into a bag. * @param num How many elements to copy. * @param sampleBag The WtdObjBag to add elements to. * @return OK: there were Num elements left, and they all were * copied well; anything else: problems. */ public int sample(int num, WtdObjBag sampleBag) { WtdObj threadElt; int returnValue = ReturnCodes.OK; for (int i=0; i < num; i++) { // Try and add another element from the // enumeration to this bag try { threadElt = (WtdObj) this.nextElement(); } catch( NoSuchElementException blammo) { // Alas, there wasn't one, so let's quit // the loop and report the bad news returnValue = ReturnCodes.NOT_FOUND; break; } // Add the weighted object we got from the // enumeration to the bag sampleBag.add(threadElt); } return returnValue; } /** * Copy into a bag all the members of this set with * weights >= some weight. Because we have a * nextElement() but not a previousElement(), we don't * have a clean way to back up if we reach an element * of the enumeration that has weight < minWt. So, * instead, we peek at the weight of the enumeration * element and only do a nextElement if its weight * is >= minWt. * @param minWt The lowest weight to copy. * @param sampleBag The WtdObjBag to add elements to. * @return OK There were Num elements left, and they all * were copied well; anything else: problems. * @exception NoSuchElementException */ public int sampleToWt(Weight minWt, WtdObjBag sampleBag) { int returnValue = ReturnCodes.OK; // Loop until we get to the end of the list while(this.hasMoreElements() && returnValue == ReturnCodes.OK) { // Peek at the weight of enum if (minWt.compare(enum.getWeight()) == Weight.HIGHER ) { break; } else { // The weight of the next element is >= minWt // so add the next element to the sample bag returnValue = sampleBag.add((WtdObj)this.nextElement()); } } return returnValue; } /** * Return the number of elements still to be iterated * over in this thread. We do this by counting from * the current point in the enumeration until we * reach a "next" pointer which is null. * @return the exact number of elements in this thread * still to be iterated over */ public int exactNumRemaining() { WtdObjMultiThreaded nextElt = enum; int count = 0; while (nextElt != null) { // Increment tally count++; // Bump onto the next item in this thread nextElt = (WtdObjMultiThreaded) get(nextElt.getNext()); } return count; } /** * Return the approximate number of elements still to be * iterated over. This is a (VERY!) rough estimate * which naively assumes all the threads in the table * (including the multi-hit list) are of equal length. * @return the approximate number of elements still to be * iterated over */ public int approxNumRemaining() { int approxNum; approxNum = size()/(numSetImages+1) - numIteratedOver; return (approxNum < 0)?0:approxNum; } /** * Return the maximum number of elements still to be * iterated over. This is the total number of items * in the table less the number we have already iterated * over in this thread. This will, needless to say, * be a gross overestimation, but an upper bound * nontheless. * @return the maximum number of elements still to be * iterated over */ public int maxNumRemaining() { return size() - numIteratedOver; } } return new SetImageWtdObjTableEnum( thread ); } /** * Clear hash table. This basically just calls the superclass * clear() method, but also resets local list variables * to null to reflect initial table status. * @see java.util.Hashtable */ public synchronized void clear() { // Clear the hash table super.clear(); // Blow away current set image information initialiseVectors(); numSetImages = MULTI_HIT_LIST; // "Unset" currentSetImage currentSetImage = NO_SET_IMAGE_SELECTED; } }