package edu.vt.marian.search; import java.io.*; import java.net.*; import java.util.*; import edu.vt.marian.common.*; /** DienstClassManager is the class that extends the capability of the Marian System by extending searches to be carried out onto the Dienst System. It also allows for the retrieval of Documents from the Dienst Database. All these operations are transparent i.e the end User does not have to format his queries to suit the Dienst System. In fact the user does not even now about the searches being made on the Dienst System.
@author Nilesh Phadke
@see edu.vt.marian.common.FullID
@see edu.vt.marian.common.WtdObj
@see edu.vt.marian.search.VectorWtdObjSet
*/
public class DienstClassManager implements NodeClassManager {
protected Debug debug; // The inevitable.
protected String DocFormat;
protected int ClassID;
protected Hashtable MarianDienstIDs=new Hashtable();
protected FullID F;
protected WtdObj weightedobj;
protected VectorWtdObjSet VObjSet;
protected int InstanceID;
protected String[] authors;
protected String[] titles;
protected String[] keywords;
protected String[] abstracts;
protected int authorsArrayIndex=0,titlesArrayIndex=0,keywordsArrayIndex=0,abstractsArrayIndex=0;
/**
* Constructor for DienstClassManager .. accepts the ClassID as input */
public DienstClassManager ( int Clid, Debug d )
{
debug = d;
ClassID=Clid;
int result=F.setClassID(ClassID);
if (result != ReturnCodes.OK) { System.out.println("ERROR"); }
DocFormat = new String("/text/html");
}
/**
The Following Function (i.e polishstring(String str) ) is require because .....
The syntax rules for URLs restrict a few characters to special roles. and require that if these characters are used in any other way that they be written as an escape
sequence, a percent sign followed by the character code in hexadecimal. The reserved characters are:
/ - separates components in the URL.
? - separates optional arguments from the rest of the URL
# - indicates reference to a named anchor within a document
= - separates name from value in an argument list
& - separates multiple arguments after a ?
Note that the slash character used in handles must be encoded when expressed in a URL. (The encoding is %2F, by the way.)
Finally, the space character may not appear anyplace. It must be written with a "+" (or with a percent sign escape sequence.)
*/
protected String polishstring(String str)
{
int index=0;
String outstring,tempstring;
char oldchar='/';
char newchar='%';
outstring=str;
//We need to replace / by %2F so we insert 2F after every occurence of / then finally we use the replace function to replace all / by %
index=outstring.indexOf("/",index);
while(index!=-1)
{
index=outstring.indexOf("/",index);
tempstring=(outstring.substring(0,index+1));
tempstring=tempstring.concat("2f"+outstring.substring(index+1));
outstring=tempstring;
index++;
index=outstring.indexOf("/",index);
}
outstring=outstring.replace('/','%');
//we need to replace all white spaces by '+'
outstring=outstring.replace(' ','+');
return(outstring);
}
/* Match Function has three main parts....
-> extract the information from Information passed onto it (InfoDesc) and convert it to a form useful for searching onto the Dienst System
-> using the search string formed in the previous step carry out the actual search on the Dienst System
-> capture the reply from the Dienst Server and corresponding to every unique document retrieved, create a Full ID and store the data in a table.
*/
public WtdObjSet match(InfoDesc description)
{
int thingstosearch=0;
Object firstobject, lastobject, currentobject;
int first, last, current;
// extracting info from infodesc
Enumeration linkDescsEnum = description.enumLinkDescs();
while( linkDescsEnum.hasMoreElements() )
{
currentobject= linkDescsEnum.nextElement();
LinkDesc currentLink = (LinkDesc) currentobject;
int currentClassID = currentLink.getClassID();
thingstosearch++;
InfoDesc stringDesc = currentLink.getKeyDesc();
String currentString = (String) stringDesc.getNodeDesc();
switch(currentClassID) {
case ClassIDs.CLASS_HAS_AUTHOR :
case ClassIDs.CLASS_HAS_CONF_AUTHOR :
case ClassIDs.CLASS_HAS_CORP_AUTHOR :
authors[authorsArrayIndex++]=currentString;
break;
case ClassIDs.CLASS_HAS_KEYWORD :
case ClassIDs.CLASS_HAS_SUBJECT : // this is to be considered as a keyword search in dienst ;
keywords[keywordsArrayIndex++]=currentString;
break;
case ClassIDs.CLASS_HAS_ABSTRACT :
abstracts[abstractsArrayIndex++]=currentString;
break;
case ClassIDs.CLASS_HAS_TITLE :
titles[titlesArrayIndex++]=currentString;
break;
default : thingstosearch--;
break;
} // end of switch
} //end of while
if(thingstosearch!=0)
{
try
{
search();
} catch (Exception e)
{
System.out.println("DienstClassManager.match(): exception " + e.toString() +
" while contacting remote server.");
return( null );
}
}
//return WtdObjSet
return( VObjSet );
} //end of match function
/*
The following Function just takes an int and converts it to a Integer. This is required because the put
function of Hashtable requires that the hashcode be of type java.lang.Object and does not allow to directly
use int as a hashcode for storing in the hashtable
*/
protected Integer intTOInteger( int val) {
Integer returninteger=new Integer(val);
return(returninteger);
}
protected void search() throws Exception {
String authorstring="",titlestring="",keywordstring="",abstractstring="",booleansearchstring="" ;
int i;
// forming the search string
if(authorsArrayIndex>=1)
{
authorstring="author="+authors[0];
for (i=1;i