package edu.vt.marian.Document;

import java.io.*;
import java.net.*;
import java.util.*;

import edu.vt.marian.common.*;


/**
    Utility routines for dealing with XML documents.
    @author	Robert France
*/

public class XmlDoc
{
    public static Vector acceptTag(BufferedReader in) throws IOException
    {
        int c;
        c = in.read();
// System.err.println("Got '" + (char) c + "' (" + c + ").");
        while ( Character.isWhitespace((char) c) )	// Eat any white space before next
            c = in.read();				//  attribute or close character.
        if ( c == -1 )
           throw(new EOFException());
        else if ( c != '<' )
           return( null );
        return( acceptPreppedTag(in) );
    }
	
    public static Vector acceptPreppedTag(BufferedReader in) throws IOException
    {
        Vector bindings = new Vector();
        char [] charBuf = new char [256];
        int c;
        int i = 0;

        // Get tag name; add as first binding.
        c = in.read();
        if ( c == '/' )
        {
            charBuf[i++] = (char) c;
            c = in.read();
        }
        while ( Character.isLetterOrDigit((char) c) || (c == '_') )
        {
            charBuf[i++] = (char) c;
            c = in.read();
        }
        if (i == 0)	// No tag name.
            return( null );
        String tag = new String(charBuf, 0, i);
// System.err.println("Tag is '" + tag + "'.");
        bindings.addElement(tag);

        boolean seenSingleQuote;
        boolean seenDoubleQuote;
        while ( true )
        {
            i = 0;
            while ( ! Character.isLetterOrDigit((char) c)  && (c != '_') )
            {
                if ( c == -1 )
                    throw(new EOFException());
                else if ( c == '>')
                    return( bindings );

                c = in.read();
            }
            charBuf[i++] = (char) c;
            c = in.read();
            while ( Character.isLetterOrDigit((char) c) || (c == '_') )
            {
                charBuf[i++] = (char) c;
                c = in.read();
            }
            bindings.addElement(new String(charBuf, 0, i));
// System.err.println("Attr is '" + new String(charBuf, 0, i) + "'.");

/*
            c = in.read();
// System.err.println("Looking at '" + (char) c + "' (" + c + ").");
            while ( ! Character.isLetterOrDigit((char) c)  && (c != '_') )
            {
                if ( c == -1 )
                    throw(new EOFException());
                c = in.read();
// System.err.println("Looking at '" + (char) c + "' (" + c + ").");
            }
*/
            if ( c != '=' )
            {
                return( null );
            }

            seenSingleQuote = false;
            seenDoubleQuote = false;
            i = 0;
            c = in.read();
            while ( ! Character.isLetterOrDigit((char) c)  && (c != '_') )
            {
// System.err.println("(Attr) Looking at '" + (char) c + "' (" + c + ").");
                if ( c == -1 )
                   throw(new EOFException());
                else if ( c == '\'' )
                {
                   seenSingleQuote = true;
// System.err.println("Setting seenSingleQuote.");
                   break;
                }
                else if ( c == '\"' )
                {
                   seenDoubleQuote = true;
// System.err.println("Setting seenDoubleQuote.");
                   break;
                }
                c = in.read();
            }

            if (seenSingleQuote)
            {
                while ( (c = in.read()) != '\'' )
                {
// System.err.println("(SSQ) Looking at '" + (char) c + "' (" + c + ").");
                    charBuf[i++] = (char) c;
                }
// System.err.println("(SSQ: out) Looking at '" + (char) c + "' (" + c + ").");
                c = in.read();
            }
            else if (seenDoubleQuote)
            {
                while ( (c = in.read()) != '\"' )
                {
// System.err.println("(SDQ) Looking at '" + (char) c + "' (" + c + ").");
                    charBuf[i++] = (char) c;
                }
// System.err.println("(SDQ: out) Looking at '" + (char) c + "' (" + c + ").");
                c = in.read();
            }
            else
            {
                while ( Character.isLetterOrDigit((char) c)  || (c == '_') )
                {
                    charBuf[i++] = (char) c;
                    c = in.read();
                }
            }
            bindings.addElement(new String(charBuf, 0, i));
// System.err.println("Value is '" + new String(charBuf, 0, i) + "'.");
            while ( Character.isWhitespace((char) c) )	// Eat any white space before next
                c = in.read();				//  attribute or close character.
        }
    }
}

