/*
* LookAheadSet.cs
*
* This work is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* This work is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
* As a special exception, the copyright holders of this library give
* you permission to link this library with independent modules to
* produce an executable, regardless of the license terms of these
* independent modules, and to copy and distribute the resulting
* executable under terms of your choice, provided that you also meet,
* for each linked independent module, the terms and conditions of the
* license of that module. An independent module is a module which is
* not derived from or based on this library. If you modify this
* library, you may extend this exception to your version of the
* library, but you are not obligated to do so. If you do not wish to
* do so, delete this exception statement from your version.
*
* Copyright (c) 2003 Per Cederberg. All rights reserved.
*/
using System.Collections;
using System.Text;
namespace PerCederberg.Grammatica.Parser {
/**
* A token look-ahead set. This class contains a set of token id
* sequences. All sequences in the set are limited in length, so
* that no single sequence is longer than a maximum value. This
* class also filters out duplicates. Each token sequence also
* contains a repeat flag, allowing the look-ahead set to contain
* information about possible infinite repetitions of certain
* sequences. That information is important when conflicts arise
* between two look-ahead sets, as such a conflict cannot be
* resolved if the conflicting sequences can be repeated (would
* cause infinite loop).
*
* @author Per Cederberg, <per at percederberg dot net>
* @version 1.1
*/
internal class LookAheadSet {
/**
* The set of token look-ahead sequences. Each sequence in
* turn is represented by an ArrayList with Integers for the
* token id:s.
*/
private ArrayList elements = new ArrayList();
/**
* The maximum length of any look-ahead sequence.
*/
private int maxLength;
/**
* Creates a new look-ahead set with the specified maximum
* length.
*
* @param maxLength the maximum token sequence length
*/
public LookAheadSet(int maxLength) {
this.maxLength = maxLength;
}
/**
* Creates a duplicate look-ahead set, possibly with a
* different maximum length.
*
* @param maxLength the maximum token sequence length
* @param set the look-ahead set to copy
*/
public LookAheadSet(int maxLength, LookAheadSet set)
: this(maxLength) {
AddAll(set);
}
/**
* Returns the size of this look-ahead set.
*
* @return the number of token sequences in the set
*/
public int Size() {
return elements.Count;
}
/**
* Returns the length of the shortest token sequence in this
* set. This method will return zero (0) if the set is empty.
*
* @return the length of the shortest token sequence
*/
public int GetMinLength() {
Sequence seq;
int min = -1;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
if (min < 0 || seq.Length() < min) {
min = seq.Length();
}
}
return (min < 0) ? 0 : min;
}
/**
* Returns the length of the longest token sequence in this
* set. This method will return zero (0) if the set is empty.
*
* @return the length of the longest token sequence
*/
public int GetMaxLength() {
Sequence seq;
int max = 0;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
if (seq.Length() > max) {
max = seq.Length();
}
}
return max;
}
/**
* Returns a list of the initial token id:s in this look-ahead
* set. The list returned will not contain any duplicates.
*
* @return a list of the inital token id:s in this look-ahead set
*/
public int[] GetInitialTokens() {
ArrayList list = new ArrayList();
int[] result;
object token;
int i;
for (i = 0; i < elements.Count; i++) {
token = ((Sequence) elements[i]).GetToken(0);
if (token != null && !list.Contains(token)) {
list.Add(token);
}
}
result = new int[list.Count];
for (i = 0; i < list.Count; i++) {
result[i] = (int) list[i];
}
return result;
}
/**
* Checks if this look-ahead set contains a repetitive token
* sequence.
*
* @return true if at least one token sequence is repetitive, or
* false otherwise
*/
public bool IsRepetitive() {
Sequence seq;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
if (seq.IsRepetitive()) {
return true;
}
}
return false;
}
/**
* Checks if the next token(s) in the parser match any token
* sequence in this set.
*
* @param parser the parser to check
*
* @return true if the next tokens are in the set, or
* false otherwise
*/
public bool IsNext(Parser parser) {
Sequence seq;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
if (seq.IsNext(parser)) {
return true;
}
}
return false;
}
/**
* Checks if the next token(s) in the parser match any token
* sequence in this set.
*
* @param parser the parser to check
* @param length the maximum number of tokens to check
*
* @return true if the next tokens are in the set, or
* false otherwise
*/
public bool IsNext(Parser parser, int length) {
Sequence seq;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
if (seq.IsNext(parser, length)) {
return true;
}
}
return false;
}
/**
* Checks if another look-ahead set has an overlapping token
* sequence. An overlapping token sequence is a token sequence
* that is identical to another sequence, but for the length.
* I.e. one of the two sequences may be longer than the other.
*
* @param set the look-ahead set to check
*
* @return true if there is some token sequence that overlaps, or
* false otherwise
*/
public bool IsOverlap(LookAheadSet set) {
for (int i = 0; i < elements.Count; i++) {
if (set.IsOverlap((Sequence) elements[i])) {
return true;
}
}
return false;
}
/**
* Checks if a token sequence is overlapping. An overlapping token
* sequence is a token sequence that is identical to another
* sequence, but for the length. I.e. one of the two sequences may
* be longer than the other.
*
* @param seq the token sequence to check
*
* @return true if there is some token sequence that overlaps, or
* false otherwise
*/
private bool IsOverlap(Sequence seq) {
Sequence elem;
for (int i = 0; i < elements.Count; i++) {
elem = (Sequence) elements[i];
if (seq.StartsWith(elem) || elem.StartsWith(seq)) {
return true;
}
}
return false;
}
/**
* Checks if the specified token sequence is present in the
* set.
*
* @param elem the token sequence to check
*
* @return true if the sequence is present in this set, or
* false otherwise
*/
private bool Contains(Sequence elem) {
return FindSequence(elem) != null;
}
/**
* Checks if some token sequence is present in both this set
* and a specified one.
*
* @param set the look-ahead set to compare with
*
* @return true if the look-ahead sets intersect, or
* false otherwise
*/
public bool Intersects(LookAheadSet set) {
for (int i = 0; i < elements.Count; i++) {
if (set.Contains((Sequence) elements[i])) {
return true;
}
}
return false;
}
/**
* Finds an identical token sequence if present in the set.
*
* @param elem the token sequence to search for
*
* @return an identical the token sequence if found, or
* null if not found
*/
private Sequence FindSequence(Sequence elem) {
for (int i = 0; i < elements.Count; i++) {
if (elements[i].Equals(elem)) {
return (Sequence) elements[i];
}
}
return null;
}
/**
* Adds a token sequence to this set. The sequence will only
* be added if it is not already in the set. Also, if the
* sequence is longer than the allowed maximum, a truncated
* sequence will be added instead.
*
* @param seq the token sequence to add
*/
private void Add(Sequence seq) {
if (seq.Length() > maxLength) {
seq = new Sequence(maxLength, seq);
}
if (!Contains(seq)) {
elements.Add(seq);
}
}
/**
* Adds a new token sequence with a single token to this set.
* The sequence will only be added if it is not already in the
* set.
*
* @param token the token to add
*/
public void Add(int token) {
Add(new Sequence(false, token));
}
/**
* Adds all the token sequences from a specified set. Only
* sequences not already in this set will be added.
*
* @param set the set to add from
*/
public void AddAll(LookAheadSet set) {
for (int i = 0; i < set.elements.Count; i++) {
Add((Sequence) set.elements[i]);
}
}
/**
* Adds an empty token sequence to this set. The sequence will
* only be added if it is not already in the set.
*/
public void AddEmpty() {
Add(new Sequence());
}
/**
* Removes a token sequence from this set.
*
* @param seq the token sequence to remove
*/
private void Remove(Sequence seq) {
elements.Remove(seq);
}
/**
* Removes all the token sequences from a specified set. Only
* sequences already in this set will be removed.
*
* @param set the set to remove from
*/
public void RemoveAll(LookAheadSet set) {
for (int i = 0; i < set.elements.Count; i++) {
Remove((Sequence) set.elements[i]);
}
}
/**
* Creates a new look-ahead set that is the result of reading
* the specified token. The new look-ahead set will contain
* the rest of all the token sequences that started with the
* specified token.
*
* @param token the token to read
*
* @return a new look-ahead set containing the remaining tokens
*/
public LookAheadSet CreateNextSet(int token) {
LookAheadSet result = new LookAheadSet(maxLength -1);
Sequence seq;
object value;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
value = seq.GetToken(0);
if (value != null && token == (int) value) {
result.Add(seq.Subsequence(1));
}
}
return result;
}
/**
* Creates a new look-ahead set that is the intersection of
* this set with another set. The token sequences in the net
* set will only have the repeat flag set if it was set in
* both the identical token sequences.
*
* @param set the set to intersect with
*
* @return a new look-ahead set containing the intersection
*/
public LookAheadSet CreateIntersection(LookAheadSet set) {
LookAheadSet result = new LookAheadSet(maxLength);
Sequence seq1;
Sequence seq2;
for (int i = 0; i < elements.Count; i++) {
seq1 = (Sequence) elements[i];
seq2 = set.FindSequence(seq1);
if (seq2 != null && seq1.IsRepetitive()) {
result.Add(seq2);
} else if (seq2 != null) {
result.Add(seq1);
}
}
return result;
}
/**
* Creates a new look-ahead set that is the combination of
* this set with another set. The combination is created by
* creating new token sequences that consist of appending all
* elements from the specified set onto all elements in this
* set. This is sometimes referred to as the cartesian
* product.
*
* @param set the set to combine with
*
* @return a new look-ahead set containing the combination
*/
public LookAheadSet CreateCombination(LookAheadSet set) {
LookAheadSet result = new LookAheadSet(maxLength);
Sequence first;
Sequence second;
// Handle special cases
if (this.Size() <= 0) {
return set;
} else if (set.Size() <= 0) {
return this;
}
// Create combinations
for (int i = 0; i < elements.Count; i++) {
first = (Sequence) elements[i];
if (first.Length() >= maxLength) {
result.Add(first);
} else if (first.Length() <= 0) {
result.AddAll(set);
} else {
for (int j = 0; j < set.elements.Count; j++) {
second = (Sequence) set.elements[j];
result.Add(first.Concat(maxLength, second));
}
}
}
return result;
}
/**
* Creates a new look-ahead set with overlaps from another. All
* token sequences in this set that overlaps with the other set
* will be added to the new look-ahead set.
*
* @param set the look-ahead set to check with
*
* @return a new look-ahead set containing the overlaps
*/
public LookAheadSet CreateOverlaps(LookAheadSet set) {
LookAheadSet result = new LookAheadSet(maxLength);
Sequence seq;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
if (set.IsOverlap(seq)) {
result.Add(seq);
}
}
return result;
}
/**
* Creates a new look-ahead set filter. The filter will contain
* all sequences from this set, possibly left trimmed by each one
* of the sequences in the specified set.
*
* @param set the look-ahead set to trim with
*
* @return a new look-ahead set filter
*/
public LookAheadSet CreateFilter(LookAheadSet set) {
LookAheadSet result = new LookAheadSet(maxLength);
Sequence first;
Sequence second;
// Handle special cases
if (this.Size() <= 0 || set.Size() <= 0) {
return this;
}
// Create combinations
for (int i = 0; i < elements.Count; i++) {
first = (Sequence) elements[i];
for (int j = 0; j < set.elements.Count; j++) {
second = (Sequence) set.elements[j];
if (first.StartsWith(second)) {
result.Add(first.Subsequence(second.Length()));
}
}
}
return result;
}
/**
* Creates a new identical look-ahead set, except for the
* repeat flag being set in each token sequence.
*
* @return a new repetitive look-ahead set
*/
public LookAheadSet CreateRepetitive() {
LookAheadSet result = new LookAheadSet(maxLength);
Sequence seq;
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
if (seq.IsRepetitive()) {
result.Add(seq);
} else {
result.Add(new Sequence(true, seq));
}
}
return result;
}
/**
* Returns a string representation of this object.
*
* @return a string representation of this object
*/
public override string ToString() {
return ToString(null);
}
/**
* Returns a string representation of this object.
*
* @param tokenizer the tokenizer containing the tokens
*
* @return a string representation of this object
*/
public string ToString(Tokenizer tokenizer) {
StringBuilder buffer = new StringBuilder();
Sequence seq;
buffer.Append("{");
for (int i = 0; i < elements.Count; i++) {
seq = (Sequence) elements[i];
buffer.Append("\n ");
buffer.Append(seq.ToString(tokenizer));
}
buffer.Append("\n}");
return buffer.ToString();
}
/**
* A token sequence. This class contains a list of token ids.
* It is immutable after creation, meaning that no changes
* will be made to an instance after creation.
*
* @author Per Cederberg, <per at percederberg dot net>
* @version 1.0
*/
private class Sequence {
/**
* The repeat flag. If this flag is set, the token
* sequence or some part of it may be repeated infinitely.
*/
private bool repeat = false;
/**
* The list of token ids in this sequence.
*/
private ArrayList tokens = null;
/**
* Creates a new empty token sequence. The repeat flag
* will be set to false.
*/
public Sequence() {
this.repeat = false;
this.tokens = new ArrayList(0);
}
/**
* Creates a new token sequence with a single token.
*
* @param repeat the repeat flag value
* @param token the token to add
*/
public Sequence(bool repeat, int token) {
this.repeat = false;
this.tokens = new ArrayList(1);
this.tokens.Add(token);
}
/**
* Creates a new token sequence that is a duplicate of
* another sequence. Only a limited number of tokens will
* be copied however. The repeat flag from the original
* will be kept intact.
*
* @param length the maximum number of tokens to copy
* @param seq the sequence to copy
*/
public Sequence(int length, Sequence seq) {
this.repeat = seq.repeat;
this.tokens = new ArrayList(length);
if (seq.Length() < length) {
length = seq.Length();
}
for (int i = 0; i < length; i++) {
tokens.Add(seq.tokens[i]);
}
}
/**
* Creates a new token sequence that is a duplicate of
* another sequence. The new value of the repeat flag will
* be used however.
*
* @param repeat the new repeat flag value
* @param seq the sequence to copy
*/
public Sequence(bool repeat, Sequence seq) {
this.repeat = repeat;
this.tokens = seq.tokens;
}
/**
* Returns the length of the token sequence.
*
* @return the number of tokens in the sequence
*/
public int Length() {
return tokens.Count;
}
/**
* Returns a token at a specified position in the sequence.
*
* @param pos the sequence position
*
* @return the token id found, or null
*/
public object GetToken(int pos) {
if (pos >= 0 && pos < tokens.Count) {
return tokens[pos];
} else {
return null;
}
}
/**
* Checks if this sequence is equal to another object.
* Only token sequences with the same tokens in the same
* order will be considered equal. The repeat flag will be
* disregarded.
*
* @param obj the object to compare with
*
* @return true if the objects are equal, or
* false otherwise
*/
public override bool Equals(object obj) {
if (obj is Sequence) {
return Equals((Sequence) obj);
} else {
return false;
}
}
/**
* Checks if this sequence is equal to another sequence.
* Only sequences with the same tokens in the same order
* will be considered equal. The repeat flag will be
* disregarded.
*
* @param seq the sequence to compare with
*
* @return true if the sequences are equal, or
* false otherwise
*/
public bool Equals(Sequence seq) {
if (tokens.Count != seq.tokens.Count) {
return false;
}
for (int i = 0; i < tokens.Count; i++) {
if (!tokens[i].Equals(seq.tokens[i])) {
return false;
}
}
return true;
}
/**
* Checks if this token sequence starts with the tokens from
* another sequence. If the other sequence is longer than this
* sequence, this method will always return false.
*
* @param seq the token sequence to check
*
* @return true if this sequence starts with the other, or
* false otherwise
*/
public bool StartsWith(Sequence seq) {
if (Length() < seq.Length()) {
return false;
}
for (int i = 0; i < seq.tokens.Count; i++) {
if (!tokens[i].Equals(seq.tokens[i])) {
return false;
}
}
return true;
}
/**
* Checks if this token sequence is repetitive. A repetitive
* token sequence is one with the repeat flag set.
*
* @return true if this token sequence is repetitive, or
* false otherwise
*/
public bool IsRepetitive() {
return repeat;
}
/**
* Checks if the next token(s) in the parser matches this
* token sequence.
*
* @param parser the parser to check
*
* @return true if the next tokens are in the sequence, or
* false otherwise
*/
public bool IsNext(Parser parser) {
Token token;
int id;
for (int i = 0; i < tokens.Count; i++) {
id = (int) tokens[i];
token = parser.PeekToken(i);
if (token == null || token.GetId() != id) {
return false;
}
}
return true;
}
/**
* Checks if the next token(s) in the parser matches this
* token sequence.
*
* @param parser the parser to check
* @param length the maximum number of tokens to check
*
* @return true if the next tokens are in the sequence, or
* false otherwise
*/
public bool IsNext(Parser parser, int length) {
Token token;
int id;
if (length > tokens.Count) {
length = tokens.Count;
}
for (int i = 0; i < length; i++) {
id = (int) tokens[i];
token = parser.PeekToken(i);
if (token == null || token.GetId() != id) {
return false;
}
}
return true;
}
/**
* Returns a string representation of this object.
*
* @return a string representation of this object
*/
public override string ToString() {
return ToString(null);
}
/**
* Returns a string representation of this object.
*
* @param tokenizer the tokenizer containing the tokens
*
* @return a string representation of this object
*/
public string ToString(Tokenizer tokenizer) {
StringBuilder buffer = new StringBuilder();
string str;
int id;
if (tokenizer == null) {
buffer.Append(tokens.ToString());
} else {
buffer.Append("[");
for (int i = 0; i < tokens.Count; i++) {
id = (int) tokens[i];
str = tokenizer.GetPatternDescription(id);
if (i > 0) {
buffer.Append(" ");
}
buffer.Append(str);
}
buffer.Append("]");
}
if (repeat) {
buffer.Append(" *");
}
return buffer.ToString();
}
/**
* Creates a new token sequence that is the concatenation
* of this sequence and another. A maximum length for the
* new sequence is also specified.
*
* @param length the maximum length of the result
* @param seq the other sequence
*
* @return the concatenated token sequence
*/
public Sequence Concat(int length, Sequence seq) {
Sequence res = new Sequence(length, this);
if (seq.repeat) {
res.repeat = true;
}
length -= this.Length();
if (length > seq.Length()) {
res.tokens.AddRange(seq.tokens);
} else {
for (int i = 0; i < length; i++) {
res.tokens.Add(seq.tokens[i]);
}
}
return res;
}
/**
* Creates a new token sequence that is a subsequence of
* this one.
*
* @param start the subsequence start position
*
* @return the new token subsequence
*/
public Sequence Subsequence(int start) {
Sequence res = new Sequence(Length(), this);
while (start > 0 && res.tokens.Count > 0) {
res.tokens.RemoveAt(0);
start--;
}
return res;
}
}
}
}