/*
* RepeatElement.cs
*
* This work is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* This work is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
* As a special exception, the copyright holders of this library give
* you permission to link this library with independent modules to
* produce an executable, regardless of the license terms of these
* independent modules, and to copy and distribute the resulting
* executable under terms of your choice, provided that you also meet,
* for each linked independent module, the terms and conditions of the
* license of that module. An independent module is a module which is
* not derived from or based on this library. If you modify this
* library, you may extend this exception to your version of the
* library, but you are not obligated to do so. If you do not wish to
* do so, delete this exception statement from your version.
*
* Copyright (c) 2003 Per Cederberg. All rights reserved.
*/
using System;
using System.Collections;
using System.IO;
namespace PerCederberg.Grammatica.Parser.RE {
/**
* A regular expression element repeater. The element repeats the
* matches from a specified element, attempting to reach the
* maximum repetition count.
*
* @author Per Cederberg, <per at percederberg dot net>
* @version 1.1
*/
internal class RepeatElement : Element {
/**
* The repeat type constants.
*/
public enum RepeatType {
/*
* The greedy repeat type constant.
*/
GREEDY = 1,
/*
* The reluctant repeat type constant.
*/
RELUCTANT = 2,
/*
* The possesive repeat type constant.
*/
POSSESSIVE = 3
}
/**
* The element to repeat.
*/
private Element elem;
/**
* The minimum number of repetitions.
*/
private int min;
/**
* The maximum number of repetitions.
*/
private int max;
/**
* The repeat type.
*/
private RepeatType type;
/**
* The start position of the last set of matches.
*/
private int matchStart;
/**
* A set with all matches starting at matchStart. A match with
* a specific length is reported by a non-zero bit in the bit
* array.
*/
private BitArray matches;
/**
* Creats a new element repeater.
*
* @param elem the element to repeat
* @param min the minimum count
* @param max the maximum count
* @param type the repeat type constant
*/
public RepeatElement(Element elem,
int min,
int max,
RepeatType type) {
this.elem = elem;
this.min = min;
if (max <= 0) {
this.max = Int32.MaxValue;
} else {
this.max = max;
}
this.type = type;
this.matchStart = -1;
this.matches = null;
}
/**
* Creates a copy of this element. The copy will be an
* instance of the same class matching the same strings.
* Copies of elements are necessary to allow elements to cache
* intermediate results while matching strings without
* interfering with other threads.
*
* @return a copy of this element
*/
public override object Clone() {
return new RepeatElement((Element) elem.Clone(),
min,
max,
type);
}
/**
* Returns the length of a matching string starting at the
* specified position. The number of matches to skip can also be
* specified.
*
* @param m the matcher being used
* @param str the string to match
* @param start the starting position
* @param skip the number of matches to skip
*
* @return the length of the matching string, or
* -1 if no match was found
*/
public override int Match(Matcher m,
string str,
int start,
int skip) {
if (skip == 0) {
matchStart = -1;
matches = null;
}
switch (type) {
case RepeatType.GREEDY:
return MatchGreedy(m, str, start, skip);
case RepeatType.RELUCTANT:
return MatchReluctant(m, str, start, skip);
case RepeatType.POSSESSIVE:
if (skip == 0) {
return MatchPossessive(m, str, start, 0);
}
break;
}
return -1;
}
/**
* Returns the length of the longest possible matching string
* starting at the specified position. The number of matches
* to skip can also be specified.
*
* @param m the matcher being used
* @param str the string to match
* @param start the starting position
* @param skip the number of matches to skip
*
* @return the length of the longest matching string, or
* -1 if no match was found
*/
private int MatchGreedy(Matcher m,
string str,
int start,
int skip) {
// Check for simple case
if (skip == 0) {
return MatchPossessive(m, str, start, 0);
}
// Find all matches
if (matchStart != start) {
matchStart = start;
matches = new BitArray(10);
FindMatches(m, str, start, 0, 0, 0);
}
// Find first non-skipped match
for (int i = matches.Count -1; i >= 0; i--) {
if (matches[i]) {
if (skip == 0) {
return i;
}
skip--;
}
}
return -1;
}
/**
* Returns the length of the shortest possible matchine string
* starting at the specified position. The number of matches to
* skip can also be specified.
*
* @param m the matcher being used
* @param str the string to match
* @param start the starting position
* @param skip the number of matches to skip
*
* @return the length of the shortest matching string, or
* -1 if no match was found
*/
private int MatchReluctant(Matcher m,
string str,
int start,
int skip) {
// Find all matches
if (matchStart != start) {
matchStart = start;
matches = new BitArray(10);
FindMatches(m, str, start, 0, 0, 0);
}
// Find first non-skipped match
for (int i = 0; i < matches.Count; i++) {
if (matches[i]) {
if (skip == 0) {
return i;
}
skip--;
}
}
return -1;
}
/**
* Returns the length of the maximum number of elements matching
* the string starting at the specified position. This method
* allows no backtracking, i.e. no skips..
*
* @param m the matcher being used
* @param str the string to match
* @param start the starting position
* @param count the start count, normally zero (0)
*
* @return the length of the longest matching string, or
* -1 if no match was found
*/
private int MatchPossessive(Matcher m,
string str,
int start,
int count) {
int length = 0;
int subLength = 1;
// Match as many elements as possible
while (subLength > 0 && count < max) {
subLength = elem.Match(m, str, start + length, 0);
if (subLength >= 0) {
count++;
length += subLength;
}
}
// Return result
if (min <= count && count <= max) {
return length;
} else {
return -1;
}
}
/**
* Finds all matches and adds the lengths to the matches set.
*
* @param m the matcher being used
* @param str the string to match
* @param start the starting position
* @param length the match length at the start position
* @param count the number of sub-elements matched
* @param attempt the number of match attempts here
*/
private void FindMatches(Matcher m,
string str,
int start,
int length,
int count,
int attempt) {
int subLength;
// Check match ending here
if (count > max) {
return;
}
if (min <= count && attempt == 0) {
if (matches.Length <= length) {
matches.Length = length + 10;
}
matches[length] = true;
}
// Check element match
subLength = elem.Match(m, str, start, attempt);
if (subLength < 0) {
return;
} else if (subLength == 0) {
if (min == count + 1) {
if (matches.Length <= length) {
matches.Length = length + 10;
}
matches[length] = true;
}
return;
}
// Find alternative and subsequent matches
FindMatches(m, str, start, length, count, attempt + 1);
FindMatches(m,
str,
start + subLength,
length + subLength,
count + 1,
0);
}
/**
* Prints this element to the specified output stream.
*
* @param output the output stream to use
* @param indent the current indentation
*/
public override void PrintTo(TextWriter output, string indent) {
output.Write(indent + "Repeat (" + min + "," + max + ")");
if (type == RepeatType.RELUCTANT) {
output.Write("?");
} else if (type == RepeatType.POSSESSIVE) {
output.Write("+");
}
output.WriteLine();
elem.PrintTo(output, indent + " ");
}
}
}