<?php
/*
Inspired by an awk BibTeX parser written by Nelson H. F. Beebe over 20 years ago although little of that 
remains other than a highly edited braceCount().

Released through http://bibliophile.sourceforge.net under the GPL licence.
Do whatever you like with this -- some credit to the author(s) would be appreciated.

A collection of PHP classes to manipulate bibtex files.

If you make improvements, please consider contacting the administrators at bibliophile.sourceforge.net so that your improvements can be added to the release package.

Mark Grimshaw 2004
http://bibliophile.sourceforge.net

(Amendments to file reading Daniel Pozzi for v1.1)

21/08/2004 v1.4 Guillaume Gardey, Added string parsing and expand macro features.
 Fix bug with comments, strings macro.
    expandMacro = FALSE/TRUE to expand string macros.
    loadStringMacro($bibtex_string) to load a string. (array of lines)
22/08/2004 v1.4 Mark Grimshaw - a few adjustments to Guillaume's code.
*/

// For a quick command-line test (php -f PARSEENTRIES.php) after installation, uncomment these lines:

/***********************
// Parse a file
	$parse = NEW PARSEENTRIES();
	$parse->expandMacro = TRUE;
//	$parse->removeDelimit = FALSE;
//	$parse->fieldExtract = FALSE;
	$parse->openBib("gaertler.bib");
	$parse->extractEntries();
	$parse->closeBib();
	list($preamble, $strings, $entries) = $parse->returnArrays();
	print_r($preamble);
	print "\n";
	print_r($strings);
	print "\n";
	print_r($entries);
	print "\n\n";
***********************/
/***********************
// Parse a bibtex string
	$bibtex_data = file("bib.bib");
	$parse = NEW PARSEENTRIES();
//	$parse->expandMacro = TRUE;
//	$parse->removeDelimit = FALSE;
//	$parse->fieldExtract = FALSE;
	$array = array("RMP = Rev., Mod. Phys.");
	$parse->loadStringMacro($array);
	$parse->loadBibtexString($bibtex_data);
	$parse->extractEntries();
	$parse->closeBib();
	list($preamble, $strings, $entries) = $parse->returnArrays();
	print_r($preamble);
	print "\n";
	print_r($strings);
	print "\n";
	print_r($entries);
	print "\n\n";
***********************/

class PARSEENTRIES
{
	function PARSEENTRIES()
	{
		$this->preamble = $this->strings = $this->entries = array();
		$this->count = 0;
		$this->fieldExtract = TRUE;
		$this->removeDelimit = TRUE;
        $this->expandMacro = FALSE;
        $this->parseFile = TRUE;
	}
// Open bib file
	function openBib($file)
	{
		if(!is_file($file))
			die;
		$this->fid = fopen ($file,'r');
// 22/08/2004 Mark Grimshaw - commented out as set in constructor.
//		$this->parseFile = TRUE;
	}
// Load a bibtex string to parse it
    function loadBibtexString($bibtex_string)
    {
        if(!is_array($bibtex_string)){
            $this->bibtexString = explode('\n',$bibtex_string);    
        }
        else{
            $this->bibtexString = $bibtex_string;   
        }
        $this->parseFile = FALSE;
        $this->currentLine = 0;
    }
    // set strings macro
    function loadStringMacro($macro_array){
        $this->strings = $macro_array;
    }
// Close bib file
	function closeBib()
	{
		fclose($this->fid);
	}
// Get a line from bib file
    function getLine()
    {
// 21/08/2004 G.Gardey
// remove comments from parsing
        if($this->parseFile){
            if(!feof($this->fid)){
                do{
                    $line = trim(fgets($this->fid));
                    $isComment = (strlen($line)>0) ? $line[0] == '%' : FALSE;
                }
                while(!feof($this->fid) && $isComment);
                return $line;
            }
            return FALSE;
        }
        else{
            do{
                $line = trim($this->bibtexString[$this->currentLine]);
                $isComment = (strlen($line)>0) ? $line[0] == '%' : FALSE;
                $this->currentLine++;
            }
            while($this->currentLine <count($this->bibtexString) && $isComment);
            $val = ($this->currentLine < count($this->bibtexString)) ? $line : FALSE;
            return $val;
        }
	}
// Count entry delimiters
	function braceCount($line, $delimitStart)
	{
		if($delimitStart == '{')
			$delimitEnd = '}';
		else
		{
			$delimitStart = '(';
			$delimitEnd = ')';
		}
		$count = 0;
		$count = substr_count($line, $delimitStart);
		$count += 0 - substr_count($line, $delimitEnd);
		return $count;
	}
// Extract value part of @string field enclosed by double-quotes.
	function extractStringValue($string)
	{
		$split = explode("\"", strrev($string), 2);
                $output = substr(strrev($split[1]), 1);
                return substr($output, 1);
	}
// Extract a field
	function fieldSplit($seg)
	{
		$array = preg_split("/,\s*(\w+)\s*={1}\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE);
		if(!array_key_exists(1, $array))
			return array($array[0], FALSE);
		return array($array[0], $array[1]);
	}
// Extract and format fields
	function reduceFields($oldString)
	{
		$oldString = rtrim($oldString, "}),");
		$split = preg_split("/=/", $oldString, 2);
		$string = $split[1];
		while($string)
		{
			list($entry, $string) = $this->fieldSplit($string);
			$values[] = $entry;
		}
		foreach($values as $value)
		{
            
			$pos = strpos($oldString, $value);
			$oldString = substr_replace($oldString, '', $pos, strlen($value));
		}
		$rev = strrev(trim($oldString));
		if($rev{0} != ',')
			$oldString .= ',';
		$keys = preg_split("/=,/", $oldString);
// 22/08/2004 - Mark Grimshaw
// I have absolutely no idea why this array_pop is required but it is.  Seems to always be an empty key at the end after the split 
// which causes problems if not removed.
		array_pop($keys);
		foreach($keys as $key)
		{
			$value = trim(array_shift($values));
			$rev = strrev($value);
// remove any dangling ',' left on final field of entry
			if($rev{0} == ',')
				$value = rtrim($value, ",");
			if(!$value)
				continue;
// 21/08/2004 G.Gardey -> expand macro
// Don't remove delimiters now
// needs to know if the value is a string macro
//			$this->entries[$this->count][strtolower(trim($key))] = trim($this->removeDelimiters(trim($value)));
			$this->entries[$this->count][strtolower(trim($key))] = trim($value);
		}
	}
// Start splitting a bibtex entry into component fields.
// Store the entry type and citation.
	function fullSplit($entry)
	{
		$matches = preg_split("/@(.*)\s*[{(](.*),/U", $entry, 2, PREG_SPLIT_DELIM_CAPTURE);
		$this->entries[$this->count]['___type'] = strtolower($matches[1]);
		$this->entries[$this->count]['id'] = $matches[2];
		$matches = $this->reduceFields($matches[3]);
	}
// Grab a complete bibtex entry
	function getEntry($line)
	{
		$entry = '';
		$count = 0;
		$lastLine = FALSE;
		if(preg_match("/@(.*)\s*([{(])/", $line, $matches))
		{
			do
			{
				$count += $this->braceCount($line, $matches[2]);
				$entry .= ' ' . $line;
				if(($line = $this->getLine()) === FALSE)
					break;
				$lastLine = $line;
			}
			while($count);
		}
		else
		{
			$line .= $this->getLine();
			$this->getEntry($line);
		}
		if(!array_key_exists(1, $matches))
			return $lastLine;
		if(preg_match("/string/i", $matches[1]))
			$this->strings[] = $entry;
		else if(preg_match("/preamble/i", $matches[1]))
			$this->preamble[] = $entry;
		else
		{
			if($this->fieldExtract)
				$this->fullSplit($entry);
			else
				$this->entries[$this->count] = $entry;
			$this->count++;
		}
		return $lastLine;
	}
// Remove enclosures around entry field values.  Additionally, expand macros if flag set.
	function removeDelimiters($string)
	{
// Remove any enclosing double quotes or braces around entry field values
		if($this->removeDelimit && ($string{0} == "\""))
		{
			$string = substr($string, 1);
			$string = substr($string, 0, -1);
		}
		else if($this->removeDelimit && ($string{0} == "{"))
		{
			$string = substr($string, 1);
			// 22/08/2004 G. Gardey
			// be sure to remove a } (needed when:  lastfield = {value}}  
			// if not the last letter of the value is removed
            if(strlen($string) > 0 && $string[strlen($string)-1] == "}")
                $string = substr($string, 0, -1);
		}
// expand the macro if defined
// 23/08/2004 Mark - changed isset() to !empty() since $this->strings isset in constructor.
		else if($this->expandMacro && !empty($this->strings))
		{
// macro are case insensitive
			foreach($this->strings as $key => $value)
                		$string = eregi_replace($key,$value,$string);
// 22/08/2004 Mark Grimshaw - make sure a '#' surrounded by any number of spaces is replaced by just one space.
                	$string = preg_replace("/\s*#\s*/", " ", $string);
//            		$string = str_replace('#',' ',$string);
        	}
		return $string;
	}
// This method starts the whole process
	function extractEntries()
	{
        $lastLine = FALSE;
        if($this->parseFile)
        {
            while(!feof($this->fid))
            {
                $line = $lastLine ? $lastLine : $this->getLine();
                if(!preg_match("/^@/i", $line))
                    continue;
                if(($lastLine = $this->getEntry($line)) !== FALSE)
                    continue;
            }
        }
        else{
            while($this->currentLine < count($this->bibtexString))
            {
                $line = $lastLine ? $lastLine : $this->getLine();
                if(!preg_match("/^@/i", $line))
                    continue;
                if(($lastLine = $this->getEntry($line)) !== FALSE)
                    continue;
            }
        }
	}
// Return arrays of entries etc. to the calling process.
	function returnArrays()
	{
		foreach($this->preamble as $value)
		{
			preg_match("/.*[{(](.*)/", $value, $matches);
			$preamble = substr($matches[1], 0, -1);
			$preambles['bibtexPreamble'] = trim($this->removeDelimiters(trim($preamble)));
		}
		if(isset($preambles))
			$this->preamble = $preambles;
		if($this->fieldExtract)
		{
			foreach($this->strings as $value)
			{
// changed 21/08/2004 G. Gardey
// 23/08/2004 Mark G. account for comments on same line as @string - count delimiters in string value
				$value = trim($value);
				$matches = preg_split("/@string\s*([{(])/i", $value, 2, PREG_SPLIT_DELIM_CAPTURE);
				$delimit = $matches[1];
				$matches = preg_split("/=/", $matches[2], 2, PREG_SPLIT_DELIM_CAPTURE);
				$strings[trim($matches[0])] = trim($this->extractStringValue($matches[1]));
			}
		}
	        if(isset($strings))
			$this->strings = $strings;
        
// changed 21/08/2004 G. Gardey
// 22/08/2004 Mark Grimshaw - stopped useless looping.
// removeDelimit and expandMacro have NO effect if !$this->fieldExtract
		if($this->removeDelimit || $this->expandMacro && $this->fieldExtract)
		{
			for($i=0;$i<count($this->entries);$i++)
			{
		            	foreach($this->entries[$i] as $key => $value)
		                	$this->entries[$i][$key] = trim($this->removeDelimiters($this->entries[$i][$key])); 
		        }
		}
		if(empty($this->preamble))
			$this->preamble = FALSE;
		if(empty($this->strings))
			$this->strings = FALSE;
		if(empty($this->entries))
			$this->entries = FALSE;

		return array($this->preamble, $this->strings, $this->entries);
	}
}
?>
