Créer un parseur ll

Contenu du snippet

Les parseurs, vous les utilisez mais vous ne savez pas forcement comment ils marchent.

Voici un exemple de parseur ll simple sans lexer :

Son fonctionnement est très simple, il commence par consommer la plus petite entité du flux : le caractère avec la fonction readChar, et a partir de là suivent un lot de fonctions qui permettent au fur et a mesure des entités plus complexes comme les nombres, les chaines etc. et ainsi définir les règles de grammaire du parsing.

Ce parseur contient deux buffers : le buffer principal qui contient le flux à parser et le buffer de lecture qui contient ce qui a été lu.

Il reste extrêmement basique bien entendu ^^

Source / Exemple :


<?php
define('BR', '<br />');

define('NO_ERROR', -1);
define('EMPTY_BUFFER', 0);
define('INVALID_DIGIT', 1);
define('INVALID_VARNAME', 2);
define('VAR_TAG_EXPECTED', 3);
define('CLOSE_TAG_EXPECTED', 4);
/*

  • Specifics chars
  • /
define('VAR_TAG', '$'); define('CLOSE_TAG', ';'); define('FLOAT_COMMA', '.'); abstract class LLP { /*
  • Main bufer
  • /
private static $buffer = ''; private static $bSize = 0; /*
  • Read buffer
  • /
private static $readBuffer = ''; private static $rbSize = 0; /*
  • Read cursors
  • /
private static $cursor = 0; # Current private static $index = 0; # Global /*
  • Prepare will skip following chars
  • /
private static $skip = array ( ' ', "\n", "\t", "\r" ); /*
  • Errors
  • /
private static $errorType = -1; public static function getError() { switch (self::$errorType) { case 0 : $str = 'EMPTY_BUFFER'; break; case 1 : $str = 'INVALID_DIGIT'; break; case 2 : $str = 'INVALID_VARNAME'; break; case 3 : $str = 'VAR_TAG_EXPECTED'; break; case 4 : $str = 'CLOSE_TAG_EXPECTED'; break; default : $str = 'NO_ERROR'; } return 'PARSE_ERROR >> '.$str; } /*
  • Returns global buffer index
  • /
public static function getIndex() { return self::$index; } /*
  • Returns char at index self::$index;
  • /
public static function getCharAtCurrentIndex() { return self::$buffer[self::$index]; } /*
  • return current buffer index
  • /
public static function getCursor() { return self::$cursor; } /*
  • Clean main buffer
  • /
public static function clean() { self::$buffer = ''; self::$bSize = 0; self::$readBuffer = ''; self::$rbSize = 0; self::$index = 0; self::$cursor = 0; } /*
  • Rturn main buffer
  • /
public static function buffer() { return self::$buffer; } /*
  • Clean Read buffer
  • /
public static function flush() { $readBuffer = self::$readBuffer; self::$readBuffer = ''; return $readBuffer; } /*
  • Print buffers
  • /
public static function cout() { $out = ''; $out .= '<pre>'; $out .= 'MAIN_BUFFER >> '; $out .= (strlen(self::$buffer) > 0 ? self::$buffer : 'EMPTY').BR; $out .= 'READ_BUFFER >> '; $out .= (strlen(self::$readBuffer) > 0 ? self::$readBuffer : 'EMPTY').BR; $out .= '</pre>'; echo $out; } /*
  • Prepare for read
  • /
protected static function prepare() { if (self::$bSize === 0) { self::$errorType = EMPTY_BUFFER; return false; } self::$cursor = 0; while (in_array(self::$buffer[0], self::$skip)) self::unbufferize(); return true; } /*
  • Add text to main buffer
  • /
public static function bufferize($str = '') { if (is_string($str)) { self::$buffer = trim($str.self::$buffer); self::$bSize += strlen($str); return true; } return false; } /*
  • Eat $size left chars in main buffer
  • /
protected static function unbufferize($size = 1) { if ($size > 0) { if ($size <= self::$bSize) { self::$buffer = substr(self::$buffer, $size, self::$bSize); self::$bSize -= strlen($size); self::$cursor += $size; self::$index += $size; return true; } return false; } return false; } /*
  • Private read functions
  • /
/*
  • Read $size chars in main buffer and unbuferize them
  • /
protected static function read($size = 1) { if ($size > 0) { if ($size > self::$bSize) { $size = self::$bSize; self::$bSize = 0; } self::$readBuffer .= substr(self::$buffer, 0, $size); self::$rbSize += strlen($size); return self::unbufferize($size); } return false; } /*
  • Try to read any char in main buffer
  • /
protected static function readChar($char = NULL) { if (!is_null($char)) { if (self::$buffer[0] !== $char) return false; } return self::read(); } /*
  • Try to read specified digit in main buffer
  • /
protected static function readDigit($digit = NULL) { if (ctype_digit(self::$buffer[0])) { if (ctype_digit($digit)) { if (self::$buffer[0] !== $digit) return false; } return self::read(); } return false; } /*
  • Try to read a int in main buffer
  • /
protected static function readInteger($int) { if (!ctype_digit($int)) return false; $int = strval($int); $numSize = strlen($int); $i = 0; while ($i < $numSize) { if (self::readDigit($int[$i])) $i++; else return false; } return true; } /*
  • Try to read a float in main buffer
  • /
protected static function readFloat($float = '', $separator = FLOAT_COMMA) { $float = explode($separator, $float); return self::readInteger($float[0]) && self::readChar($separator) && self::readInteger($float[1]); } /*
  • Public basic read functions
  • /
/*
  • Try to read specified string
  • /
public static function readString($str = '') { if (!is_string($str)) return false; $strSize = strlen($str); $i = 0; while ($i < $strSize) { if (self::readChar($str[$i]) || self::readDigit($str[$i])) $i++; else return false; } return true; } /*
  • Read until next pattern
  • /
public static function readNext($str) { if (!is_string($str)) return false; while (!self::readString($str)) ; if (self::$bSize == 0) return false; return true; } /*
  • Read buffer until buffer contain alpha chars
  • /
public static function readAlpha() { self::prepare(); $count = 0; while ((ctype_alpha(self::$buffer[0]) && self::readChar())) $count++; if ($count > 0) return true; return false; } /*
  • Read buffer until buffer contain alphanum chars
  • /
public static function readAlphaNum() { self::prepare(); $count = 0; while ((ctype_alnum(self::$buffer[0]) && self::readChar())) $count++; if ($count > 0) return true; return false; } /*
  • Try to read a number (int or float)
  • /
public static function readNumber($number) { self::prepare(); if (strstr($number, FLOAT_COMMA)) return self::readFloat($number); return self::readInteger($number); } /*
  • Try to read a word in main buffer
  • /
public static function readWord($str) { self::prepare(); return self::readString($str); } /*
  • Test patterned read functions
  • /
public static function readVar() { self::prepare(); if (self::readString(VAR_TAG)) { self::prepare(); if (self::readAlpha() && !self::readAlphaNum()) { if (self::readChar(CLOSE_TAG)) return true; else self::$errorType = CLOSE_TAG_EXPECTED; } else { self::$errorType = INVALID_VARNAME; } } else self::$errorType = VAR_TAG_EXPECTED; return false; } } define('XML_OPEN_NODE', '<'); define('XML_CLOSE_NODE', '>'); define('XML_TERMINAL_NODE', '</'); class NodeParser extends LLP { private static $currentNodeName = ''; private static $nodeContent = ''; public static function nodeName() { return self::$currentNodeName; } public static function nodeContent() { return self::$nodeContent; } public static function openNode() { if (parent::readString(XML_OPEN_NODE)) parent::flush(); if (parent::readAlphaNum()) self::$currentNodeName = parent::flush(); if (parent::readString(XML_CLOSE_NODE)) { parent::flush(); return true; } return false; } public static function closeNode() { if (parent::readString(XML_TERMINAL_NODE)) parent::flush(); if (parent::readString(self::$currentNodeName)) parent::flush(); if (parent::readString(XML_CLOSE_NODE)) { parent::flush(); return true; } return false; } public static function node() { if (self::openNode()) { parent::readAlphaNum(); self::$nodeContent = parent::flush(); if (self::closeNode()) { return true; } else { echo 'CLOSE_NODE_ERROR'.BR; } } else { echo 'OPEN_NODE_ERROR'.BR; } return false; } } LLP::bufferize('public private 4242 42.42 toto42 $titi;'); LLP::cout(); if (LLP::readWord('public')) { echo 'Read String : '.LLP::flush().BR; }else{ echo 'Read String : FAIL at index '.LLP::getIndex().BR; } if (LLP::readWord('private')) { echo 'Read String : '.LLP::flush().BR; }else{ echo 'Read String : FAIL at index '.LLP::getIndex().BR; } if (LLP::readNumber(4242)) { echo 'Read Int : '.LLP::flush().BR; } else{ echo 'Read Int : FAIL at index '.LLP::getIndex().BR; LLP::cout(); } if (LLP::readNumber(42.42)) { echo 'Read Float : '.LLP::flush().BR; } else{ echo 'Read Float : FAIL at index '.LLP::getIndex().BR; LLP::cout(); } if (LLP::readAlphaNum()) { echo 'Read AlphaNum : '.LLP::flush().BR; }else{ echo 'Read AlphaNum : FAIL at index '.LLP::getIndex().BR; } if (LLP::readVar()) { echo 'Read Var : '.LLP::flush().BR; }else{ echo 'Read Var : FAIL at index '.LLP::getIndex().' : '.LLP::getError().BR; } echo 'Done'; NodeParser::clean(); NodeParser::bufferize('<toto>titi</toto>'); NodeParser::cout(); if (NodeParser::node()) { echo 'Read Node : '.XMLParser::nodeName().' with content '.XMLParser::nodeContent().BR; }else{ echo 'Read Node : FAIL at index '.XMLParser::getIndex().' : '.XMLParser::getError().BR; } ?>

Conclusion :


Cette source est purement didactique ^^

A voir également

Vous n'êtes pas encore membre ?

inscrivez-vous, c'est gratuit et ça prend moins d'une minute !

Les membres obtiennent plus de réponses que les utilisateurs anonymes.

Le fait d'être membre vous permet d'avoir un suivi détaillé de vos demandes et codes sources.

Le fait d'être membre vous permet d'avoir des options supplémentaires.