Parse Wordpress like Shortcode

Here's a utility class that we used on our project It will match all shortcodes in a string (including html) and it will output an associative array including their name, attributes and content

final class Parser {

    // Regex101 reference: https://regex101.com/r/pJ7lO1
    const SHORTOCODE_REGEXP = "/(?P<shortcode>(?:(?:\\s?\\[))(?P<name>[\\w\\-]{3,})(?:\\s(?P<attrs>[\\w\\d,\\s=\\\"\\'\\-\\+\\#\\%\\!\\~\\`\\&\\.\\s\\:\\/\\?\\|]+))?(?:\\])(?:(?P<content>[\\w\\d\\,\\!\\@\\#\\$\\%\\^\\&\\*\\(\\\\)\\s\\=\\\"\\'\\-\\+\\&\\.\\s\\:\\/\\?\\|\\<\\>]+)(?:\\[\\/[\\w\\-\\_]+\\]))?)/u";

    // Regex101 reference: https://regex101.com/r/sZ7wP0
    const ATTRIBUTE_REGEXP = "/(?<name>\\S+)=[\"']?(?P<value>(?:.(?![\"']?\\s+(?:\\S+)=|[>\"']))+.)[\"']?/u";

    public static function parse_shortcodes($text) {
        preg_match_all(self::SHORTOCODE_REGEXP, $text, $matches, PREG_SET_ORDER);
        $shortcodes = array();
        foreach ($matches as $i => $value) {
            $shortcodes[$i]['shortcode'] = $value['shortcode'];
            $shortcodes[$i]['name'] = $value['name'];
            if (isset($value['attrs'])) {
                $attrs = self::parse_attrs($value['attrs']);
                $shortcodes[$i]['attrs'] = $attrs;
            }
            if (isset($value['content'])) {
                $shortcodes[$i]['content'] = $value['content'];
            }
        }

        return $shortcodes;
    }

    private static function parse_attrs($attrs) {
        preg_match_all(self::ATTRIBUTE_REGEXP, $attrs, $matches, PREG_SET_ORDER);
        $attributes = array();
        foreach ($matches as $i => $value) {
            $key = $value['name'];
            $attributes[$i][$key] = $value['value'];
        }
        return $attributes;
    }
}

print_r(Parser::parse_shortcodes('[include file="header.html"]'));

Output:

Array
(
    [0] => Array
        (
            [shortcode] => [include file="header.html"]
            [name] => include
            [attrs] => Array
                (
                    [0] => Array
                        (
                            [file] => header.html
                        )
                )
        )
)

I also needed this functionality in my PHP framework. This is what I've written, it works pretty well. It works with anonymous functions, which I really like (it's a bit like the callback functions in JavaScript).

<?php
//The content which should be parsed
$content = '<p>Hello, my name is John an my age is [calc-age day="4" month="10" year="1991"].</p>';
$content .= '<p>Hello, my name is Carol an my age is [calc-age day="26" month="11" year="1996"].</p>';

//The array with all the shortcode handlers. This is just a regular associative array with anonymous functions as values. A very cool new feature in PHP, just like callbacks in JavaScript or delegates in C#.
$shortcodes = array(
    "calc-age" => function($data){
        $content = "";
        //Calculate the age
        if(isset($data["day"], $data["month"], $data["year"])){
            $age = date("Y") - $data["year"];
            if(date("m") < $data["month"]){
                $age--;
            }
            if(date("m") == $data["month"] && date("d") < $data["day"]){
                $age--;
            }
            $content = $age;
        }
        return $content;
    }
);
//http://stackoverflow.com/questions/18196159/regex-extract-variables-from-shortcode
function handleShortcodes($content, $shortcodes){
    //Loop through all shortcodes
    foreach($shortcodes as $key => $function){
        $dat = array();
        preg_match_all("/\[".$key." (.+?)\]/", $content, $dat);
        if(count($dat) > 0 && $dat[0] != array() && isset($dat[1])){
            $i = 0;
            $actual_string = $dat[0];
            foreach($dat[1] as $temp){
                $temp = explode(" ", $temp);
                $params = array();
                foreach ($temp as $d){
                    list($opt, $val) = explode("=", $d);
                    $params[$opt] = trim($val, '"');
                }
                $content = str_replace($actual_string[$i], $function($params), $content);
                $i++;
            }
        }
    }
    return $content;
}
echo handleShortcodes($content, $shortcodes);
?>

The result:
Hello, my name is John an my age is 22.
Hello, my name is Carol an my age is 17.


Using this function

$code = '[include file="header.html"]';
$innerCode = GetBetween($code, '[', ']');
$innerCodeParts = explode(' ', $innerCode);

$command = $innerCodeParts[0];

$attributeAndValue = $innerCodeParts[1];
$attributeParts = explode('=', $attributeAndValue);
$attribute = $attributeParts[0];
$attributeValue = str_replace('"', '', $attributeParts[1]);

echo $command . ' ' . $attribute . '=' . $attributeValue;
//this will result in include file=header.html

$command will be "include"

$attribute will be "file"

$attributeValue will be "header.html"

Tags:

Php