php正则表达式压缩与格式化html,css,js

注意事项

只支持压缩含一个<script></script>的html,且变量内多个空格也会被压缩为一个

格式化html单标签需要添加/结束

压缩html

去除<!-- -->内的全部内容

多个空白符变为一个空格

去除> <内的空白符

压缩css

去除/* */内的全部内容

多个空白符变为一个空格

去除【'】【"】【{】【}】【:】【;】【,】【<style>】【</style>】两端的空白符

压缩js

去除/* */内的全部内容
去除//行内容
多个空白符变为一个空格
去除【(】【)】【{】【}】【=】【||】【&&】【+】【:】【;】【,】【<script>】【</script>】两端的空白符

调用 

<?php

include 'HtmlFormat.php';

$html = file_get_contents('https://www.baidu.com/');

//压缩

$min_html = HtmlFormat::miniHtml($html);

highlight_string($min_html);

//格式化

$max_html = HtmlFormat::fomatHtml($html );

highlight_string($max_html);

代码

压缩与格式化

HtmlFormat.php

<?php
namespace ppt\core;
class HtmlFormat
{
    public static function miniHtml($s)
    {
        //注:只支持压缩含一个<script></script>的html,且变量内多个空格也会被压缩为一个
        //提取css跟javascript单独处理
        $s = preg_replace('/\<style[\s\S]*\>/U','<style>',$s);
//    preg_replace('/\<script[\s\S]*\>/','<script>',$s);

        preg_match('/\<style\>[\s\S]*\<\/style\>/',$s,$style);
        preg_match('/\<script\>[\s\S]*\<\/script\>/',$s,$script);
        empty($style)?$style='':$style = $style[0];
        empty($script)?$script='':$script = $script[0];

        //处理html
        $html = preg_replace('/\<style\>[\s\S]*\<\/style\>/','<style></style>',$s);
        $html = preg_replace('/\<script\>[\s\S]*\<\/script\>/','<script></script>',$html);
        $html = preg_replace('/\<\!\-\-[\s\S]*\-\-\>/U','',$html);

        $html = preg_replace('/[\s]+/',' ',$html);
        $html = preg_replace('/>[\s]+</','><',$html);

        //处理css
        $style = preg_replace('/\/\*[\s\S]*\*\//U','',$style);
        $style = preg_replace('/[\s]+/',' ',$style);
        $style = preg_replace('/[\s]?\'[\s]?/','\'',$style);
        $style = preg_replace('/[\s]?"[\s]?/','"',$style);
        $style = preg_replace('/[\s]?\{[\s]?/','{',$style);
        $style = preg_replace('/[\s]?\}[\s]?/','}',$style);
        $style = preg_replace('/[\s]?\:[\s]?/',':',$style);
        $style = preg_replace('/[\s]?;[\s]?/',';',$style);
        $style = preg_replace('/[\s]?,[\s]?/',',',$style);
        $style = preg_replace('/[\s]?\<style\>[\s]?/','<style>',$style);
        $style = preg_replace('/[\s]?\<\/style\>[\s]?/','</style>',$style);

        //处理js

        $script = preg_replace('/\/\*[\s\S]*\*\//U','',$script);

        $script = preg_replace('/^[\s]*\/\/.*$\n/m','',$script);
        $script = preg_replace('/[\s]+/',' ',$script);
        $script = preg_replace('/[\s]?\([\s]?/','(',$script);
        $script = preg_replace('/[\s]?\)[\s]?/',')',$script);
        $script = preg_replace('/[\s]?\{[\s]?/','{',$script);
        $script = preg_replace('/[\s]?\}[\s]?/','}',$script);
        $script = preg_replace('/[\s]?\=[\s]?/','=',$script);
        $script = preg_replace('/[\s]?\|\|[\s]?/','||',$script);
        $script = preg_replace('/[\s]?\&\&[\s]?/','&&',$script);
        $script = preg_replace('/[\s]?\+[\s]?/','+',$script);

        $script = preg_replace('/[\s]?\:[\s]?/',':',$script);
        $script = preg_replace('/[\s]?;[\s]?/',';',$script);
        $script = preg_replace('/[\s]?,[\s]?/',',',$script);
        $script = preg_replace('/[\s]?\<script\>[\s]?/','<script>',$script);
        $script = preg_replace('/[\s]?\<\/script\>[\s]?/','</script>',$script);

        //合并css跟js
        $html = preg_replace('/\<style\>[\s\S]*\<\/style\>/',$style,$html);
        $html = preg_replace('/\<script\>[\s\S]*\<\/script\>/',$script,$html);
        return $html;
    }

//格式化html
    public static function fomatHtml($content)
    {
        $content = self::miniHtml($content);
        preg_match('/\<style\>[\s\S]*\<\/style\>/',$content,$style);
        preg_match('/\<script\>[\s\S]*\<\/script\>/',$content,$script);
        empty($style)?$style='':$style = $style[0];
        empty($script)?$script='':$script = $script[0];
        $style = str_replace('<style>',"<style>\n",$style);
        $style = preg_replace('/;\}\}/',"@#",$style);
        $style = preg_replace('/;\}/',"@@",$style);
        $style = preg_replace('/\}/',"$$$",$style);
        $style = preg_replace('/;/',";\n\t",$style);
        $style = preg_replace('/@#/',";\n\t}\n}\n\n",$style);
        $style = preg_replace('/@@/',";\n}\n",$style);
        $style = preg_replace('/\$\$\$/',"\n}\n\n",$style);
        $style = preg_replace('/\{/',"{\n\t",$style);
        $style = preg_replace('/\n/',"\n\t\t",$style);
        $style = str_replace("\t</style>","</style>",$style);

        $script = self::format_javascript($script);

        $content = preg_replace('/\<style\>[\s\S]*\<\/style\>/','<style></style>',$content);
        $content = preg_replace('/\<script\>[\s\S]*\<\/script\>/','<script></script>',$content);


        $tmp = explode('><', $content);
        $new = [];
        foreach ($tmp as $key => $item) {
            if ($key === 0) {
                $item .= '>';
                $new[] = $item;
            } else if ($key === count($tmp) - 1) {
                $item = '<' . $item;
                $new[] = $item;
            } else {
                if (strpos($item, '<') !== false || strpos($item, '>') !== false) {
                    $tmp1 = str_replace(['<', '>'], '@', $item);
                    $tmp2 = explode('@', $tmp1);
                    $new[] = '<' . $tmp2[0] . '>';
                    $new[] = str_replace(" ",'',$tmp2[1]);
                    $new[] = '<' . $tmp2[2] . '>';
                } else {
                    $item = '<' . $item . '>';
                    $new[] = $item;

                }
            }
        }
        $i = 0;
        $str = '';
        foreach ($new as $key => $item) {
            if(substr($item,0,strlen('<html'))==='<html'||substr($item,0,strlen('<body'))==='<body'){
                $i=0;
            }
            if($key>0){
                if(substr($new[$key-1],0,strlen('<body'))==='<body')
                    $i=0;
            }

            if(strpos($item,'<')===false&&strpos($item,'>')===false){
                //内容
                $str.= $item;
            }
            else if(substr($item,strlen($item)-2,2)=='/>'){
                //单标签
                $p = '';
                $k = $i;
                while ($k>0){
                    $p.="\t";
                    $k--;
                }
                $str.= $p;
                $str.= $item;
                $str.= "\r\n";
            }else if(strpos($item,'<meta')!==false
                ||strpos($item,'<link')!==false
                ||strpos($item,'<input')!==false
                ||strpos($item,'<img')!==false
                ||strpos($item,'<hr')!==false
                ||strpos($item,'<br')!==false
                ||strpos($item,'<html')!==false
            ){
                //单标签
                $p = '';
                $k = $i;
                while ($k>0){
                    $p.="\t";
                    $k--;
                }
                $str.= $p;
                $str.= $item;
                $str.= "\r\n";

            }else{
                if(substr($item,0,2)!=='</'){
                    //双标签开始
                    $p = '';
                    $k = $i;
                    while ($k>0){
                        $p.="\t";
                        $k--;
                    }
                    if($key!==(count($new)-2)){
                        if(strpos($new[$key+1],'<')===false&&strpos($new[$key+1],'>')===false){
                            $str.= $p;
                            $str.= $item;
                        }else{
                            $str.= $p;
                            $str.= $item;
                            $str.= "\r\n";
                        }
                    }else{
                        $str.= $p;
                        $str.= $item;
                        $str.= "\r\n";
                    }
                    $i++;
                }else{
                    //双标签结束
                    $i--;
                    $p = '';
                    $k = $i;
                    while ($k>0){
                        $p.="\t";
                        $k--;
                    }
                    if($key>0){
                        if(strpos($new[$key-1],'<')===false&&strpos($new[$key-1],'>')===false){
                        }else{
                            $str.= $p;
                        }
                    }else{
                        $str.= $p;
                    }
                    $str.= $item;
                    $str.= "\r\n";
                }
            }
        }
        $str = preg_replace('/\<style\>[\s\S]*\<\/style\>/',$style,$str);
        $str = preg_replace('/\<script\>[\s\S]*\<\/script\>/',$script,$str);
        $str = preg_replace('/\n[\s]*\n/m',"\n",$str);
        return $str;
    }

//格式化js
    private static function format_javascript($js_code) {
        $js_code = str_replace(['<script>','</script>'],'',$js_code);
        $js_code = str_replace(';',";\n",$js_code);
        $js_code = str_replace('{',"\n{\n",$js_code);
        $js_code = str_replace('}',"\n}\n",$js_code);
        $tmp = explode("\n",$js_code);
        $tmp = array_filter($tmp);
        $lines = '';
        $index = 0;
        $space = '';
        foreach ($tmp as $item){

            if(strpos($item,'}')!==false){
                $index--;
            }

            $space = '';
            for ($i=0;$i<$index;$i++){
                $space.="\t";
            }
            $lines.="\n".$space.$item;
            if(strpos($item,'{')!==false){
                $index++;
            }
        }
        $lines = str_replace("\nfunction","\n\nfunction",$lines);
        $lines = str_replace("\n;",";",$lines);
        $lines = preg_replace('/\}[\s]+\)/',"})",$lines);
        $lines = preg_replace('/\)[\s]+\{/',"){",$lines);
        $lines="<script>\n".$lines."\n</script>";
        return $lines;
    }
}
//压缩html