php正则表达式压缩与格式化html,css,js
注意事项
只支持压缩含一个<script></script>的html,且变量内多个空格也会被压缩为一个
格式化html单标签需要添加/结束
压缩html
去除<!-- -->内的全部内容
多个空白符变为一个空格
去除> <内的空白符
压缩css
去除/* */内的全部内容
多个空白符变为一个空格
去除【'】【"】【{】【}】【:】【;】【,】【<style>】【</style>】两端的空白符
压缩js
去除/* */内的全部内容 去除//行内容 多个空白符变为一个空格 去除【(】【)】【{】【}】【=】【||】【&&】【+】【:】【;】【,】【<script>】【</script>】两端的空白符
调用
<?php
include 'HtmlFormat.php';
$html = file_get_contents('https://www.baidu.com/');
//压缩
$min_html = HtmlFormat::miniHtml($html);
highlight_string($min_html);
//格式化
$max_html = HtmlFormat::fomatHtml($html );
highlight_string($max_html);
代码
压缩与格式化
HtmlFormat.php
<?php
namespace ppt\core;
class HtmlFormat
{
public static function miniHtml($s)
{
//注:只支持压缩含一个<script></script>的html,且变量内多个空格也会被压缩为一个
//提取css跟javascript单独处理
$s = preg_replace('/\<style[\s\S]*\>/U','<style>',$s);
// preg_replace('/\<script[\s\S]*\>/','<script>',$s);
preg_match('/\<style\>[\s\S]*\<\/style\>/',$s,$style);
preg_match('/\<script\>[\s\S]*\<\/script\>/',$s,$script);
empty($style)?$style='':$style = $style[0];
empty($script)?$script='':$script = $script[0];
//处理html
$html = preg_replace('/\<style\>[\s\S]*\<\/style\>/','<style></style>',$s);
$html = preg_replace('/\<script\>[\s\S]*\<\/script\>/','<script></script>',$html);
$html = preg_replace('/\<\!\-\-[\s\S]*\-\-\>/U','',$html);
$html = preg_replace('/[\s]+/',' ',$html);
$html = preg_replace('/>[\s]+</','><',$html);
//处理css
$style = preg_replace('/\/\*[\s\S]*\*\//U','',$style);
$style = preg_replace('/[\s]+/',' ',$style);
$style = preg_replace('/[\s]?\'[\s]?/','\'',$style);
$style = preg_replace('/[\s]?"[\s]?/','"',$style);
$style = preg_replace('/[\s]?\{[\s]?/','{',$style);
$style = preg_replace('/[\s]?\}[\s]?/','}',$style);
$style = preg_replace('/[\s]?\:[\s]?/',':',$style);
$style = preg_replace('/[\s]?;[\s]?/',';',$style);
$style = preg_replace('/[\s]?,[\s]?/',',',$style);
$style = preg_replace('/[\s]?\<style\>[\s]?/','<style>',$style);
$style = preg_replace('/[\s]?\<\/style\>[\s]?/','</style>',$style);
//处理js
$script = preg_replace('/\/\*[\s\S]*\*\//U','',$script);
$script = preg_replace('/^[\s]*\/\/.*$\n/m','',$script);
$script = preg_replace('/[\s]+/',' ',$script);
$script = preg_replace('/[\s]?\([\s]?/','(',$script);
$script = preg_replace('/[\s]?\)[\s]?/',')',$script);
$script = preg_replace('/[\s]?\{[\s]?/','{',$script);
$script = preg_replace('/[\s]?\}[\s]?/','}',$script);
$script = preg_replace('/[\s]?\=[\s]?/','=',$script);
$script = preg_replace('/[\s]?\|\|[\s]?/','||',$script);
$script = preg_replace('/[\s]?\&\&[\s]?/','&&',$script);
$script = preg_replace('/[\s]?\+[\s]?/','+',$script);
$script = preg_replace('/[\s]?\:[\s]?/',':',$script);
$script = preg_replace('/[\s]?;[\s]?/',';',$script);
$script = preg_replace('/[\s]?,[\s]?/',',',$script);
$script = preg_replace('/[\s]?\<script\>[\s]?/','<script>',$script);
$script = preg_replace('/[\s]?\<\/script\>[\s]?/','</script>',$script);
//合并css跟js
$html = preg_replace('/\<style\>[\s\S]*\<\/style\>/',$style,$html);
$html = preg_replace('/\<script\>[\s\S]*\<\/script\>/',$script,$html);
return $html;
}
//格式化html
public static function fomatHtml($content)
{
$content = self::miniHtml($content);
preg_match('/\<style\>[\s\S]*\<\/style\>/',$content,$style);
preg_match('/\<script\>[\s\S]*\<\/script\>/',$content,$script);
empty($style)?$style='':$style = $style[0];
empty($script)?$script='':$script = $script[0];
$style = str_replace('<style>',"<style>\n",$style);
$style = preg_replace('/;\}\}/',"@#",$style);
$style = preg_replace('/;\}/',"@@",$style);
$style = preg_replace('/\}/',"$$$",$style);
$style = preg_replace('/;/',";\n\t",$style);
$style = preg_replace('/@#/',";\n\t}\n}\n\n",$style);
$style = preg_replace('/@@/',";\n}\n",$style);
$style = preg_replace('/\$\$\$/',"\n}\n\n",$style);
$style = preg_replace('/\{/',"{\n\t",$style);
$style = preg_replace('/\n/',"\n\t\t",$style);
$style = str_replace("\t</style>","</style>",$style);
$script = self::format_javascript($script);
$content = preg_replace('/\<style\>[\s\S]*\<\/style\>/','<style></style>',$content);
$content = preg_replace('/\<script\>[\s\S]*\<\/script\>/','<script></script>',$content);
$tmp = explode('><', $content);
$new = [];
foreach ($tmp as $key => $item) {
if ($key === 0) {
$item .= '>';
$new[] = $item;
} else if ($key === count($tmp) - 1) {
$item = '<' . $item;
$new[] = $item;
} else {
if (strpos($item, '<') !== false || strpos($item, '>') !== false) {
$tmp1 = str_replace(['<', '>'], '@', $item);
$tmp2 = explode('@', $tmp1);
$new[] = '<' . $tmp2[0] . '>';
$new[] = str_replace(" ",'',$tmp2[1]);
$new[] = '<' . $tmp2[2] . '>';
} else {
$item = '<' . $item . '>';
$new[] = $item;
}
}
}
$i = 0;
$str = '';
foreach ($new as $key => $item) {
if(substr($item,0,strlen('<html'))==='<html'||substr($item,0,strlen('<body'))==='<body'){
$i=0;
}
if($key>0){
if(substr($new[$key-1],0,strlen('<body'))==='<body')
$i=0;
}
if(strpos($item,'<')===false&&strpos($item,'>')===false){
//内容
$str.= $item;
}
else if(substr($item,strlen($item)-2,2)=='/>'){
//单标签
$p = '';
$k = $i;
while ($k>0){
$p.="\t";
$k--;
}
$str.= $p;
$str.= $item;
$str.= "\r\n";
}else if(strpos($item,'<meta')!==false
||strpos($item,'<link')!==false
||strpos($item,'<input')!==false
||strpos($item,'<img')!==false
||strpos($item,'<hr')!==false
||strpos($item,'<br')!==false
||strpos($item,'<html')!==false
){
//单标签
$p = '';
$k = $i;
while ($k>0){
$p.="\t";
$k--;
}
$str.= $p;
$str.= $item;
$str.= "\r\n";
}else{
if(substr($item,0,2)!=='</'){
//双标签开始
$p = '';
$k = $i;
while ($k>0){
$p.="\t";
$k--;
}
if($key!==(count($new)-2)){
if(strpos($new[$key+1],'<')===false&&strpos($new[$key+1],'>')===false){
$str.= $p;
$str.= $item;
}else{
$str.= $p;
$str.= $item;
$str.= "\r\n";
}
}else{
$str.= $p;
$str.= $item;
$str.= "\r\n";
}
$i++;
}else{
//双标签结束
$i--;
$p = '';
$k = $i;
while ($k>0){
$p.="\t";
$k--;
}
if($key>0){
if(strpos($new[$key-1],'<')===false&&strpos($new[$key-1],'>')===false){
}else{
$str.= $p;
}
}else{
$str.= $p;
}
$str.= $item;
$str.= "\r\n";
}
}
}
$str = preg_replace('/\<style\>[\s\S]*\<\/style\>/',$style,$str);
$str = preg_replace('/\<script\>[\s\S]*\<\/script\>/',$script,$str);
$str = preg_replace('/\n[\s]*\n/m',"\n",$str);
return $str;
}
//格式化js
private static function format_javascript($js_code) {
$js_code = str_replace(['<script>','</script>'],'',$js_code);
$js_code = str_replace(';',";\n",$js_code);
$js_code = str_replace('{',"\n{\n",$js_code);
$js_code = str_replace('}',"\n}\n",$js_code);
$tmp = explode("\n",$js_code);
$tmp = array_filter($tmp);
$lines = '';
$index = 0;
$space = '';
foreach ($tmp as $item){
if(strpos($item,'}')!==false){
$index--;
}
$space = '';
for ($i=0;$i<$index;$i++){
$space.="\t";
}
$lines.="\n".$space.$item;
if(strpos($item,'{')!==false){
$index++;
}
}
$lines = str_replace("\nfunction","\n\nfunction",$lines);
$lines = str_replace("\n;",";",$lines);
$lines = preg_replace('/\}[\s]+\)/',"})",$lines);
$lines = preg_replace('/\)[\s]+\{/',"){",$lines);
$lines="<script>\n".$lines."\n</script>";
return $lines;
}
}
//压缩html
上一篇: 89. html网页
下一篇: html接入百度地图