'html'에 해당되는 글 2건
- 2008.11.17 PHP Convert HTML to text
- 2008.11.17 vb - HTML Tag 삭제
Example 1. Convert HTML to text
<?php
// $document should contain an HTML document.
// This will remove HTML tags, javascript sections
// and white space. It will also convert some
// common HTML entities to their text equivalent.
$search = array (\'@<script[^>]*?>.*?</script>@si\', // Strip out javascript
\'@<[\\/\\!]*?[^<>]*?>@si\', // Strip out HTML tags
\'@([\\r\\n])[\\s]+@\', // Strip out white space
\'@&(quot|#34);@i\', // Replace HTML entities
\'@&(amp|#38);@i\',
\'@&(lt|#60);@i\',
\'@&(gt|#62);@i\',
\'@&(nbsp|#160);@i\',
\'@&(iexcl|#161);@i\',
\'@&(cent|#162);@i\',
\'@&(pound|#163);@i\',
\'@&(copy|#169);@i\',
\'@&#(\\d+);@e\'); // evaluate as php
$replace = array (\'\',
\'\',
\'\\1\',
\'\"\',
\'&\',
\'<\',
\'>\',
\' \',
chr(161),
chr(162),
chr(163),
chr(169),
\'chr(\\1)\');
$text = preg_replace($search, $replace, $document);
' --------------------------------------------------------
' Tag를 삭제한다.
' --------------------------------------------------------
Function strip_tag(atcText)
atcText= eregi_replace("<", "<", atcText)
atcText= eregi_replace(">", ">", atcText)
atcText= eregi_replace("<html(.*|)<body([^>]*)>","",atcText)
atcText= eregi_replace("</body(.*)</html>(.*)","",atcText)
atcText= eregi_replace("<[/]*(div|layer|body|html|head|meta|form|input|select|textarea|base)[^>]*>","",atcText)
atcText= eregi_replace("<(style|script|title|link)(.*)</(style|script|title)>","",atcText)
atcText= eregi_replace("<[/]*(script|style|title|xmp)>","",atcText)
atcText= eregi_replace("([a-z0-9]*script:)","deny_$1",atcText)
atcText= eregi_replace("<(\?|%)","<$1",atcText)
atcText= eregi_replace("(\?|%)>","$1>",atcText)
' Tag가 제거된 문자열을 리턴한다.
strip_tag = atcText
End Function