1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13:
14: class ElggAutoP {
15:
16: public $encoding = 'UTF-8';
17:
18: 19: 20:
21: protected $_doc = null;
22:
23: 24: 25:
26: protected $_xpath = null;
27:
28: protected $_blocks = 'address article area aside blockquote caption col colgroup dd
29: details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header
30: hr hgroup legend map math menu nav noscript p pre section select style summary
31: table tbody td tfoot th thead tr ul ol option li';
32:
33: 34: 35:
36: protected $_inlines = 'a abbr audio b button canvas caption cite code command datalist
37: del dfn em embed i iframe img input ins kbd keygen label map mark meter object
38: output progress q rp rt ruby s samp script select small source span strong style
39: sub sup textarea time var video wbr';
40:
41: 42: 43: 44: 45:
46: protected $_descendList = 'article aside blockquote body details div footer form
47: header section';
48:
49: 50: 51: 52: 53:
54: protected $_alterList = 'article aside blockquote body details div footer header
55: section';
56:
57:
58: protected $_unique = '';
59:
60: 61: 62:
63: public function __construct() {
64: $this->_blocks = preg_split('@\\s+@', $this->_blocks);
65: $this->_descendList = preg_split('@\\s+@', $this->_descendList);
66: $this->_alterList = preg_split('@\\s+@', $this->_alterList);
67: $this->_inlines = preg_split('@\\s+@', $this->_inlines);
68: $this->_unique = md5(__FILE__);
69: }
70:
71: 72: 73: 74:
75: private static $instance;
76:
77: 78: 79: 80:
81: public static function getInstance() {
82: $className = __CLASS__;
83: if (!(self::$instance instanceof $className)) {
84: self::$instance = new $className();
85: }
86: return self::$instance;
87: }
88:
89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99:
100: public function process($html) {
101:
102: $html = str_replace(array("\r\n", "\r"), "\n", $html);
103:
104:
105: $html = str_replace('&', $this->_unique . 'AMP', $html);
106:
107: $this->_doc = new DOMDocument();
108:
109:
110:
111: libxml_use_internal_errors(true);
112:
113: if (!$this->_doc->loadHTML("<html><meta http-equiv='content-type' "
114: . "content='text/html; charset={$this->encoding}'><body>{$html}</body>"
115: . "</html>")) {
116: return false;
117: }
118:
119: $this->_xpath = new DOMXPath($this->_doc);
120:
121: $nodeList = $this->_xpath->query('//body[1]');
122: $this->addParagraphs($nodeList->item(0));
123:
124:
125: $html = $this->_doc->saveHTML();
126:
127:
128:
129:
130: $html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '</autop><autop>', $html);
131: $html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '<br>'),
132: '<br />',
133: $html);
134: $html = str_replace('<br /></autop>', '</autop>', $html);
135:
136:
137:
138: if (!$this->_doc->loadHTML($html)) {
139: return false;
140: }
141:
142: $this->_xpath = new DOMXPath($this->_doc);
143:
144:
145: foreach ($this->_xpath->query('//autop') as $autop) {
146:
147: $hasContent = false;
148: if (trim($autop->textContent) !== '') {
149: $hasContent = true;
150: } else {
151: foreach ($autop->childNodes as $node) {
152: if ($node->nodeType === XML_ELEMENT_NODE) {
153: $hasContent = true;
154: break;
155: }
156: }
157: }
158: if (!$hasContent) {
159:
160: $autop->setAttribute("r", "1");
161: }
162: }
163:
164:
165: foreach ($this->_xpath->query('//div') as $el) {
166:
167: $autops = $this->_xpath->query('./autop', $el);
168: if ($autops->length === 1) {
169: $firstAutop = $autops->item(0);
170:
171: $firstAutop->setAttribute("r", "1");
172: }
173: }
174:
175: $html = $this->_doc->saveHTML();
176:
177:
178: $bodyStart = strpos($html, '<body>');
179: $bodyEnd = strpos($html, '</body>', $bodyStart + 6);
180: $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6);
181:
182:
183: $html = preg_replace('@<autop r="1">(.*?)</autop>@', '\\1', $html);
184:
185:
186: $html = str_replace('<autop>', "\n<p>", $html);
187: $html = str_replace('</autop>', "</p>\n", $html);
188:
189: $html = str_replace('<br>', '<br />', $html);
190: $html = str_replace($this->_unique . 'AMP', '&', $html);
191: return $html;
192: }
193:
194: 195: 196: 197: 198: 199:
200: protected function addParagraphs(DOMElement $el) {
201:
202: $elsToProcess = array($el);
203: $inlinesToProcess = array();
204: while ($el = array_shift($elsToProcess)) {
205:
206:
207: $alterInline = in_array($el->nodeName, $this->_alterList);
208:
209:
210:
211: $ltrimFirstTextNode = true;
212:
213:
214: $openP = true;
215: $autop = null;
216:
217:
218: $isFollowingBr = false;
219:
220: $node = $el->firstChild;
221: while (null !== $node) {
222: if ($alterInline) {
223: if ($openP) {
224: $openP = false;
225:
226: $autop = $el->insertBefore($this->_doc->createElement('autop'), $node);
227: }
228: }
229:
230: $isElement = ($node->nodeType === XML_ELEMENT_NODE);
231: if ($isElement) {
232: $isBlock = in_array($node->nodeName, $this->_blocks);
233: } else {
234: $isBlock = false;
235: }
236:
237: if ($alterInline) {
238: $isText = ($node->nodeType === XML_TEXT_NODE);
239: $isLastInline = (! $node->nextSibling
240: || ($node->nextSibling->nodeType === XML_ELEMENT_NODE
241: && in_array($node->nextSibling->nodeName, $this->_blocks)));
242: if ($isElement) {
243: $isFollowingBr = ($node->nodeName === 'br');
244: }
245:
246: if ($isText) {
247: $nodeText = $node->nodeValue;
248: if ($ltrimFirstTextNode) {
249: $nodeText = ltrim($nodeText);
250: $ltrimFirstTextNode = false;
251: }
252: if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) {
253:
254: $nodeText = substr($nodeText, strlen($m[0]));
255: }
256: if ($isLastInline) {
257: $nodeText = rtrim($nodeText);
258: }
259: $nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText);
260: $tmpNode = $node;
261: $node = $node->nextSibling;
262:
263:
264: $tmpNode->nodeValue = $nodeText;
265: $autop->appendChild($tmpNode);
266:
267: continue;
268: }
269: }
270: if ($isBlock || ! $node->nextSibling) {
271: if ($isBlock) {
272: if (in_array($node->nodeName, $this->_descendList)) {
273: $elsToProcess[] = $node;
274:
275: }
276: }
277: $openP = true;
278: $ltrimFirstTextNode = true;
279: }
280: if ($alterInline) {
281: if (! $isBlock) {
282: $tmpNode = $node;
283: if ($isElement && false !== strpos($tmpNode->textContent, "\n")) {
284: $inlinesToProcess[] = $tmpNode;
285: }
286: $node = $node->nextSibling;
287: $autop->appendChild($tmpNode);
288: continue;
289: }
290: }
291:
292: $node = $node->nextSibling;
293: }
294: }
295:
296:
297:
298: while ($el = array_shift($inlinesToProcess)) {
299: $ignoreLeadingNewline = false;
300: foreach ($el->childNodes as $node) {
301: if ($node->nodeType === XML_ELEMENT_NODE) {
302: if ($node->nodeValue === 'BR') {
303: $ignoreLeadingNewline = true;
304: } else {
305: $ignoreLeadingNewline = false;
306: if (false !== strpos($node->textContent, "\n")) {
307: $inlinesToProcess[] = $node;
308: }
309: }
310: continue;
311: } elseif ($node->nodeType === XML_TEXT_NODE) {
312: $text = $node->nodeValue;
313: if ($text[0] === "\n" && $ignoreLeadingNewline) {
314: $text = substr($text, 1);
315: $ignoreLeadingNewline = false;
316: }
317: $node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text);
318: }
319: }
320: }
321: }
322: }
323: