Overview

Packages

  • ClipIt
    • clipit
      • api
    • urjc
      • backend
  • Elgg
    • Core
      • Access
      • Authentication
      • Cache
      • Caches
      • Core
      • DataMode
        • Site
      • DataModel
        • Annotations
        • Entities
        • Extender
        • File
        • Importable
        • Loggable
        • Notable
        • Object
        • User
      • DataStorage
      • Exception
      • Exceptions
        • Stub
      • FileStore
        • Disk
      • Groups
      • Helpers
      • HMAC
      • Memcache
      • Metadata
      • Navigation
      • ODD
      • Output
      • Plugins
        • Settings
      • Sessions
      • SocialModel
        • Friendable
        • Locatable
      • WebServicesAPI
      • Widgets
      • XML
      • XMLRPC
    • Exceptions
      • Stub
  • None
  • PHP

Classes

  • ElggAutoP
  • Overview
  • Package
  • Class
  • Tree
  1: <?php
  2: 
  3: /**
  4:  * Create wrapper P and BR elements in HTML depending on newlines. Useful when
  5:  * users use newlines to signal line and paragraph breaks. In all cases output
  6:  * should be well-formed markup.
  7:  *
  8:  * In DIV elements, Ps are only added when there would be at
  9:  * least two of them.
 10:  * 
 11:  * @package    Elgg.Core
 12:  * @subpackage Output
 13:  */
 14: class ElggAutoP {
 15: 
 16:     public $encoding = 'UTF-8';
 17: 
 18:     /**
 19:      * @var DOMDocument
 20:      */
 21:     protected $_doc = null;
 22: 
 23:     /**
 24:      * @var DOMXPath
 25:      */
 26:     protected $_xpath = null;
 27: 
 28:     protected $_blocks = 'address article area aside blockquote caption col colgroup dd 
 29:         details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header 
 30:         hr hgroup legend map math menu nav noscript p pre section select style summary
 31:         table tbody td tfoot th thead tr ul ol option li';
 32: 
 33:     /**
 34:      * @var array
 35:      */
 36:     protected $_inlines = 'a abbr audio b button canvas caption cite code command datalist
 37:         del dfn em embed i iframe img input ins kbd keygen label map mark meter object
 38:         output progress q rp rt ruby s samp script select small source span strong style
 39:         sub sup textarea time var video wbr';
 40: 
 41:     /**
 42:      * Descend into these elements to add Ps
 43:      *
 44:      * @var array
 45:      */
 46:     protected $_descendList = 'article aside blockquote body details div footer form
 47:         header section';
 48: 
 49:     /**
 50:      * Add Ps inside these elements
 51:      *
 52:      * @var array
 53:      */
 54:     protected $_alterList = 'article aside blockquote body details div footer header
 55:         section';
 56: 
 57:     /** @var string */
 58:     protected $_unique = '';
 59: 
 60:     /**
 61:      * Constructor
 62:      */
 63:     public function __construct() {
 64:         $this->_blocks = preg_split('@\\s+@', $this->_blocks);
 65:         $this->_descendList = preg_split('@\\s+@', $this->_descendList);
 66:         $this->_alterList = preg_split('@\\s+@', $this->_alterList);
 67:         $this->_inlines = preg_split('@\\s+@', $this->_inlines);
 68:         $this->_unique = md5(__FILE__);
 69:     }
 70: 
 71:     /**
 72:      * Intance of class for singleton pattern.
 73:      * @var ElggAutoP
 74:      */
 75:     private static $instance;
 76:     
 77:     /**
 78:      * Singleton pattern.
 79:      * @return ElggAutoP
 80:      */
 81:     public static function getInstance() {
 82:         $className = __CLASS__;
 83:         if (!(self::$instance instanceof $className)) {
 84:             self::$instance = new $className();
 85:         }
 86:         return self::$instance;
 87:     }
 88:     
 89:     /**
 90:      * Create wrapper P and BR elements in HTML depending on newlines. Useful when
 91:      * users use newlines to signal line and paragraph breaks. In all cases output
 92:      * should be well-formed markup.
 93:      *
 94:      * In DIV, LI, TD, and TH elements, Ps are only added when their would be at
 95:      * least two of them.
 96:      *
 97:      * @param string $html snippet
 98:      * @return string|false output or false if parse error occurred
 99:      */
100:     public function process($html) {
101:         // normalize whitespace
102:         $html = str_replace(array("\r\n", "\r"), "\n", $html);
103: 
104:         // allows preserving entities untouched
105:         $html = str_replace('&', $this->_unique . 'AMP', $html);
106: 
107:         $this->_doc = new DOMDocument();
108: 
109:         // parse to DOM, suppressing loadHTML warnings
110:         // http://www.php.net/manual/en/domdocument.loadhtml.php#95463
111:         libxml_use_internal_errors(true);
112: 
113:         if (!$this->_doc->loadHTML("<html><meta http-equiv='content-type' " 
114:                 . "content='text/html; charset={$this->encoding}'><body>{$html}</body>"
115:                 . "</html>")) {
116:             return false;
117:         }
118: 
119:         $this->_xpath = new DOMXPath($this->_doc);
120:         // start processing recursively at the BODY element
121:         $nodeList = $this->_xpath->query('//body[1]');
122:         $this->addParagraphs($nodeList->item(0));
123: 
124:         // serialize back to HTML
125:         $html = $this->_doc->saveHTML();
126: 
127:         // Note: we create <autop> elements, which will later be converted to paragraphs
128: 
129:         // split AUTOPs into multiples at /\n\n+/
130:         $html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '</autop><autop>', $html);
131:         $html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '<br>'), 
132:                 '<br />',
133:                 $html);
134:         $html = str_replace('<br /></autop>', '</autop>', $html);
135: 
136:         // re-parse so we can handle new AUTOP elements
137: 
138:         if (!$this->_doc->loadHTML($html)) {
139:             return false;
140:         }
141:         // must re-create XPath object after DOM load
142:         $this->_xpath = new DOMXPath($this->_doc);
143: 
144:         // strip AUTOPs that only have comments/whitespace
145:         foreach ($this->_xpath->query('//autop') as $autop) {
146:             /* @var DOMElement $autop */
147:             $hasContent = false;
148:             if (trim($autop->textContent) !== '') {
149:                 $hasContent = true;
150:             } else {
151:                 foreach ($autop->childNodes as $node) {
152:                     if ($node->nodeType === XML_ELEMENT_NODE) {
153:                         $hasContent = true;
154:                         break;
155:                     }
156:                 }
157:             }
158:             if (!$hasContent) {
159:                 // mark to be later replaced w/ preg_replace (faster than moving nodes out)
160:                 $autop->setAttribute("r", "1");
161:             }
162:         }
163: 
164:         // If a DIV contains a single AUTOP, remove it
165:         foreach ($this->_xpath->query('//div') as $el) {
166:             /* @var DOMElement $el */
167:             $autops = $this->_xpath->query('./autop', $el);
168:             if ($autops->length === 1) {
169:                 $firstAutop = $autops->item(0);
170:                 /* @var DOMElement $firstAutop */
171:                 $firstAutop->setAttribute("r", "1");
172:             }
173:         }
174: 
175:         $html = $this->_doc->saveHTML();
176: 
177:         // trim to the contents of BODY
178:         $bodyStart = strpos($html, '<body>');
179:         $bodyEnd = strpos($html, '</body>', $bodyStart + 6);
180:         $html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6);
181:         
182:         // strip AUTOPs that should be removed
183:         $html = preg_replace('@<autop r="1">(.*?)</autop>@', '\\1', $html);
184: 
185:         // commit to converting AUTOPs to Ps
186:         $html = str_replace('<autop>', "\n<p>", $html);
187:         $html = str_replace('</autop>', "</p>\n", $html);
188:         
189:         $html = str_replace('<br>', '<br />', $html);
190:         $html = str_replace($this->_unique . 'AMP', '&', $html);
191:         return $html;
192:     }
193: 
194:     /**
195:      * Add P and BR elements as necessary
196:      *
197:      * @param DOMElement $el DOM element
198:      * @return void
199:      */
200:     protected function addParagraphs(DOMElement $el) {
201:         // no need to call recursively, just queue up
202:         $elsToProcess = array($el);
203:         $inlinesToProcess = array();
204:         while ($el = array_shift($elsToProcess)) {
205:             // if true, we can alter all child nodes, if not, we'll just call
206:             // addParagraphs on each element in the descendInto list
207:             $alterInline = in_array($el->nodeName, $this->_alterList);
208: 
209:             // inside affected elements, we want to trim leading whitespace from
210:             // the first text node
211:             $ltrimFirstTextNode = true;
212: 
213:             // should we open a new AUTOP element to move inline elements into?
214:             $openP = true;
215:             $autop = null;
216: 
217:             // after BR, ignore a newline
218:             $isFollowingBr = false;
219: 
220:             $node = $el->firstChild;
221:             while (null !== $node) {
222:                 if ($alterInline) {
223:                     if ($openP) {
224:                         $openP = false;
225:                         // create a P to move inline content into (this may be removed later)
226:                         $autop = $el->insertBefore($this->_doc->createElement('autop'), $node);
227:                     }
228:                 }
229: 
230:                 $isElement = ($node->nodeType === XML_ELEMENT_NODE);
231:                 if ($isElement) {
232:                     $isBlock = in_array($node->nodeName, $this->_blocks);
233:                 } else {
234:                     $isBlock = false;
235:                 }
236: 
237:                 if ($alterInline) {
238:                     $isText = ($node->nodeType === XML_TEXT_NODE);
239:                     $isLastInline = (! $node->nextSibling
240:                             || ($node->nextSibling->nodeType === XML_ELEMENT_NODE
241:                                 && in_array($node->nextSibling->nodeName, $this->_blocks)));
242:                     if ($isElement) {
243:                         $isFollowingBr = ($node->nodeName === 'br');
244:                     }
245: 
246:                     if ($isText) {
247:                         $nodeText = $node->nodeValue;
248:                         if ($ltrimFirstTextNode) {
249:                             $nodeText = ltrim($nodeText);
250:                             $ltrimFirstTextNode = false;
251:                         }
252:                         if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) {
253:                             // if a user ends a line with <br>, don't add a second BR
254:                             $nodeText = substr($nodeText, strlen($m[0]));
255:                         }
256:                         if ($isLastInline) {
257:                             $nodeText = rtrim($nodeText);
258:                         }
259:                         $nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText);
260:                         $tmpNode = $node;
261:                         $node = $node->nextSibling; // move loop to next node
262: 
263:                         // alter node in place, then move into AUTOP
264:                         $tmpNode->nodeValue = $nodeText;
265:                         $autop->appendChild($tmpNode);
266: 
267:                         continue;
268:                     }
269:                 }
270:                 if ($isBlock || ! $node->nextSibling) {
271:                     if ($isBlock) {
272:                         if (in_array($node->nodeName, $this->_descendList)) {
273:                             $elsToProcess[] = $node;
274:                             //$this->addParagraphs($node);
275:                         }
276:                     }
277:                     $openP = true;
278:                     $ltrimFirstTextNode = true;
279:                 }
280:                 if ($alterInline) {
281:                     if (! $isBlock) {
282:                         $tmpNode = $node;
283:                         if ($isElement && false !== strpos($tmpNode->textContent, "\n")) {
284:                             $inlinesToProcess[] = $tmpNode;
285:                         }
286:                         $node = $node->nextSibling;
287:                         $autop->appendChild($tmpNode);
288:                         continue;
289:                     }
290:                 }
291: 
292:                 $node = $node->nextSibling;
293:             }
294:         }
295: 
296:         // handle inline nodes
297:         // no need to recurse, just queue up
298:         while ($el = array_shift($inlinesToProcess)) {
299:             $ignoreLeadingNewline = false;
300:             foreach ($el->childNodes as $node) {
301:                 if ($node->nodeType === XML_ELEMENT_NODE) {
302:                     if ($node->nodeValue === 'BR') {
303:                         $ignoreLeadingNewline = true;
304:                     } else {
305:                         $ignoreLeadingNewline = false;
306:                         if (false !== strpos($node->textContent, "\n")) {
307:                             $inlinesToProcess[] = $node;
308:                         }
309:                     }
310:                     continue;
311:                 } elseif ($node->nodeType === XML_TEXT_NODE) {
312:                     $text = $node->nodeValue;
313:                     if ($text[0] === "\n" && $ignoreLeadingNewline) {
314:                         $text = substr($text, 1);
315:                         $ignoreLeadingNewline = false;
316:                     }
317:                     $node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text);
318:                 }
319:             }
320:         }
321:     }
322: }
323: 
API documentation generated by ApiGen 2.8.0