1: <?php
2: /**
3: * Efficiently run operations on batches of results for any function
4: * that supports an options array.
5: *
6: * This is usually used with elgg_get_entities() and friends,
7: * elgg_get_annotations(), and elgg_get_metadata().
8: *
9: * If you pass a valid PHP callback, all results will be run through that
10: * callback. You can still foreach() through the result set after. Valid
11: * PHP callbacks can be a string, an array, or a closure.
12: * {@link http://php.net/manual/en/language.pseudo-types.php}
13: *
14: * The callback function must accept 3 arguments: an entity, the getter
15: * used, and the options used.
16: *
17: * Results from the callback are stored in callbackResult. If the callback
18: * returns only booleans, callbackResults will be the combined result of
19: * all calls. If no entities are processed, callbackResults will be null.
20: *
21: * If the callback returns anything else, callbackresult will be an indexed
22: * array of whatever the callback returns. If returning error handling
23: * information, you should include enough information to determine which
24: * result you're referring to.
25: *
26: * Don't combine returning bools and returning something else.
27: *
28: * Note that returning false will not stop the foreach.
29: *
30: * @warning If your callback or foreach loop deletes or disable entities
31: * you MUST call setIncrementOffset(false) or set that when instantiating.
32: * This forces the offset to stay what it was in the $options array.
33: *
34: * @example
35: * <code>
36: * // using foreach
37: * $batch = new ElggBatch('elgg_get_entities', array());
38: * $batch->setIncrementOffset(false);
39: *
40: * foreach ($batch as $entity) {
41: * $entity->disable();
42: * }
43: *
44: * // using both a callback
45: * $callback = function($result, $getter, $options) {
46: * var_dump("Looking at annotation id: $result->id");
47: * return true;
48: * }
49: *
50: * $batch = new ElggBatch('elgg_get_annotations', array('guid' => 2), $callback);
51: * </code>
52: *
53: * @package Elgg.Core
54: * @subpackage DataModel
55: * @link http://docs.elgg.org/DataModel/ElggBatch
56: * @since 1.8
57: */
58: class ElggBatch
59: implements Iterator {
60:
61: /**
62: * The objects to interator over.
63: *
64: * @var array
65: */
66: private $results = array();
67:
68: /**
69: * The function used to get results.
70: *
71: * @var mixed A string, array, or closure, or lamda function
72: */
73: private $getter = null;
74:
75: /**
76: * The number of results to grab at a time.
77: *
78: * @var int
79: */
80: private $chunkSize = 25;
81:
82: /**
83: * A callback function to pass results through.
84: *
85: * @var mixed A string, array, or closure, or lamda function
86: */
87: private $callback = null;
88:
89: /**
90: * Start after this many results.
91: *
92: * @var int
93: */
94: private $offset = 0;
95:
96: /**
97: * Stop after this many results.
98: *
99: * @var int
100: */
101: private $limit = 0;
102:
103: /**
104: * Number of processed results.
105: *
106: * @var int
107: */
108: private $retrievedResults = 0;
109:
110: /**
111: * The index of the current result within the current chunk
112: *
113: * @var int
114: */
115: private $resultIndex = 0;
116:
117: /**
118: * The index of the current chunk
119: *
120: * @var int
121: */
122: private $chunkIndex = 0;
123:
124: /**
125: * The number of results iterated through
126: *
127: * @var int
128: */
129: private $processedResults = 0;
130:
131: /**
132: * Is the getter a valid callback
133: *
134: * @var bool
135: */
136: private $validGetter = null;
137:
138: /**
139: * The result of running all entities through the callback function.
140: *
141: * @var mixed
142: */
143: public $callbackResult = null;
144:
145: /**
146: * If false, offset will not be incremented. This is used for callbacks/loops that delete.
147: *
148: * @var bool
149: */
150: private $incrementOffset = true;
151:
152: /**
153: * Entities that could not be instantiated during a fetch
154: *
155: * @var stdClass[]
156: */
157: private $incompleteEntities = array();
158:
159: /**
160: * Total number of incomplete entities fetched
161: *
162: * @var int
163: */
164: private $totalIncompletes = 0;
165:
166: /**
167: * Batches operations on any elgg_get_*() or compatible function that supports
168: * an options array.
169: *
170: * Instead of returning all objects in memory, it goes through $chunk_size
171: * objects, then requests more from the server. This avoids OOM errors.
172: *
173: * @param string $getter The function used to get objects. Usually
174: * an elgg_get_*() function, but can be any valid PHP callback.
175: * @param array $options The options array to pass to the getter function. If limit is
176: * not set, 10 is used as the default. In most cases that is not
177: * what you want.
178: * @param mixed $callback An optional callback function that all results will be passed
179: * to upon load. The callback needs to accept $result, $getter,
180: * $options.
181: * @param int $chunk_size The number of entities to pull in before requesting more.
182: * You have to balance this between running out of memory in PHP
183: * and hitting the db server too often.
184: * @param bool $inc_offset Increment the offset on each fetch. This must be false for
185: * callbacks that delete rows. You can set this after the
186: * object is created with {@see ElggBatch::setIncrementOffset()}.
187: */
188: public function __construct($getter, $options, $callback = null, $chunk_size = 25,
189: $inc_offset = true) {
190:
191: $this->getter = $getter;
192: $this->options = $options;
193: $this->callback = $callback;
194: $this->chunkSize = $chunk_size;
195: $this->setIncrementOffset($inc_offset);
196:
197: if ($this->chunkSize <= 0) {
198: $this->chunkSize = 25;
199: }
200:
201: // store these so we can compare later
202: $this->offset = elgg_extract('offset', $options, 0);
203: $this->limit = elgg_extract('limit', $options, 10);
204:
205: // if passed a callback, create a new ElggBatch with the same options
206: // and pass each to the callback.
207: if ($callback && is_callable($callback)) {
208: $batch = new ElggBatch($getter, $options, null, $chunk_size, $inc_offset);
209:
210: $all_results = null;
211:
212: foreach ($batch as $result) {
213: if (is_string($callback)) {
214: $result = $callback($result, $getter, $options);
215: } else {
216: $result = call_user_func_array($callback, array($result, $getter, $options));
217: }
218:
219: if (!isset($all_results)) {
220: if ($result === true || $result === false || $result === null) {
221: $all_results = $result;
222: } else {
223: $all_results = array();
224: }
225: }
226:
227: if (($result === true || $result === false || $result === null) && !is_array($all_results)) {
228: $all_results = $result && $all_results;
229: } else {
230: $all_results[] = $result;
231: }
232: }
233:
234: $this->callbackResult = $all_results;
235: }
236: }
237:
238: /**
239: * Tell the process that an entity was incomplete during a fetch
240: *
241: * @param stdClass $row
242: *
243: * @access private
244: */
245: public function reportIncompleteEntity(stdClass $row) {
246: $this->incompleteEntities[] = $row;
247: }
248:
249: /**
250: * Fetches the next chunk of results
251: *
252: * @return bool
253: */
254: private function getNextResultsChunk() {
255:
256: // always reset results.
257: $this->results = array();
258:
259: if (!isset($this->validGetter)) {
260: $this->validGetter = is_callable($this->getter);
261: }
262:
263: if (!$this->validGetter) {
264: return false;
265: }
266:
267: $limit = $this->chunkSize;
268:
269: // if someone passed limit = 0 they want everything.
270: if ($this->limit != 0) {
271: if ($this->retrievedResults >= $this->limit) {
272: return false;
273: }
274:
275: // if original limit < chunk size, set limit to original limit
276: // else if the number of results we'll fetch if greater than the original limit
277: if ($this->limit < $this->chunkSize) {
278: $limit = $this->limit;
279: } elseif ($this->retrievedResults + $this->chunkSize > $this->limit) {
280: // set the limit to the number of results remaining in the original limit
281: $limit = $this->limit - $this->retrievedResults;
282: }
283: }
284:
285: if ($this->incrementOffset) {
286: $offset = $this->offset + $this->retrievedResults;
287: } else {
288: $offset = $this->offset + $this->totalIncompletes;
289: }
290:
291: $current_options = array(
292: 'limit' => $limit,
293: 'offset' => $offset,
294: '__ElggBatch' => $this,
295: );
296:
297: $options = array_merge($this->options, $current_options);
298:
299: $this->incompleteEntities = array();
300: $this->results = call_user_func_array($this->getter, array($options));
301:
302: $num_results = count($this->results);
303: $num_incomplete = count($this->incompleteEntities);
304:
305: $this->totalIncompletes += $num_incomplete;
306:
307: if ($this->incompleteEntities) {
308: // pad the front of the results with nulls representing the incompletes
309: array_splice($this->results, 0, 0, array_pad(array(), $num_incomplete, null));
310: // ...and skip past them
311: reset($this->results);
312: for ($i = 0; $i < $num_incomplete; $i++) {
313: next($this->results);
314: }
315: }
316:
317: if ($this->results) {
318: $this->chunkIndex++;
319:
320: // let the system know we've jumped past the nulls
321: $this->resultIndex = $num_incomplete;
322:
323: $this->retrievedResults += ($num_results + $num_incomplete);
324: if ($num_results == 0) {
325: // This fetch was *all* incompletes! We need to fetch until we can either
326: // offer at least one row to iterate over, or give up.
327: return $this->getNextResultsChunk();
328: }
329: return true;
330: } else {
331: return false;
332: }
333: }
334:
335: /**
336: * Increment the offset from the original options array? Setting to
337: * false is required for callbacks that delete rows.
338: *
339: * @param bool $increment Set to false when deleting data
340: * @return void
341: */
342: public function setIncrementOffset($increment = true) {
343: $this->incrementOffset = (bool) $increment;
344: }
345:
346: /**
347: * Implements Iterator
348: */
349:
350: /**
351: * PHP Iterator Interface
352: *
353: * @see Iterator::rewind()
354: * @return void
355: */
356: public function rewind() {
357: $this->resultIndex = 0;
358: $this->retrievedResults = 0;
359: $this->processedResults = 0;
360:
361: // only grab results if we haven't yet or we're crossing chunks
362: if ($this->chunkIndex == 0 || $this->limit > $this->chunkSize) {
363: $this->chunkIndex = 0;
364: $this->getNextResultsChunk();
365: }
366: }
367:
368: /**
369: * PHP Iterator Interface
370: *
371: * @see Iterator::current()
372: * @return mixed
373: */
374: public function current() {
375: return current($this->results);
376: }
377:
378: /**
379: * PHP Iterator Interface
380: *
381: * @see Iterator::key()
382: * @return int
383: */
384: public function key() {
385: return $this->processedResults;
386: }
387:
388: /**
389: * PHP Iterator Interface
390: *
391: * @see Iterator::next()
392: * @return mixed
393: */
394: public function next() {
395: // if we'll be at the end.
396: if (($this->processedResults + 1) >= $this->limit && $this->limit > 0) {
397: $this->results = array();
398: return false;
399: }
400:
401: // if we'll need new results.
402: if (($this->resultIndex + 1) >= $this->chunkSize) {
403: if (!$this->getNextResultsChunk()) {
404: $this->results = array();
405: return false;
406: }
407:
408: $result = current($this->results);
409: } else {
410: // the function above resets the indexes, so only inc if not
411: // getting new set
412: $this->resultIndex++;
413: $result = next($this->results);
414: }
415:
416: $this->processedResults++;
417: return $result;
418: }
419:
420: /**
421: * PHP Iterator Interface
422: *
423: * @see Iterator::valid()
424: * @return bool
425: */
426: public function valid() {
427: if (!is_array($this->results)) {
428: return false;
429: }
430: $key = key($this->results);
431: return ($key !== NULL && $key !== FALSE);
432: }
433: }
434: