Skip to content

Commit b0bf490

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents bac1942 + 5f97615 commit b0bf490

File tree

2 files changed

+54
-27
lines changed

2 files changed

+54
-27
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
Update: I tested this library with php 7.0.1 - 7.0.5 as well as PHP 5.3 and up, everything seems to be working.
2+
13
The goal of this project is to be a DOM-based drop-in replacement for PHP's simple html dom library.
24

35
*How To Use* - The same way as simple. If you use file/str_get_html then you don't need to change anything. If you are instantiating with `new simple_html_dom()` then you will need to change that to `new AdvancedHtmlDom()`
@@ -14,4 +16,6 @@ Features
1416
* Use with css or xpath: $doc->find('h3 a'), $doc->find('//h3//a')
1517
* Jquery-style functions replace, wrap, unwrap, before after
1618
* Nodeset math: $doc->find('a')->minus($doc->find('.skip_me'))
17-
* Lots more features that haven't been documented yet.
19+
* Lots more features that haven't been documented yet.
20+
21+
If you love Advanced HTML Dom please [vote for it](http://stackoverflow.com/questions/3577641/how-do-you-parse-and-process-html-xml-in-php) here!

advanced_html_dom.php

Lines changed: 49 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,15 @@ public function remove(){
3737
return $this;
3838
}
3939

40-
public function str($str){ return new Str($str); }
40+
public function str(){ return new Str($this->text); }
41+
public function match($re){
42+
$str = new Str($this->text);
43+
return $str->match($re);
44+
}
45+
public function scan($re){
46+
$str = new Str($this->text);
47+
return $str->scan($re);
48+
}
4149
public function clean($str){ return trim(preg_replace('/\s+/', ' ', $str)); }
4250
public function trim($str){ return trim($str); }
4351

@@ -76,7 +84,7 @@ public function __call($key, $args){
7684
case 'index': return $this->search('./preceding-sibling::*')->length + 1;
7785

7886
/*
79-
DOMNode::insertBefore — Adds a new child
87+
DOMNode::insertBefore — Adds a new child
8088
*/
8189

8290
// simple-html-dom junk methods
@@ -97,10 +105,14 @@ public function __call($key, $args){
97105

98106
// attributes
99107
case 'hasattribute': return !$this->is_text && $this->node->getAttribute($args[0]);
100-
case 'getattribute': return $this->$args[0];
108+
case 'getattribute': $arg = $args[0]; return $this->$arg;
109+
case 'setattribute': $arg0 = $args[0]; $arg1 = $args[1]; return $this->$arg0 = $arg1;
110+
case 'removeattribute': $arg = $args[0]; return $this->$arg = null;
111+
case 'getattribute': return $this->node->getAttribute($args[0]);
101112
case 'setattribute': return $this->$args[0] = $args[1];
102113
case 'removeattribute': return $this->$args[0] = null;
103114

115+
104116
// wrap
105117
case 'wrap':
106118
return $this->replace('<' . $args[0] . '>' . $this . '</' . $args[0] . '>');
@@ -129,9 +141,11 @@ public function __call($key, $args){
129141
// $doc->spans[x]
130142
if(preg_match(TAGS_REGEX, $key, $m)) return $this->find($m[1]);
131143
if(preg_match(TAG_REGEX, $key, $m)) return $this->find($m[1], 0);
132-
133-
if(preg_match('/(clean|trim|str)(.*)/', $key, $m)){
134-
return $this->$m[1]($this->$m[2]);
144+
145+
if(preg_match('/(clean|trim|str)(.*)/', $key, $m) && isset($m[2])){
146+
$arg1 = $m[1];
147+
$arg2 = $m[2];
148+
return $this->$arg1($this->$arg2);
135149
}
136150

137151
if(!preg_match(ATTRIBUTE_REGEX, $key, $m)) trigger_error('Unknown method or property: ' . $key, E_USER_WARNING);
@@ -159,7 +173,7 @@ public function load($html, $is_xml = false){
159173
@$this->dom->loadXML(preg_replace('/xmlns=".*?"/ ', '', $html));
160174
} else {
161175
@$this->dom->loadHTML($html);
162-
}
176+
}
163177
$this->xpath = new DOMXPath($this->dom);
164178
//$this->root = new AHTMLNode($this->dom->documentElement, $this->doc);
165179
$this->root = $this->at('body');
@@ -189,7 +203,7 @@ function __construct($nodeList, $doc){
189203
abstract public void offsetUnset ( mixed $offset )
190204
*/
191205

192-
public function offsetExists($offset){ return 0 <= $offset && $offset < $this->nodeList->length(); }
206+
public function offsetExists($offset){ return 0 <= $offset && $offset < $this->nodeList->length; }
193207
public function offsetGet($offset){ return new AHTMLNode($this->nodeList->item($offset), $this->doc); }
194208
public function offsetSet($offset, $value){ trigger_error('offsetSet not implemented', E_USER_WARNING); }
195209
public function offsetUnset($offset){ trigger_error('offsetUnset not implemented', E_USER_WARNING); }
@@ -201,23 +215,23 @@ public function count(){
201215
public function rewind(){
202216
$this->counter = 0;
203217
}
204-
218+
205219
public function current(){
206220
return new AHTMLNode($this->nodeList->item($this->counter), $this->doc);
207221
}
208-
222+
209223
public function key(){
210224
return $this->counter;
211225
}
212-
226+
213227
public function next(){
214228
$this->counter++;
215229
}
216-
230+
217231
public function valid(){
218232
return $this->counter < $this->nodeList->length;
219233
}
220-
234+
221235
public function last(){
222236
return ($this->nodeList->length > 0) ? new AHTMLNode($this->nodeList->item($this->nodeList->length - 1), $this->doc) : null;
223237
}
@@ -279,12 +293,18 @@ public function __call($key, $values){
279293
*/
280294

281295
if(preg_match(ATTRIBUTES_REGEX, $key, $m) || preg_match('/^((clean|trim|str).*)s$/', $key, $m)){
282-
foreach($this as $node){$retval[] = $node->$m[1];}
296+
foreach($this as $node){
297+
$arg = $m[1];
298+
$retval[] = $node->$arg;
299+
}
283300
return $retval;
284301
}
285302

286303
if(preg_match(ATTRIBUTE_REGEX, $key, $m)){
287-
foreach($this as $node){$retval[] = $node->$m[1];}
304+
foreach($this as $node){
305+
$arg = $m[1];
306+
$retval[] = $node->$arg;
307+
}
288308
return implode('', $retval);
289309
}
290310

@@ -304,7 +324,7 @@ public function length(){ return $this->nodeList->length; }
304324

305325
class AHTMLNode extends AdvancedHtmlBase implements ArrayAccess{
306326
private $_path;
307-
327+
308328
function __construct($node, $doc){
309329
$this->node = $node;
310330
$this->_path = $node->getNodePath();
@@ -321,13 +341,13 @@ private function get_fragment($html){
321341

322342
function replace($html){
323343
$node = empty($html) ? null : $this->before($html);
324-
$this->remove();
344+
$this->remove();
325345
return $node;
326346
}
327347

328348
function before($html){
329349
$fragment = $this->get_fragment($html);
330-
$this->node->parentNode->insertBefore($fragment, $this->node);
350+
$this->node->parentNode->insertBefore($fragment, $this->node);
331351
return new AHTMLNode($this->node->previousSibling, $this->doc);
332352
}
333353

@@ -341,7 +361,7 @@ function after($html){
341361
}
342362

343363
function decamelize($str){
344-
$str = preg_replace('/(^|[a-z])([A-Z])/e', 'strtolower(strlen("\\1") ? "\\1_\\2" : "\\2")', $str);
364+
$str = preg_replace('/(^|[a-z])([A-Z])/e', 'strtolower(strlen("\\1") ? "\\1_\\2" : "\\2")', $str);
345365
return preg_replace('/ /', '_', strtolower($str));
346366
}
347367

@@ -409,7 +429,7 @@ public function offsetSet($key, $value){
409429
} else {
410430
$this->node->removeAttribute($key);
411431
}
412-
//trigger_error('offsetSet not implemented', E_USER_WARNING);
432+
//trigger_error('offsetSet not implemented', E_USER_WARNING);
413433
}
414434
public function offsetUnset($offset){ trigger_error('offsetUnset not implemented', E_USER_WARNING); }
415435

@@ -490,6 +510,8 @@ private static function not($str){
490510
switch(true){
491511
case preg_match('/^\.(\w+)$/', $str, $m): return self::do_class($str);
492512
case preg_match('/^\#(\w+)$/', $str, $m): return self::do_id($str);
513+
case preg_match('/^(\w+)$/', $str, $m): return "self::" . $str;
514+
case preg_match('/^\[(.*)\]$/', $str, $m): return substr(self::do_braces($str), 1, -1);
493515
default: return self::translate($str);
494516
}
495517
}
@@ -551,7 +573,7 @@ static function do_braces($str){
551573
$tokens = preg_split($re, substr($str, 1, strlen($str) - 2), 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
552574
// var_dump($tokens);
553575
$attr = trim(array_shift($tokens));
554-
// && )
576+
// && )
555577
if(!$op = @trim(array_shift($tokens))){
556578
switch(true){
557579
case preg_match('/^\d+$/', $attr): return "[count(preceding-sibling::*) = " . ($attr - 1) . "]"; // [2] -> [count(preceding-sibling::*) = 1]
@@ -610,7 +632,7 @@ static function translate($str){
610632
$retval = array();
611633
$re = '/(\((?>[^()]|(?R))*\)|\[(?>[^\[\]]|(?R))*\]|\s*[+~>]\s*| \s*)/';
612634
$item = '';
613-
635+
614636
$last_nav = null;
615637
//echo "\n!" . $str . "!\n";
616638
//var_dump(preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY));
@@ -621,7 +643,7 @@ static function translate($str){
621643
case '>':
622644
case '~':
623645
case '+':
624-
case '':
646+
case '':
625647
if(!empty($item)) $retval[] = self::translate_part(trim($item), $last_nav);
626648
$item = '';
627649
$last_nav = $token;
@@ -682,7 +704,7 @@ public static function xpath_for($str){
682704
* Str
683705
*/
684706

685-
class Str{
707+
class Str{
686708
var $text;
687709

688710
function __construct($str){
@@ -692,7 +714,7 @@ function __construct($str){
692714
function match($regex, $group_number = 0){
693715
if(!preg_match($regex, $this->text, $m)) return false;
694716
$val = $m[$group_number];
695-
return new Str($val);
717+
return $val; // new Str($val);
696718
}
697719

698720
function scan($regex, $group_number = 0){
@@ -728,4 +750,5 @@ function file_get_html($url){ return str_get_html(file_get_contents($url)); }
728750
function str_get_xml($html){ return new AdvancedHtmlDom($html, true); }
729751
function file_get_xml($url){ return str_get_xml(file_get_contents($url)); }
730752
}
731-
?>
753+
754+
?>

0 commit comments

Comments
 (0)