Skip to content

Commit 710fc8e

Browse files
committed
Tidy up examples in eg. Fixes issue #13
* perltidy * use strict/warnings * add comments to each example giving a brief explanation of what it does * print out UTF-8 bytes where it makes sense in the example files * get the changes log up-to-date * Change CI to use 5.32 on windows now that there's a portable for it
1 parent 58b5dbc commit 710fc8e

File tree

13 files changed

+284
-229
lines changed

13 files changed

+284
-229
lines changed

.github/workflows/windows.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ jobs:
1313
fail-fast: true
1414
matrix:
1515
perl-version:
16-
- '5.30.2.1'
16+
- '5.32.0.1'
17+
# - '5.30.3.1'
1718
# - '5.28.2.1'
1819
# - '5.26.3.1'
1920
# - '5.24.4.1'

Changes

+12
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,19 @@ Change history for HTML-Parser
44
* Fix the order of date and version in this change log. (Thanks, haarg)
55
* Convert to Dist::Zilla
66
* Build all prereqs from our cpanfile
7+
* Go through all test files and:
8+
* perltidy
9+
* Use strict/warnings
10+
* Get rid of two-arg open
11+
* Get rid of BAREWORD filehandles
12+
* Fix the eval pattern used
13+
* Only use -w where we catch $SIG{__WARN__}
14+
* Fix encoding problems
15+
* use utf8 where we have unicode in the source
716
* Fix a typo here and there
17+
* perltidy all of the example apps in eg/
18+
* Add comments explaining the apps in eg/ (GH#13 Thanks, Salvatore Bonaccorso)
19+
* Print out UTF-8 encoded data where sensible in eg/
820

921
3.73 2020-08-24
1022
* Cleaned up this changes log.

eg/hanchors

+20-20
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,48 @@
1-
#!/usr/bin/perl -w
1+
#!/usr/bin/perl
2+
3+
use strict;
4+
use warnings;
25

36
# This program will print out all <a href=".."> links in a
47
# document together with the text that goes with it.
58
#
69
# See also HTML::LinkExtor
7-
10+
use Encode;
811
use HTML::Parser;
912

10-
my $p = HTML::Parser->new(api_version => 3,
11-
start_h => [\&a_start_handler, "self,tagname,attr"],
12-
report_tags => [qw(a img)],
13-
);
13+
my $p = HTML::Parser->new(
14+
api_version => 3,
15+
start_h => [\&a_start_handler, "self,tagname,attr"],
16+
report_tags => [qw(a img)],
17+
);
1418
$p->parse_file(shift || die) || die $!;
1519

16-
sub a_start_handler
17-
{
18-
my($self, $tag, $attr) = @_;
20+
sub a_start_handler {
21+
my ($self, $tag, $attr) = @_;
1922
return unless $tag eq "a";
2023
return unless exists $attr->{href};
2124
print "A $attr->{href}\n";
2225

23-
$self->handler(text => [], '@{dtext}' );
26+
$self->handler(text => [], '@{dtext}');
2427
$self->handler(start => \&img_handler);
2528
$self->handler(end => \&a_end_handler, "self,tagname");
2629
}
2730

28-
sub img_handler
29-
{
30-
my($self, $tag, $attr) = @_;
31+
sub img_handler {
32+
my ($self, $tag, $attr) = @_;
3133
return unless $tag eq "img";
3234
push(@{$self->handler("text")}, $attr->{alt} || "[IMG]");
3335
}
3436

35-
sub a_end_handler
36-
{
37-
my($self, $tag) = @_;
38-
my $text = join("", @{$self->handler("text")});
37+
sub a_end_handler {
38+
my ($self, $tag) = @_;
39+
my $text = encode('utf8', join("", @{$self->handler("text")}));
3940
$text =~ s/^\s+//;
4041
$text =~ s/\s+$//;
4142
$text =~ s/\s+/ /g;
4243
print "T $text\n";
4344

44-
$self->handler("text", undef);
45+
$self->handler("text", undef);
4546
$self->handler("start", \&a_start_handler);
46-
$self->handler("end", undef);
47+
$self->handler("end", undef);
4748
}
48-

eg/hbody

+18-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
$doc = <<'EOT';
1+
#!/usr/bin/perl
2+
3+
use strict;
4+
use warnings;
5+
6+
use HTML::Parser ();
7+
8+
my $doc = <<'EOT';
29
310
<!-- This is not where <BODY> starts -->
411
<title>foo</title>
@@ -20,14 +27,18 @@ Howdy!
2027
2128
EOT
2229

23-
use HTML::Parser 3;
2430

2531
my $body_offset;
26-
HTML::Parser->new(start_h => [sub {
27-
return unless shift eq "body";
28-
$body_offset = shift;
29-
shift->eof; # tell the parser to stop
30-
}, "tagname,offset,self"])->parse($doc);
32+
HTML::Parser->new(
33+
start_h => [
34+
sub {
35+
return unless shift eq "body";
36+
$body_offset = shift;
37+
shift->eof; # tell the parser to stop
38+
},
39+
"tagname,offset,self"
40+
]
41+
)->parse($doc);
3142

3243
die "No <body> found" unless defined $body_offset;
3344

eg/hdisable

+17-15
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,29 @@
1-
#!/usr/bin/perl -w
1+
#!/usr/bin/perl
22

3-
use HTML::Parser;
3+
use strict;
4+
use warnings;
5+
6+
use HTML::Parser ();
47
use HTML::Entities qw(encode_entities);
58

69
sub disable_tags_but {
7-
my($text, $allowed_tags) = @_;
10+
my ($text, $allowed_tags) = @_;
811

912
my @text;
10-
my %allowed_tag = map {$_ => 1} @{$allowed_tags || []};
13+
my %allowed_tag = map { $_ => 1 } @{$allowed_tags || []};
1114

1215
my $tag_h = sub {
13-
my($tag, $text) = @_;
14-
$text = encode_entities($text, "<")
15-
unless $allowed_tag{$tag};
16-
push(@text, $text);
16+
my ($tag, $text) = @_;
17+
$text = encode_entities($text, "<") unless $allowed_tag{$tag};
18+
push(@text, $text);
1719
};
18-
19-
HTML::Parser->new(start_h => [$tag_h, 'tagname, text'],
20-
end_h => [$tag_h, 'tagname, text'],
21-
default_h => [\@text, '@{text}'],
22-
)
23-
->parse($text)->eof;
24-
20+
21+
HTML::Parser->new(
22+
start_h => [$tag_h, 'tagname, text'],
23+
end_h => [$tag_h, 'tagname, text'],
24+
default_h => [\@text, '@{text}'],
25+
)->parse($text)->eof;
26+
2527
return join("", @text);
2628
}
2729

eg/hdump

+11-7
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
1-
#!/usr/bin/perl -w
1+
#!/usr/bin/perl
2+
3+
# This script will output event information as it parses the HTML document.
4+
# This gives the user a "Parser's eye view" of an HTML document.
5+
6+
use strict;
7+
use warnings;
28

39
use HTML::Parser ();
4-
use Data::Dump ();
10+
use Data::Dumper qw(Dumper);
511

612
sub h {
7-
my($event, $line, $column, $text, $tagname, $attr) = @_;
13+
my ($event, $line, $column, $text, $tagname, $attr) = @_;
814

9-
my @d = (uc(substr($event,0,1)) . " L$line C$column");
15+
my @d = (uc(substr($event, 0, 1)) . " L$line C$column");
1016
substr($text, 40) = "..." if length($text) > 40;
1117
push(@d, $text);
1218
push(@d, $tagname) if defined $tagname;
1319
push(@d, $attr) if $attr;
1420

15-
print Data::Dump::dump(@d), "\n";
21+
print Dumper(@d), "\n";
1622
}
1723

1824
my $p = HTML::Parser->new(api_version => 3);
1925
$p->handler(default => \&h, "event, line, column, text, tagname, attr");
2026

2127
$p->parse_file(@ARGV ? shift : *STDIN);
22-
23-

eg/hform

+64-58
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,89 @@
1-
#!/usr/bin/perl -w
1+
#!/usr/bin/perl
22

3+
# Print information about forms and their controls present in the HTML.
34
# See also HTML::Form module
45

6+
use strict;
7+
use warnings;
8+
59
use HTML::PullParser ();
610
use HTML::Entities qw(decode_entities);
7-
use Data::Dump qw(dump);
11+
use Data::Dumper qw(Dumper);
812

913
my @FORM_TAGS = qw(form input textarea button select option);
1014

11-
my $p = HTML::PullParser->new(file => shift || "xxx.html",
12-
start => 'tag, attr',
13-
end => 'tag',
14-
text => '@{text}',
15-
report_tags => \@FORM_TAGS,
16-
) || die "$!";
15+
my $p = HTML::PullParser->new(
16+
file => shift || "xxx.html",
17+
start => 'tag, attr',
18+
end => 'tag',
19+
text => '@{text}',
20+
report_tags => \@FORM_TAGS,
21+
) || die "$!";
1722

1823
# a little helper function
1924
sub get_text {
20-
my($p, $stop) = @_;
25+
my ($p, $stop) = @_;
2126
my $text;
2227
while (defined(my $t = $p->get_token)) {
23-
if (ref $t) {
24-
$p->unget_token($t) unless $t->[0] eq $stop;
25-
last;
26-
}
27-
else {
28-
$text .= $t;
29-
}
28+
if (ref $t) {
29+
$p->unget_token($t) unless $t->[0] eq $stop;
30+
last;
31+
}
32+
else {
33+
$text .= $t;
34+
}
3035
}
3136
return $text;
3237
}
3338

3439
my @forms;
3540
while (defined(my $t = $p->get_token)) {
36-
next unless ref $t; # skip text
41+
next unless ref $t; # skip text
3742
if ($t->[0] eq "form") {
38-
shift @$t;
39-
push(@forms, $t);
40-
while (defined(my $t = $p->get_token)) {
41-
next unless ref $t; # skip text
42-
last if $t->[0] eq "/form";
43-
if ($t->[0] eq "select") {
44-
my $sel = $t;
45-
push(@{$forms[-1]}, $t);
46-
while (defined(my $t = $p->get_token)) {
47-
next unless ref $t; # skip text
48-
last if $t->[0] eq "/select";
49-
#print "select ", dump($t), "\n";
50-
if ($t->[0] eq "option") {
51-
my $value = $t->[1]->{value};
52-
my $text = get_text($p, "/option");
53-
unless (defined $value) {
54-
$value = decode_entities($text);
55-
}
56-
push(@$sel, $value);
57-
}
58-
else {
59-
warn "$t->[0] inside select";
60-
}
61-
}
62-
}
63-
elsif ($t->[0] =~ /^\/?option$/) {
64-
warn "option tag outside select";
65-
}
66-
elsif ($t->[0] eq "textarea") {
67-
push(@{$forms[-1]}, $t);
68-
$t->[1]{value} = get_text($p, "/textarea");
69-
}
70-
elsif ($t->[0] =~ m,^/,) {
71-
warn "stray $t->[0] tag";
72-
}
73-
else {
74-
push(@{$forms[-1]}, $t);
75-
}
76-
}
43+
shift @$t;
44+
push(@forms, $t);
45+
while (defined(my $t = $p->get_token)) {
46+
next unless ref $t; # skip text
47+
last if $t->[0] eq "/form";
48+
if ($t->[0] eq "select") {
49+
my $sel = $t;
50+
push(@{$forms[-1]}, $t);
51+
while (defined(my $t = $p->get_token)) {
52+
next unless ref $t; # skip text
53+
last if $t->[0] eq "/select";
54+
55+
#print "select ", Dumper($t), "\n";
56+
if ($t->[0] eq "option") {
57+
my $value = $t->[1]->{value};
58+
my $text = get_text($p, "/option");
59+
unless (defined $value) {
60+
$value = decode_entities($text);
61+
}
62+
push(@$sel, $value);
63+
}
64+
else {
65+
warn "$t->[0] inside select";
66+
}
67+
}
68+
}
69+
elsif ($t->[0] =~ /^\/?option$/) {
70+
warn "option tag outside select";
71+
}
72+
elsif ($t->[0] eq "textarea") {
73+
push(@{$forms[-1]}, $t);
74+
$t->[1]{value} = get_text($p, "/textarea");
75+
}
76+
elsif ($t->[0] =~ m,^/,) {
77+
warn "stray $t->[0] tag";
78+
}
79+
else {
80+
push(@{$forms[-1]}, $t);
81+
}
82+
}
7783
}
7884
else {
79-
warn "form tag $t->[0] outside form";
85+
warn "form tag $t->[0] outside form";
8086
}
8187
}
8288

83-
print dump(\@forms), "\n";
89+
print Dumper(\@forms), "\n";

0 commit comments

Comments
 (0)