@@ -80,7 +80,7 @@ public function __construct(array $config = [])
80
80
*/
81
81
public function fetch (UriInterface $ uri , array $ options = []): array
82
82
{
83
- $ this ->fetchFromDocument ($ this ->getCrawler ($ uri ), $ options );
83
+ $ this ->fetchFromDocument ($ this ->getCrawler ($ uri ), $ options, $ uri );
84
84
85
85
return $ this ->snippets ;
86
86
}
@@ -90,19 +90,20 @@ public function fetch(UriInterface $uri, array $options = []): array
90
90
*
91
91
* @param string|Symfony\Component\DomCrawler\Crawler $document
92
92
* @param array $options
93
+ * @param Psr\Http\Message\UriInterface $uri
93
94
* @return Snippetify\SnippetSniffer\Common\Snippet[]
94
95
*/
95
- public function fetchFromDocument ($ document , array $ options = []): array
96
+ public function fetchFromDocument ($ document , array $ options = [], ? UriInterface $ uri = null ): array
96
97
{
97
- $ crawler = $ document instanceof Crawler ? $ document : new Crawler ($ document );
98
+ $ crawler = $ document instanceof Crawler ? $ document : new Crawler ($ document, $ uri );
98
99
99
100
try {
100
101
101
102
$ htmlTags = explode (', ' , $ this ->config ['html_tags ' ]['snippet ' ]);
102
103
103
104
foreach ($ htmlTags as $ value ) {
104
- $ crawler ->filter ($ value )->each (function ($ node ) use ($ crawler ) {
105
- $ this ->hydrateSnippets ($ node , $ crawler );
105
+ $ crawler ->filter ($ value )->each (function ($ node ) use ($ crawler, $ options ) {
106
+ $ this ->hydrateSnippets ($ node , $ crawler, $ options );
106
107
});
107
108
}
108
109
@@ -134,24 +135,25 @@ protected function getCrawler(UriInterface $uri): Crawler
134
135
*/
135
136
protected function hydrateSnippets (Crawler $ node , Crawler $ crawler , array $ meta = []): void
136
137
{
137
- if ($ this ->containsSnippet ($ this ->snippets , $ node )) return ;
138
+ if ($ this ->containsSnippet ($ node )) return ;
139
+
140
+ if ($ this ->hasMoreSnippetsPerPage ($ crawler , $ meta )) return ;
138
141
139
142
if ($ snippet = $ this ->fetchSnippet ($ node , $ crawler , $ meta )) $ this ->snippets [] = $ snippet ;
140
143
}
141
144
142
145
/**
143
146
* Contains snippet.
144
147
*
145
- * @param Snippetify\SnippetSniffer\Common\Snippet[] $snippets
146
148
* @param Symfony\Component\DomCrawler\Crawler $node
147
149
* @return bool
148
150
*/
149
- protected function containsSnippet (array $ snippets , Crawler $ node ): bool
151
+ protected function containsSnippet (Crawler $ node ): bool
150
152
{
151
153
$ has = false ;
152
154
153
155
try {
154
- foreach ($ snippets as $ snippet ) {
156
+ foreach ($ this -> snippets as $ snippet ) {
155
157
if ($ snippet ->code == $ node ->text ()) {
156
158
$ has = true ;
157
159
break ;
@@ -164,6 +166,37 @@ protected function containsSnippet(array $snippets, Crawler $node): bool
164
166
return $ has ;
165
167
}
166
168
169
+ /**
170
+ * Has more snippets per page.
171
+ *
172
+ * @param Symfony\Component\DomCrawler\Crawler $crawler
173
+ * @param array $meta
174
+ * @return bool
175
+ */
176
+ protected function hasMoreSnippetsPerPage (Crawler $ crawler , array $ meta ): bool
177
+ {
178
+ if (empty ($ meta ['snippets_per_page ' ])) return false ;
179
+
180
+ return $ meta ['snippets_per_page ' ] <= $ this ->countRetrievedSnippetsPerPage ($ crawler );
181
+ }
182
+
183
+ /**
184
+ * Count retrieved snippets per page.
185
+ *
186
+ * @param Symfony\Component\DomCrawler\Crawler $crawler
187
+ * @return int
188
+ */
189
+ protected function countRetrievedSnippetsPerPage (Crawler $ crawler ): int
190
+ {
191
+ $ count = 0 ;
192
+
193
+ foreach ($ this ->snippets as $ snippet ) {
194
+ if ($ crawler ->getUri () === $ snippet ->meta ['url ' ]) $ count ++;
195
+ }
196
+
197
+ return $ count ;
198
+ }
199
+
167
200
/**
168
201
* Fetch snippet.
169
202
*
@@ -224,6 +257,7 @@ protected function fetchTags(Crawler $node): array
224
257
*/
225
258
protected function fetchWebsiteMetadata (Crawler $ crawler ): array
226
259
{
260
+ $ url = new Uri ($ crawler ->getUri ());
227
261
$ title = $ crawler ->filter ('title ' )->text ();
228
262
$ siteIcon = $ crawler ->filter ('link[rel="icon"] ' );
229
263
$ ogImage = $ crawler ->filter ('meta[property="og:image"] ' );
@@ -251,7 +285,7 @@ protected function fetchWebsiteMetadata(Crawler $crawler): array
251
285
return [
252
286
'name ' => $ name ,
253
287
'brand ' => $ brand ,
254
- 'url ' => ( new Uri ( $ crawler -> getUri ())) ->getHost (),
288
+ 'url ' => $ url -> getScheme () . ' :// ' . $ url ->getHost (),
255
289
];
256
290
}
257
291
0 commit comments