5
5
use Rowbot \URL \URL ;
6
6
use WordPress \ByteStream \ReadStream \FileReadStream ;
7
7
use WordPress \DataLiberation \BlockMarkup \BlockMarkupUrlProcessor ;
8
+ use WordPress \DataLiberation \DataLiberationException ;
8
9
use WordPress \DataLiberation \EntityReader \EntityReaderIterator ;
9
10
use WordPress \DataLiberation \EntityReader \WXREntityReader ;
10
11
use WordPress \DataLiberation \URL \WPURL ;
@@ -50,6 +51,10 @@ class StreamImporter {
50
51
* in the imported content.
51
52
*/
52
53
protected $ site_url_mapping = array ();
54
+ /**
55
+ * A list of URLs to frontload the media files from.
56
+ */
57
+ protected $ source_media_root_urls = array ();
53
58
/**
54
59
* A list of candidate base URLs that have been spotted in the WXR file.
55
60
*
@@ -73,15 +78,15 @@ class StreamImporter {
73
78
* it and how.
74
79
*
75
80
* Once the API consumer decides on the mapping, it can call
76
- * add_site_url_mapping () to tell the importer what to map that domain to.
81
+ * add_url_mapping () to tell the importer what to map that domain to.
77
82
*/
78
83
protected $ site_url_mapping_candidates = array ();
79
84
protected $ entity_reader_factory ;
80
85
/**
81
86
* @param array|string|null $query {
82
87
* @type string $uploads_path The directory to download the media attachments to.
83
88
* E.g. WP_CONTENT_DIR . '/uploads'
84
- * @type string $uploads_url The URL where the media attachments will be accessible
89
+ * @type string $new_media_root_url The URL where the media attachments will be accessible
85
90
* after the import. E.g. http://127.0.0.1:9400/wp-content/uploads/
86
91
* }
87
92
*/
@@ -202,7 +207,7 @@ protected function initialize_from_cursor( $cursor ) {
202
207
}
203
208
if ( ! empty ( $ cursor ['site_url_mapping ' ] ) ) {
204
209
foreach ( $ cursor ['site_url_mapping ' ] as $ pair ) {
205
- $ this ->add_site_url_mapping ( $ pair ['from ' ], $ pair ['to ' ] );
210
+ $ this ->add_url_mapping ( $ pair ['from ' ], $ pair ['to ' ] );
206
211
}
207
212
}
208
213
if ( ! empty ( $ cursor ['site_url_mapping_candidates ' ] ) ) {
@@ -218,14 +223,10 @@ protected function set_source_site_url( $source_site_url ) {
218
223
// override that mapping.
219
224
$ this ->site_url_mapping [-1 ] = array (
220
225
'from ' => WPURL ::parse ( $ source_site_url ),
221
- 'to ' => WPURL ::parse ( $ this ->options ['new_site_url ' ] ),
226
+ 'to ' => WPURL ::parse ( $ this ->options ['new_site_content_root_url ' ] ),
222
227
);
223
228
}
224
229
225
- protected function get_source_site_url () {
226
- return $ this ->site_url_mapping [-1 ]['from ' ];
227
- }
228
-
229
230
public function get_site_url_mapping_candidates () {
230
231
// Only return the candidates that have been spotted in the last index_entities() call.
231
232
if ( self ::STAGE_INDEX_ENTITIES !== $ this ->stage ) {
@@ -240,13 +241,17 @@ public function get_site_url_mapping_candidates() {
240
241
return $ new_candidates ;
241
242
}
242
243
243
- public function add_site_url_mapping ( $ from , $ to ) {
244
+ public function add_url_mapping ( $ from_url , $ to_url ) {
244
245
$ this ->site_url_mapping [] = array (
245
- 'from ' => WPURL ::parse ( $ from ),
246
- 'to ' => WPURL ::parse ( $ to ),
246
+ 'from ' => WPURL ::parse ( $ from_url ),
247
+ 'to ' => WPURL ::parse ( $ to_url ),
247
248
);
248
249
}
249
250
251
+ public function add_source_media_root_url ( $ source_media_root_url ) {
252
+ $ this ->source_media_root_urls [] = WPURL ::parse ( $ source_media_root_url );
253
+ }
254
+
250
255
public function get_reentrancy_cursor () {
251
256
$ serialized_site_url_mapping = array ();
252
257
foreach ( $ this ->site_url_mapping as $ pair ) {
@@ -272,8 +277,11 @@ public function get_reentrancy_cursor() {
272
277
}
273
278
274
279
protected static function parse_options ( $ options ) {
275
- if ( ! isset ( $ options ['new_site_url ' ] ) ) {
276
- $ options ['new_site_url ' ] = get_site_url ();
280
+ if ( ! isset ( $ options ['source_site_url ' ] ) ) {
281
+ throw new DataLiberationException ( 'The "source_site_url" option is required ' );
282
+ }
283
+ if ( ! isset ( $ options ['new_site_content_root_url ' ] ) ) {
284
+ $ options ['new_site_content_root_url ' ] = get_site_url ();
277
285
}
278
286
279
287
if ( ! isset ( $ options ['uploads_path ' ] ) ) {
@@ -282,11 +290,11 @@ protected static function parse_options( $options ) {
282
290
// Remove the trailing slash to make concatenation easier later.
283
291
$ options ['uploads_path ' ] = rtrim ( $ options ['uploads_path ' ], '/ ' );
284
292
285
- if ( ! isset ( $ options ['uploads_url ' ] ) ) {
286
- $ options ['uploads_url ' ] = rtrim ( $ options [ ' new_site_url ' ], ' / ' ) . '/wp-content/uploads ' ;
293
+ if ( ! isset ( $ options ['new_media_root_url ' ] ) ) {
294
+ $ options ['new_media_root_url ' ] = get_site_url ( ) . '/wp-content/uploads ' ;
287
295
}
288
296
// Remove the trailing slash to make concatenation easier later.
289
- $ options ['uploads_url ' ] = rtrim ( $ options ['uploads_url ' ], '/ ' );
297
+ $ options ['new_media_root_url ' ] = rtrim ( $ options ['new_media_root_url ' ], '/ ' );
290
298
291
299
return $ options ;
292
300
}
@@ -297,8 +305,18 @@ protected function __construct(
297
305
) {
298
306
$ this ->entity_reader_factory = $ entity_reader_factory ;
299
307
$ this ->options = $ options ;
300
- if ( isset ( $ options ['default_source_site_url ' ] ) ) {
301
- $ this ->set_source_site_url ( $ options ['default_source_site_url ' ] );
308
+ $ this ->set_source_site_url ( $ options ['source_site_url ' ] );
309
+
310
+ if ( isset ( $ options ['source_media_root_urls ' ] ) ) {
311
+ foreach ( $ options ['source_media_root_urls ' ] as $ source_media_root_url ) {
312
+ $ this ->add_source_media_root_url ( $ source_media_root_url );
313
+ }
314
+ }
315
+
316
+ if ( isset ( $ options ['additional_url_mappings ' ] ) ) {
317
+ foreach ( $ options ['additional_url_mappings ' ] as $ additional_url_mapping ) {
318
+ $ this ->add_url_mapping ( $ additional_url_mapping ['from ' ], $ additional_url_mapping ['to ' ] );
319
+ }
302
320
}
303
321
}
304
322
@@ -477,7 +495,7 @@ protected function index_next_entities( $count = 10000 ) {
477
495
return true ;
478
496
}
479
497
480
- public function get_new_site_url_mapping_candidates () {
498
+ public function get_new_site_content_root_url_mapping_candidates () {
481
499
$ candidates = array ();
482
500
foreach ( $ this ->site_url_mapping_candidates as $ base_url => $ status ) {
483
501
if ( false === $ status ) {
@@ -737,7 +755,7 @@ protected function import_next_entity() {
737
755
$ data ['local_file_path ' ] ?? $ data ['slug ' ] ?? null
738
756
);
739
757
if ( file_exists ( $ this ->options ['uploads_path ' ] . '/ ' . $ asset_filename ) ) {
740
- $ raw_url = $ this ->options ['uploads_url ' ] . '/ ' . $ asset_filename ;
758
+ $ raw_url = $ this ->options ['new_media_root_url ' ] . '/ ' . $ asset_filename ;
741
759
$ p ->set_url (
742
760
$ raw_url ,
743
761
WPURL ::parse ( $ raw_url )
@@ -766,7 +784,7 @@ protected function import_next_entity() {
766
784
$ p ->replace_base_url ( $ mapping_pair ['to ' ], $ mapping_pair ['from ' ] );
767
785
}
768
786
do_action ( 'data_liberation.stream_importer.rewrite_url ' , $ p , [
769
- 'base_url_mapping ' => $ mapping_pair ,
787
+ 'applied_base_url_mapping ' => $ mapping_pair ,
770
788
'raw_url_before ' => $ raw_url_before ,
771
789
'entity ' => $ entity ,
772
790
]);
@@ -912,11 +930,18 @@ protected function rewrite_attachment_url( string $raw_url, $context_path = null
912
930
* @TODO: What other asset types are there?
913
931
*/
914
932
protected function url_processor_matched_asset_url ( BlockMarkupUrlProcessor $ p ) {
915
- return (
916
- $ p ->get_tag () === 'IMG ' &&
917
- $ p ->get_inspected_attribute_name () === 'src ' &&
918
- $ this ->is_child_of_a_mapped_url ( $ p ->get_parsed_url () )
919
- );
933
+ if ( $ p ->get_tag () !== 'IMG ' ) {
934
+ return false ;
935
+ }
936
+ if ( $ p ->get_inspected_attribute_name () !== 'src ' ) {
937
+ return false ;
938
+ }
939
+ foreach ( $ this ->source_media_root_urls as $ source_media_root_url ) {
940
+ if ( is_child_url_of ( $ p ->get_parsed_url (), $ source_media_root_url ) ) {
941
+ return true ;
942
+ }
943
+ }
944
+ return false ;
920
945
}
921
946
922
947
protected function is_child_of_a_mapped_url ( $ url_detected_in_content ) {
0 commit comments