2
2
3
3
namespace WordPress \DataLiberation \Importer ;
4
4
5
+ use Rowbot \URL \URL ;
5
6
use WordPress \ByteStream \ReadStream \FileReadStream ;
6
7
use WordPress \DataLiberation \BlockMarkup \BlockMarkupUrlProcessor ;
7
8
use WordPress \DataLiberation \EntityReader \EntityReaderIterator ;
@@ -201,7 +202,7 @@ protected function initialize_from_cursor( $cursor ) {
201
202
}
202
203
if ( ! empty ( $ cursor ['site_url_mapping ' ] ) ) {
203
204
foreach ( $ cursor ['site_url_mapping ' ] as $ pair ) {
204
- $ this ->add_site_url_mapping ( $ pair [0 ], $ pair [1 ] );
205
+ $ this ->add_site_url_mapping ( $ pair [' from ' ], $ pair [' to ' ] );
205
206
}
206
207
}
207
208
if ( ! empty ( $ cursor ['site_url_mapping_candidates ' ] ) ) {
@@ -216,11 +217,15 @@ protected function set_source_site_url( $source_site_url ) {
216
217
// Every subsequent call to set_source_site_url() will
217
218
// override that mapping.
218
219
$ this ->site_url_mapping [-1 ] = array (
219
- WPURL ::parse ( $ source_site_url ),
220
- WPURL ::parse ( $ this ->options ['new_site_url ' ] ),
220
+ ' from ' => WPURL ::parse ( $ source_site_url ),
221
+ ' to ' => WPURL ::parse ( $ this ->options ['new_site_url ' ] ),
221
222
);
222
223
}
223
224
225
+ protected function get_source_site_url () {
226
+ return $ this ->site_url_mapping [-1 ]['from ' ];
227
+ }
228
+
224
229
public function get_site_url_mapping_candidates () {
225
230
// Only return the candidates that have been spotted in the last index_entities() call.
226
231
if ( self ::STAGE_INDEX_ENTITIES !== $ this ->stage ) {
@@ -236,15 +241,18 @@ public function get_site_url_mapping_candidates() {
236
241
}
237
242
238
243
public function add_site_url_mapping ( $ from , $ to ) {
239
- $ this ->site_url_mapping [] = array ( WPURL ::parse ( $ from ), WPURL ::parse ( $ to ) );
244
+ $ this ->site_url_mapping [] = array (
245
+ 'from ' => WPURL ::parse ( $ from ),
246
+ 'to ' => WPURL ::parse ( $ to ),
247
+ );
240
248
}
241
249
242
250
public function get_reentrancy_cursor () {
243
251
$ serialized_site_url_mapping = array ();
244
252
foreach ( $ this ->site_url_mapping as $ pair ) {
245
253
$ serialized_site_url_mapping [] = array (
246
- (string ) $ pair [0 ],
247
- (string ) $ pair [1 ],
254
+ ' from ' => (string ) $ pair [' from ' ],
255
+ ' to ' => (string ) $ pair [' to ' ],
248
256
);
249
257
}
250
258
return json_encode (
@@ -699,7 +707,13 @@ protected function import_next_entity() {
699
707
unset( $ data ['attachment_url ' ] );
700
708
$ data ['local_file_path ' ] = $ this ->options ['uploads_path ' ] . '/ ' . $ asset_filename ;
701
709
} else {
702
- // @TODO: Consider rewriting the guid, too.
710
+ /**
711
+ * @TODO: Think through guid rewriting. It may matter for WXR posts,
712
+ * where the guid often describes the page URL, but it may
713
+ * get in the way for Markdown imports, where the guid is
714
+ * either inferred from the static file path or fixed and
715
+ * sourced from metadata.
716
+ */
703
717
foreach ( array ( 'post_content ' , 'post_excerpt ' ) as $ key ) {
704
718
if ( ! isset ( $ data [ $ key ] ) ) {
705
719
continue ;
@@ -746,21 +760,14 @@ protected function import_next_entity() {
746
760
}
747
761
748
762
$ raw_url_before = $ p ->get_raw_url ();
749
- $ parsed_url_before = clone $ p ->get_parsed_url ();
750
-
751
763
$ mapping_pair = $ this ->get_url_mapping_pair ( $ p ->get_parsed_url () );
752
764
$ should_rewrite_base_url = false !== $ mapping_pair ;
753
765
if ( $ should_rewrite_base_url ) {
754
- $ p ->replace_base_url ( $ mapping_pair [1 ], $ mapping_pair [0 ] );
766
+ $ p ->replace_base_url ( $ mapping_pair [' to ' ], $ mapping_pair [' from ' ] );
755
767
}
756
768
do_action ( 'data_liberation.stream_importer.rewrite_url ' , $ p , [
757
- 'base_url_rewritten ' => $ should_rewrite_base_url ,
758
- 'new_base_url ' => $ mapping_pair [1 ],
759
- 'old_base_url ' => $ mapping_pair [0 ],
769
+ 'base_url_mapping ' => $ mapping_pair ,
760
770
'raw_url_before ' => $ raw_url_before ,
761
- 'parsed_url_before ' => $ parsed_url_before ,
762
- 'raw_url_after ' => $ p ->get_raw_url (),
763
- 'parsed_url_after ' => $ p ->get_parsed_url (),
764
771
'entity ' => $ entity ,
765
772
]);
766
773
}
@@ -918,8 +925,7 @@ protected function is_child_of_a_mapped_url( $url_detected_in_content ) {
918
925
919
926
protected function get_url_mapping_pair ( $ url_detected_in_content ) {
920
927
foreach ( $ this ->site_url_mapping as $ pair ) {
921
- $ parsed_base_url = $ pair [0 ];
922
- if ( is_child_url_of ( $ url_detected_in_content , $ parsed_base_url ) ) {
928
+ if ( is_child_url_of ( $ url_detected_in_content , $ pair ['from ' ] ) ) {
923
929
return $ pair ;
924
930
}
925
931
}
0 commit comments