Skip to content

Commit facd0ea

Browse files
committed
Polish the method and variable names
1 parent 9f88ad0 commit facd0ea

File tree

3 files changed

+46
-34
lines changed

3 files changed

+46
-34
lines changed

components/DataLiberation/BlockMarkup/BlockMarkupUrlProcessor.php

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -277,17 +277,7 @@ public function replace_base_url( URL $to_url, ?URL $base_url=null ) {
277277
return false;
278278
}
279279

280-
$is_relative = (
281-
// The URL-rewriting specific logic. We make an assumption that only
282-
// absolute URLs are detected in text nodes.
283-
// @TODO: Verify this assumption, evaluate whether this is the right
284-
// place to place this logic. Perhaps this *method* could be
285-
// decoupled into two separate *functions*?
286-
$this->get_token_type() !== '#text' &&
287-
! str_starts_with( $this->get_raw_url(), 'http://' ) &&
288-
! str_starts_with( $this->get_raw_url(), 'https://' )
289-
);
290-
if ( ! $is_relative ) {
280+
if ( ! $this->is_url_relative() ) {
291281
$this->set_url( $new_raw_url, $updated_url );
292282
return true;
293283
}
@@ -304,6 +294,22 @@ public function replace_base_url( URL $to_url, ?URL $base_url=null ) {
304294
return true;
305295
}
306296

297+
/**
298+
* Returns true if the currently matched URL is relative.
299+
*
300+
* @return bool Whether the currently matched URL is relative.
301+
*/
302+
public function is_url_relative() {
303+
// Assumption:
304+
// - only absolute URLs are detected in text nodes.
305+
return (
306+
$this->get_token_type() !== '#text' &&
307+
! str_starts_with( $this->get_raw_url(), 'http://' ) &&
308+
! str_starts_with( $this->get_raw_url(), 'https://' ) &&
309+
! str_starts_with( $this->get_raw_url(), '//' )
310+
);
311+
}
312+
307313
public function get_inspected_attribute_name() {
308314
if ( '#tag' !== $this->get_token_type() ) {
309315
return false;

components/DataLiberation/Importer/StreamImporter.php

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace WordPress\DataLiberation\Importer;
44

5+
use Rowbot\URL\URL;
56
use WordPress\ByteStream\ReadStream\FileReadStream;
67
use WordPress\DataLiberation\BlockMarkup\BlockMarkupUrlProcessor;
78
use WordPress\DataLiberation\EntityReader\EntityReaderIterator;
@@ -201,7 +202,7 @@ protected function initialize_from_cursor( $cursor ) {
201202
}
202203
if ( ! empty( $cursor['site_url_mapping'] ) ) {
203204
foreach ( $cursor['site_url_mapping'] as $pair ) {
204-
$this->add_site_url_mapping( $pair[0], $pair[1] );
205+
$this->add_site_url_mapping( $pair['from'], $pair['to'] );
205206
}
206207
}
207208
if ( ! empty( $cursor['site_url_mapping_candidates'] ) ) {
@@ -216,11 +217,15 @@ protected function set_source_site_url( $source_site_url ) {
216217
// Every subsequent call to set_source_site_url() will
217218
// override that mapping.
218219
$this->site_url_mapping[-1] = array(
219-
WPURL::parse( $source_site_url ),
220-
WPURL::parse( $this->options['new_site_url'] ),
220+
'from' => WPURL::parse( $source_site_url ),
221+
'to' => WPURL::parse( $this->options['new_site_url'] ),
221222
);
222223
}
223224

225+
protected function get_source_site_url() {
226+
return $this->site_url_mapping[-1]['from'];
227+
}
228+
224229
public function get_site_url_mapping_candidates() {
225230
// Only return the candidates that have been spotted in the last index_entities() call.
226231
if ( self::STAGE_INDEX_ENTITIES !== $this->stage ) {
@@ -236,15 +241,18 @@ public function get_site_url_mapping_candidates() {
236241
}
237242

238243
public function add_site_url_mapping( $from, $to ) {
239-
$this->site_url_mapping[] = array( WPURL::parse( $from ), WPURL::parse( $to ) );
244+
$this->site_url_mapping[] = array(
245+
'from' => WPURL::parse( $from ),
246+
'to' => WPURL::parse( $to ),
247+
);
240248
}
241249

242250
public function get_reentrancy_cursor() {
243251
$serialized_site_url_mapping = array();
244252
foreach ( $this->site_url_mapping as $pair ) {
245253
$serialized_site_url_mapping[] = array(
246-
(string) $pair[0],
247-
(string) $pair[1],
254+
'from' => (string) $pair['from'],
255+
'to' => (string) $pair['to'],
248256
);
249257
}
250258
return json_encode(
@@ -699,7 +707,13 @@ protected function import_next_entity() {
699707
unset( $data['attachment_url'] );
700708
$data['local_file_path'] = $this->options['uploads_path'] . '/' . $asset_filename;
701709
} else {
702-
// @TODO: Consider rewriting the guid, too.
710+
/**
711+
* @TODO: Think through guid rewriting. It may matter for WXR posts,
712+
* where the guid often describes the page URL, but it may
713+
* get in the way for Markdown imports, where the guid is
714+
* either inferred from the static file path or fixed and
715+
* sourced from metadata.
716+
*/
703717
foreach ( array( 'post_content', 'post_excerpt' ) as $key ) {
704718
if ( ! isset( $data[ $key ] ) ) {
705719
continue;
@@ -746,21 +760,14 @@ protected function import_next_entity() {
746760
}
747761

748762
$raw_url_before = $p->get_raw_url();
749-
$parsed_url_before = clone $p->get_parsed_url();
750-
751763
$mapping_pair = $this->get_url_mapping_pair( $p->get_parsed_url() );
752764
$should_rewrite_base_url = false !== $mapping_pair;
753765
if ( $should_rewrite_base_url ) {
754-
$p->replace_base_url( $mapping_pair[1], $mapping_pair[0] );
766+
$p->replace_base_url( $mapping_pair['to'], $mapping_pair['from'] );
755767
}
756768
do_action( 'data_liberation.stream_importer.rewrite_url', $p, [
757-
'base_url_rewritten' => $should_rewrite_base_url,
758-
'new_base_url' => $mapping_pair[1],
759-
'old_base_url' => $mapping_pair[0],
769+
'base_url_mapping' => $mapping_pair,
760770
'raw_url_before' => $raw_url_before,
761-
'parsed_url_before' => $parsed_url_before,
762-
'raw_url_after' => $p->get_raw_url(),
763-
'parsed_url_after' => $p->get_parsed_url(),
764771
'entity' => $entity,
765772
]);
766773
}
@@ -918,8 +925,7 @@ protected function is_child_of_a_mapped_url( $url_detected_in_content ) {
918925

919926
protected function get_url_mapping_pair( $url_detected_in_content ) {
920927
foreach ( $this->site_url_mapping as $pair ) {
921-
$parsed_base_url = $pair[0];
922-
if ( is_child_url_of( $url_detected_in_content, $parsed_base_url ) ) {
928+
if ( is_child_url_of( $url_detected_in_content, $pair['from'] ) ) {
923929
return $pair;
924930
}
925931
}

examples/import-static-files/import-markdown-directory.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,19 +138,19 @@ function map_file_path_to_wordpress_url( $path ) {
138138
}
139139

140140
add_action(
141-
// Link in the post content
141+
// Rewrite URLs in the post content
142142
'data_liberation.stream_importer.rewrite_url',
143143
function ( $processor, $context ) use ( $console_writer, $chrooted_fs ) {
144-
if(!$context['base_url_rewritten']) {
144+
if(!$context['base_url_mapping']) {
145145
return;
146146
}
147147

148148
$path = $processor->get_parsed_url()->pathname;
149149
$path_before_rewriting = $path;
150150
$site_url_path_prefix = '';
151-
if($context['new_base_url']) {
152-
if(str_starts_with($path, $context['new_base_url']->pathname)) {
153-
$site_url_path_prefix = rtrim($context['new_base_url']->pathname, '/');
151+
if($context['base_url_mapping']) {
152+
if(str_starts_with($path, $context['base_url_mapping']['to']->pathname)) {
153+
$site_url_path_prefix = rtrim($context['base_url_mapping']['to']->pathname, '/');
154154
$path = substr($path, strlen($site_url_path_prefix));
155155
}
156156
}

0 commit comments

Comments
 (0)