Skip to content

Commit b774cf8

Browse files
committed
Logging and consistency improvements
1 parent 2558754 commit b774cf8

File tree

1 file changed

+27
-20
lines changed

1 file changed

+27
-20
lines changed

pgsql2osm.py

+27-20
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ def main(self) :
334334
t=asyncio.run(stream_osm_xml(self))
335335
except ZeroDivisionError :
336336
print('\nError: boundary is empty or database has no data within',file=sys.stderr)
337+
sys.stderr.flush()
337338

338339
async def test(self) :
339340
""" Test: checks if get_lonlat exsits, is executable.
@@ -375,7 +376,11 @@ def __init__(self) :
375376
#os.get_terminal_size() will error out when .isatty() is false.
376377
# instead of calling isatty() on each line, save it one at program start
377378
# (because it does not change)
378-
self.isatty=sys.stderr.isatty()
379+
try :
380+
#sys.stderr.isatty() seems to return True when <prog>|tail -f /dev/stdin
381+
self.isatty=os.get_terminal_size().columns>0
382+
except OSError :
383+
self.isatty=False
379384

380385
def check_ready(self) :
381386
assert self._ready, 'Need to run .set_phases first'
@@ -394,9 +399,10 @@ def next_phase(self) :
394399

395400
def save_clearedline(self) :
396401
''' Simply write a newline at the end of the previous clearline: save it.
397-
Warning, will garbe output if not preceded by a clearline=True log call.
402+
Warning, will garble output if not preceded by a clearline=True log call.
398403
'''
399404
print(end='\n',file=sys.stderr)
405+
sys.stderr.flush()
400406
#same behaviour whether followed by a clearline or not
401407
self.previous_clearline=False
402408

@@ -493,23 +499,26 @@ def ratefmt(self,r:float) :
493499
''' Return str(r) with 3 sigfigs
494500
'''
495501
for i,letter in ((1e12,'T'),(1e9,'G'),(1e6,'M'),(1e3,'K'),(1,'')) :
496-
if r>i :
502+
if r>i or i==1: #give up at <1.0 point
497503
tgt=r/i
498504
# ljust for 3 -> 3.00
499-
if tgt<10.0 :
505+
if tgt<1.0 :
506+
return str(round(tgt,3)).ljust(5,'0')+letter
507+
elif tgt<10.0 :
500508
return str(round(tgt,2)).ljust(4,'0')+letter
501509
elif tgt<100.0 :
502510
return str(round(tgt,1)).ljust(4,'0')+letter
503511
else :
504512
return str(round(tgt)).ljust(3,'0')+letter
505-
#else give up
506-
return n(r)
507513

508514
def simplerate(self,count:int,msg:str,tot:int,lastline=False) :
509515
""" Show a rate progress bar on count from tot items in format:
510516
'{count} ({count_rate}/s) / {tot} {msg} {percent:count/tot}%'
511517
"""
512518
self.is_simplerate=True
519+
if count>1e6 :
520+
#set higher for a smoother rate display
521+
self.sample_length=100_000
513522
if not lastline :
514523
self.samples_append((count,))
515524
self.prev_args=(count,msg,tot)
@@ -552,6 +561,8 @@ def multirate(self,ns:typing.Tuple[int],msgs:typing.Tuple[str],count:int,total:i
552561
how they will be printed out. Not .simplerate() though, it is separate.
553562
"""
554563
self.is_simplerate=False
564+
if ns[0]>1e6 :
565+
self.sample_length=100_000
555566
if not lastline :
556567
self.samples_append(ns)
557568
self.prev_args=(ns,msgs,count,total)
@@ -565,10 +576,7 @@ def multirate(self,ns:typing.Tuple[int],msgs:typing.Tuple[str],count:int,total:i
565576
r_ss=['(0/s)' for ix in enumerate(ns)]
566577
rates=[j for ix,i in enumerate(ns) for j in (n(i),r_ss[ix],msgs[ix])]
567578
l=(*rates,n(count)+' / '+n(total),' ',self.percent(count,total),)
568-
if lastline :
569-
self.log(*l,clearline=True)
570-
else :
571-
self.log(*l,clearline=True)
579+
self.log(*l,clearline=True)
572580

573581
def finishrate(self,lastline=True) :
574582
""" Any currently running rate printer (simplerate,rate,doublerate,triplerate)
@@ -581,11 +589,13 @@ def finishrate(self,lastline=True) :
581589
self.simplerate(*self.prev_args,lastline=True)
582590
else :
583591
self.multirate(*self.prev_args,lastline=True)
592+
self.save_clearedline()
584593
#reset rate measurement
585594
self.samples=[]
586595
self.times=[]
587596
self.prev_print_t=0
588597
self.prev_args=None
598+
self.sample_length=10_000
589599

590600
def percent(self,numer:int,denom:int)->str :
591601
''' Return the str(float(numer/denom)*100) with 3 sigfigs,
@@ -612,7 +622,6 @@ async def chain(*generators:typing.Iterator)->typing.Iterator:
612622

613623
async def get_latlon_str_from_flatnodes(osm_ids:typing.Collection[int],
614624
s:Settings)->typing.Iterator :
615-
#beware, need to exchange lonlat -> latlon
616625
a=await asyncio.create_subprocess_exec(s.get_lonlat_binary,s.nodes_file,
617626
stdout=asyncio.subprocess.PIPE,stdin=asyncio.subprocess.PIPE)
618627
# some osm_ids may error out. in that case get_lonlat just ignores them.
@@ -628,6 +637,7 @@ async def get_latlon_str_from_flatnodes(osm_ids:typing.Collection[int],
628637
while (line:=(await a.stdout.readline()).strip().decode()) :
629638
#l.log('read line',line)
630639
x,y,osm_id=line.split(';')
640+
#beware, need to exchange lonlat -> latlon
631641
yield (osm_id,y,x)
632642

633643
def all_nwr_within(s:Settings,a:Accumulator) :
@@ -638,7 +648,7 @@ def all_nwr_within(s:Settings,a:Accumulator) :
638648
s.c.execute(f'SELECT osm_id FROM {tbl_name} WHERE {constr};')
639649
for row in g_from_cursor(s.c,verbose=True,prefix_msg=tbl_name+' ') :
640650
a.add('nodes',row['osm_id'])
641-
l.log(a.len('nodes'),'nodes within bounds')
651+
l.log(n(a.len('nodes')),'nodes within bounds')
642652

643653
# 1b) select all ways,rels FROM planet_osm_polygon WHERE way ST_Within(bbox);
644654
constr,tbl_name=s.make_bounds_constr('_polygon')
@@ -650,7 +660,7 @@ def all_nwr_within(s:Settings,a:Accumulator) :
650660
a.add('ways',id)
651661
else :
652662
a.add('rels',-id)
653-
l.log(a.len('ways'),'ways,',a.len('rels'),'rels from',tbl_name)
663+
l.log(n(a.len('ways')),'ways,',n(a.len('rels')),'rels from',tbl_name)
654664

655665
# 1c) select all ways,rels FROM planet_osm_line WHERE way ST_Within(bbox);
656666
# planet_osm_roads is not needed in that fashion, because it is a strict subset
@@ -664,16 +674,14 @@ def all_nwr_within(s:Settings,a:Accumulator) :
664674
a.add('ways',id)
665675
else :
666676
a.add('rels',-id)
667-
l.log(a.len('ways'),'ways,',a.len('rels'),'rels within bounds')
677+
l.log(n(a.len('ways')),'ways,',n(a.len('rels')),'rels within bounds')
668678

669-
670679
def nodes_parent_wr(s:Settings,a:Accumulator,only_nodes_within=False) :
671680
# 2a) foreach node_id :
672681
# 2b) select all ways WHERE ARRAY[node_id]::bigint[] <@ nodes;
673682
# 2c) select all rels WHERE ARRAY[node_id]::bigint[] <@ parts;
674683
nodes_name='nodes_within' if only_nodes_within else 'nodes'
675684
a_len=a.len
676-
l.log('checking parent ways of',a_len(nodes_name),'nodes')
677685
way_count=0
678686
rel_count=0
679687
node_count=0
@@ -714,7 +722,7 @@ def nodes_parent_wr(s:Settings,a:Accumulator,only_nodes_within=False) :
714722
for rel in rel_ids:
715723
rel_count+=1
716724
a.add('rels',rel['id'])
717-
l.finishrate(lastline=False)
725+
l.finishrate()
718726
l.log(n(a_len('ways')),'ways,',n(a_len('rels')),'rels forward from nodes')
719727

720728
def ways_parent_r(s:Settings,a:Accumulator) :
@@ -885,7 +893,7 @@ async def stream_osm_xml(s:Settings) :
885893

886894
l.next_phase() #write
887895
counts=[a.len(i)for i in ('nodes','ways','rels')]
888-
l.log('dumping',counts[0],'nodes,',counts[1],'ways,',counts[2],'rels in total')
896+
l.log('dumping',n(counts[0]),'nodes,',n(counts[1]),'ways,',n(counts[2]),'rels in total')
889897
# we now have: [~3.3M nodes, ~400K ways, ~8K rels] with_parents=True
890898

891899
# ONLY after all ids have been resolved, do we actually query the data,
@@ -905,7 +913,6 @@ async def stream_osm_xml(s:Settings) :
905913
create_relations(s,a),
906914
) :
907915
xml_out.write(el)
908-
print(file=sys.stderr)
909916

910917
def rel_to_xml(row_dict:dict,tags:dict,new_jsonb_schema:bool)->ET.Element :
911918
# separate tags and row_dict, see way_to_xml()
@@ -1040,7 +1047,7 @@ def create_relations(s:Settings,a:Accumulator)->typing.Iterator[ET.Element] :
10401047
for row_dict in g_query_ids(s.c,query,g_negate(a.all_subtract('rels','done_ids')),'osm_id',step=250) :
10411048
if first :
10421049
start_t=time.time()
1043-
l.log('rels _line output start',start_t)
1050+
#l.log('rels _line output start',start_t)
10441051
first=False
10451052

10461053
if a.is_in('done_ids',row_dict['id']) :

0 commit comments

Comments
 (0)