forked from MaZderMind/osm-history-splitter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
split-all-clipbounds.py
executable file
·168 lines (133 loc) · 5.02 KB
/
split-all-clipbounds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/python
import sys, os, tempfile
from datetime import datetime
import Queue, threading
import random, time
# just simulate the splitting by sleeping for a randome number of seconds,
# used to test mutoprocess handling
simulate = True
# the directory to scan for clipbounds-files
clipDir = "clipbounds"
# the type of clipbounds to use (OSM or POLY)
clipType = "POLY" # OSM
clipExtension = ".poly" # .osm
# the clipbounds are read from the directory and created in a hirachy, so
# clipbounds/europe.poly is read from the planetfile
# clipbounds/asia.poly is read from the planetfile
# clipbounds/europe/germany.poly is read from the generated europe.osm.pbf
# clipbounds/europe/italy.poly is read from the generated europe.osm.pbf
# clipbounds/foo/bar.poly is read from the planetfile, because there was no foo.poly
# the desired result datatype (.osm.pbf, .osh.pbf, .osm, .osh, ...)
dataType = ".osm.pbf"
# the maximum number of parallel running extracts
# this is ( <your systems memory in GB> - 1) * 1024 / <size per extract>
# where <size per extract> is 190 MB for Hardcut and 350 MB for Softcut
#
# to achive best results, set maxProcesses = 1 and start with a low value
# for maxParallel, the run the tool a little. increase maxParallel,
# re-run the tool for some seconds, ...
# when creating the last bit-vector takes more time then creating the
# first vectors, the os starts to swap the bit-vectors out. reduce the
# number by one and try again
maxParallel = 8
# the number of parallel extracts is determined by the available memory.
# when all bit-vectory fit into the RAM, runtime is mostly a matter of CPU.
# if you have multiple cores, you can split quicker by distributing the
# point-in-polygon tests about your cores. This increases the number of
# disk-seeks, because multiple processes tries to access the same file,
# but in most cases this should not hit the performance much.
maxProcesses = 4
# on my PC (4 GB, 4 Cores) i achived best results when doing 8 extracts
# in parallel with 4 processes.
# the source file
inputFile = "/home/peter/osm-data/planet-latest.osm.pbf"
# the directory to place the generated extracts into
outputDir = "o"
# path to the compiled splitter
splitterCommand = "osm-history-splitter"
if(sys.argv.count("--plan") > 0):
maxParallel = maxParallel / maxProcesses
maxProcesses = 1
printlock = threading.Lock()
q = Queue.Queue(0)
finished = []
def process(tasks):
(source, foo) = os.path.split(tasks[0])
if(source == ""):
source = inputFile
else:
if finished.count(source) == 0:
printlock.acquire()
print "trying to split from", source, "which is not finished yet, re-queuing and sleeping 5 seconds"
printlock.release()
q.put(tasks)
time.sleep(5)
return
source = outputDir + "/" + source + dataType
if not simulate and not os.path.exists(source):
source = inputFile
printlock.acquire()
print "splitting", source, "to", tasks
printlock.release()
if(sys.argv.count("--plan") > 0):
return
(fp, configfile) = tempfile.mkstemp()
os.write(fp, "# auto-generated\n")
for task in tasks:
dest = os.path.join(outputDir, task + dataType)
dirname = os.path.dirname(dest)
if not os.path.exists(dirname):
printlock.acquire()
print "Creating", dirname
os.mkdir(dirname)
printlock.release()
os.write(fp, dest)
os.write(fp, "\t")
os.write(fp, clipType)
os.write(fp, "\t")
os.write(fp, clipDir + "/" + task + clipExtension)
os.write(fp, "\n")
os.close(fp)
start = datetime.now()
if(simulate):
time.sleep(random.randint(1, 10))
else:
os.spawnl(os.P_WAIT, splitterCommand, splitterCommand, "--softcut", source, configfile)
printlock.acquire()
print "finished splitting to", tasks
print "runtime:", datetime.now() - start
printlock.release()
for task in tasks:
finished.append(task)
os.unlink(configfile)
def worker():
while True:
item = q.get()
process(item)
q.task_done()
print "starting", maxProcesses, "threads"
for i in range(maxProcesses):
t = threading.Thread(target=worker)
t.daemon = True
t.start()
tasksPerProc = maxParallel / maxProcesses
tasks = []
lastdir = "";
stack = [clipDir]
while stack:
directory = stack.pop(0)
for base in sorted(os.listdir(directory)):
name = os.path.join(directory, base)
if os.path.isdir(name):
if not os.path.islink(name):
stack.append(name)
else:
if name.endswith(clipExtension):
name = os.path.relpath(name, clipDir)
(name, ext) = os.path.splitext(name)
if len(tasks) > 0 and (lastdir != directory or len(tasks) == tasksPerProc):
q.put(tasks)
tasks = []
lastdir = directory
tasks.append(name)
q.join()