Skip to content

Commit 5f9ce7b

Browse files
committed
- Adding missing files, which the flakey github .gitignore file prevented from being added the first time.
1 parent 0705f4d commit 5f9ce7b

7 files changed

+1474
-0
lines changed

LICENSE

Lines changed: 674 additions & 0 deletions
Large diffs are not rendered by default.

run-parse-html.bat

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
c:\Python27\python.exe run-parse-html.py

run-parse-html.py

Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
"""
2+
LICENSE
3+
4+
pysrd - Python scripts for working with the DND35 OGL SRD.
5+
Copyright (C) 2012, 2013 Richard Tew
6+
7+
This program is free software: you can redistribute it and/or modify
8+
it under the terms of the GNU General Public License as published by
9+
the Free Software Foundation, either version 3 of the License, or
10+
(at your option) any later version.
11+
12+
This program is distributed in the hope that it will be useful,
13+
but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
GNU General Public License for more details.
16+
17+
You should have received a copy of the GNU General Public License
18+
along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
20+
OVERVIEW
21+
22+
The SQLite database provided by highmage, and available here (http://www.andargor.com/) lacks
23+
some data from the SRD. This script parses additional SRD information from Josh Ritter's
24+
OpenSRD (http://sourceforge.net/projects/opensrd) HTML files.
25+
26+
You will need to ensure the following variables have the correct values:
27+
DATABASE_FILENAME: Name of a local file containing the SQLite database created by highmage.
28+
HTML_DIR_NAME: Name of local directory immediately containing the OpenSRD html files.
29+
"""
30+
31+
import bs4 # c:\python27\Scripts\pip.exe install beautifulsoup4
32+
import os
33+
import re
34+
import sys
35+
import StringIO
36+
import sqlite3
37+
38+
39+
DATABASE_FILENAME = "dnd35.sqlite"
40+
HTML_DIR_NAME = "SRD-html"
41+
42+
43+
# Taken from the Python wiki.
44+
html_escape_table = {
45+
# "&": "&amp;",
46+
'"': "&quot;",
47+
"'": "&apos;",
48+
">": "&gt;",
49+
"<": "&lt;",
50+
}
51+
def html_escape(text):
52+
"""Produce entities within text."""
53+
return "".join(html_escape_table.get(c,c) for c in text)
54+
55+
56+
def escape_children(v):
57+
for child in v.contents:
58+
if isinstance(child, bs4.Tag):
59+
if child.name == "a":
60+
if child.string is None:
61+
child.replace_with("")
62+
continue
63+
child.string = html_escape(child.string)
64+
escape_children(child)
65+
else:
66+
child.replace_with(html_escape(child))
67+
68+
69+
#####
70+
71+
def parse_special_abilities(cb):
72+
file_path = os.path.join(html_path, "abilitiesAndConditions.html")
73+
with open(file_path, "r") as f:
74+
soup = bs4.BeautifulSoup(f)
75+
v = first_h5 = soup.body.h5
76+
77+
name = ""
78+
fulltext = ""
79+
while v:
80+
if isinstance(v, bs4.Tag):
81+
if v.name == "h5":
82+
# Commit any current entry.
83+
if name:
84+
for name in name.split("and"):
85+
cb(name=name.strip().capitalize(), fulltext=fulltext)
86+
# Start the next entry.
87+
name = v.get_text().lower()
88+
fulltext = ""
89+
elif v.name == "h3":
90+
break
91+
else:
92+
if "class" in v.attrs:
93+
del v.attrs["class"]
94+
fulltext += v.prettify()
95+
else:
96+
pass # print v.string
97+
v = v.next_sibling
98+
# Commit any current entry.
99+
if name:
100+
for name in name.split("and"):
101+
cb(name=name.strip().capitalize(), fulltext=fulltext)
102+
103+
104+
def parse_conditions(cb):
105+
file_path = os.path.join(html_path, "abilitiesAndConditions.html")
106+
with open(file_path, "r") as f:
107+
soup = bs4.BeautifulSoup(f)
108+
v = soup.body.h3.find_next("h3")
109+
if v.get_text() != "CONDITIONS":
110+
raise Exception, "unable to find CONDITIONS H3 tag"
111+
v = v.find_next("p")
112+
113+
name = ""
114+
fulltext = ""
115+
while v:
116+
if "class" not in v.attrs:
117+
b = v.find("b")
118+
if b is None:
119+
fulltext += v.prettify()
120+
else:
121+
# Commit any current entry.
122+
if name:
123+
cb(name=name, fulltext=fulltext)
124+
# Start the next entry.
125+
name = b.get_text().lower().capitalize()
126+
escape_children(v)
127+
fulltext = v.prettify()
128+
v = v.find_next("p")
129+
# Commit any current entry.
130+
if name:
131+
cb(name=name, fulltext=fulltext)
132+
133+
def parse_abilities(cb):
134+
title_re = re.compile("([a-zA-Z]+)[ ]+\(([a-zA-Z]+)\)")
135+
136+
file_path = os.path.join(html_path, "basics.html")
137+
with open(file_path, "r") as f:
138+
soup = bs4.BeautifulSoup(f)
139+
v = soup.body.h3.find_next("h3")
140+
while v.get_text() != "THE ABILITIES":
141+
v = v.find_next("h3")
142+
143+
v = v.find_next("h5")
144+
name = ""
145+
shortname = ""
146+
fulltext = ""
147+
while v:
148+
if isinstance(v, bs4.Tag):
149+
if v.name == "h5":
150+
# Commit any current entry.
151+
if name:
152+
cb(name=name, shortname=shortname, fulltext=fulltext)
153+
# Start the next entry.
154+
m = title_re.match(v.get_text().lower())
155+
name = m.group(1).capitalize()
156+
shortname = m.group(2)
157+
fulltext = ""
158+
elif v.name == "h3":
159+
break
160+
else:
161+
if "class" in v.attrs:
162+
del v.attrs["class"]
163+
fulltext += v.prettify()
164+
else:
165+
pass # print v.string
166+
v = v.next_sibling
167+
# Commit any current entry.
168+
if name:
169+
cb(name=name, shortname=shortname, fulltext=fulltext)
170+
171+
def parse_abilities_table(cb):
172+
file_path = os.path.join(html_path, "basics.html")
173+
with open(file_path, "r") as f:
174+
soup = bs4.BeautifulSoup(f)
175+
176+
v = soup.body.h5
177+
while v.get_text() != "ABILITY MODIFIERS":
178+
v = v.find_next("h5")
179+
180+
e = v.find_next("tr")
181+
tr_column_names = []
182+
tr_lines = []
183+
while e:
184+
if isinstance(e, bs4.Tag):
185+
if e.name == "tr":
186+
if e.th is not None:
187+
# The last row with header cells is considered the right one.
188+
tr_column_names[:] = []
189+
th = e.th
190+
while th:
191+
if isinstance(th, bs4.Tag):
192+
if "colspan" in th.attrs:
193+
colspan = int(th.attrs["colspan"])
194+
tr_column_names.extend(( "?" for i in range(colspan) ))
195+
else:
196+
v = th.get_text().lower()
197+
tr_column_names.append(th.get_text().lower())
198+
th = th.next_sibling
199+
elif e.td is not None:
200+
td = e.td
201+
line = []
202+
while td:
203+
if isinstance(td, bs4.Tag):
204+
if "colspan" in td.attrs:
205+
colspan = int(td.attrs["colspan"])
206+
if colspan != len(tr_column_names):
207+
line.extend(( "NULL" for i in range(colspan) ))
208+
else:
209+
value = td.get_text()
210+
if value == u'\u2014': # unicode for '-'
211+
value = 0
212+
else:
213+
try:
214+
value = int(value)
215+
except ValueError:
216+
pass
217+
line.append(value)
218+
td = td.next_sibling
219+
if len(line) == len(tr_column_names):
220+
tr_lines.append(line)
221+
elif len(e.string) > 1:
222+
pass # print "'"+ e +"'"
223+
e = e.next_sibling
224+
225+
# Translate the table column names to database column names.
226+
db_column_names = []
227+
column_types_list = []
228+
for i, column_name in enumerate(tr_column_names):
229+
if i == 0:
230+
db_column_names.append(column_name +"_min")
231+
column_types_list.append((db_column_names[-1], "INTEGER"))
232+
db_column_names.append(column_name +"_max")
233+
column_types_list.append((db_column_names[-1], "INTEGER"))
234+
else:
235+
c = column_name[0]
236+
try:
237+
int(column_name[0])
238+
value = "level_"+ column_name[0]
239+
except ValueError:
240+
value = column_name
241+
db_column_names.append(value)
242+
column_types_list.append((db_column_names[-1], "INTEGER"))
243+
244+
# Fix the first column.
245+
db_lines = []
246+
for tr_line in tr_lines:
247+
db_line = []
248+
score_range = tr_line[0]
249+
if type(score_range) is int:
250+
score_min = score_max = score_range
251+
else:
252+
score_min, score_max = [ int(v) for v in score_range.split("-") ]
253+
db_line.append(score_min)
254+
db_line.append(score_max)
255+
db_line.extend(tr_line[1:])
256+
db_lines.append(db_line)
257+
258+
for db_line in db_lines:
259+
kwargs = dict(zip(db_column_names, db_line))
260+
kwargs["column_types_list"] = column_types_list
261+
cb(**kwargs)
262+
263+
264+
# This list is used to preserve column ordering.
265+
column_types_list = [
266+
("name", "TEXT NOT NULL UNIQUE"),
267+
("shortname", "TEXT NOT NULL UNIQUE"),
268+
("fulltext", "TEXT NOT NULL"),
269+
]
270+
271+
272+
def create_callback(table_name, statements):
273+
def cb(**kwargs):
274+
# Build the complete list of known column types for this table.
275+
local_column_types_list = column_types_list[:]
276+
if "column_types_list" in kwargs:
277+
local_column_types_list.extend(kwargs["column_types_list"])
278+
del kwargs["column_types_list"]
279+
local_column_types = dict(local_column_types_list)
280+
input_column_names = kwargs.keys()
281+
282+
# Validate text has sane values.
283+
for column_name in input_column_names:
284+
column_type = local_column_types[column_name]
285+
if "TEXT" in column_type and "'" in kwargs[column_name]:
286+
raise RuntimeError("text contains SQL quoting character")
287+
288+
# Build the table definition on receiving the first row to insert.
289+
if not len(statements):
290+
s = StringIO.StringIO()
291+
# Drop the table if it already exists, to start fresh.
292+
s.write("DROP TABLE IF EXISTS %s;" % table_name)
293+
statements.append(s.getvalue())
294+
s.close()
295+
296+
# Recreate the table, preserving ideal column ordering.
297+
s = StringIO.StringIO()
298+
s.write("CREATE TABLE %s (" % table_name)
299+
s.write("id INTEGER PRIMARY KEY, ")
300+
cnt = 0
301+
for entry in local_column_types_list:
302+
if entry[0] in input_column_names:
303+
if cnt > 0:
304+
s.write(", ")
305+
s.write("%s %s" % entry)
306+
cnt += 1
307+
s.write(");")
308+
statements.append(s.getvalue())
309+
s.close()
310+
311+
nameSIO = StringIO.StringIO()
312+
valueSIO = StringIO.StringIO()
313+
for i, column_name in enumerate(input_column_names):
314+
if i > 0:
315+
nameSIO.write(", ")
316+
valueSIO.write(", ")
317+
nameSIO.write(column_name)
318+
column_type = local_column_types[column_name]
319+
if column_type.startswith("TEXT"):
320+
valueSIO.write("'%s'" % kwargs[column_name])
321+
elif column_type.startswith("INTEGER"):
322+
valueSIO.write(str(kwargs[column_name]))
323+
else:
324+
raise RuntimeError("Data-type '%s' needs handling" % column_type)
325+
326+
s = StringIO.StringIO()
327+
s.write("INSERT INTO %s (%s) VALUES (%s)" % (table_name, nameSIO.getvalue(), valueSIO.getvalue()))
328+
statements.append(s.getvalue())
329+
s.close()
330+
return cb
331+
332+
def run():
333+
if False:
334+
statements = []
335+
cb = create_callback("abilities_table", statements)
336+
parse_abilities_table(cb)
337+
for s in statements:
338+
print s.encode('ascii','xmlcharrefreplace')
339+
return
340+
341+
conn = sqlite3.connect(DATABASE_FILENAME)
342+
for (table_name, func) in (
343+
("conditions", parse_conditions),
344+
("special_abilities", parse_special_abilities),
345+
("abilities", parse_abilities),
346+
("abilities_table", parse_abilities_table),
347+
):
348+
statements = []
349+
cb = create_callback(table_name, statements)
350+
func(cb)
351+
352+
c = conn.cursor()
353+
sys.stdout.write("%s %d [" % (table_name, len(statements)))
354+
for s in statements:
355+
sys.stdout.write(".")
356+
c.execute(s)
357+
sys.stdout.write("]"+ os.linesep)
358+
359+
conn.commit()
360+
c.close()
361+
362+
363+
if __name__ == "__main__":
364+
current_path = sys.path[0]
365+
html_path = os.path.join(current_path, HTML_DIR_NAME)
366+
367+
run()
368+
369+
raw_input("Press enter to continue..")

run-parse-html.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
[ -f /c/python27/python.exe ] && export PYTHON=/c/python27/python.exe || export PYTHON=python
3+
$PYTHON python/run-parse-html.py $@

run-webserver.bat

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
REM Useful for double-clicking in explorer to start the web server in a DOS window.
2+
c:\Python27\python.exe run-webserver.py

0 commit comments

Comments
 (0)