Skip to content

Commit a71a15f

Browse files
authored
Web Application Firewall country blacklist heuristic. (#8)
* Initial WIP country blacklist structure. * Add request de-serialization protections. * Improved hinting, now storing packed IPs. * Correct missing import, pre-trigger cached de-serialization operations which may fail. * Additional optional installation USE flags. * Document addition of WAF extension. * Add API definition for persistent blacklists. * Permit persistence of the blacklist and exemptions. * Deserialization errors are better handled in core at time of collect callback execution. * Initial WIP country blacklist structure. * Sorting of example countries. * Register the WAF heuristics as plugins. * Use correct code, we are not a client ourselves. * Pass client IP down to heuristics, prime query string arguments. * Heuristics are now passed the client IP. * IP2Location utilization. * Ban-by-country implementation. * Hosting combined heuristic default extensions. * Who needs a temporary variable? * Additional example country, short and long name for logs. * Adjustments to logging levels and extras. * Blacklist serialization and deserialization. * Correction for escaping within a quoted string, additional example geographic exclusions.
1 parent 64ada32 commit a71a15f

File tree

5 files changed

+129
-27
lines changed

5 files changed

+129
-27
lines changed

setup.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,13 @@
102102
'matches = web.security.predicate:ContextMatch',
103103
'contains = web.security.predicate:ContextContains',
104104
],
105+
'web.security.heuristic': [
106+
'dns = web.security.waf:ClientDNSHeuristic',
107+
'path = web.security.waf:PathHeuristic',
108+
'php = web.security.waf:PHPHeuristic',
109+
'wordpress = web.security.waf:WordpressHeuristic',
110+
'hosting = web.security.waf:HostingCombinedHeuristic',
111+
'country = web.security.waf:GeoCountryHeuristic',
112+
]
105113
},
106114
)

web/ext/acl.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def __init__(self, *_policy, default=None, policy=None):
249249
def prepare(self, context):
250250
"""Called to prepare the request context by adding an `acl` attribute."""
251251

252-
if __debug__: log.debug("Populating request context with ACL.", extra=dict(request=id(context)))
252+
if __debug__: log.trace("Populating request context with ACL.", extra=context.extra)
253253

254254
context.acl = ACL(context=context, policy=self.policy)
255255

@@ -262,24 +262,24 @@ def dispatch(self, context, crumb):
262262
acl = getattr(crumb.handler, '__acl__', ())
263263
inherit = getattr(crumb.handler, '__acl_inherit__', True)
264264

265-
if __debug__: log.debug(f"Handling dispatch event: {crumb.handler!r} {acl!r}", extra=dict(
266-
request = id(context),
267-
consumed = crumb.path,
268-
handler = safe_name(crumb.handler),
269-
endpoint = crumb.endpoint,
270-
acl = [repr(i) for i in acl],
271-
inherit = inherit,
272-
))
265+
if __debug__: log.trace(f"Handling dispatch event: {crumb.handler!r} {acl!r}", extra={
266+
'consumed': crumb.path,
267+
'handler': safe_name(crumb.handler),
268+
'endpoint': crumb.endpoint,
269+
'acl': [repr(i) for i in acl],
270+
'inherit': inherit,
271+
**context.extra
272+
})
273273

274274
if not inherit:
275-
if __debug__: log.info("Clearing collected access control list.")
275+
if __debug__: log.warn("Clearing collected access control list.")
276276
del context.acl[:]
277277

278278
context.acl.extend((Path(context.request.path), i, handler) for i in acl)
279279

280280
def collect(self, context, handler, args, kw):
281281
if not context.acl:
282-
if __debug__: log.debug("Skipping validation of empty ACL.", extra=dict(request=id(context)))
282+
if __debug__: log.debug("Skipping validation of empty ACL.", extra=context.extra)
283283
return
284284

285285
grant = context.acl.is_authorized

web/ext/waf.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from abc import ABCMeta, abstractmethod
1414
from html import escape
15+
from pathlib import Path
1516
from re import compile as re
1617
from socket import inet_aton
1718

@@ -34,26 +35,56 @@
3435
ClientSet = MutableSet[bytes]
3536

3637
class PersistentClientSet(ClientSet, metaclass=ABCMeta):
37-
"""A mutable set exposing two methods for persisting and restoring its contents."""
38+
"""An ABC describing a mutable set that exposes methods for persisting and restoring its contents."""
3839

3940
@abstractmethod
4041
def persist(self, context:Context) -> None:
41-
...
42+
"""Persist the state of the set.
43+
44+
It is up to the individual implementation to decide how to do this. Typically this would involve serialization
45+
on-disk or the use of some form of data store, such as SQLite, PostgreSQL, or MongoDB.
46+
"""
47+
48+
raise NotImplementedError()
4249

4350
@abstractmethod
4451
def restore(self, context:Context) -> None:
45-
...
52+
"""Restore the state of the set.
53+
54+
It is up to the individual implementation to decide how to do this. Typically this involves deserialization
55+
from disk or the use of some form of data store, such as SQLite, PostgreSQL, or MongoDB.
56+
"""
57+
58+
raise NotImplementedError()
59+
60+
61+
class LineSerializedSet(set, PersistentClientSet):
62+
location:Path # The target path to read and write data from/to.
63+
64+
def __init__(self, *args, location:Union[str,Path]):
65+
self.location = Path(location)
66+
67+
def persist(self, context:Context) -> None:
68+
with self.location.open('w') as fh:
69+
for element in sorted(self):
70+
fh.write(str(element) + "\n")
71+
72+
def restore(self, context:Context) -> None:
73+
self.clear()
74+
75+
with self.location.open('r') as fh:
76+
for line in fh.readlines():
77+
self.add(int(line.strip()))
4678

4779

4880
class WebApplicationFirewallExtension:
4981
"""A basic rules-based Web Application Firewall implementation."""
5082

83+
uses:ClassVar[Tags] = {'timing.prefix'} # We want our execution time to be counted.
5184
provides:ClassVar[Tags] = {'waf'} # A set of keywords usable in `uses` and `needs` declarations.
5285
first:ClassVar[bool] = True # Always try to be first: if truthy, become a dependency for all non-first extensions.
5386
extensions:ClassVar[Tags] = {'waf.rule'} # A set of entry_point namespaces to search for related plugin registrations.
5487

55-
uses:ClassVar[Tags] = {'timing.prefix'} # We want our execution time to be counted.
56-
5788
heuristics:Iterable[WAFHeuristic] # The prepared heuristic instances.
5889
blacklist:ClientSet # The current blacklist. Can theoretically be swapped for any mutable set-like object.
5990
exempt:ClientSet # IP addresses exempt from blacklisting.
@@ -73,7 +104,7 @@ def __init__(self, *heuristics, blacklist:Optional[ClientSet]=None, exempt:Optio
73104
self.heuristics = heuristics
74105

75106
# Permit custom backing stores to be passed in; we optimize by storing packed binary values, not strings.
76-
self.blacklist = set() if blacklist is None else set(inet_aton(i) for i in blacklist)
107+
self.blacklist = set() if blacklist is None else blacklist.__class__(inet_aton(i) for i in blacklist)
77108

78109
# Permit custom backing stores to be passed in for the exemptions, as well.
79110
self.exempt = set() if exempt is None else exempt
@@ -87,6 +118,7 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse):
87118
try:
88119
request: Request = Request(environ) # This will be remembered and re-used as a singleton later.
89120
uri: URI = URI(request.url)
121+
request.GET # As will this "attempt to access query string parameters", malformation detection.
90122

91123
except Exception as e: # Protect against de-serialization errors.
92124
return HTTPBadRequest(f"Encountered error de-serializing the request: {e!r}")(environ, start_response)
@@ -103,7 +135,7 @@ def inner(environ:WSGIEnvironment, start_response:WSGIStartResponse):
103135
# Validate the heuristic rules.
104136
for heuristic in self.heuristics:
105137
try:
106-
heuristic(environ, uri)
138+
heuristic(environ, uri, client)
107139
except HTTPClose as e:
108140
log.error(f"{heuristic} {e.args[0].lower()}")
109141
raise

web/security/exc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@
44
class HTTPClose(HTTPClientError):
55
"""Indicate to the front-end load balancer (FELB) that it should hang up on the client."""
66

7-
code = 499
8-
title = "Client Closed Request"
7+
code = 444
8+
title = "Connection Closed Without Response"
99
explanation = "The server did not accept your request."

web/security/waf.py

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,14 @@
1111
from .util import DNS
1212
from .exc import HTTPClose
1313

14+
try:
15+
from IP2Location import IP2Location
16+
except ImportError:
17+
IP2Location = None
18+
1419

1520
class WAFHeuristic:
16-
def __call__(self, environ:WSGIEnvironment, uri:URI) -> Optional[bool]:
21+
def __call__(self, environ:WSGIEnvironment, uri:URI, client:str) -> Optional[bool]:
1722
"""Perform the heuristic check.
1823
1924
May return True to indicate processing should stop, raise an HTTPException to propagate to the client, or may
@@ -78,7 +83,7 @@ def __repr__(self, *extra:str) -> str:
7883
*extra
7984
)
8085

81-
def __call__(self, environ:WSGIEnvironment, uri:URI) -> Optional[bool]:
86+
def __call__(self, environ:WSGIEnvironment, uri:URI, client:str) -> Optional[bool]:
8287
assert check_argument_types()
8388

8489
addr:str = environ.get(self.origin, '') # Attempt to retrieve the client IP from the WSGI environment.
@@ -139,7 +144,7 @@ class PathHeuristic(WAFHeuristic):
139144
140145
One can also deny any request targeting a PHP script:
141146
142-
PathHeuristic(re.compile(r'\.phps?($|/)'))
147+
PathHeuristic(re.compile(r'\\.phps?($|/)'))
143148
144149
It's important to note that regular expression flags (such as case insensitivity) will be ignored; the search is
145150
always case sensitive. (phpMyAdmin != phpmyadmin; these are legitimately separate resources.)
@@ -170,7 +175,7 @@ def __repr__(self, *extra:str) -> str:
170175
*extra
171176
)
172177

173-
def __call__(self, environ:dict, uri:URI) -> None:
178+
def __call__(self, environ:dict, uri:URI, client:str) -> None:
174179
assert check_argument_types()
175180

176181
if self.forbidden & set(uri.path.parts): # This is ~a third faster than the simplest regex use.
@@ -209,16 +214,73 @@ def __init__(self) -> None:
209214
class HostingCombinedHeuristic(PathHeuristic):
210215
"""A combined set of suspicious URI fragments and general patterns matching commonly exploited tools.
211216
212-
This is the result of casually browsing through around ten years of error logs on an active hosting service.
217+
This is the result of casually browsing through around ten years of error logs on an active hosting service and
218+
combines a number of the other PathHeuristic rules into one for convenience. (The WAF already optimizes these down
219+
into a single regex for runtime checking; this is an import optimization.)
220+
221+
Several filename extensions which ought to be delivered by a front-end load balancer are included in this list;
222+
DO NOT INCLUDE THIS HEURISTIC AT DEVELOPMENT TIME if you are delivering static content via an endpoint within your
223+
application. A critical message will be emitted if used at development time.
213224
"""
214225

215-
def __init__(self) -> None:
226+
def __init__(self, *extensions:str) -> None:
227+
"""Prepare a 'combined hosting experience' heuristic.
228+
229+
You can pass in additional extensions to block beyond the basic set included as stringy regular expression
230+
fragments via positional arguments.
231+
"""
232+
233+
if __debug__:
234+
log.critical("Use of this heuristic if delivering statics from the application at development time will" \
235+
"likely blacklist you.")
236+
237+
extensions = set(extensions) | {'html?', 'phps?', 'py', 'js', 'css', 'swf', 'txt', 'md'}
238+
216239
super().__init__(
217-
re(r'\.(html?|swf|phps?)($|/)'), # Bare HTML files, Adobe Flash, or PHP.
240+
re(r'\.(' + '|'.join(sorted(extensions)) + r')($|/)'), # Forbidden filename extensions.
218241
re(r'((web)?mail)|(round|cube|roundcube)((web)?mail)?2?(-[0-9\.]+)?'), # Webmail service, in general.
219242
'wm', 'rc', 'rms', 'mss', 'mss2', # More common webmail containers.
220243
'FlexDataServices', 'amfphp', 'soapCaller.bs', # Adobe Flex AMF and RPC services.
221244
'wordpress', 'wp', 'wp-admin', 'wp-includes', 'wlwmanifest.xml', # WordPress-related.
222245
'admin', 'mysql', 'phpMyAdmin', 'pma', 'dbadmin', 'MyAdmin', 'phppgadmin', # Common administrative access.
223246
'crossdomain.xml', 'README', 'LICENSE', 'webdav', re(r'w00tw00t'), # Generic probes.
224247
)
248+
249+
250+
class GeoCountryHeuristic(WAFHeuristic):
251+
"""A rule which preemptively blocks attempted access from specific countries of origin.
252+
253+
Example usage:
254+
255+
GeoCountryHeuristic(
256+
'cn', 'kp', # China, take that, "Great Firewall", and North Korea.
257+
'ae', 'ir', 'iq', 'sa', 'tr', # Middle-eastern nations.
258+
'by', 'ru', 'ua', # Russia and nearby former bloc states.
259+
'am', 'az', 'ee', 'ge', 'kg', 'kz', 'lt', 'lv', 'md', 'tj', 'tm', 'uz', # Additional former states.
260+
'af', 'mr', 'ng', 'ph', 'pl', 'sd', 'ye', # LGBTQ and human rights violators, others included above.
261+
)
262+
"""
263+
264+
countries: Set[str] # The set of blocked ISO 3166 country codes.
265+
resolver: IP2Location
266+
267+
def __init__(self, *countries:str, db:str='IP2LOCATION-LITE-DB1.IPV6.BIN') -> None:
268+
"""Initialize the country heuristic's geographic database and blacklist."""
269+
270+
assert check_argument_types()
271+
272+
if IP2Location is None:
273+
raise ImportError("You must have the IP2Location library installed.")
274+
275+
self.countries = {i.upper() for i in countries}
276+
self.resolver = IP2Location(db)
277+
278+
def __repr__(self, *extra:str) -> str:
279+
countries = "'" + "', '".join(sorted(self.countries)) + "'"
280+
return super().__repr__(countries, *extra)
281+
282+
def __call__(self, environ:dict, uri:URI, client:str) -> None:
283+
assert check_argument_types()
284+
285+
if (short := self.resolver.get_country_short(client)) in self.countries:
286+
raise HTTPClose(f"Access from {short} ({self.resolver.get_country_long(client)}) forbidden.")

0 commit comments

Comments
 (0)