|
11 | 11 | from .util import DNS
|
12 | 12 | from .exc import HTTPClose
|
13 | 13 |
|
| 14 | +try: |
| 15 | + from IP2Location import IP2Location |
| 16 | +except ImportError: |
| 17 | + IP2Location = None |
| 18 | + |
14 | 19 |
|
15 | 20 | class WAFHeuristic:
|
16 |
| - def __call__(self, environ:WSGIEnvironment, uri:URI) -> Optional[bool]: |
| 21 | + def __call__(self, environ:WSGIEnvironment, uri:URI, client:str) -> Optional[bool]: |
17 | 22 | """Perform the heuristic check.
|
18 | 23 |
|
19 | 24 | May return True to indicate processing should stop, raise an HTTPException to propagate to the client, or may
|
@@ -78,7 +83,7 @@ def __repr__(self, *extra:str) -> str:
|
78 | 83 | *extra
|
79 | 84 | )
|
80 | 85 |
|
81 |
| - def __call__(self, environ:WSGIEnvironment, uri:URI) -> Optional[bool]: |
| 86 | + def __call__(self, environ:WSGIEnvironment, uri:URI, client:str) -> Optional[bool]: |
82 | 87 | assert check_argument_types()
|
83 | 88 |
|
84 | 89 | addr:str = environ.get(self.origin, '') # Attempt to retrieve the client IP from the WSGI environment.
|
@@ -139,7 +144,7 @@ class PathHeuristic(WAFHeuristic):
|
139 | 144 |
|
140 | 145 | One can also deny any request targeting a PHP script:
|
141 | 146 |
|
142 |
| - PathHeuristic(re.compile(r'\.phps?($|/)')) |
| 147 | + PathHeuristic(re.compile(r'\\.phps?($|/)')) |
143 | 148 |
|
144 | 149 | It's important to note that regular expression flags (such as case insensitivity) will be ignored; the search is
|
145 | 150 | always case sensitive. (phpMyAdmin != phpmyadmin; these are legitimately separate resources.)
|
@@ -170,7 +175,7 @@ def __repr__(self, *extra:str) -> str:
|
170 | 175 | *extra
|
171 | 176 | )
|
172 | 177 |
|
173 |
| - def __call__(self, environ:dict, uri:URI) -> None: |
| 178 | + def __call__(self, environ:dict, uri:URI, client:str) -> None: |
174 | 179 | assert check_argument_types()
|
175 | 180 |
|
176 | 181 | if self.forbidden & set(uri.path.parts): # This is ~a third faster than the simplest regex use.
|
@@ -209,16 +214,73 @@ def __init__(self) -> None:
|
209 | 214 | class HostingCombinedHeuristic(PathHeuristic):
|
210 | 215 | """A combined set of suspicious URI fragments and general patterns matching commonly exploited tools.
|
211 | 216 |
|
212 |
| - This is the result of casually browsing through around ten years of error logs on an active hosting service. |
| 217 | + This is the result of casually browsing through around ten years of error logs on an active hosting service and |
| 218 | + combines a number of the other PathHeuristic rules into one for convenience. (The WAF already optimizes these down |
| 219 | + into a single regex for runtime checking; this is an import optimization.) |
| 220 | + |
| 221 | + Several filename extensions which ought to be delivered by a front-end load balancer are included in this list; |
| 222 | + DO NOT INCLUDE THIS HEURISTIC AT DEVELOPMENT TIME if you are delivering static content via an endpoint within your |
| 223 | + application. A critical message will be emitted if used at development time. |
213 | 224 | """
|
214 | 225 |
|
215 |
| - def __init__(self) -> None: |
| 226 | + def __init__(self, *extensions:str) -> None: |
| 227 | + """Prepare a 'combined hosting experience' heuristic. |
| 228 | + |
| 229 | + You can pass in additional extensions to block beyond the basic set included as stringy regular expression |
| 230 | + fragments via positional arguments. |
| 231 | + """ |
| 232 | + |
| 233 | + if __debug__: |
| 234 | + log.critical("Use of this heuristic if delivering statics from the application at development time will" \ |
| 235 | + "likely blacklist you.") |
| 236 | + |
| 237 | + extensions = set(extensions) | {'html?', 'phps?', 'py', 'js', 'css', 'swf', 'txt', 'md'} |
| 238 | + |
216 | 239 | super().__init__(
|
217 |
| - re(r'\.(html?|swf|phps?)($|/)'), # Bare HTML files, Adobe Flash, or PHP. |
| 240 | + re(r'\.(' + '|'.join(sorted(extensions)) + r')($|/)'), # Forbidden filename extensions. |
218 | 241 | re(r'((web)?mail)|(round|cube|roundcube)((web)?mail)?2?(-[0-9\.]+)?'), # Webmail service, in general.
|
219 | 242 | 'wm', 'rc', 'rms', 'mss', 'mss2', # More common webmail containers.
|
220 | 243 | 'FlexDataServices', 'amfphp', 'soapCaller.bs', # Adobe Flex AMF and RPC services.
|
221 | 244 | 'wordpress', 'wp', 'wp-admin', 'wp-includes', 'wlwmanifest.xml', # WordPress-related.
|
222 | 245 | 'admin', 'mysql', 'phpMyAdmin', 'pma', 'dbadmin', 'MyAdmin', 'phppgadmin', # Common administrative access.
|
223 | 246 | 'crossdomain.xml', 'README', 'LICENSE', 'webdav', re(r'w00tw00t'), # Generic probes.
|
224 | 247 | )
|
| 248 | + |
| 249 | + |
| 250 | +class GeoCountryHeuristic(WAFHeuristic): |
| 251 | + """A rule which preemptively blocks attempted access from specific countries of origin. |
| 252 | + |
| 253 | + Example usage: |
| 254 | + |
| 255 | + GeoCountryHeuristic( |
| 256 | + 'cn', 'kp', # China, take that, "Great Firewall", and North Korea. |
| 257 | + 'ae', 'ir', 'iq', 'sa', 'tr', # Middle-eastern nations. |
| 258 | + 'by', 'ru', 'ua', # Russia and nearby former bloc states. |
| 259 | + 'am', 'az', 'ee', 'ge', 'kg', 'kz', 'lt', 'lv', 'md', 'tj', 'tm', 'uz', # Additional former states. |
| 260 | + 'af', 'mr', 'ng', 'ph', 'pl', 'sd', 'ye', # LGBTQ and human rights violators, others included above. |
| 261 | + ) |
| 262 | + """ |
| 263 | + |
| 264 | + countries: Set[str] # The set of blocked ISO 3166 country codes. |
| 265 | + resolver: IP2Location |
| 266 | + |
| 267 | + def __init__(self, *countries:str, db:str='IP2LOCATION-LITE-DB1.IPV6.BIN') -> None: |
| 268 | + """Initialize the country heuristic's geographic database and blacklist.""" |
| 269 | + |
| 270 | + assert check_argument_types() |
| 271 | + |
| 272 | + if IP2Location is None: |
| 273 | + raise ImportError("You must have the IP2Location library installed.") |
| 274 | + |
| 275 | + self.countries = {i.upper() for i in countries} |
| 276 | + self.resolver = IP2Location(db) |
| 277 | + |
| 278 | + def __repr__(self, *extra:str) -> str: |
| 279 | + countries = "'" + "', '".join(sorted(self.countries)) + "'" |
| 280 | + return super().__repr__(countries, *extra) |
| 281 | + |
| 282 | + def __call__(self, environ:dict, uri:URI, client:str) -> None: |
| 283 | + assert check_argument_types() |
| 284 | + |
| 285 | + if (short := self.resolver.get_country_short(client)) in self.countries: |
| 286 | + raise HTTPClose(f"Access from {short} ({self.resolver.get_country_long(client)}) forbidden.") |
0 commit comments