Skip to content

Commit 3b8bd5c

Browse files
authored
feat: Prevent zipbomb files (#231)
* feat: Prevent zipbomb files - Added a limit of the zip file content to be 5MB max - Added a unittest - Added to the pytest ignore directories to node_modules directory
1 parent f9b53f0 commit 3b8bd5c

File tree

7 files changed

+54
-9
lines changed

7 files changed

+54
-9
lines changed

lms/extractors/ziparchive.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from zipfile import BadZipFile, ZipFile
66

77
from lms.extractors.base import Extractor, File
8+
from lms.models.errors import FileSizeError
9+
from lms.utils.consts import MAX_ZIP_CONTENT_SIZE
810
from lms.utils.log import log
911

1012

@@ -29,6 +31,11 @@ def __init__(self, **kwargs):
2931
def can_extract(self) -> bool:
3032
return self.is_zipfile
3133

34+
def check_files_size(self) -> bool:
35+
return sum(
36+
f.file_size for f in self.archive.infolist()
37+
) <= MAX_ZIP_CONTENT_SIZE
38+
3239
@staticmethod
3340
def _extract(archive: ZipFile, filename: str, dirname: str = '') -> File:
3441
with archive.open(filename) as current_file:
@@ -57,7 +64,7 @@ def get_exercises_by_dirs(
5764
) -> Iterator[Tuple[int, List[File]]]:
5865
for dirname in filenames:
5966
if len(dirname.strip(os.path.sep).split(os.path.sep)) == 1:
60-
# Checking if the dirname is in the first dir in the zipfile
67+
# Checking if the dirname is in the first dir of the zipfile
6168
parent_name, _ = os.path.split(dirname)
6269
exercise_id, _ = self._clean(parent_name)
6370
if exercise_id:
@@ -79,6 +86,11 @@ def get_exercise(self, file: ZipFile) -> Iterator[Tuple[int, List[File]]]:
7986
yield from self.get_exercises_by_dirs(archive, filenames)
8087

8188
def get_exercises(self) -> Iterator[Tuple[int, List[File]]]:
89+
if not self.check_files_size():
90+
raise FileSizeError(
91+
'File content is too big. '
92+
f'{MAX_ZIP_CONTENT_SIZE // 1000000}MB allowed.',
93+
)
8294
for exercise_id, files in self.get_exercise(self.archive):
8395
if exercise_id and files and any(file.code for file in files):
8496
yield (exercise_id, files)

lms/lmsweb/views.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
MAX_REQUEST_SIZE, PERMISSIVE_CORS, get_next_url, login_manager,
2727
)
2828
from lms.models import comments, notifications, share_link, solutions, upload
29-
from lms.models.errors import LmsError, UploadError, fail
29+
from lms.models.errors import FileSizeError, LmsError, UploadError, fail
3030
from lms.utils.consts import RTL_LANGUAGES
3131
from lms.utils.files import get_language_name_by_extension
3232
from lms.utils.log import log
@@ -292,7 +292,7 @@ def upload_page():
292292
return fail(404, 'User not found.')
293293
if request.content_length > MAX_REQUEST_SIZE:
294294
return fail(
295-
413, f'File is too big. {MAX_REQUEST_SIZE // 1000000}MB allowed',
295+
413, f'File is too big. {MAX_REQUEST_SIZE // 1000000}MB allowed.',
296296
)
297297

298298
file: Optional[FileStorage] = request.files.get('file')
@@ -304,6 +304,9 @@ def upload_page():
304304
except UploadError as e:
305305
log.debug(e)
306306
return fail(400, str(e))
307+
except FileSizeError as e:
308+
log.debug(e)
309+
return fail(413, str(e))
307310

308311
return jsonify({
309312
'exercise_matches': matches,

lms/models/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ class BadUploadFile(LmsError):
1717
pass
1818

1919

20+
class FileSizeError(LmsError):
21+
pass
22+
23+
2024
def fail(status_code: int, error_msg: str):
2125
data = {
2226
'status': 'failed',

lms/tests/samples/zipbomb.zip

39 KB
Binary file not shown.

lms/tests/test_extractor.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from io import BufferedReader, BytesIO
22
from tempfile import SpooledTemporaryFile
3-
from typing import Iterator, Tuple
3+
from typing import Iterator, List, Tuple
44
from zipfile import ZipFile
55

66
from flask import json
@@ -18,6 +18,7 @@ class TestExtractor:
1818
IGNORE_FILES_ZIP_NAME = 'Upload_123.zip'
1919
PY_NAMES = ('code1.py', 'code2.py')
2020
ZIP_FILES = ('Upload_1.zip', 'zipfiletest.zip')
21+
ZIP_BOMB_FILE = 'zipbomb.zip'
2122

2223
def setup(self):
2324
self.ipynb_file = self.ipynb_file()
@@ -32,11 +33,18 @@ def setup(self):
3233
self.zipfile_file, self.IGNORE_FILES_ZIP_NAME,
3334
)
3435
self.zipfiles_extractor_files = list(self.zip_files(self.ZIP_FILES))
35-
self.zipfiles_extractors_bytes_io = list(self.get_bytes_io_zip_files())
36+
self.zipfiles_extractors_bytes_io = list(self.get_bytes_io_zip_files(
37+
self.zipfiles_extractor_files, self.ZIP_FILES,
38+
))
39+
self.zipbomb_file_list = list(self.zip_files((self.ZIP_BOMB_FILE,)))
40+
self.zipbomb_bytes_io = next(self.get_bytes_io_zip_files(
41+
self.zipbomb_file_list, (self.ZIP_BOMB_FILE,),
42+
))
3643

3744
def teardown(self):
3845
self.ipynb_file.close()
3946
self.zipfile_file.close()
47+
self.zipbomb_file_list[0].close()
4048
for py_file in self.pyfiles_files:
4149
py_file.close()
4250
for zip_file in self.zipfiles_extractor_files:
@@ -67,6 +75,13 @@ def create_zipfile_storage(
6775
opened_file.seek(0)
6876
return zip_file_storage
6977

78+
@staticmethod
79+
def get_bytes_io_zip_files(
80+
files: List[BufferedReader], filesnames: Tuple[str, ...],
81+
) -> Iterator[Tuple[BytesIO, str]]:
82+
for file, name in zip(files, filesnames):
83+
yield BytesIO(file.read()), name
84+
7085
def get_zip_filenames(self):
7186
the_zip = ZipFile(f'{SAMPLES_DIR}/{self.IGNORE_FILES_ZIP_NAME}')
7287
return the_zip.namelist()
@@ -106,10 +121,6 @@ def test_zip_ignore_files(self):
106121
for filename in original_zip_filenames
107122
)
108123

109-
def get_bytes_io_zip_files(self) -> Iterator[Tuple[BytesIO, str]]:
110-
for file, name in zip(self.zipfiles_extractor_files, self.ZIP_FILES):
111-
yield BytesIO(file.read()), name
112-
113124
def test_zip(self, student_user: User):
114125
conftest.create_exercise()
115126
conftest.create_exercise()
@@ -134,3 +145,14 @@ def test_zip(self, student_user: User):
134145
file=self.zipfiles_extractors_bytes_io[0],
135146
))
136147
assert second_upload_response.status_code == 400
148+
149+
def test_zip_bomb(self, student_user: User):
150+
conftest.create_exercise()
151+
152+
client = conftest.get_logged_user(username=student_user.username)
153+
154+
# Trying to upload a zipbomb file
155+
upload_response = client.post('/upload', data={
156+
'file': self.zipbomb_bytes_io,
157+
})
158+
assert upload_response.status_code == 413

lms/utils/consts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
'he', 'ar', 'arc', 'dv', 'fa', 'ha',
33
'khw', 'ks', 'ku', 'ps', 'ur', 'yi',
44
}
5+
6+
7+
MAX_ZIP_CONTENT_SIZE = 5_000_000 # 5MB (in bytes)

pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
[pytest]
22
env =
33
LOCAL_SETUP=true
4+
norecursedirs = node_modules/*

0 commit comments

Comments
 (0)