Skip to content

Commit 70a62ab

Browse files
authored
CM-44524 - Increase project chunk size for SAST (#279)
1 parent ac54f89 commit 70a62ab

File tree

12 files changed

+60
-56
lines changed

12 files changed

+60
-56
lines changed

cycode/cli/commands/scan/code_scanner.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ def scan_documents(
301301
if not scan_parameters:
302302
scan_parameters = get_default_scan_parameters(context)
303303

304+
scan_type = context.obj['scan_type']
304305
progress_bar = context.obj['progress_bar']
305306

306307
if not documents_to_scan:
@@ -318,13 +319,13 @@ def scan_documents(
318319
context, is_git_diff, is_commit_range, scan_parameters
319320
)
320321
errors, local_scan_results = run_parallel_batched_scan(
321-
scan_batch_thread_func, documents_to_scan, progress_bar=progress_bar
322+
scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar
322323
)
323324

324325
if len(local_scan_results) > 1:
325326
# if we used more than one batch, we need to fetch aggregate report url
326327
aggregation_report_url = _try_get_aggregation_report_url_if_needed(
327-
scan_parameters, context.obj['client'], context.obj['scan_type']
328+
scan_parameters, context.obj['client'], scan_type
328329
)
329330
set_aggregation_report_url(context, aggregation_report_url)
330331

cycode/cli/commands/scan/scan_command.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import click
55

6+
from cycode.cli import consts
67
from cycode.cli.commands.scan.commit_history.commit_history_command import commit_history_command
78
from cycode.cli.commands.scan.path.path_command import path_command
89
from cycode.cli.commands.scan.pre_commit.pre_commit_command import pre_commit_command
@@ -34,7 +35,7 @@
3435
@click.option(
3536
'--scan-type',
3637
'-t',
37-
default='secret',
38+
default=consts.SECRET_SCAN_TYPE,
3839
help='Specify the type of scan you wish to execute (the default is Secrets).',
3940
type=click.Choice(config['scans']['supported_scans']),
4041
)

cycode/cli/consts.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,16 @@
136136
# 5MB in bytes (in decimal)
137137
FILE_MAX_SIZE_LIMIT_IN_BYTES = 5000000
138138

139-
# 20MB in bytes (in binary)
140-
ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20971520
141-
# 200MB in bytes (in binary)
142-
SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 209715200
139+
DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20 * 1024 * 1024
140+
ZIP_MAX_SIZE_LIMIT_IN_BYTES = {
141+
SCA_SCAN_TYPE: 200 * 1024 * 1024,
142+
SAST_SCAN_TYPE: 50 * 1024 * 1024,
143+
}
143144

144145
# scan in batches
145-
SCAN_BATCH_MAX_SIZE_IN_BYTES = 9 * 1024 * 1024
146-
SCAN_BATCH_MAX_FILES_COUNT = 1000
146+
DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES = 9 * 1024 * 1024
147+
SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: 50 * 1024 * 1024}
148+
DEFAULT_SCAN_BATCH_MAX_FILES_COUNT = 1000
147149
# if we increase this values, the server doesn't allow connecting (ConnectionError)
148150
SCAN_BATCH_MAX_PARALLEL_SCANS = 5
149151
SCAN_BATCH_SCANS_PER_CPU = 1

cycode/cli/files_collector/zip_documents.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,9 @@
1010

1111

1212
def _validate_zip_file_size(scan_type: str, zip_file_size: int) -> None:
13-
if scan_type == consts.SCA_SCAN_TYPE:
14-
if zip_file_size > consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES:
15-
raise custom_exceptions.ZipTooLargeError(consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES)
16-
else:
17-
if zip_file_size > consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES:
18-
raise custom_exceptions.ZipTooLargeError(consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES)
13+
max_size_limit = consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES.get(scan_type, consts.DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES)
14+
if zip_file_size > max_size_limit:
15+
raise custom_exceptions.ZipTooLargeError(max_size_limit)
1916

2017

2118
def zip_documents(scan_type: str, documents: List[Document], zip_file: Optional[InMemoryZip] = None) -> InMemoryZip:

cycode/cli/utils/scan_batch.py

+9-13
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,7 @@
22
from multiprocessing.pool import ThreadPool
33
from typing import TYPE_CHECKING, Callable, Dict, List, Tuple
44

5-
from cycode.cli.consts import (
6-
SCAN_BATCH_MAX_FILES_COUNT,
7-
SCAN_BATCH_MAX_PARALLEL_SCANS,
8-
SCAN_BATCH_MAX_SIZE_IN_BYTES,
9-
SCAN_BATCH_SCANS_PER_CPU,
10-
)
5+
from cycode.cli import consts
116
from cycode.cli.models import Document
127
from cycode.cli.utils.progress_bar import ScanProgressBarSection
138

@@ -18,8 +13,8 @@
1813

1914
def split_documents_into_batches(
2015
documents: List[Document],
21-
max_size_mb: int = SCAN_BATCH_MAX_SIZE_IN_BYTES,
22-
max_files_count: int = SCAN_BATCH_MAX_FILES_COUNT,
16+
max_size: int = consts.DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES,
17+
max_files_count: int = consts.DEFAULT_SCAN_BATCH_MAX_FILES_COUNT,
2318
) -> List[List[Document]]:
2419
batches = []
2520

@@ -28,7 +23,7 @@ def split_documents_into_batches(
2823
for document in documents:
2924
document_size = len(document.content.encode('UTF-8'))
3025

31-
if (current_size + document_size > max_size_mb) or (len(current_batch) >= max_files_count):
26+
if (current_size + document_size > max_size) or (len(current_batch) >= max_files_count):
3227
batches.append(current_batch)
3328

3429
current_batch = [document]
@@ -45,17 +40,18 @@ def split_documents_into_batches(
4540

4641
def _get_threads_count() -> int:
4742
cpu_count = os.cpu_count() or 1
48-
return min(cpu_count * SCAN_BATCH_SCANS_PER_CPU, SCAN_BATCH_MAX_PARALLEL_SCANS)
43+
return min(cpu_count * consts.SCAN_BATCH_SCANS_PER_CPU, consts.SCAN_BATCH_MAX_PARALLEL_SCANS)
4944

5045

5146
def run_parallel_batched_scan(
5247
scan_function: Callable[[List[Document]], Tuple[str, 'CliError', 'LocalScanResult']],
48+
scan_type: str,
5349
documents: List[Document],
5450
progress_bar: 'BaseProgressBar',
55-
max_size_mb: int = SCAN_BATCH_MAX_SIZE_IN_BYTES,
56-
max_files_count: int = SCAN_BATCH_MAX_FILES_COUNT,
5751
) -> Tuple[Dict[str, 'CliError'], List['LocalScanResult']]:
58-
batches = split_documents_into_batches(documents, max_size_mb, max_files_count)
52+
max_size = consts.SCAN_BATCH_MAX_SIZE_IN_BYTES.get(scan_type, consts.DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES)
53+
batches = split_documents_into_batches(documents, max_size)
54+
5955
progress_bar.set_section_length(ScanProgressBarSection.SCAN, len(batches)) # * 3
6056
# TODO(MarshalX): we should multiply the count of batches in SCAN section because each batch has 3 steps:
6157
# 1. scan creation

cycode/cyclient/scan_client.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -328,11 +328,11 @@ def parse_zipped_file_scan_response(response: Response) -> models.ZippedFileScan
328328
@staticmethod
329329
def get_service_name(scan_type: str) -> Optional[str]:
330330
# TODO(MarshalX): get_service_name should be removed from ScanClient? Because it exists in ScanConfig
331-
if scan_type == 'secret':
331+
if scan_type == consts.SECRET_SCAN_TYPE:
332332
return 'secret'
333-
if scan_type == 'iac':
333+
if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE:
334334
return 'iac'
335-
if scan_type == 'sca' or scan_type == 'sast':
335+
if scan_type == consts.SCA_SCAN_TYPE or scan_type == consts.SAST_SCAN_TYPE:
336336
return 'scans'
337337

338338
return None

cycode/cyclient/scan_config_base.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ def get_service_name(self, scan_type: str, should_use_scan_service: bool = False
99

1010
@staticmethod
1111
def get_async_scan_type(scan_type: str) -> str:
12-
if scan_type == 'secret':
12+
if scan_type == consts.SECRET_SCAN_TYPE:
1313
return 'Secrets'
14-
if scan_type == 'iac':
14+
if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE:
1515
return 'InfraConfiguration'
1616

1717
return scan_type.upper()
@@ -31,9 +31,9 @@ class DevScanConfig(ScanConfigBase):
3131
def get_service_name(self, scan_type: str, should_use_scan_service: bool = False) -> str:
3232
if should_use_scan_service:
3333
return '5004'
34-
if scan_type == 'secret':
34+
if scan_type == consts.SECRET_SCAN_TYPE:
3535
return '5025'
36-
if scan_type == 'iac':
36+
if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE:
3737
return '5026'
3838

3939
# sca and sast
@@ -47,9 +47,9 @@ class DefaultScanConfig(ScanConfigBase):
4747
def get_service_name(self, scan_type: str, should_use_scan_service: bool = False) -> str:
4848
if should_use_scan_service:
4949
return 'scans'
50-
if scan_type == 'secret':
50+
if scan_type == consts.SECRET_SCAN_TYPE:
5151
return 'secret'
52-
if scan_type == 'iac':
52+
if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE:
5353
return 'iac'
5454

5555
# sca and sast

tests/cli/commands/test_main_command.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import responses
77
from click.testing import CliRunner
88

9+
from cycode.cli import consts
910
from cycode.cli.commands.main_cli import main_cli
1011
from cycode.cli.utils.git_proxy import git_proxy
1112
from tests.conftest import CLI_ENV_VARS, TEST_FILES_PATH, ZIP_CONTENT_PATH
@@ -29,7 +30,7 @@ def _is_json(plain: str) -> bool:
2930
@responses.activate
3031
@pytest.mark.parametrize('output', ['text', 'json'])
3132
def test_passing_output_option(output: str, scan_client: 'ScanClient', api_token_response: responses.Response) -> None:
32-
scan_type = 'secret'
33+
scan_type = consts.SECRET_SCAN_TYPE
3334
scan_id = uuid4()
3435

3536
mock_scan_responses(responses, scan_type, scan_client, scan_id, ZIP_CONTENT_PATH)
@@ -52,8 +53,10 @@ def test_passing_output_option(output: str, scan_client: 'ScanClient', api_token
5253

5354
@responses.activate
5455
def test_optional_git_with_path_scan(scan_client: 'ScanClient', api_token_response: responses.Response) -> None:
55-
mock_scan_responses(responses, 'secret', scan_client, uuid4(), ZIP_CONTENT_PATH)
56-
responses.add(get_zipped_file_scan_response(get_zipped_file_scan_url('secret', scan_client), ZIP_CONTENT_PATH))
56+
mock_scan_responses(responses, consts.SECRET_SCAN_TYPE, scan_client, uuid4(), ZIP_CONTENT_PATH)
57+
responses.add(
58+
get_zipped_file_scan_response(get_zipped_file_scan_url(consts.SECRET_SCAN_TYPE, scan_client), ZIP_CONTENT_PATH)
59+
)
5760
responses.add(api_token_response)
5861

5962
# fake env without Git executable

tests/cyclient/scan_config/test_default_scan_config.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1+
from cycode.cli import consts
12
from cycode.cyclient.scan_config_base import DefaultScanConfig
23

34

45
def test_get_service_name() -> None:
56
default_scan_config = DefaultScanConfig()
67

7-
assert default_scan_config.get_service_name('secret') == 'secret'
8-
assert default_scan_config.get_service_name('iac') == 'iac'
9-
assert default_scan_config.get_service_name('sca') == 'scans'
10-
assert default_scan_config.get_service_name('sast') == 'scans'
11-
assert default_scan_config.get_service_name('secret', True) == 'scans'
8+
assert default_scan_config.get_service_name(consts.SECRET_SCAN_TYPE) == 'secret'
9+
assert default_scan_config.get_service_name(consts.INFRA_CONFIGURATION_SCAN_TYPE) == 'iac'
10+
assert default_scan_config.get_service_name(consts.SCA_SCAN_TYPE) == 'scans'
11+
assert default_scan_config.get_service_name(consts.SAST_SCAN_TYPE) == 'scans'
12+
assert default_scan_config.get_service_name(consts.SECRET_SCAN_TYPE, True) == 'scans'
1213

1314

1415
def test_get_detections_prefix() -> None:

tests/cyclient/scan_config/test_dev_scan_config.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1+
from cycode.cli import consts
12
from cycode.cyclient.scan_config_base import DevScanConfig
23

34

45
def test_get_service_name() -> None:
56
dev_scan_config = DevScanConfig()
67

7-
assert dev_scan_config.get_service_name('secret') == '5025'
8-
assert dev_scan_config.get_service_name('iac') == '5026'
9-
assert dev_scan_config.get_service_name('sca') == '5004'
10-
assert dev_scan_config.get_service_name('sast') == '5004'
11-
assert dev_scan_config.get_service_name('secret', should_use_scan_service=True) == '5004'
8+
assert dev_scan_config.get_service_name(consts.SECRET_SCAN_TYPE) == '5025'
9+
assert dev_scan_config.get_service_name(consts.INFRA_CONFIGURATION_SCAN_TYPE) == '5026'
10+
assert dev_scan_config.get_service_name(consts.SCA_SCAN_TYPE) == '5004'
11+
assert dev_scan_config.get_service_name(consts.SAST_SCAN_TYPE) == '5004'
12+
assert dev_scan_config.get_service_name(consts.SECRET_SCAN_TYPE, should_use_scan_service=True) == '5004'
1213

1314

1415
def test_get_detections_prefix() -> None:

tests/cyclient/test_scan_client.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from requests import Timeout
99
from requests.exceptions import ProxyError
1010

11+
from cycode.cli import consts
1112
from cycode.cli.config import config
1213
from cycode.cli.exceptions.custom_exceptions import (
1314
CycodeError,
@@ -49,10 +50,10 @@ def get_test_zip_file(scan_type: str) -> InMemoryZip:
4950

5051
def test_get_service_name(scan_client: ScanClient) -> None:
5152
# TODO(MarshalX): get_service_name should be removed from ScanClient? Because it exists in ScanConfig
52-
assert scan_client.get_service_name('secret') == 'secret'
53-
assert scan_client.get_service_name('iac') == 'iac'
54-
assert scan_client.get_service_name('sca') == 'scans'
55-
assert scan_client.get_service_name('sast') == 'scans'
53+
assert scan_client.get_service_name(consts.SECRET_SCAN_TYPE) == 'secret'
54+
assert scan_client.get_service_name(consts.INFRA_CONFIGURATION_SCAN_TYPE) == 'iac'
55+
assert scan_client.get_service_name(consts.SCA_SCAN_TYPE) == 'scans'
56+
assert scan_client.get_service_name(consts.SAST_SCAN_TYPE) == 'scans'
5657

5758

5859
@pytest.mark.parametrize('scan_type', config['scans']['supported_scans'])

tests/test_code_scanner.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55
import responses
66

7+
from cycode.cli import consts
78
from cycode.cli.commands.scan.code_scanner import (
89
_try_get_aggregation_report_url_if_needed,
910
_try_get_report_url_if_needed,
@@ -22,13 +23,13 @@
2223

2324
def test_is_relevant_file_to_scan_sca() -> None:
2425
path = os.path.join(TEST_FILES_PATH, 'package.json')
25-
assert _is_relevant_file_to_scan('sca', path) is True
26+
assert _is_relevant_file_to_scan(consts.SCA_SCAN_TYPE, path) is True
2627

2728

2829
@pytest.mark.parametrize('scan_type', config['scans']['supported_scans'])
2930
def test_try_get_report_url_if_needed_return_none(scan_type: str, scan_client: ScanClient) -> None:
3031
scan_id = uuid4().hex
31-
result = _try_get_report_url_if_needed(scan_client, False, scan_id, 'secret')
32+
result = _try_get_report_url_if_needed(scan_client, False, scan_id, consts.SECRET_SCAN_TYPE)
3233
assert result is None
3334

3435

0 commit comments

Comments
 (0)