Skip to content

Support remote inference on Triton Inference Server with ease of use #536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 21, 2025
Merged
1 change: 1 addition & 0 deletions docs/source/getting_started/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
- dicom_series_to_image_app
- breast_density_classifer_app
- cchmc_ped_abd_ct_seg_app
- ai_remote_infer_app
15 changes: 15 additions & 0 deletions examples/apps/ai_remote_infer_app/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2021-2025 MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from app import AIRemoteInferSpleenSegApp

if __name__ == "__main__":
AIRemoteInferSpleenSegApp().run()
143 changes: 143 additions & 0 deletions examples/apps/ai_remote_infer_app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Copyright 2021-2025 MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path

from pydicom.sr.codedict import codes # Required for setting SegmentDescription attributes.
from spleen_seg_operator import SpleenSegOperator

from monai.deploy.conditions import CountCondition
from monai.deploy.core import Application
from monai.deploy.operators.dicom_data_loader_operator import DICOMDataLoaderOperator
from monai.deploy.operators.dicom_seg_writer_operator import DICOMSegmentationWriterOperator, SegmentDescription
from monai.deploy.operators.dicom_series_selector_operator import DICOMSeriesSelectorOperator
from monai.deploy.operators.dicom_series_to_volume_operator import DICOMSeriesToVolumeOperator
from monai.deploy.operators.stl_conversion_operator import STLConversionOperator


class AIRemoteInferSpleenSegApp(Application):
def __init__(self, *args, **kwargs):
"""Creates an application instance."""

super().__init__(*args, **kwargs)
self._logger = logging.getLogger("{}.{}".format(__name__, type(self).__name__))

def run(self, *args, **kwargs):
# This method calls the base class to run. Can be omitted if simply calling through.
self._logger.info(f"Begin {self.run.__name__}")
super().run(*args, **kwargs)
self._logger.info(f"End {self.run.__name__}")

def compose(self):
"""Creates the app specific operators and chain them up in the processing DAG."""

# Use Commandline options over environment variables to init context.
app_context = Application.init_app_context(self.argv)
self._logger.debug(f"Begin {self.compose.__name__}")
app_input_path = Path(app_context.input_path)
app_output_path = Path(app_context.output_path)
model_path = Path(app_context.model_path)

self._logger.info(f"App input and output path: {app_input_path}, {app_output_path}")

# instantiates the SDK built-in operator(s).
study_loader_op = DICOMDataLoaderOperator(
self, CountCondition(self, 1), input_folder=app_input_path, name="dcm_loader_op"
)
series_selector_op = DICOMSeriesSelectorOperator(self, rules=Sample_Rules_Text, name="series_selector_op")
series_to_vol_op = DICOMSeriesToVolumeOperator(self, name="series_to_vol_op")

# Model specific inference operator, supporting MONAI transforms.
spleen_seg_op = SpleenSegOperator(
self, app_context=app_context, model_name="spleen_ct", model_path=model_path, name="seg_op"
)

# Create DICOM Seg writer providing the required segment description for each segment with
# the actual algorithm and the pertinent organ/tissue.
# The segment_label, algorithm_name, and algorithm_version are limited to 64 chars.
# https://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_6.2.html
# User can Look up SNOMED CT codes at, e.g.
# https://bioportal.bioontology.org/ontologies/SNOMEDCT

_algorithm_name = "3D segmentation of the Spleen from a CT series"
_algorithm_family = codes.DCM.ArtificialIntelligence
_algorithm_version = "0.1.0"

segment_descriptions = [
SegmentDescription(
segment_label="Spleen",
segmented_property_category=codes.SCT.Organ,
segmented_property_type=codes.SCT.Spleen,
algorithm_name=_algorithm_name,
algorithm_family=_algorithm_family,
algorithm_version=_algorithm_version,
),
]

custom_tags = {"SeriesDescription": "AI generated Seg, not for clinical use."}

dicom_seg_writer = DICOMSegmentationWriterOperator(
self,
segment_descriptions=segment_descriptions,
custom_tags=custom_tags,
output_folder=app_output_path,
name="dcm_seg_writer_op",
)

# Create the processing pipeline, by specifying the source and destination operators, and
# ensuring the output from the former matches the input of the latter, in both name and type.
self.add_flow(study_loader_op, series_selector_op, {("dicom_study_list", "dicom_study_list")})
self.add_flow(
series_selector_op, series_to_vol_op, {("study_selected_series_list", "study_selected_series_list")}
)
self.add_flow(series_to_vol_op, spleen_seg_op, {("image", "image")})

# Note below the dicom_seg_writer requires two inputs, each coming from a source operator.
self.add_flow(
series_selector_op, dicom_seg_writer, {("study_selected_series_list", "study_selected_series_list")}
)
self.add_flow(spleen_seg_op, dicom_seg_writer, {("seg_image", "seg_image")})

# Create the surface mesh STL conversion operator and add it to the app execution flow, if needed, by
# uncommenting the following couple lines.
stl_conversion_op = STLConversionOperator(
self, output_file=app_output_path.joinpath("stl/spleen.stl"), name="stl_conversion_op"
)
self.add_flow(spleen_seg_op, stl_conversion_op, {("pred", "image")})

self._logger.debug(f"End {self.compose.__name__}")


# This is a sample series selection rule in JSON, simply selecting CT series.
# If the study has more than 1 CT series, then all of them will be selected.
# Please see more detail in DICOMSeriesSelectorOperator.
# For list of string values, e.g. "ImageType": ["PRIMARY", "ORIGINAL"], it is a match if all elements
# are all in the multi-value attribute of the DICOM series.

Sample_Rules_Text = """
{
"selections": [
{
"name": "CT Series",
"conditions": {
"StudyDescription": "(.*?)",
"Modality": "(?i)CT",
"SeriesDescription": "(.*?)",
"ImageType": ["PRIMARY", "ORIGINAL"]
}
}
]
}
"""

if __name__ == "__main__":
# Creates the app and test it standalone.
AIRemoteInferSpleenSegApp().run()
6 changes: 6 additions & 0 deletions examples/apps/ai_remote_infer_app/env_settings_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
export HOLOSCAN_INPUT_PATH="inputs/spleen_ct_tcia"
export HOLOSCAN_MODEL_PATH="examples/apps/ai_remote_infer_app/models_client_side"
export HOLOSCAN_OUTPUT_PATH="output_spleen"
export HOLOSCAN_LOG_LEVEL=DEBUG # TRACE can be used for verbose low-level logging
export TRITON_SERVER_NETLOC="localhost:8000" # Triton server network location, host:port
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
platform: "pytorch_libtorch"

max_batch_size: 16 # The maximum batch size. 0 for no batching with full shape in dims

default_model_filename: "model_spleen_ct_segmentation_v1.ts" # The name of the TorchScript model file

input [
{
name: "INPUT_0" # The name of the input tensor (or should match the input tensor name in your model if used)
data_type: TYPE_FP32 # Data type is FP32
dims: [ 1, 96, 96, 96 ] # Input dimensions: [channels, width, height, depth], to be stacked as a batch
}
]

output [
{
name: "OUTPUT_0" # The name of the output tensor (match this with your TorchScript model's output name)
data_type: TYPE_FP32 # Output is FP32
dims: [ 2, 96, 96, 96 ] # Output dimensions: [channels, width, height, depth], stacked to match input batch size
}
]

version_policy: { latest: { num_versions: 1}} # Only serve the latest version, which is the default

instance_group [
{
kind: KIND_GPU # Specify the hardware type (GPU in this case)
count: 1 # Number of instances created for each GPU listed in 'gpus' (adjust based on your resources)
}
]

dynamic_batching {
preferred_batch_size: [ 4, 8, 16 ] # Preferred batch size(s) for dynamic batching. Matching the max_batch_size for sync calls.
max_queue_delay_microseconds: 1000 # Max delay before processing the batch.
}

# The initial calls to a loaded TorchScript model take extremely long.
# Due to this longer model warmup issue, Triton allows execution of models without these optimizations.
parameters: {
key: "DISABLE_OPTIMIZED_EXECUTION"
value: {
string_value: "true"
}
}
165 changes: 165 additions & 0 deletions examples/apps/ai_remote_infer_app/spleen_seg_operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# Copyright 2021-2025 MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from pathlib import Path

from numpy import uint8

from monai.deploy.core import AppContext, ConditionType, Fragment, Operator, OperatorSpec
from monai.deploy.operators.monai_seg_inference_operator import InfererType, InMemImageReader, MonaiSegInferenceOperator
from monai.transforms import (
Activationsd,
AsDiscreted,
Compose,
EnsureChannelFirstd,
EnsureTyped,
Invertd,
LoadImaged,
Orientationd,
SaveImaged,
ScaleIntensityRanged,
Spacingd,
)


class SpleenSegOperator(Operator):
"""Performs Spleen segmentation with a 3D image converted from a DICOM CT series."""

DEFAULT_OUTPUT_FOLDER = Path.cwd() / "output/saved_images_folder"

def __init__(
self,
fragment: Fragment,
*args,
app_context: AppContext,
model_path: Path,
model_name: str,
output_folder: Path = DEFAULT_OUTPUT_FOLDER,
**kwargs,
):

self.logger = logging.getLogger("{}.{}".format(__name__, type(self).__name__))
self._input_dataset_key = "image"
self._pred_dataset_key = "pred"

self.model_path = model_path
self.model_name = model_name
self.output_folder = output_folder
self.output_folder.mkdir(parents=True, exist_ok=True)
self.app_context = app_context
self.input_name_image = "image"
self.output_name_seg = "seg_image"
self.output_name_saved_images_folder = "saved_images_folder"

# The base class has an attribute called fragment to hold the reference to the fragment object
super().__init__(fragment, *args, **kwargs)

def setup(self, spec: OperatorSpec):
spec.input(self.input_name_image)
spec.output(self.output_name_seg)
spec.output(self.output_name_saved_images_folder).condition(
ConditionType.NONE
) # Output not requiring a receiver

def compute(self, op_input, op_output, context):
input_image = op_input.receive(self.input_name_image)
if not input_image:
raise ValueError("Input image is not found.")

# This operator gets an in-memory Image object, so a specialized ImageReader is needed.
_reader = InMemImageReader(input_image)

pre_transforms = self.pre_process(_reader, str(self.output_folder))
post_transforms = self.post_process(pre_transforms, str(self.output_folder))

# Delegates inference and saving output to the built-in operator.
infer_operator = MonaiSegInferenceOperator(
self.fragment,
roi_size=(
96,
96,
96,
),
pre_transforms=pre_transforms,
post_transforms=post_transforms,
overlap=0.6,
app_context=self.app_context,
model_name=self.model_name,
inferer=InfererType.SLIDING_WINDOW,
sw_batch_size=4,
model_path=self.model_path,
name="monai_seg_remote_inference_op",
)

# Setting the keys used in the dictionary based transforms may change.
infer_operator.input_dataset_key = self._input_dataset_key
infer_operator.pred_dataset_key = self._pred_dataset_key

# Now emit data to the output ports of this operator
op_output.emit(infer_operator.compute_impl(input_image, context), self.output_name_seg)
op_output.emit(self.output_folder, self.output_name_saved_images_folder)

def pre_process(self, img_reader, out_dir: str = "./input_images") -> Compose:
"""Composes transforms for preprocessing input before predicting on a model."""

Path(out_dir).mkdir(parents=True, exist_ok=True)
my_key = self._input_dataset_key

return Compose(
[
LoadImaged(keys=my_key, reader=img_reader),
EnsureChannelFirstd(keys=my_key),
# The SaveImaged transform can be commented out to save 5 seconds.
# Uncompress NIfTI file, nii, is used favoring speed over size, but can be changed to nii.gz
SaveImaged(
keys=my_key,
output_dir=out_dir,
output_postfix="",
resample=False,
output_ext=".nii",
),
Orientationd(keys=my_key, axcodes="RAS"),
Spacingd(keys=my_key, pixdim=[1.5, 1.5, 2.9], mode=["bilinear"]),
ScaleIntensityRanged(keys=my_key, a_min=-57, a_max=164, b_min=0.0, b_max=1.0, clip=True),
EnsureTyped(keys=my_key),
]
)

def post_process(self, pre_transforms: Compose, out_dir: str = "./prediction_output") -> Compose:
"""Composes transforms for postprocessing the prediction results."""

Path(out_dir).mkdir(parents=True, exist_ok=True)
pred_key = self._pred_dataset_key

return Compose(
[
Activationsd(keys=pred_key, softmax=True),
Invertd(
keys=pred_key,
transform=pre_transforms,
orig_keys=self._input_dataset_key,
nearest_interp=False,
to_tensor=True,
),
AsDiscreted(keys=pred_key, argmax=True),
# The SaveImaged transform can be commented out to save 5 seconds.
# Uncompress NIfTI file, nii, is used favoring speed over size, but can be changed to nii.gz
SaveImaged(
keys=pred_key,
output_dir=out_dir,
output_postfix="seg",
output_dtype=uint8,
resample=False,
output_ext=".nii",
),
]
)
Loading