RF-DETR Seg XLarge

Bases: RFDETRSeg

Source code in src/rfdetr/detr.py

class RFDETRSegXLarge(RFDETRSeg):
    size = "rfdetr-seg-xlarge"
    _model_config_class = RFDETRSegXLargeConfig

Attributes¶

`class_names` `property` ¶

Retrieve the class names supported by the loaded model.

Returns:

Type	Description
`list[str]`	A list of class name strings, 0-indexed. When no custom class
`list[str]`	names are embedded in the checkpoint, returns the standard 80
`list[str]`	COCO class names.

Functions¶

`deploy_to_roboflow(workspace, project_id, version, api_key=None, size=None)` ¶

Deploy the trained RF-DETR model to Roboflow.

Deploying with Roboflow will create a Serverless API to which you can make requests.

You can also download weights into a Roboflow Inference deployment for use in Roboflow Workflows and on-device deployment.

Parameters:

Name	Type	Description	Default
`workspace` ¶	`str`	The name of the Roboflow workspace to deploy to.	required
`project_id` ¶	`str`	The project ID to which the model will be deployed.	required
`version` ¶	`int \| str`	The project version to which the model will be deployed.	required
`api_key` ¶	`str \| None`	Your Roboflow API key. If not provided, it will be read from the environment variable `ROBOFLOW_API_KEY`.	`None`
`size` ¶	`str \| None`	The size of the model to deploy. If not provided, it will default to the size of the model being trained (e.g., "rfdetr-base", "rfdetr-large", etc.).	`None`

Raises:

Type	Description
`ValueError`	If the `api_key` is not provided and not found in the environment variable `ROBOFLOW_API_KEY`, or if the `size` is not set for custom architectures.

Source code in src/rfdetr/detr.py

def deploy_to_roboflow(
    self,
    workspace: str,
    project_id: str,
    version: int | str,
    api_key: str | None = None,
    size: str | None = None,
) -> None:
    """Deploy the trained RF-DETR model to Roboflow.

    Deploying with Roboflow will create a Serverless API to which you can make requests.

    You can also download weights into a Roboflow Inference deployment for use in
    Roboflow Workflows and on-device deployment.

    Args:
        workspace: The name of the Roboflow workspace to deploy to.
        project_id: The project ID to which the model will be deployed.
        version: The project version to which the model will be deployed.
        api_key: Your Roboflow API key. If not provided,
            it will be read from the environment variable `ROBOFLOW_API_KEY`.
        size: The size of the model to deploy. If not provided,
            it will default to the size of the model being trained (e.g., "rfdetr-base", "rfdetr-large", etc.).

    Raises:
        ValueError: If the `api_key` is not provided and not found in the
            environment variable `ROBOFLOW_API_KEY`, or if the `size` is
            not set for custom architectures.

    """
    import shutil

    from roboflow import Roboflow

    if api_key is None:
        api_key = os.getenv("ROBOFLOW_API_KEY")
        if api_key is None:
            raise ValueError("Set api_key=<KEY> in deploy_to_roboflow or export ROBOFLOW_API_KEY=<KEY>")

    rf = Roboflow(api_key=api_key)
    workspace = rf.workspace(workspace)

    if self.size is None and size is None:
        raise ValueError("Must set size for custom architectures")

    size = self.size or size
    tmp_out_dir = ".roboflow_temp_upload"
    os.makedirs(tmp_out_dir, exist_ok=True)
    try:
        # Write class_names.txt so the Roboflow upload pipeline can discover
        # the class labels without relying on args.class_names in the checkpoint.
        class_names_path = os.path.join(tmp_out_dir, "class_names.txt")
        with open(class_names_path, "w", encoding="utf-8", newline="\n") as f:
            f.write("\n".join(self.class_names))

        # Also embed class_names in the args namespace so that any code path
        # that loads the checkpoint directly (e.g. roboflow-python's second
        # fallback) can find them.  Mutating the shared SimpleNamespace is
        # intentional here: this mirrors reinitialize_detection_head(), which
        # already mutates args.num_classes in-place.
        args = self.model.args
        if not hasattr(args, "class_names") or args.class_names is None:
            args.class_names = self.class_names

        outpath = os.path.join(tmp_out_dir, "weights.pt")
        torch.save({"model": self.model.model.state_dict(), "args": args}, outpath)
        project = workspace.project(project_id)
        project_version = project.version(version)
        project_version.deploy(model_type=size, model_path=tmp_out_dir, filename="weights.pt")
    finally:
        shutil.rmtree(tmp_out_dir, ignore_errors=True)

`export(output_dir='output', infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, dynamic_batch=False, patch_size=None, **kwargs)` ¶

Export the trained model to ONNX format.

See the ONNX export documentation <https://rfdetr.roboflow.com/learn/export/>_ for more information.

Parameters:

Name	Type	Description	Default
`output_dir` ¶	`str`	Directory to write the ONNX file to.	`'output'`
`infer_dir` ¶	`str`	Optional directory of sample images for dynamic-axes inference.	`None`
`simplify` ¶	`bool`	Deprecated and ignored. Simplification is no longer run.	`False`
`backbone_only` ¶	`bool`	Export only the backbone (feature extractor).	`False`
`opset_version` ¶	`int`	ONNX opset version to target.	`17`
`verbose` ¶	`bool`	Print export progress information.	`True`
`force` ¶	`bool`	Deprecated and ignored.	`False`
`shape` ¶	`tuple[int, int] \| None`	`(height, width)` tuple; defaults to square at model resolution. Both dimensions must be divisible by `patch_size * num_windows`.	`None`
`batch_size` ¶	`int`	Static batch size to bake into the ONNX graph.	`1`
`dynamic_batch` ¶	`bool`	If True, export with a dynamic batch dimension so the ONNX model accepts variable batch sizes at runtime.	`False`
`patch_size` ¶	`int \| None`	Backbone patch size. Defaults to the value stored in `model_config.patch_size` (typically 14 or 16). When provided explicitly it must match the instantiated model's patch size. Shape divisibility is validated against `patch_size * num_windows`.	`None`
`**kwargs` ¶		Additional keyword arguments forwarded to export_onnx.	`{}`

Source code in src/rfdetr/detr.py

@deprecated(
    target=True,
    # `simplify` / `force` are retained for API compatibility and treated as no-op.
    args_mapping={
        "simplify": False,
        "force": False,
    },
    deprecated_in="1.6",
    remove_in="1.8",
    num_warns=1,
    stream=functools.partial(warnings.warn, category=DeprecationWarning, stacklevel=2),
)
def export(
    self,
    output_dir: str = "output",
    infer_dir: str = None,
    simplify: bool = False,
    backbone_only: bool = False,
    opset_version: int = 17,
    verbose: bool = True,
    force: bool = False,
    shape: tuple[int, int] | None = None,
    batch_size: int = 1,
    dynamic_batch: bool = False,
    patch_size: int | None = None,
    **kwargs,
) -> None:
    """Export the trained model to ONNX format.

    See the `ONNX export documentation <https://rfdetr.roboflow.com/learn/export/>`_
    for more information.

    Args:
        output_dir: Directory to write the ONNX file to.
        infer_dir: Optional directory of sample images for dynamic-axes inference.
        simplify: Deprecated and ignored. Simplification is no longer run.
        backbone_only: Export only the backbone (feature extractor).
        opset_version: ONNX opset version to target.
        verbose: Print export progress information.
        force: Deprecated and ignored.
        shape: ``(height, width)`` tuple; defaults to square at model resolution.
            Both dimensions must be divisible by ``patch_size * num_windows``.
        batch_size: Static batch size to bake into the ONNX graph.
        dynamic_batch: If True, export with a dynamic batch dimension
            so the ONNX model accepts variable batch sizes at runtime.
        patch_size: Backbone patch size. Defaults to the value stored in
            ``model_config.patch_size`` (typically 14 or 16). When provided
            explicitly it must match the instantiated model's patch size.
            Shape divisibility is validated against ``patch_size * num_windows``.
        **kwargs: Additional keyword arguments forwarded to export_onnx.

    """
    logger.info("Exporting model to ONNX format")
    try:
        from rfdetr.export.main import export_onnx, make_infer_image
    except ImportError:
        logger.error(
            "It seems some dependencies for ONNX export are missing."
            " Please run `pip install rfdetr[onnx]` and try again.",
        )
        raise

    device = self.model.device
    model = deepcopy(self.model.model.to("cpu"))
    model.to(device)

    os.makedirs(output_dir, exist_ok=True)
    output_dir_path = Path(output_dir)
    patch_size = _resolve_patch_size(patch_size, self.model_config, "export")
    num_windows = getattr(self.model_config, "num_windows", 1)
    if isinstance(num_windows, bool) or not isinstance(num_windows, int) or num_windows <= 0:
        raise ValueError(f"num_windows must be a positive integer, got {num_windows!r}")
    block_size = patch_size * num_windows
    if shape is None:
        shape = (self.model.resolution, self.model.resolution)
        if shape[0] % block_size != 0:
            raise ValueError(
                f"Model's default resolution ({self.model.resolution}) is not divisible by "
                f"block_size={block_size} (patch_size={patch_size} * num_windows={num_windows}). "
                f"Provide an explicit shape divisible by {block_size}.",
            )
    else:
        shape = _validate_shape_dims(shape, block_size, patch_size, num_windows)

    input_tensors = make_infer_image(infer_dir, shape, batch_size, device).to(device)
    input_names = ["input"]
    if backbone_only:
        output_names = ["features"]
    elif self.model_config.segmentation_head:
        output_names = ["dets", "labels", "masks"]
    else:
        output_names = ["dets", "labels"]

    if dynamic_batch:
        dynamic_axes = {name: {0: "batch"} for name in input_names + output_names}
    else:
        dynamic_axes = None
    model.eval()
    with torch.no_grad():
        if backbone_only:
            features = model(input_tensors)
            logger.debug(f"PyTorch inference output shape: {features.shape}")
        elif self.model_config.segmentation_head:
            outputs = model(input_tensors)
            dets = outputs["pred_boxes"]
            labels = outputs["pred_logits"]
            masks = outputs["pred_masks"]
            if isinstance(masks, torch.Tensor):
                logger.debug(
                    f"PyTorch inference output shapes - Boxes: {dets.shape}, Labels: {labels.shape}, "
                    f"Masks: {masks.shape}",
                )
            else:
                logger.debug(f"PyTorch inference output shapes - Boxes: {dets.shape}, Labels: {labels.shape}")
        else:
            outputs = model(input_tensors)
            dets = outputs["pred_boxes"]
            labels = outputs["pred_logits"]
            logger.debug(f"PyTorch inference output shapes - Boxes: {dets.shape}, Labels: {labels.shape}")

    model.cpu()
    input_tensors = input_tensors.cpu()

    output_file = export_onnx(
        output_dir=str(output_dir_path),
        model=model,
        input_names=input_names,
        input_tensors=input_tensors,
        output_names=output_names,
        dynamic_axes=dynamic_axes,
        backbone_only=backbone_only,
        verbose=verbose,
        opset_version=opset_version,
    )

    logger.info(f"Successfully exported ONNX model to: {output_file}")

    logger.info("ONNX export completed successfully")
    self.model.model = self.model.model.to(device)

`get_model(config)` ¶

Retrieve a model context from the provided architecture configuration.

Parameters:

Name	Type	Description	Default
`config` ¶	`ModelConfig`	Architecture configuration.	required

Returns:

Type	Description
`'ModelContext'`	ModelContext with model, postprocess, device, resolution, args,
`'ModelContext'`	and class_names attributes.

Source code in src/rfdetr/detr.py

def get_model(self, config: ModelConfig) -> "ModelContext":
    """Retrieve a model context from the provided architecture configuration.

    Args:
        config: Architecture configuration.

    Returns:
        ModelContext with model, postprocess, device, resolution, args,
        and class_names attributes.

    """
    return _build_model_context(config)

`get_model_config(**kwargs)` ¶

Retrieve the configuration parameters used by the model.

Source code in src/rfdetr/detr.py

def get_model_config(self, **kwargs) -> ModelConfig:
    """Retrieve the configuration parameters used by the model."""
    return self._model_config_class(**kwargs)

`get_train_config(**kwargs)` ¶

Retrieve the configuration parameters that will be used for training.

Source code in src/rfdetr/detr.py

def get_train_config(self, **kwargs) -> TrainConfig:
    """Retrieve the configuration parameters that will be used for training."""
    return self._train_config_class(**kwargs)

`maybe_download_pretrain_weights()` ¶

Download pre-trained weights if they are not already downloaded.

Source code in src/rfdetr/detr.py

def maybe_download_pretrain_weights(self):
    """Download pre-trained weights if they are not already downloaded."""
    pretrain_weights = self.model_config.pretrain_weights
    if pretrain_weights is None:
        return
    download_pretrain_weights(pretrain_weights)

`optimize_for_inference(compile=True, batch_size=1, dtype=torch.float32)` ¶

Optimize the model for inference with optional JIT compilation and dtype casting.

Operations are wrapped in the correct CUDA device context to prevent context leaks on multi-GPU setups. When compile=True the model is traced with torch.jit.trace using a dummy input of batch_size images at the model's current resolution.

Parameters:

Name	Type	Description	Default
`compile` ¶	`bool`	If `True`, trace the model with `torch.jit.trace` to obtain a JIT-compiled `ScriptModule`. Set to `False` for broader compatibility (e.g. models with dynamic control flow).	`True`
`batch_size` ¶	`int`	Number of images the traced model will be optimized for. Ignored when `compile=False`.	`1`
`dtype` ¶	`dtype \| str`	Target floating-point dtype for the inference model. Accepts a `torch.dtype` directly (e.g. `torch.float16`) or its string name (e.g. `"float16"`). Defaults to `torch.float32`.	`float32`

Raises:

Type	Description
`TypeError`	If `dtype` is not a `torch.dtype`, or if `dtype` is a string that does not correspond to a valid `torch.dtype` attribute.

Examples:

>>> model = RFDETRNano()
>>> model.optimize_for_inference(compile=False, dtype="float16", batch_size=4)

Source code in src/rfdetr/detr.py

def optimize_for_inference(
    self, compile: bool = True, batch_size: int = 1, dtype: torch.dtype | str = torch.float32
) -> None:
    """Optimize the model for inference with optional JIT compilation and dtype casting.

    Operations are wrapped in the correct CUDA device context to prevent context
    leaks on multi-GPU setups. When ``compile=True`` the model is traced with
    ``torch.jit.trace`` using a dummy input of ``batch_size`` images at the
    model's current resolution.

    Args:
        compile: If ``True``, trace the model with ``torch.jit.trace`` to obtain
            a JIT-compiled ``ScriptModule``. Set to ``False`` for broader
            compatibility (e.g. models with dynamic control flow).
        batch_size: Number of images the traced model will be optimized for.
            Ignored when ``compile=False``.
        dtype: Target floating-point dtype for the inference model. Accepts a
            ``torch.dtype`` directly (e.g. ``torch.float16``) or its string name
            (e.g. ``"float16"``). Defaults to ``torch.float32``.

    Raises:
        TypeError: If ``dtype`` is not a ``torch.dtype``, or if ``dtype`` is a
            string that does not correspond to a valid ``torch.dtype`` attribute.

    Examples:
        >>> model = RFDETRNano()
        >>> model.optimize_for_inference(compile=False, dtype="float16", batch_size=4)
    """
    if isinstance(dtype, str):
        try:
            dtype = getattr(torch, dtype)
        except AttributeError:
            raise TypeError(f"dtype must be a torch.dtype or a string name of a dtype, got {dtype!r}") from None
    if not isinstance(dtype, torch.dtype):
        raise TypeError(f"dtype must be a torch.dtype or a string name of a dtype, got {type(dtype)!r}")

    self.remove_optimized_model()

    device = self.model.device
    # Clear any previously optimized state before starting a new optimization run.
    self.remove_optimized_model()

    device = self.model.device
    cuda_ctx = torch.cuda.device(device) if device.type == "cuda" else contextlib.nullcontext()

    try:
        with cuda_ctx:
            self.model.inference_model = deepcopy(self.model.model)
            self.model.inference_model.eval()
            self.model.inference_model.export()

            self._optimized_resolution = self.model.resolution
            self._is_optimized_for_inference = True

            self.model.inference_model = self.model.inference_model.to(dtype=dtype)
            self._optimized_dtype = dtype

            if compile:
                self.model.inference_model = torch.jit.trace(
                    self.model.inference_model,
                    torch.randn(
                        batch_size,
                        3,
                        self.model.resolution,
                        self.model.resolution,
                        device=self.model.device,
                        dtype=dtype,
                    ),
                )
                self._optimized_has_been_compiled = True
                self._optimized_batch_size = batch_size
    except Exception:
        # Ensure the object is left in a consistent, unoptimized state if optimization fails.
        with contextlib.suppress(Exception):
            self.remove_optimized_model()
        raise

`predict(images, threshold=0.5, shape=None, patch_size=None, **kwargs)` ¶

Performs object detection on the input images and returns bounding box predictions.

This method accepts a single image or a list of images in various formats (file path, image url, PIL Image, NumPy array, or torch.Tensor). The images should be in RGB channel order. If a torch.Tensor is provided, it must already be normalized to values in the [0, 1] range and have the shape (C, H, W).

Parameters:

Name	Type	Description	Default
`images` ¶	`str \| Image \| ndarray \| Tensor \| list[str \| ndarray \| Image \| Tensor]`	A single image or a list of images to process. Images can be provided as file paths, PIL Images, NumPy arrays, or torch.Tensors.	required
`threshold` ¶	`float`	The minimum confidence score needed to consider a detected bounding box valid.	`0.5`
`shape` ¶	`tuple[int, int] \| None`	Optional `(height, width)` tuple to resize images to before inference. When provided, overrides the model's default inference resolution. The tuple should match the resolution used when exporting the model (typically a square shape). Both dimensions must be positive integers divisible by `patch_size * num_windows`. Defaults to `(model.resolution, model.resolution)` when not set.	`None`
`patch_size` ¶	`int \| None`	Backbone patch size used for shape divisibility validation. Defaults to `model_config.patch_size` (typically 14 for large models, 16 for smaller ones). Divisibility is checked against `patch_size * num_windows`.	`None`
`**kwargs` ¶		Additional keyword arguments.	`{}`

Returns:

Type	Description
`Detections \| list[Detections]`	A single or multiple Detections objects, each containing bounding box
`Detections \| list[Detections]`	coordinates, confidence scores, and class IDs.

Raises:

Type	Description
`ValueError`	If `shape` cannot be unpacked as a two-element sequence, if either dimension does not support the `__index__` protocol (e.g. `float`) or is a `bool`, if either dimension is zero or negative, if either dimension is not divisible by `patch_size * num_windows`, or if `patch_size` is not a positive integer.

Source code in src/rfdetr/detr.py

def predict(
    self,
    images: str | Image.Image | np.ndarray | torch.Tensor | list[str | np.ndarray | Image.Image | torch.Tensor],
    threshold: float = 0.5,
    shape: tuple[int, int] | None = None,
    patch_size: int | None = None,
    **kwargs,
) -> sv.Detections | list[sv.Detections]:
    """Performs object detection on the input images and returns bounding box
    predictions.

    This method accepts a single image or a list of images in various formats
    (file path, image url, PIL Image, NumPy array, or torch.Tensor). The images should be in
    RGB channel order. If a torch.Tensor is provided, it must already be normalized
    to values in the [0, 1] range and have the shape (C, H, W).

    Args:
        images:
            A single image or a list of images to process. Images can be provided
            as file paths, PIL Images, NumPy arrays, or torch.Tensors.
        threshold:
            The minimum confidence score needed to consider a detected bounding box valid.
        shape:
            Optional ``(height, width)`` tuple to resize images to before inference.
            When provided, overrides the model's default inference resolution. The
            tuple should match the resolution used when exporting the model
            (typically a square shape). Both dimensions must be positive integers
            divisible by ``patch_size * num_windows``. Defaults to
            ``(model.resolution, model.resolution)`` when not set.
        patch_size:
            Backbone patch size used for shape divisibility validation. Defaults
            to ``model_config.patch_size`` (typically 14 for large models, 16 for
            smaller ones). Divisibility is checked against
            ``patch_size * num_windows``.
        **kwargs:
            Additional keyword arguments.

    Returns:
        A single or multiple Detections objects, each containing bounding box
        coordinates, confidence scores, and class IDs.

    Raises:
        ValueError: If ``shape`` cannot be unpacked as a two-element sequence,
            if either dimension does not support the ``__index__`` protocol
            (e.g. ``float``) or is a ``bool``, if either dimension is zero or
            negative, if either dimension is not divisible by
            ``patch_size * num_windows``, or if ``patch_size`` is not a positive
            integer.

    """
    import supervision as sv

    patch_size = _resolve_patch_size(patch_size, self.model_config, "predict")
    num_windows = getattr(self.model_config, "num_windows", 1)
    if isinstance(num_windows, bool) or not isinstance(num_windows, int) or num_windows <= 0:
        raise ValueError(f"model_config.num_windows must be a positive integer, got {num_windows!r}")
    block_size = patch_size * num_windows

    if shape is None:
        default_res = self.model.resolution
        if default_res % block_size != 0:
            raise ValueError(
                f"Model's default resolution ({default_res}) is not divisible by "
                f"block_size={block_size} (patch_size={patch_size} * num_windows={num_windows}). "
                f"Provide an explicit shape divisible by {block_size}.",
            )
    else:
        shape = _validate_shape_dims(shape, block_size, patch_size, num_windows)

    if not self._is_optimized_for_inference and not self._has_warned_about_not_being_optimized_for_inference:
        logger.warning(
            "Model is not optimized for inference. Latency may be higher than expected."
            " You can optimize the model for inference by calling model.optimize_for_inference().",
        )
        self._has_warned_about_not_being_optimized_for_inference = True

        self.model.model.eval()

    if not isinstance(images, list):
        images = [images]

    orig_sizes = []
    processed_images = []
    source_images = []

    for img in images:
        if isinstance(img, str):
            if img.startswith("http"):
                img = requests.get(img, stream=True).raw
            img = Image.open(img)

        if not isinstance(img, torch.Tensor):
            src = np.array(img)
            if src.dtype != np.uint8:
                src = (src * 255).clip(0, 255).astype(np.uint8)
            source_images.append(src)
            img = F.to_tensor(img)
        else:
            source_images.append((img.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))

        if (img > 1).any():
            raise ValueError(
                "Image has pixel values above 1. Please ensure the image is normalized (scaled to [0, 1]).",
            )
        if (img < 0).any():
            raise ValueError(
                "Image has pixel values below 0. Please ensure the image is normalized (scaled to [0, 1]).",
            )
        if img.shape[0] != 3:
            raise ValueError(f"Invalid image shape. Expected 3 channels (RGB), but got {img.shape[0]} channels.")
        img_tensor = img

        h, w = img_tensor.shape[1:]
        orig_sizes.append((h, w))

        img_tensor = img_tensor.to(self.model.device)
        resize_to = list(shape) if shape is not None else [self.model.resolution, self.model.resolution]
        img_tensor = F.resize(img_tensor, resize_to)
        img_tensor = F.normalize(img_tensor, self.means, self.stds)

        processed_images.append(img_tensor)

    batch_tensor = torch.stack(processed_images)

    if self._is_optimized_for_inference:
        if (
            self._optimized_resolution != batch_tensor.shape[2]
            or self._optimized_resolution != batch_tensor.shape[3]
        ):
            # this could happen if someone manually changes self.model.resolution after optimizing the model,
            # or if predict(shape=...) is used with a shape that doesn't match the compiled square resolution.
            raise ValueError(
                f"Resolution mismatch. "
                f"Model was optimized for resolution {self._optimized_resolution}x{self._optimized_resolution}, "
                f"but got {batch_tensor.shape[2]}x{batch_tensor.shape[3]}."
                " You can explicitly remove the optimized model by calling model.remove_optimized_model().",
            )
        if self._optimized_has_been_compiled:
            if self._optimized_batch_size != batch_tensor.shape[0]:
                raise ValueError(
                    f"Batch size mismatch. "
                    f"Optimized model was compiled for batch size {self._optimized_batch_size}, "
                    f"but got {batch_tensor.shape[0]}."
                    " You can explicitly remove the optimized model by calling model.remove_optimized_model()."
                    " Alternatively, you can recompile the optimized model for a different batch size"
                    " by calling model.optimize_for_inference(batch_size=<new_batch_size>).",
                )

    with torch.no_grad():
        if self._is_optimized_for_inference:
            predictions = self.model.inference_model(batch_tensor.to(dtype=self._optimized_dtype))
        else:
            predictions = self.model.model(batch_tensor)
        if isinstance(predictions, tuple):
            return_predictions = {
                "pred_logits": predictions[1],
                "pred_boxes": predictions[0],
            }
            if len(predictions) == 3:
                return_predictions["pred_masks"] = predictions[2]
            predictions = return_predictions
        target_sizes = torch.tensor(orig_sizes, device=self.model.device)
        results = self.model.postprocess(predictions, target_sizes=target_sizes)

    detections_list = []
    for i, result in enumerate(results):
        scores = result["scores"]
        labels = result["labels"]
        boxes = result["boxes"]

        keep = scores > threshold
        scores = scores[keep]
        labels = labels[keep]
        boxes = boxes[keep]

        if "masks" in result:
            masks = result["masks"]
            masks = masks[keep]

            detections = sv.Detections(
                xyxy=boxes.float().cpu().numpy(),
                confidence=scores.float().cpu().numpy(),
                class_id=labels.cpu().numpy(),
                mask=masks.squeeze(1).cpu().numpy(),
            )
        else:
            detections = sv.Detections(
                xyxy=boxes.float().cpu().numpy(),
                confidence=scores.float().cpu().numpy(),
                class_id=labels.cpu().numpy(),
            )

        detections.data["source_image"] = source_images[i]
        detections.data["source_shape"] = orig_sizes[i]

        detections_list.append(detections)

    return detections_list if len(detections_list) > 1 else detections_list[0]

`remove_optimized_model()` ¶

Remove the optimized inference model and reset all optimization flags.

Clears model.inference_model and resets all internal state set by :meth:optimize_for_inference. Safe to call even if the model has not been optimized.

Examples:

>>> model = RFDETRSmall()
>>> model.optimize_for_inference(compile=False)
>>> model.remove_optimized_model()
>>> assert not model._is_optimized_for_inference

Source code in src/rfdetr/detr.py

def remove_optimized_model(self) -> None:
    """Remove the optimized inference model and reset all optimization flags.

    Clears ``model.inference_model`` and resets all internal state set by
    :meth:`optimize_for_inference`. Safe to call even if the model has not
    been optimized.

    Examples:
        >>> model = RFDETRSmall()
        >>> model.optimize_for_inference(compile=False)
        >>> model.remove_optimized_model()
        >>> assert not model._is_optimized_for_inference
    """
    self.model.inference_model = None
    self._is_optimized_for_inference = False
    self._optimized_has_been_compiled = False
    self._optimized_batch_size = None
    self._optimized_resolution = None
    self._optimized_dtype = None

`train(**kwargs)` ¶

Train an RF-DETR model via the PyTorch Lightning stack.

All keyword arguments are forwarded to :meth:get_train_config to build a :class:~rfdetr.config.TrainConfig. Several legacy kwargs are absorbed so existing call-sites do not break:

device — normalized via :class:torch.device and mapped to PyTorch Lightning trainer arguments. "cpu" becomes accelerator="cpu"; "cuda" and "cuda:N" become accelerator="gpu" and optionally devices=[N]; "mps" becomes accelerator="mps". Other valid torch device types fall back to PTL auto-detection and emit a :class:UserWarning.
callbacks — if the dict contains any non-empty lists a :class:DeprecationWarning is emitted; the dict is then discarded. Use PTL :class:~pytorch_lightning.Callback objects passed via :func:~rfdetr.training.build_trainer instead.
start_epoch — emits :class:DeprecationWarning and is dropped.
do_benchmark — emits :class:DeprecationWarning and is dropped.

After training completes the underlying nn.Module is synced back onto self.model.model so that :meth:predict and :meth:export continue to work without reloading the checkpoint.

Raises:

Type	Description
`ImportError`	If training dependencies are not installed. Install with `pip install "rfdetr[train,loggers]"`.

Source code in src/rfdetr/detr.py

def train(self, **kwargs):
    """Train an RF-DETR model via the PyTorch Lightning stack.

    All keyword arguments are forwarded to :meth:`get_train_config` to build
    a :class:`~rfdetr.config.TrainConfig`.  Several legacy kwargs are absorbed
    so existing call-sites do not break:

    * ``device`` — normalized via :class:`torch.device` and mapped to PyTorch
      Lightning trainer arguments. ``"cpu"`` becomes ``accelerator="cpu"``;
      ``"cuda"`` and ``"cuda:N"`` become ``accelerator="gpu"`` and optionally
      ``devices=[N]``; ``"mps"`` becomes ``accelerator="mps"``. Other valid
      torch device types fall back to PTL auto-detection and emit a
      :class:`UserWarning`.
    * ``callbacks`` — if the dict contains any non-empty lists a
      :class:`DeprecationWarning` is emitted; the dict is then discarded.
      Use PTL :class:`~pytorch_lightning.Callback` objects passed via
      :func:`~rfdetr.training.build_trainer` instead.
    * ``start_epoch`` — emits :class:`DeprecationWarning` and is dropped.
    * ``do_benchmark`` — emits :class:`DeprecationWarning` and is dropped.

    After training completes the underlying ``nn.Module`` is synced back
    onto ``self.model.model`` so that :meth:`predict` and :meth:`export`
    continue to work without reloading the checkpoint.

    Raises:
        ImportError: If training dependencies are not installed. Install with
            ``pip install "rfdetr[train,loggers]"``.

    """
    # Both imports are grouped in a single try block because they both live in
    # the `rfdetr[train]` extras group — a missing `pytorch_lightning` (or any
    # other training-extras package) causes either import to fail, and the
    # remediation is identical: `pip install "rfdetr[train,loggers]"`.
    try:
        from rfdetr.training import RFDETRDataModule, RFDETRModelModule, build_trainer
        from rfdetr.training.auto_batch import resolve_auto_batch_config
    except ModuleNotFoundError as exc:
        # Preserve internal import errors so packaging/regression issues in
        # rfdetr.* are not misreported as missing optional extras.
        if exc.name and exc.name.startswith("rfdetr."):
            raise
        raise ImportError(
            "RF-DETR training dependencies are missing. "
            'Install them with `pip install "rfdetr[train,loggers]"` and try again.',
        ) from exc

    # Absorb legacy `callbacks` dict — warn if non-empty, then discard.
    callbacks_dict = kwargs.pop("callbacks", None)
    if callbacks_dict and any(callbacks_dict.values()):
        warnings.warn(
            "Custom callbacks dict is not forwarded to PTL. Use PTL Callback objects instead.",
            DeprecationWarning,
            stacklevel=2,
        )

    # Parse `device` kwarg and map it to PTL accelerator/devices.
    # Supports torch-style strings and torch.device (e.g. "cuda:1").
    _device = kwargs.pop("device", None)
    _accelerator, _devices = RFDETR._resolve_trainer_device_kwargs(_device)

    # Absorb legacy `start_epoch` — PTL resumes automatically via ckpt_path.
    if "start_epoch" in kwargs:
        warnings.warn(
            "`start_epoch` is deprecated and ignored; PTL resumes automatically via `resume`.",
            DeprecationWarning,
            stacklevel=2,
        )
        kwargs.pop("start_epoch")

    # Pop `do_benchmark`; benchmarking via `.train()` is deprecated.
    run_benchmark = bool(kwargs.pop("do_benchmark", False))
    if run_benchmark:
        warnings.warn(
            "`do_benchmark` in `.train()` is deprecated; use `rfdetr benchmark`.",
            DeprecationWarning,
            stacklevel=2,
        )

    config = self.get_train_config(**kwargs)
    if config.batch_size == "auto":
        auto_batch = resolve_auto_batch_config(
            model_context=self.model,
            model_config=self.model_config,
            train_config=config,
        )
        config.batch_size = auto_batch.safe_micro_batch
        config.grad_accum_steps = auto_batch.recommended_grad_accum_steps
        logger.info(
            "[auto-batch] resolved train config: batch_size=%s grad_accum_steps=%s effective_batch_size=%s",
            config.batch_size,
            config.grad_accum_steps,
            auto_batch.effective_batch_size,
        )

    # Auto-detect num_classes from the training dataset and align model_config.
    # This must run before RFDETRModelModule is constructed so that weight loading
    # inside the module uses the correct (dataset-derived) class count.
    dataset_dir = getattr(config, "dataset_dir", None)
    if dataset_dir:
        self._align_num_classes_from_dataset(dataset_dir)

    module = RFDETRModelModule(self.model_config, config)
    datamodule = RFDETRDataModule(self.model_config, config)
    trainer_kwargs = {"accelerator": _accelerator}
    if _devices is not None:
        trainer_kwargs["devices"] = _devices
    trainer = build_trainer(config, self.model_config, **trainer_kwargs)
    trainer.fit(module, datamodule, ckpt_path=config.resume or None)

    # Sync the trained weights back so predict() / export() see the updated model.
    self.model.model = module.model
    # Sync class names: prefer explicit config.class_names, otherwise fall back to dataset (#509).
    config_class_names = getattr(config, "class_names", None)
    if config_class_names is not None:
        self.model.class_names = config_class_names
    else:
        dataset_class_names = getattr(datamodule, "class_names", None)
        if dataset_class_names is not None:
            self.model.class_names = dataset_class_names

RF-DETR Seg XLarge

Attributes¶

`class_names` `property` ¶

Functions¶

`deploy_to_roboflow(workspace, project_id, version, api_key=None, size=None)` ¶

`workspace` ¶

`project_id` ¶

`version` ¶

`api_key` ¶

`size` ¶

`export(output_dir='output', infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, dynamic_batch=False, patch_size=None, **kwargs)` ¶

`output_dir` ¶

`infer_dir` ¶

`simplify` ¶

`backbone_only` ¶

`opset_version` ¶

`verbose` ¶

`force` ¶

`shape` ¶

`batch_size` ¶

`dynamic_batch` ¶

`patch_size` ¶

`**kwargs` ¶

`get_model(config)` ¶

`config` ¶

`get_model_config(**kwargs)` ¶

`get_train_config(**kwargs)` ¶

`maybe_download_pretrain_weights()` ¶

`optimize_for_inference(compile=True, batch_size=1, dtype=torch.float32)` ¶

`compile` ¶

`batch_size` ¶

`dtype` ¶

`predict(images, threshold=0.5, shape=None, patch_size=None, **kwargs)` ¶

`images` ¶

`threshold` ¶

`shape` ¶

`patch_size` ¶

`**kwargs` ¶

`remove_optimized_model()` ¶

`train(**kwargs)` ¶

RF-DETR Seg XLarge

Attributes¶

class_names property ¶

Functions¶

deploy_to_roboflow(workspace, project_id, version, api_key=None, size=None) ¶

workspace ¶

project_id ¶

version ¶

api_key ¶

size ¶

export(output_dir='output', infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, dynamic_batch=False, patch_size=None, **kwargs) ¶

output_dir ¶

infer_dir ¶

simplify ¶

backbone_only ¶

opset_version ¶

verbose ¶

force ¶

shape ¶

batch_size ¶

dynamic_batch ¶

patch_size ¶

**kwargs ¶

get_model(config) ¶

config ¶

get_model_config(**kwargs) ¶

get_train_config(**kwargs) ¶

maybe_download_pretrain_weights() ¶

optimize_for_inference(compile=True, batch_size=1, dtype=torch.float32) ¶

compile ¶

batch_size ¶

dtype ¶

predict(images, threshold=0.5, shape=None, patch_size=None, **kwargs) ¶

images ¶

threshold ¶

shape ¶

patch_size ¶

**kwargs ¶

remove_optimized_model() ¶

train(**kwargs) ¶

`class_names` `property` ¶

`deploy_to_roboflow(workspace, project_id, version, api_key=None, size=None)` ¶

`workspace` ¶

`project_id` ¶

`version` ¶

`api_key` ¶

`size` ¶

`export(output_dir='output', infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, dynamic_batch=False, patch_size=None, **kwargs)` ¶

`output_dir` ¶

`infer_dir` ¶

`simplify` ¶

`backbone_only` ¶

`opset_version` ¶

`verbose` ¶

`force` ¶

`shape` ¶

`batch_size` ¶

`dynamic_batch` ¶

`patch_size` ¶

`**kwargs` ¶

`get_model(config)` ¶

`config` ¶

`get_model_config(**kwargs)` ¶

`get_train_config(**kwargs)` ¶

`maybe_download_pretrain_weights()` ¶

`optimize_for_inference(compile=True, batch_size=1, dtype=torch.float32)` ¶

`compile` ¶

`batch_size` ¶

`dtype` ¶

`predict(images, threshold=0.5, shape=None, patch_size=None, **kwargs)` ¶

`images` ¶

`threshold` ¶

`shape` ¶

`patch_size` ¶

`**kwargs` ¶

`remove_optimized_model()` ¶

`train(**kwargs)` ¶