Skip to content

zamba.object_detection.yolox

Modules

megadetector_lite_yolox

LOCAL_MD_LITE_MODEL

Classes

FillModeEnum (str, Enum)

Enum for frame filtering fill modes

Attributes:

Name Type Description
repeat

Randomly resample qualifying frames to get to n_frames

score_sorted

Take up to n_frames in sort order (even if some have zero probability)

weighted_euclidean

Sample the remaining frames weighted by their euclidean distance in time to the frames over the threshold

weighted_prob

Sample the remaining frames weighted by their predicted probability

Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class FillModeEnum(str, Enum):
    """Enum for frame filtering fill modes

    Attributes:
        repeat: Randomly resample qualifying frames to get to n_frames
        score_sorted: Take up to n_frames in sort order (even if some have zero probability)
        weighted_euclidean: Sample the remaining frames weighted by their euclidean distance in
            time to the frames over the threshold
        weighted_prob: Sample the remaining frames weighted by their predicted probability
    """

    repeat = "repeat"
    score_sorted = "score_sorted"
    weighted_euclidean = "weighted_euclidean"
    weighted_prob = "weighted_prob"
repeat
score_sorted
weighted_euclidean
weighted_prob
MegadetectorLiteYoloX
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class MegadetectorLiteYoloX:
    def __init__(
        self,
        path: os.PathLike = LOCAL_MD_LITE_MODEL,
        config: Optional[Union[MegadetectorLiteYoloXConfig, dict]] = None,
    ):
        """MegadetectorLite based on YOLOX.

        Args:
            path (pathlike): Path to trained YoloX model checkpoint (.pth extension)
            config (MegadetectorLiteYoloXConfig): YoloX configuration
        """
        if config is None:
            config = MegadetectorLiteYoloXConfig()
        elif isinstance(config, dict):
            config = MegadetectorLiteYoloXConfig.parse_obj(config)

        checkpoint = torch.load(path, map_location=config.device)
        num_classes = checkpoint["model"]["head.cls_preds.0.weight"].shape[0]

        yolox = YoloXNano(num_classes=num_classes)
        model = yolox.get_model()
        model.load_state_dict(checkpoint["model"])
        model = model.eval().to(config.device)

        self.model = model
        self.yolox = yolox
        self.config = config
        self.num_classes = num_classes

    @staticmethod
    def scale_and_pad_array(
        image_array: np.ndarray, output_width: int, output_height: int
    ) -> np.ndarray:
        return np.array(
            ImageOps.pad(
                Image.fromarray(image_array),
                (output_width, output_height),
                method=Image.BICUBIC,
                color=None,
                centering=(0, 0),
            )
        )

    def _preprocess(self, frame: np.ndarray) -> np.ndarray:
        """Process an image for the model, including scaling/padding the image, transposing from
        (height, width, channel) to (channel, height, width) and casting to float.
        """
        return np.ascontiguousarray(
            self.scale_and_pad_array(
                frame, self.config.image_width, self.config.image_height
            ).transpose(2, 0, 1),
            dtype=np.float32,
        )

    def detect_video(self, frames: np.ndarray, pbar: bool = False):
        pbar = tqdm if pbar else lambda x: x

        detections = []
        for frame in pbar(frames):
            detections.append(self.detect_image(frame))
        return detections

    def detect_image(self, img_arr: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """Runs object detection on an image.

        Args:
            img_arr (np.ndarray): An image array with dimensions (height, width, channels).

        Returns:
            np.ndarray: An array of bounding box detections with dimensions (object, 4) where
                object is the number of objects detected and the other 4 dimension are
                (x1, y1, x2, y1).

            np.ndarray: An array of object detection confidence scores of length (object) where
                object is the number of objects detected.
        """
        with torch.no_grad():
            outputs = self.model(
                torch.from_numpy(self._preprocess(img_arr)).unsqueeze(0).to(self.config.device)
            )

        output = postprocess(
            outputs, self.num_classes, self.config.confidence, self.config.nms_threshold
        )[0]
        if output is None:
            return np.array([]), np.array([])
        else:
            detections = pd.DataFrame(
                output.cpu().numpy(),
                columns=["x1", "y1", "x2", "y2", "score1", "score2", "class_num"],
            ).assign(score=lambda row: row.score1 * row.score2)

            # Transform bounding box to be in terms of the original image dimensions
            original_height, original_width = img_arr.shape[:2]
            ratio = min(
                self.config.image_width / original_width,
                self.config.image_height / original_height,
            )
            detections[["x1", "y1", "x2", "y2"]] /= ratio

            # Express bounding boxes in terms of proportions of original image dimensions
            detections[["x1", "x2"]] /= original_width
            detections[["y1", "y2"]] /= original_height

            return detections[["x1", "y1", "x2", "y2"]].values, detections.score.values

    def filter_frames(
        self, frames: np.ndarray, detections: List[Tuple[float, float, float, float]]
    ) -> np.ndarray:
        """Filter video frames using megadetector lite.

        Which frames are returned depends on the fill_mode and how many frames are above the
        confidence threshold. If more than n_frames are above the threshold, the top n_frames are
        returned. Otherwise add to those over threshold based on fill_mode. If none of these
        conditions are met, returns all frames above the threshold.

        Args:
            frames (np.ndarray): Array of video frames to filter with dimensions (frames, height,
                width, channels)
            detections (list of tuples): List of detection results for each frame. Each element is
                a tuple of the list of bounding boxes [array(x1, y1, x2, y2)] and the detection
                 probabilities, both as float

        Returns:
            np.ndarray: An array of video frames of length n_frames or shorter
        """

        frame_scores = pd.Series(
            [(np.max(score) if (len(score) > 0) else 0) for _, score in detections]
        ).sort_values(
            ascending=False
        )  # reduce to one score per frame

        selected_indices = frame_scores.loc[frame_scores > self.config.confidence].index

        if self.config.n_frames is None:
            # no minimum n_frames provided, just select all the frames with scores > threshold
            pass

        elif len(selected_indices) >= self.config.n_frames:
            # num. frames with scores > threshold is greater than the requested number of frames
            selected_indices = (
                frame_scores[selected_indices]
                .sort_values(ascending=False)
                .iloc[: self.config.n_frames]
                .index
            )

        elif len(selected_indices) < self.config.n_frames:
            # num. frames with scores > threshold is less than the requested number of frames
            # repeat frames that are above threshold to get to n_frames
            rng = np.random.RandomState(self.config.seed)

            if self.config.fill_mode == "repeat":
                repeated_indices = rng.choice(
                    selected_indices,
                    self.config.n_frames - len(selected_indices),
                    replace=True,
                )
                selected_indices = np.concatenate((selected_indices, repeated_indices))

            # take frames in sorted order up to n_frames, even if score is zero
            elif self.config.fill_mode == "score_sorted":
                selected_indices = (
                    frame_scores.sort_values(ascending=False).iloc[: self.config.n_frames].index
                )

            # sample up to n_frames, prefer points closer to frames with detection
            elif self.config.fill_mode == "weighted_euclidean":
                sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
                # take one over euclidean distance to all points with detection
                weights = [1 / np.linalg.norm(selected_indices - sample) for sample in sample_from]
                # normalize weights
                weights /= np.sum(weights)
                sampled = rng.choice(
                    sample_from,
                    self.config.n_frames - len(selected_indices),
                    replace=False,
                    p=weights,
                )

                selected_indices = np.concatenate((selected_indices, sampled))

            # sample up to n_frames, weight by predicted probability - only if some frames have nonzero prob
            elif (self.config.fill_mode == "weighted_prob") and (len(selected_indices) > 0):
                sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
                weights = frame_scores[sample_from] / np.sum(frame_scores[sample_from])
                sampled = rng.choice(
                    sample_from,
                    self.config.n_frames - len(selected_indices),
                    replace=False,
                    p=weights,
                )

                selected_indices = np.concatenate((selected_indices, sampled))

        # sort the selected images back into their original order
        if self.config.sort_by_time:
            selected_indices = sorted(selected_indices)

        return frames[selected_indices]
Methods
__init__(self, path: PathLike = PosixPath('/home/runner/work/zamba/zamba/zamba/object_detection/yolox/assets/yolox_nano_20210901.pth'), config: Union[zamba.object_detection.yolox.megadetector_lite_yolox.MegadetectorLiteYoloXConfig, dict] = None) special

MegadetectorLite based on YOLOX.

Parameters:

Name Type Description Default
path pathlike

Path to trained YoloX model checkpoint (.pth extension)

PosixPath('/home/runner/work/zamba/zamba/zamba/object_detection/yolox/assets/yolox_nano_20210901.pth')
config MegadetectorLiteYoloXConfig

YoloX configuration

None
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def __init__(
    self,
    path: os.PathLike = LOCAL_MD_LITE_MODEL,
    config: Optional[Union[MegadetectorLiteYoloXConfig, dict]] = None,
):
    """MegadetectorLite based on YOLOX.

    Args:
        path (pathlike): Path to trained YoloX model checkpoint (.pth extension)
        config (MegadetectorLiteYoloXConfig): YoloX configuration
    """
    if config is None:
        config = MegadetectorLiteYoloXConfig()
    elif isinstance(config, dict):
        config = MegadetectorLiteYoloXConfig.parse_obj(config)

    checkpoint = torch.load(path, map_location=config.device)
    num_classes = checkpoint["model"]["head.cls_preds.0.weight"].shape[0]

    yolox = YoloXNano(num_classes=num_classes)
    model = yolox.get_model()
    model.load_state_dict(checkpoint["model"])
    model = model.eval().to(config.device)

    self.model = model
    self.yolox = yolox
    self.config = config
    self.num_classes = num_classes
detect_image(self, img_arr: ndarray) -> Tuple[numpy.ndarray, numpy.ndarray]

Runs object detection on an image.

Parameters:

Name Type Description Default
img_arr np.ndarray

An image array with dimensions (height, width, channels).

required

Returns:

Type Description
np.ndarray

An array of bounding box detections with dimensions (object, 4) where object is the number of objects detected and the other 4 dimension are (x1, y1, x2, y1).

np.ndarray: An array of object detection confidence scores of length (object) where object is the number of objects detected.

Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def detect_image(self, img_arr: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """Runs object detection on an image.

    Args:
        img_arr (np.ndarray): An image array with dimensions (height, width, channels).

    Returns:
        np.ndarray: An array of bounding box detections with dimensions (object, 4) where
            object is the number of objects detected and the other 4 dimension are
            (x1, y1, x2, y1).

        np.ndarray: An array of object detection confidence scores of length (object) where
            object is the number of objects detected.
    """
    with torch.no_grad():
        outputs = self.model(
            torch.from_numpy(self._preprocess(img_arr)).unsqueeze(0).to(self.config.device)
        )

    output = postprocess(
        outputs, self.num_classes, self.config.confidence, self.config.nms_threshold
    )[0]
    if output is None:
        return np.array([]), np.array([])
    else:
        detections = pd.DataFrame(
            output.cpu().numpy(),
            columns=["x1", "y1", "x2", "y2", "score1", "score2", "class_num"],
        ).assign(score=lambda row: row.score1 * row.score2)

        # Transform bounding box to be in terms of the original image dimensions
        original_height, original_width = img_arr.shape[:2]
        ratio = min(
            self.config.image_width / original_width,
            self.config.image_height / original_height,
        )
        detections[["x1", "y1", "x2", "y2"]] /= ratio

        # Express bounding boxes in terms of proportions of original image dimensions
        detections[["x1", "x2"]] /= original_width
        detections[["y1", "y2"]] /= original_height

        return detections[["x1", "y1", "x2", "y2"]].values, detections.score.values
detect_video(self, frames: ndarray, pbar: bool = False)
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def detect_video(self, frames: np.ndarray, pbar: bool = False):
    pbar = tqdm if pbar else lambda x: x

    detections = []
    for frame in pbar(frames):
        detections.append(self.detect_image(frame))
    return detections
filter_frames(self, frames: ndarray, detections: List[Tuple[float, float, float, float]]) -> ndarray

Filter video frames using megadetector lite.

Which frames are returned depends on the fill_mode and how many frames are above the confidence threshold. If more than n_frames are above the threshold, the top n_frames are returned. Otherwise add to those over threshold based on fill_mode. If none of these conditions are met, returns all frames above the threshold.

Parameters:

Name Type Description Default
frames np.ndarray

Array of video frames to filter with dimensions (frames, height, width, channels)

required
detections list of tuples

List of detection results for each frame. Each element is a tuple of the list of bounding boxes [array(x1, y1, x2, y2)] and the detection probabilities, both as float

required

Returns:

Type Description
np.ndarray

An array of video frames of length n_frames or shorter

Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def filter_frames(
    self, frames: np.ndarray, detections: List[Tuple[float, float, float, float]]
) -> np.ndarray:
    """Filter video frames using megadetector lite.

    Which frames are returned depends on the fill_mode and how many frames are above the
    confidence threshold. If more than n_frames are above the threshold, the top n_frames are
    returned. Otherwise add to those over threshold based on fill_mode. If none of these
    conditions are met, returns all frames above the threshold.

    Args:
        frames (np.ndarray): Array of video frames to filter with dimensions (frames, height,
            width, channels)
        detections (list of tuples): List of detection results for each frame. Each element is
            a tuple of the list of bounding boxes [array(x1, y1, x2, y2)] and the detection
             probabilities, both as float

    Returns:
        np.ndarray: An array of video frames of length n_frames or shorter
    """

    frame_scores = pd.Series(
        [(np.max(score) if (len(score) > 0) else 0) for _, score in detections]
    ).sort_values(
        ascending=False
    )  # reduce to one score per frame

    selected_indices = frame_scores.loc[frame_scores > self.config.confidence].index

    if self.config.n_frames is None:
        # no minimum n_frames provided, just select all the frames with scores > threshold
        pass

    elif len(selected_indices) >= self.config.n_frames:
        # num. frames with scores > threshold is greater than the requested number of frames
        selected_indices = (
            frame_scores[selected_indices]
            .sort_values(ascending=False)
            .iloc[: self.config.n_frames]
            .index
        )

    elif len(selected_indices) < self.config.n_frames:
        # num. frames with scores > threshold is less than the requested number of frames
        # repeat frames that are above threshold to get to n_frames
        rng = np.random.RandomState(self.config.seed)

        if self.config.fill_mode == "repeat":
            repeated_indices = rng.choice(
                selected_indices,
                self.config.n_frames - len(selected_indices),
                replace=True,
            )
            selected_indices = np.concatenate((selected_indices, repeated_indices))

        # take frames in sorted order up to n_frames, even if score is zero
        elif self.config.fill_mode == "score_sorted":
            selected_indices = (
                frame_scores.sort_values(ascending=False).iloc[: self.config.n_frames].index
            )

        # sample up to n_frames, prefer points closer to frames with detection
        elif self.config.fill_mode == "weighted_euclidean":
            sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
            # take one over euclidean distance to all points with detection
            weights = [1 / np.linalg.norm(selected_indices - sample) for sample in sample_from]
            # normalize weights
            weights /= np.sum(weights)
            sampled = rng.choice(
                sample_from,
                self.config.n_frames - len(selected_indices),
                replace=False,
                p=weights,
            )

            selected_indices = np.concatenate((selected_indices, sampled))

        # sample up to n_frames, weight by predicted probability - only if some frames have nonzero prob
        elif (self.config.fill_mode == "weighted_prob") and (len(selected_indices) > 0):
            sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
            weights = frame_scores[sample_from] / np.sum(frame_scores[sample_from])
            sampled = rng.choice(
                sample_from,
                self.config.n_frames - len(selected_indices),
                replace=False,
                p=weights,
            )

            selected_indices = np.concatenate((selected_indices, sampled))

    # sort the selected images back into their original order
    if self.config.sort_by_time:
        selected_indices = sorted(selected_indices)

    return frames[selected_indices]
scale_and_pad_array(image_array: ndarray, output_width: int, output_height: int) -> ndarray staticmethod
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
@staticmethod
def scale_and_pad_array(
    image_array: np.ndarray, output_width: int, output_height: int
) -> np.ndarray:
    return np.array(
        ImageOps.pad(
            Image.fromarray(image_array),
            (output_width, output_height),
            method=Image.BICUBIC,
            color=None,
            centering=(0, 0),
        )
    )
MegadetectorLiteYoloXConfig (BaseModel) pydantic-model

Configuration for a MegadetectorLiteYoloX frame selection model

Attributes:

Name Type Description
confidence float

Only consider object detections with this confidence or greater

nms_threshold float

Non-maximum suppression is a method for filtering many bounding boxes around the same object to a single bounding box. This is a constant that determines how much to suppress similar bounding boxes.

image_width int

Scale image to this width before sending to object detection model.

image_height int

Scale image to this height before sending to object detection model.

device str

Where to run the object detection model, "cpu" or "cuda".

n_frames int

Max number of frames to return. If None returns all frames above the threshold. Defaults to None.

fill_mode str

Mode for upsampling if the number of frames above the threshold is less than n_frames. Defaults to "repeat".

sort_by_time bool

Whether to sort the selected frames by time (original order) before returning. If False, returns frames sorted by score (descending). Defaults to True.

seed int

Random state for random number generator. Defaults to 55.

Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class MegadetectorLiteYoloXConfig(BaseModel):
    """Configuration for a MegadetectorLiteYoloX frame selection model

    Attributes:
        confidence (float): Only consider object detections with this confidence or greater
        nms_threshold (float): Non-maximum suppression is a method for filtering many bounding
            boxes around the same object to a single bounding box. This is a constant that
            determines how much to suppress similar bounding boxes.
        image_width (int): Scale image to this width before sending to object detection model.
        image_height (int): Scale image to this height before sending to object detection model.
        device (str): Where to run the object detection model, "cpu" or "cuda".
        n_frames (int, optional): Max number of frames to return. If None returns all frames above
            the threshold. Defaults to None.
        fill_mode (str, optional): Mode for upsampling if the number of frames above the threshold
            is less than n_frames. Defaults to "repeat".
        sort_by_time (bool, optional): Whether to sort the selected frames by time (original order)
            before returning. If False, returns frames sorted by score (descending). Defaults to
            True.
        seed (int, optional): Random state for random number generator. Defaults to 55.
    """

    confidence: float = 0.25
    nms_threshold: float = 0.45
    image_width: int = 416
    image_height: int = 416
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    n_frames: Optional[int] = None
    fill_mode: Optional[FillModeEnum] = FillModeEnum.score_sorted
    sort_by_time: bool = True
    seed: Optional[int] = 55

    class Config:
        extra = "forbid"
confidence: float pydantic-field
device: str pydantic-field
fill_mode: FillModeEnum pydantic-field
image_height: int pydantic-field
image_width: int pydantic-field
n_frames: int pydantic-field
nms_threshold: float pydantic-field
seed: int pydantic-field
sort_by_time: bool pydantic-field
Config
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class Config:
    extra = "forbid"

yolox_base

Classes

YoloXBase (Exp)

Modified from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/exp/yolox_base.py

Source code in zamba/object_detection/yolox/yolox_base.py
class YoloXBase(Exp):
    """Modified from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/exp/yolox_base.py"""

    def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
        from yolox.data import (
            COCODataset,
            TrainTransform,
            YoloBatchSampler,
            DataLoader,
            InfiniteSampler,
            MosaicDetection,
            worker_init_reset_seed,
        )
        from yolox.utils import (
            wait_for_the_master,
            get_local_rank,
        )

        local_rank = get_local_rank()

        with wait_for_the_master(local_rank):
            dataset = COCODataset(
                data_dir=self.data_dir,
                name="data",
                json_file=self.train_ann,
                img_size=self.input_size,
                preproc=TrainTransform(max_labels=50),
                cache=cache_img,
            )

        dataset = MosaicDetection(
            dataset,
            mosaic=not no_aug,
            img_size=self.input_size,
            preproc=TrainTransform(max_labels=120),
            degrees=self.degrees,
            translate=self.translate,
            mosaic_scale=self.mosaic_scale,
            mixup_scale=self.mixup_scale,
            shear=self.shear,
            perspective=self.perspective,
            enable_mixup=self.enable_mixup,
            mosaic_prob=self.mosaic_prob,
            mixup_prob=self.mixup_prob,
        )

        self.dataset = dataset

        if is_distributed:
            batch_size = batch_size // dist.get_world_size()

        sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)

        batch_sampler = YoloBatchSampler(
            sampler=sampler,
            batch_size=batch_size,
            drop_last=False,
            mosaic=not no_aug,
        )

        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
        dataloader_kwargs["batch_sampler"] = batch_sampler

        # Make sure each process has different random seed, especially for 'fork' method.
        # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
        dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed

        train_loader = DataLoader(self.dataset, **dataloader_kwargs)

        return train_loader

    def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
        from yolox.data import COCODataset, ValTransform

        valdataset = COCODataset(
            data_dir=self.data_dir,
            name="data",
            json_file=self.val_ann,
            img_size=self.test_size,
            preproc=ValTransform(legacy=legacy),
        )

        if is_distributed:
            batch_size = batch_size // dist.get_world_size()
            sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
        else:
            sampler = torch.utils.data.SequentialSampler(valdataset)

        dataloader_kwargs = {
            "num_workers": self.data_num_workers,
            "pin_memory": True,
            "sampler": sampler,
        }
        dataloader_kwargs["batch_size"] = batch_size
        val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)

        return val_loader
__init__(self) inherited special
Source code in zamba/object_detection/yolox/yolox_base.py
def __init__(self):
    super().__init__()

    # ---------------- model config ---------------- #
    self.num_classes = 80
    self.depth = 1.00
    self.width = 1.00
    self.act = 'silu'

    # ---------------- dataloader config ---------------- #
    # set worker to 4 for shorter dataloader init time
    self.data_num_workers = 4
    self.input_size = (640, 640)  # (height, width)
    # Actual multiscale ranges: [640-5*32, 640+5*32].
    # To disable multiscale training, set the
    # self.multiscale_range to 0.
    self.multiscale_range = 5
    # You can uncomment this line to specify a multiscale range
    # self.random_size = (14, 26)
    self.data_dir = None
    self.train_ann = "instances_train2017.json"
    self.val_ann = "instances_val2017.json"

    # --------------- transform config ----------------- #
    self.mosaic_prob = 1.0
    self.mixup_prob = 1.0
    self.hsv_prob = 1.0
    self.flip_prob = 0.5
    self.degrees = 10.0
    self.translate = 0.1
    self.mosaic_scale = (0.1, 2)
    self.mixup_scale = (0.5, 1.5)
    self.shear = 2.0
    self.enable_mixup = True

    # --------------  training config --------------------- #
    self.warmup_epochs = 5
    self.max_epoch = 300
    self.warmup_lr = 0
    self.basic_lr_per_img = 0.01 / 64.0
    self.scheduler = "yoloxwarmcos"
    self.no_aug_epochs = 15
    self.min_lr_ratio = 0.05
    self.ema = True

    self.weight_decay = 5e-4
    self.momentum = 0.9
    self.print_interval = 10
    self.eval_interval = 10
    self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

    # -----------------  testing config ------------------ #
    self.test_size = (640, 640)
    self.test_conf = 0.01
    self.nmsthre = 0.65
eval(self, model, evaluator, is_distributed, half = False) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def eval(self, model, evaluator, is_distributed, half=False):
    return evaluator.evaluate(model, is_distributed, half)
get_data_loader(self, batch_size, is_distributed, no_aug = False, cache_img = False)
Source code in zamba/object_detection/yolox/yolox_base.py
def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
    from yolox.data import (
        COCODataset,
        TrainTransform,
        YoloBatchSampler,
        DataLoader,
        InfiniteSampler,
        MosaicDetection,
        worker_init_reset_seed,
    )
    from yolox.utils import (
        wait_for_the_master,
        get_local_rank,
    )

    local_rank = get_local_rank()

    with wait_for_the_master(local_rank):
        dataset = COCODataset(
            data_dir=self.data_dir,
            name="data",
            json_file=self.train_ann,
            img_size=self.input_size,
            preproc=TrainTransform(max_labels=50),
            cache=cache_img,
        )

    dataset = MosaicDetection(
        dataset,
        mosaic=not no_aug,
        img_size=self.input_size,
        preproc=TrainTransform(max_labels=120),
        degrees=self.degrees,
        translate=self.translate,
        mosaic_scale=self.mosaic_scale,
        mixup_scale=self.mixup_scale,
        shear=self.shear,
        perspective=self.perspective,
        enable_mixup=self.enable_mixup,
        mosaic_prob=self.mosaic_prob,
        mixup_prob=self.mixup_prob,
    )

    self.dataset = dataset

    if is_distributed:
        batch_size = batch_size // dist.get_world_size()

    sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)

    batch_sampler = YoloBatchSampler(
        sampler=sampler,
        batch_size=batch_size,
        drop_last=False,
        mosaic=not no_aug,
    )

    dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
    dataloader_kwargs["batch_sampler"] = batch_sampler

    # Make sure each process has different random seed, especially for 'fork' method.
    # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
    dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed

    train_loader = DataLoader(self.dataset, **dataloader_kwargs)

    return train_loader
get_eval_loader(self, batch_size, is_distributed, testdev = False, legacy = False)
Source code in zamba/object_detection/yolox/yolox_base.py
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
    from yolox.data import COCODataset, ValTransform

    valdataset = COCODataset(
        data_dir=self.data_dir,
        name="data",
        json_file=self.val_ann,
        img_size=self.test_size,
        preproc=ValTransform(legacy=legacy),
    )

    if is_distributed:
        batch_size = batch_size // dist.get_world_size()
        sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
    else:
        sampler = torch.utils.data.SequentialSampler(valdataset)

    dataloader_kwargs = {
        "num_workers": self.data_num_workers,
        "pin_memory": True,
        "sampler": sampler,
    }
    dataloader_kwargs["batch_size"] = batch_size
    val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)

    return val_loader
get_evaluator(self, batch_size, is_distributed, testdev = False, legacy = False) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
    from yolox.evaluators import COCOEvaluator

    val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy)
    evaluator = COCOEvaluator(
        dataloader=val_loader,
        img_size=self.test_size,
        confthre=self.test_conf,
        nmsthre=self.nmsthre,
        num_classes=self.num_classes,
        testdev=testdev,
    )
    return evaluator
get_lr_scheduler(self, lr, iters_per_epoch) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def get_lr_scheduler(self, lr, iters_per_epoch):
    from yolox.utils import LRScheduler

    scheduler = LRScheduler(
        self.scheduler,
        lr,
        iters_per_epoch,
        self.max_epoch,
        warmup_epochs=self.warmup_epochs,
        warmup_lr_start=self.warmup_lr,
        no_aug_epochs=self.no_aug_epochs,
        min_lr_ratio=self.min_lr_ratio,
    )
    return scheduler
get_model(self) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def get_model(self):
    from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead

    def init_yolo(M):
        for m in M.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eps = 1e-3
                m.momentum = 0.03

    if getattr(self, "model", None) is None:
        in_channels = [256, 512, 1024]
        backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act)
        head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act)
        self.model = YOLOX(backbone, head)

    self.model.apply(init_yolo)
    self.model.head.initialize_biases(1e-2)
    return self.model
get_optimizer(self, batch_size) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def get_optimizer(self, batch_size):
    if "optimizer" not in self.__dict__:
        if self.warmup_epochs > 0:
            lr = self.warmup_lr
        else:
            lr = self.basic_lr_per_img * batch_size

        pg0, pg1, pg2 = [], [], []  # optimizer parameter groups

        for k, v in self.model.named_modules():
            if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
                pg2.append(v.bias)  # biases
            if isinstance(v, nn.BatchNorm2d) or "bn" in k:
                pg0.append(v.weight)  # no decay
            elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
                pg1.append(v.weight)  # apply decay

        optimizer = torch.optim.SGD(
            pg0, lr=lr, momentum=self.momentum, nesterov=True
        )
        optimizer.add_param_group(
            {"params": pg1, "weight_decay": self.weight_decay}
        )  # add pg1 with weight_decay
        optimizer.add_param_group({"params": pg2})
        self.optimizer = optimizer

    return self.optimizer
merge(self, cfg_list) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def merge(self, cfg_list):
    assert len(cfg_list) % 2 == 0
    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
        # only update value with same key
        if hasattr(self, k):
            src_value = getattr(self, k)
            src_type = type(src_value)
            if src_value is not None and src_type != type(v):
                try:
                    v = src_type(v)
                except Exception:
                    v = ast.literal_eval(v)
            setattr(self, k, v)
preprocess(self, inputs, targets, tsize) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def preprocess(self, inputs, targets, tsize):
    scale_y = tsize[0] / self.input_size[0]
    scale_x = tsize[1] / self.input_size[1]
    if scale_x != 1 or scale_y != 1:
        inputs = nn.functional.interpolate(
            inputs, size=tsize, mode="bilinear", align_corners=False
        )
        targets[..., 1::2] = targets[..., 1::2] * scale_x
        targets[..., 2::2] = targets[..., 2::2] * scale_y
    return inputs, targets
random_resize(self, data_loader, epoch, rank, is_distributed) inherited
Source code in zamba/object_detection/yolox/yolox_base.py
def random_resize(self, data_loader, epoch, rank, is_distributed):
    tensor = torch.LongTensor(2).cuda()

    if rank == 0:
        size_factor = self.input_size[1] * 1.0 / self.input_size[0]
        if not hasattr(self, 'random_size'):
            min_size = int(self.input_size[0] / 32) - self.multiscale_range
            max_size = int(self.input_size[0] / 32) + self.multiscale_range
            self.random_size = (min_size, max_size)
        size = random.randint(*self.random_size)
        size = (int(32 * size), 32 * int(size * size_factor))
        tensor[0] = size[0]
        tensor[1] = size[1]

    if is_distributed:
        dist.barrier()
        dist.broadcast(tensor, 0)

    input_size = (tensor[0].item(), tensor[1].item())
    return input_size

yolox_nano

Classes

YoloXNano (YoloXBase)

Copied from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/nano.py

Source code in zamba/object_detection/yolox/yolox_nano.py
class YoloXNano(YoloXBase):
    """Copied from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/nano.py"""

    def __init__(self, num_classes: int):
        super().__init__()
        self.depth = 0.33
        self.width = 0.25
        self.input_size = (416, 416)
        self.random_size = (10, 20)
        self.mosaic_scale = (0.5, 1.5)
        self.test_size = (416, 416)
        self.mosaic_prob = 0.5
        self.enable_mixup = False
        self.num_classes = num_classes
        self.exp_name = Path(__file__).stem

    def get_model(self, sublinear=False):
        def init_yolo(M):
            for m in M.modules():
                if isinstance(m, torch.nn.BatchNorm2d):
                    m.eps = 1e-3
                    m.momentum = 0.03

        if "model" not in self.__dict__:
            from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead

            in_channels = [256, 512, 1024]
            # NANO model use depthwise = True, which is main difference.
            backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
            head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
            self.model = YOLOX(backbone, head)

        self.model.apply(init_yolo)
        self.model.head.initialize_biases(1e-2)
        return self.model
__init__(self, num_classes: int) special
Source code in zamba/object_detection/yolox/yolox_nano.py
def __init__(self, num_classes: int):
    super().__init__()
    self.depth = 0.33
    self.width = 0.25
    self.input_size = (416, 416)
    self.random_size = (10, 20)
    self.mosaic_scale = (0.5, 1.5)
    self.test_size = (416, 416)
    self.mosaic_prob = 0.5
    self.enable_mixup = False
    self.num_classes = num_classes
    self.exp_name = Path(__file__).stem
eval(self, model, evaluator, is_distributed, half = False) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def eval(self, model, evaluator, is_distributed, half=False):
    return evaluator.evaluate(model, is_distributed, half)
get_data_loader(self, batch_size, is_distributed, no_aug = False, cache_img = False) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
    from yolox.data import (
        COCODataset,
        TrainTransform,
        YoloBatchSampler,
        DataLoader,
        InfiniteSampler,
        MosaicDetection,
        worker_init_reset_seed,
    )
    from yolox.utils import (
        wait_for_the_master,
        get_local_rank,
    )

    local_rank = get_local_rank()

    with wait_for_the_master(local_rank):
        dataset = COCODataset(
            data_dir=self.data_dir,
            name="data",
            json_file=self.train_ann,
            img_size=self.input_size,
            preproc=TrainTransform(max_labels=50),
            cache=cache_img,
        )

    dataset = MosaicDetection(
        dataset,
        mosaic=not no_aug,
        img_size=self.input_size,
        preproc=TrainTransform(max_labels=120),
        degrees=self.degrees,
        translate=self.translate,
        mosaic_scale=self.mosaic_scale,
        mixup_scale=self.mixup_scale,
        shear=self.shear,
        perspective=self.perspective,
        enable_mixup=self.enable_mixup,
        mosaic_prob=self.mosaic_prob,
        mixup_prob=self.mixup_prob,
    )

    self.dataset = dataset

    if is_distributed:
        batch_size = batch_size // dist.get_world_size()

    sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)

    batch_sampler = YoloBatchSampler(
        sampler=sampler,
        batch_size=batch_size,
        drop_last=False,
        mosaic=not no_aug,
    )

    dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
    dataloader_kwargs["batch_sampler"] = batch_sampler

    # Make sure each process has different random seed, especially for 'fork' method.
    # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
    dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed

    train_loader = DataLoader(self.dataset, **dataloader_kwargs)

    return train_loader
get_eval_loader(self, batch_size, is_distributed, testdev = False, legacy = False) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
    from yolox.data import COCODataset, ValTransform

    valdataset = COCODataset(
        data_dir=self.data_dir,
        name="data",
        json_file=self.val_ann,
        img_size=self.test_size,
        preproc=ValTransform(legacy=legacy),
    )

    if is_distributed:
        batch_size = batch_size // dist.get_world_size()
        sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
    else:
        sampler = torch.utils.data.SequentialSampler(valdataset)

    dataloader_kwargs = {
        "num_workers": self.data_num_workers,
        "pin_memory": True,
        "sampler": sampler,
    }
    dataloader_kwargs["batch_size"] = batch_size
    val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)

    return val_loader
get_evaluator(self, batch_size, is_distributed, testdev = False, legacy = False) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
    from yolox.evaluators import COCOEvaluator

    val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy)
    evaluator = COCOEvaluator(
        dataloader=val_loader,
        img_size=self.test_size,
        confthre=self.test_conf,
        nmsthre=self.nmsthre,
        num_classes=self.num_classes,
        testdev=testdev,
    )
    return evaluator
get_lr_scheduler(self, lr, iters_per_epoch) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def get_lr_scheduler(self, lr, iters_per_epoch):
    from yolox.utils import LRScheduler

    scheduler = LRScheduler(
        self.scheduler,
        lr,
        iters_per_epoch,
        self.max_epoch,
        warmup_epochs=self.warmup_epochs,
        warmup_lr_start=self.warmup_lr,
        no_aug_epochs=self.no_aug_epochs,
        min_lr_ratio=self.min_lr_ratio,
    )
    return scheduler
get_model(self, sublinear = False)
Source code in zamba/object_detection/yolox/yolox_nano.py
def get_model(self, sublinear=False):
    def init_yolo(M):
        for m in M.modules():
            if isinstance(m, torch.nn.BatchNorm2d):
                m.eps = 1e-3
                m.momentum = 0.03

    if "model" not in self.__dict__:
        from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead

        in_channels = [256, 512, 1024]
        # NANO model use depthwise = True, which is main difference.
        backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
        head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
        self.model = YOLOX(backbone, head)

    self.model.apply(init_yolo)
    self.model.head.initialize_biases(1e-2)
    return self.model
get_optimizer(self, batch_size) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def get_optimizer(self, batch_size):
    if "optimizer" not in self.__dict__:
        if self.warmup_epochs > 0:
            lr = self.warmup_lr
        else:
            lr = self.basic_lr_per_img * batch_size

        pg0, pg1, pg2 = [], [], []  # optimizer parameter groups

        for k, v in self.model.named_modules():
            if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
                pg2.append(v.bias)  # biases
            if isinstance(v, nn.BatchNorm2d) or "bn" in k:
                pg0.append(v.weight)  # no decay
            elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
                pg1.append(v.weight)  # apply decay

        optimizer = torch.optim.SGD(
            pg0, lr=lr, momentum=self.momentum, nesterov=True
        )
        optimizer.add_param_group(
            {"params": pg1, "weight_decay": self.weight_decay}
        )  # add pg1 with weight_decay
        optimizer.add_param_group({"params": pg2})
        self.optimizer = optimizer

    return self.optimizer
merge(self, cfg_list) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def merge(self, cfg_list):
    assert len(cfg_list) % 2 == 0
    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
        # only update value with same key
        if hasattr(self, k):
            src_value = getattr(self, k)
            src_type = type(src_value)
            if src_value is not None and src_type != type(v):
                try:
                    v = src_type(v)
                except Exception:
                    v = ast.literal_eval(v)
            setattr(self, k, v)
preprocess(self, inputs, targets, tsize) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def preprocess(self, inputs, targets, tsize):
    scale_y = tsize[0] / self.input_size[0]
    scale_x = tsize[1] / self.input_size[1]
    if scale_x != 1 or scale_y != 1:
        inputs = nn.functional.interpolate(
            inputs, size=tsize, mode="bilinear", align_corners=False
        )
        targets[..., 1::2] = targets[..., 1::2] * scale_x
        targets[..., 2::2] = targets[..., 2::2] * scale_y
    return inputs, targets
random_resize(self, data_loader, epoch, rank, is_distributed) inherited
Source code in zamba/object_detection/yolox/yolox_nano.py
def random_resize(self, data_loader, epoch, rank, is_distributed):
    tensor = torch.LongTensor(2).cuda()

    if rank == 0:
        size_factor = self.input_size[1] * 1.0 / self.input_size[0]
        if not hasattr(self, 'random_size'):
            min_size = int(self.input_size[0] / 32) - self.multiscale_range
            max_size = int(self.input_size[0] / 32) + self.multiscale_range
            self.random_size = (min_size, max_size)
        size = random.randint(*self.random_size)
        size = (int(32 * size), 32 * int(size * size_factor))
        tensor[0] = size[0]
        tensor[1] = size[1]

    if is_distributed:
        dist.barrier()
        dist.broadcast(tensor, 0)

    input_size = (tensor[0].item(), tensor[1].item())
    return input_size