zamba.object_detection.yolox¶
Modules¶
megadetector_lite_yolox
¶
LOCAL_MD_LITE_MODEL
¶
Classes¶
FillModeEnum (str, Enum)
¶
Enum for frame filtering fill modes
Attributes:
Name | Type | Description |
---|---|---|
repeat |
Randomly resample qualifying frames to get to n_frames |
|
score_sorted |
Take up to n_frames in sort order (even if some have zero probability) |
|
weighted_euclidean |
Sample the remaining frames weighted by their euclidean distance in time to the frames over the threshold |
|
weighted_prob |
Sample the remaining frames weighted by their predicted probability |
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class FillModeEnum(str, Enum):
"""Enum for frame filtering fill modes
Attributes:
repeat: Randomly resample qualifying frames to get to n_frames
score_sorted: Take up to n_frames in sort order (even if some have zero probability)
weighted_euclidean: Sample the remaining frames weighted by their euclidean distance in
time to the frames over the threshold
weighted_prob: Sample the remaining frames weighted by their predicted probability
"""
repeat = "repeat"
score_sorted = "score_sorted"
weighted_euclidean = "weighted_euclidean"
weighted_prob = "weighted_prob"
MegadetectorLiteYoloX
¶
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class MegadetectorLiteYoloX:
def __init__(
self,
path: os.PathLike = LOCAL_MD_LITE_MODEL,
config: Optional[Union[MegadetectorLiteYoloXConfig, dict]] = None,
):
"""MegadetectorLite based on YOLOX.
Args:
path (pathlike): Path to trained YoloX model checkpoint (.pth extension)
config (MegadetectorLiteYoloXConfig): YoloX configuration
"""
if config is None:
config = MegadetectorLiteYoloXConfig()
elif isinstance(config, dict):
config = MegadetectorLiteYoloXConfig.parse_obj(config)
checkpoint = torch.load(path, map_location=config.device)
num_classes = checkpoint["model"]["head.cls_preds.0.weight"].shape[0]
yolox = YoloXNano(num_classes=num_classes)
model = yolox.get_model()
model.load_state_dict(checkpoint["model"])
model = model.eval().to(config.device)
self.model = model
self.yolox = yolox
self.config = config
self.num_classes = num_classes
@staticmethod
def scale_and_pad_array(
image_array: np.ndarray, output_width: int, output_height: int
) -> np.ndarray:
return np.array(
ImageOps.pad(
Image.fromarray(image_array),
(output_width, output_height),
method=Image.BICUBIC,
color=None,
centering=(0, 0),
)
)
def _preprocess(self, frame: np.ndarray) -> np.ndarray:
"""Process an image for the model, including scaling/padding the image, transposing from
(height, width, channel) to (channel, height, width) and casting to float.
"""
return np.ascontiguousarray(
self.scale_and_pad_array(
frame, self.config.image_width, self.config.image_height
).transpose(2, 0, 1),
dtype=np.float32,
)
def detect_video(self, frames: np.ndarray, pbar: bool = False):
pbar = tqdm if pbar else lambda x: x
detections = []
for frame in pbar(frames):
detections.append(self.detect_image(frame))
return detections
def detect_image(self, img_arr: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Runs object detection on an image.
Args:
img_arr (np.ndarray): An image array with dimensions (height, width, channels).
Returns:
np.ndarray: An array of bounding box detections with dimensions (object, 4) where
object is the number of objects detected and the other 4 dimension are
(x1, y1, x2, y1).
np.ndarray: An array of object detection confidence scores of length (object) where
object is the number of objects detected.
"""
with torch.no_grad():
outputs = self.model(
torch.from_numpy(self._preprocess(img_arr)).unsqueeze(0).to(self.config.device)
)
output = postprocess(
outputs, self.num_classes, self.config.confidence, self.config.nms_threshold
)[0]
if output is None:
return np.array([]), np.array([])
else:
detections = pd.DataFrame(
output.cpu().numpy(),
columns=["x1", "y1", "x2", "y2", "score1", "score2", "class_num"],
).assign(score=lambda row: row.score1 * row.score2)
# Transform bounding box to be in terms of the original image dimensions
original_height, original_width = img_arr.shape[:2]
ratio = min(
self.config.image_width / original_width,
self.config.image_height / original_height,
)
detections[["x1", "y1", "x2", "y2"]] /= ratio
# Express bounding boxes in terms of proportions of original image dimensions
detections[["x1", "x2"]] /= original_width
detections[["y1", "y2"]] /= original_height
return detections[["x1", "y1", "x2", "y2"]].values, detections.score.values
def filter_frames(
self, frames: np.ndarray, detections: List[Tuple[float, float, float, float]]
) -> np.ndarray:
"""Filter video frames using megadetector lite.
Which frames are returned depends on the fill_mode and how many frames are above the
confidence threshold. If more than n_frames are above the threshold, the top n_frames are
returned. Otherwise add to those over threshold based on fill_mode. If none of these
conditions are met, returns all frames above the threshold.
Args:
frames (np.ndarray): Array of video frames to filter with dimensions (frames, height,
width, channels)
detections (list of tuples): List of detection results for each frame. Each element is
a tuple of the list of bounding boxes [array(x1, y1, x2, y2)] and the detection
probabilities, both as float
Returns:
np.ndarray: An array of video frames of length n_frames or shorter
"""
frame_scores = pd.Series(
[(np.max(score) if (len(score) > 0) else 0) for _, score in detections]
).sort_values(
ascending=False
) # reduce to one score per frame
selected_indices = frame_scores.loc[frame_scores > self.config.confidence].index
if self.config.n_frames is None:
# no minimum n_frames provided, just select all the frames with scores > threshold
pass
elif len(selected_indices) >= self.config.n_frames:
# num. frames with scores > threshold is greater than the requested number of frames
selected_indices = (
frame_scores[selected_indices]
.sort_values(ascending=False)
.iloc[: self.config.n_frames]
.index
)
elif len(selected_indices) < self.config.n_frames:
# num. frames with scores > threshold is less than the requested number of frames
# repeat frames that are above threshold to get to n_frames
rng = np.random.RandomState(self.config.seed)
if self.config.fill_mode == "repeat":
repeated_indices = rng.choice(
selected_indices,
self.config.n_frames - len(selected_indices),
replace=True,
)
selected_indices = np.concatenate((selected_indices, repeated_indices))
# take frames in sorted order up to n_frames, even if score is zero
elif self.config.fill_mode == "score_sorted":
selected_indices = (
frame_scores.sort_values(ascending=False).iloc[: self.config.n_frames].index
)
# sample up to n_frames, prefer points closer to frames with detection
elif self.config.fill_mode == "weighted_euclidean":
sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
# take one over euclidean distance to all points with detection
weights = [1 / np.linalg.norm(selected_indices - sample) for sample in sample_from]
# normalize weights
weights /= np.sum(weights)
sampled = rng.choice(
sample_from,
self.config.n_frames - len(selected_indices),
replace=False,
p=weights,
)
selected_indices = np.concatenate((selected_indices, sampled))
# sample up to n_frames, weight by predicted probability - only if some frames have nonzero prob
elif (self.config.fill_mode == "weighted_prob") and (len(selected_indices) > 0):
sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
weights = frame_scores[sample_from] / np.sum(frame_scores[sample_from])
sampled = rng.choice(
sample_from,
self.config.n_frames - len(selected_indices),
replace=False,
p=weights,
)
selected_indices = np.concatenate((selected_indices, sampled))
# sort the selected images back into their original order
if self.config.sort_by_time:
selected_indices = sorted(selected_indices)
return frames[selected_indices]
Methods¶
__init__(self, path: PathLike = PosixPath('/home/runner/work/zamba/zamba/zamba/object_detection/yolox/assets/yolox_nano_20210901.pth'), config: Union[zamba.object_detection.yolox.megadetector_lite_yolox.MegadetectorLiteYoloXConfig, dict] = None)
special
¶MegadetectorLite based on YOLOX.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
pathlike |
Path to trained YoloX model checkpoint (.pth extension) |
PosixPath('/home/runner/work/zamba/zamba/zamba/object_detection/yolox/assets/yolox_nano_20210901.pth') |
config |
MegadetectorLiteYoloXConfig |
YoloX configuration |
None |
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def __init__(
self,
path: os.PathLike = LOCAL_MD_LITE_MODEL,
config: Optional[Union[MegadetectorLiteYoloXConfig, dict]] = None,
):
"""MegadetectorLite based on YOLOX.
Args:
path (pathlike): Path to trained YoloX model checkpoint (.pth extension)
config (MegadetectorLiteYoloXConfig): YoloX configuration
"""
if config is None:
config = MegadetectorLiteYoloXConfig()
elif isinstance(config, dict):
config = MegadetectorLiteYoloXConfig.parse_obj(config)
checkpoint = torch.load(path, map_location=config.device)
num_classes = checkpoint["model"]["head.cls_preds.0.weight"].shape[0]
yolox = YoloXNano(num_classes=num_classes)
model = yolox.get_model()
model.load_state_dict(checkpoint["model"])
model = model.eval().to(config.device)
self.model = model
self.yolox = yolox
self.config = config
self.num_classes = num_classes
detect_image(self, img_arr: ndarray) -> Tuple[numpy.ndarray, numpy.ndarray]
¶Runs object detection on an image.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
img_arr |
np.ndarray |
An image array with dimensions (height, width, channels). |
required |
Returns:
Type | Description |
---|---|
np.ndarray |
An array of bounding box detections with dimensions (object, 4) where object is the number of objects detected and the other 4 dimension are (x1, y1, x2, y1). np.ndarray: An array of object detection confidence scores of length (object) where object is the number of objects detected. |
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def detect_image(self, img_arr: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Runs object detection on an image.
Args:
img_arr (np.ndarray): An image array with dimensions (height, width, channels).
Returns:
np.ndarray: An array of bounding box detections with dimensions (object, 4) where
object is the number of objects detected and the other 4 dimension are
(x1, y1, x2, y1).
np.ndarray: An array of object detection confidence scores of length (object) where
object is the number of objects detected.
"""
with torch.no_grad():
outputs = self.model(
torch.from_numpy(self._preprocess(img_arr)).unsqueeze(0).to(self.config.device)
)
output = postprocess(
outputs, self.num_classes, self.config.confidence, self.config.nms_threshold
)[0]
if output is None:
return np.array([]), np.array([])
else:
detections = pd.DataFrame(
output.cpu().numpy(),
columns=["x1", "y1", "x2", "y2", "score1", "score2", "class_num"],
).assign(score=lambda row: row.score1 * row.score2)
# Transform bounding box to be in terms of the original image dimensions
original_height, original_width = img_arr.shape[:2]
ratio = min(
self.config.image_width / original_width,
self.config.image_height / original_height,
)
detections[["x1", "y1", "x2", "y2"]] /= ratio
# Express bounding boxes in terms of proportions of original image dimensions
detections[["x1", "x2"]] /= original_width
detections[["y1", "y2"]] /= original_height
return detections[["x1", "y1", "x2", "y2"]].values, detections.score.values
detect_video(self, frames: ndarray, pbar: bool = False)
¶Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def detect_video(self, frames: np.ndarray, pbar: bool = False):
pbar = tqdm if pbar else lambda x: x
detections = []
for frame in pbar(frames):
detections.append(self.detect_image(frame))
return detections
filter_frames(self, frames: ndarray, detections: List[Tuple[float, float, float, float]]) -> ndarray
¶Filter video frames using megadetector lite.
Which frames are returned depends on the fill_mode and how many frames are above the confidence threshold. If more than n_frames are above the threshold, the top n_frames are returned. Otherwise add to those over threshold based on fill_mode. If none of these conditions are met, returns all frames above the threshold.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
frames |
np.ndarray |
Array of video frames to filter with dimensions (frames, height, width, channels) |
required |
detections |
list of tuples |
List of detection results for each frame. Each element is a tuple of the list of bounding boxes [array(x1, y1, x2, y2)] and the detection probabilities, both as float |
required |
Returns:
Type | Description |
---|---|
np.ndarray |
An array of video frames of length n_frames or shorter |
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
def filter_frames(
self, frames: np.ndarray, detections: List[Tuple[float, float, float, float]]
) -> np.ndarray:
"""Filter video frames using megadetector lite.
Which frames are returned depends on the fill_mode and how many frames are above the
confidence threshold. If more than n_frames are above the threshold, the top n_frames are
returned. Otherwise add to those over threshold based on fill_mode. If none of these
conditions are met, returns all frames above the threshold.
Args:
frames (np.ndarray): Array of video frames to filter with dimensions (frames, height,
width, channels)
detections (list of tuples): List of detection results for each frame. Each element is
a tuple of the list of bounding boxes [array(x1, y1, x2, y2)] and the detection
probabilities, both as float
Returns:
np.ndarray: An array of video frames of length n_frames or shorter
"""
frame_scores = pd.Series(
[(np.max(score) if (len(score) > 0) else 0) for _, score in detections]
).sort_values(
ascending=False
) # reduce to one score per frame
selected_indices = frame_scores.loc[frame_scores > self.config.confidence].index
if self.config.n_frames is None:
# no minimum n_frames provided, just select all the frames with scores > threshold
pass
elif len(selected_indices) >= self.config.n_frames:
# num. frames with scores > threshold is greater than the requested number of frames
selected_indices = (
frame_scores[selected_indices]
.sort_values(ascending=False)
.iloc[: self.config.n_frames]
.index
)
elif len(selected_indices) < self.config.n_frames:
# num. frames with scores > threshold is less than the requested number of frames
# repeat frames that are above threshold to get to n_frames
rng = np.random.RandomState(self.config.seed)
if self.config.fill_mode == "repeat":
repeated_indices = rng.choice(
selected_indices,
self.config.n_frames - len(selected_indices),
replace=True,
)
selected_indices = np.concatenate((selected_indices, repeated_indices))
# take frames in sorted order up to n_frames, even if score is zero
elif self.config.fill_mode == "score_sorted":
selected_indices = (
frame_scores.sort_values(ascending=False).iloc[: self.config.n_frames].index
)
# sample up to n_frames, prefer points closer to frames with detection
elif self.config.fill_mode == "weighted_euclidean":
sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
# take one over euclidean distance to all points with detection
weights = [1 / np.linalg.norm(selected_indices - sample) for sample in sample_from]
# normalize weights
weights /= np.sum(weights)
sampled = rng.choice(
sample_from,
self.config.n_frames - len(selected_indices),
replace=False,
p=weights,
)
selected_indices = np.concatenate((selected_indices, sampled))
# sample up to n_frames, weight by predicted probability - only if some frames have nonzero prob
elif (self.config.fill_mode == "weighted_prob") and (len(selected_indices) > 0):
sample_from = frame_scores.loc[~frame_scores.index.isin(selected_indices)].index
weights = frame_scores[sample_from] / np.sum(frame_scores[sample_from])
sampled = rng.choice(
sample_from,
self.config.n_frames - len(selected_indices),
replace=False,
p=weights,
)
selected_indices = np.concatenate((selected_indices, sampled))
# sort the selected images back into their original order
if self.config.sort_by_time:
selected_indices = sorted(selected_indices)
return frames[selected_indices]
scale_and_pad_array(image_array: ndarray, output_width: int, output_height: int) -> ndarray
staticmethod
¶Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
@staticmethod
def scale_and_pad_array(
image_array: np.ndarray, output_width: int, output_height: int
) -> np.ndarray:
return np.array(
ImageOps.pad(
Image.fromarray(image_array),
(output_width, output_height),
method=Image.BICUBIC,
color=None,
centering=(0, 0),
)
)
MegadetectorLiteYoloXConfig (BaseModel)
pydantic-model
¶
Configuration for a MegadetectorLiteYoloX frame selection model
Attributes:
Name | Type | Description |
---|---|---|
confidence |
float |
Only consider object detections with this confidence or greater |
nms_threshold |
float |
Non-maximum suppression is a method for filtering many bounding boxes around the same object to a single bounding box. This is a constant that determines how much to suppress similar bounding boxes. |
image_width |
int |
Scale image to this width before sending to object detection model. |
image_height |
int |
Scale image to this height before sending to object detection model. |
device |
str |
Where to run the object detection model, "cpu" or "cuda". |
n_frames |
int |
Max number of frames to return. If None returns all frames above the threshold. Defaults to None. |
fill_mode |
str |
Mode for upsampling if the number of frames above the threshold is less than n_frames. Defaults to "repeat". |
sort_by_time |
bool |
Whether to sort the selected frames by time (original order) before returning. If False, returns frames sorted by score (descending). Defaults to True. |
seed |
int |
Random state for random number generator. Defaults to 55. |
Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class MegadetectorLiteYoloXConfig(BaseModel):
"""Configuration for a MegadetectorLiteYoloX frame selection model
Attributes:
confidence (float): Only consider object detections with this confidence or greater
nms_threshold (float): Non-maximum suppression is a method for filtering many bounding
boxes around the same object to a single bounding box. This is a constant that
determines how much to suppress similar bounding boxes.
image_width (int): Scale image to this width before sending to object detection model.
image_height (int): Scale image to this height before sending to object detection model.
device (str): Where to run the object detection model, "cpu" or "cuda".
n_frames (int, optional): Max number of frames to return. If None returns all frames above
the threshold. Defaults to None.
fill_mode (str, optional): Mode for upsampling if the number of frames above the threshold
is less than n_frames. Defaults to "repeat".
sort_by_time (bool, optional): Whether to sort the selected frames by time (original order)
before returning. If False, returns frames sorted by score (descending). Defaults to
True.
seed (int, optional): Random state for random number generator. Defaults to 55.
"""
confidence: float = 0.25
nms_threshold: float = 0.45
image_width: int = 416
image_height: int = 416
device: str = "cuda" if torch.cuda.is_available() else "cpu"
n_frames: Optional[int] = None
fill_mode: Optional[FillModeEnum] = FillModeEnum.score_sorted
sort_by_time: bool = True
seed: Optional[int] = 55
class Config:
extra = "forbid"
confidence: float
pydantic-field
¶device: str
pydantic-field
¶fill_mode: FillModeEnum
pydantic-field
¶image_height: int
pydantic-field
¶image_width: int
pydantic-field
¶n_frames: int
pydantic-field
¶nms_threshold: float
pydantic-field
¶seed: int
pydantic-field
¶sort_by_time: bool
pydantic-field
¶
Config
¶Source code in zamba/object_detection/yolox/megadetector_lite_yolox.py
class Config:
extra = "forbid"
yolox_base
¶
Classes¶
YoloXBase (Exp)
¶
Modified from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/exp/yolox_base.py
Source code in zamba/object_detection/yolox/yolox_base.py
class YoloXBase(Exp):
"""Modified from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/exp/yolox_base.py"""
def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
from yolox.data import (
COCODataset,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
worker_init_reset_seed,
)
from yolox.utils import (
wait_for_the_master,
get_local_rank,
)
local_rank = get_local_rank()
with wait_for_the_master(local_rank):
dataset = COCODataset(
data_dir=self.data_dir,
name="data",
json_file=self.train_ann,
img_size=self.input_size,
preproc=TrainTransform(max_labels=50),
cache=cache_img,
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(max_labels=120),
degrees=self.degrees,
translate=self.translate,
mosaic_scale=self.mosaic_scale,
mixup_scale=self.mixup_scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
mosaic_prob=self.mosaic_prob,
mixup_prob=self.mixup_prob,
)
self.dataset = dataset
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
mosaic=not no_aug,
)
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
# Make sure each process has different random seed, especially for 'fork' method.
# Check https://github.com/pytorch/pytorch/issues/63311 for more details.
dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
return train_loader
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
from yolox.data import COCODataset, ValTransform
valdataset = COCODataset(
data_dir=self.data_dir,
name="data",
json_file=self.val_ann,
img_size=self.test_size,
preproc=ValTransform(legacy=legacy),
)
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)
dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
return val_loader
__init__(self)
inherited
special
¶Source code in zamba/object_detection/yolox/yolox_base.py
def __init__(self):
super().__init__()
# ---------------- model config ---------------- #
self.num_classes = 80
self.depth = 1.00
self.width = 1.00
self.act = 'silu'
# ---------------- dataloader config ---------------- #
# set worker to 4 for shorter dataloader init time
self.data_num_workers = 4
self.input_size = (640, 640) # (height, width)
# Actual multiscale ranges: [640-5*32, 640+5*32].
# To disable multiscale training, set the
# self.multiscale_range to 0.
self.multiscale_range = 5
# You can uncomment this line to specify a multiscale range
# self.random_size = (14, 26)
self.data_dir = None
self.train_ann = "instances_train2017.json"
self.val_ann = "instances_val2017.json"
# --------------- transform config ----------------- #
self.mosaic_prob = 1.0
self.mixup_prob = 1.0
self.hsv_prob = 1.0
self.flip_prob = 0.5
self.degrees = 10.0
self.translate = 0.1
self.mosaic_scale = (0.1, 2)
self.mixup_scale = (0.5, 1.5)
self.shear = 2.0
self.enable_mixup = True
# -------------- training config --------------------- #
self.warmup_epochs = 5
self.max_epoch = 300
self.warmup_lr = 0
self.basic_lr_per_img = 0.01 / 64.0
self.scheduler = "yoloxwarmcos"
self.no_aug_epochs = 15
self.min_lr_ratio = 0.05
self.ema = True
self.weight_decay = 5e-4
self.momentum = 0.9
self.print_interval = 10
self.eval_interval = 10
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
# ----------------- testing config ------------------ #
self.test_size = (640, 640)
self.test_conf = 0.01
self.nmsthre = 0.65
eval(self, model, evaluator, is_distributed, half = False)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def eval(self, model, evaluator, is_distributed, half=False):
return evaluator.evaluate(model, is_distributed, half)
get_data_loader(self, batch_size, is_distributed, no_aug = False, cache_img = False)
¶Source code in zamba/object_detection/yolox/yolox_base.py
def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
from yolox.data import (
COCODataset,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
worker_init_reset_seed,
)
from yolox.utils import (
wait_for_the_master,
get_local_rank,
)
local_rank = get_local_rank()
with wait_for_the_master(local_rank):
dataset = COCODataset(
data_dir=self.data_dir,
name="data",
json_file=self.train_ann,
img_size=self.input_size,
preproc=TrainTransform(max_labels=50),
cache=cache_img,
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(max_labels=120),
degrees=self.degrees,
translate=self.translate,
mosaic_scale=self.mosaic_scale,
mixup_scale=self.mixup_scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
mosaic_prob=self.mosaic_prob,
mixup_prob=self.mixup_prob,
)
self.dataset = dataset
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
mosaic=not no_aug,
)
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
# Make sure each process has different random seed, especially for 'fork' method.
# Check https://github.com/pytorch/pytorch/issues/63311 for more details.
dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
return train_loader
get_eval_loader(self, batch_size, is_distributed, testdev = False, legacy = False)
¶Source code in zamba/object_detection/yolox/yolox_base.py
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
from yolox.data import COCODataset, ValTransform
valdataset = COCODataset(
data_dir=self.data_dir,
name="data",
json_file=self.val_ann,
img_size=self.test_size,
preproc=ValTransform(legacy=legacy),
)
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)
dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
return val_loader
get_evaluator(self, batch_size, is_distributed, testdev = False, legacy = False)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
from yolox.evaluators import COCOEvaluator
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator
get_lr_scheduler(self, lr, iters_per_epoch)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def get_lr_scheduler(self, lr, iters_per_epoch):
from yolox.utils import LRScheduler
scheduler = LRScheduler(
self.scheduler,
lr,
iters_per_epoch,
self.max_epoch,
warmup_epochs=self.warmup_epochs,
warmup_lr_start=self.warmup_lr,
no_aug_epochs=self.no_aug_epochs,
min_lr_ratio=self.min_lr_ratio,
)
return scheduler
get_model(self)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def get_model(self):
from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
def init_yolo(M):
for m in M.modules():
if isinstance(m, nn.BatchNorm2d):
m.eps = 1e-3
m.momentum = 0.03
if getattr(self, "model", None) is None:
in_channels = [256, 512, 1024]
backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act)
head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act)
self.model = YOLOX(backbone, head)
self.model.apply(init_yolo)
self.model.head.initialize_biases(1e-2)
return self.model
get_optimizer(self, batch_size)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def get_optimizer(self, batch_size):
if "optimizer" not in self.__dict__:
if self.warmup_epochs > 0:
lr = self.warmup_lr
else:
lr = self.basic_lr_per_img * batch_size
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in self.model.named_modules():
if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
pg2.append(v.bias) # biases
if isinstance(v, nn.BatchNorm2d) or "bn" in k:
pg0.append(v.weight) # no decay
elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
pg1.append(v.weight) # apply decay
optimizer = torch.optim.SGD(
pg0, lr=lr, momentum=self.momentum, nesterov=True
)
optimizer.add_param_group(
{"params": pg1, "weight_decay": self.weight_decay}
) # add pg1 with weight_decay
optimizer.add_param_group({"params": pg2})
self.optimizer = optimizer
return self.optimizer
merge(self, cfg_list)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def merge(self, cfg_list):
assert len(cfg_list) % 2 == 0
for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
# only update value with same key
if hasattr(self, k):
src_value = getattr(self, k)
src_type = type(src_value)
if src_value is not None and src_type != type(v):
try:
v = src_type(v)
except Exception:
v = ast.literal_eval(v)
setattr(self, k, v)
preprocess(self, inputs, targets, tsize)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def preprocess(self, inputs, targets, tsize):
scale_y = tsize[0] / self.input_size[0]
scale_x = tsize[1] / self.input_size[1]
if scale_x != 1 or scale_y != 1:
inputs = nn.functional.interpolate(
inputs, size=tsize, mode="bilinear", align_corners=False
)
targets[..., 1::2] = targets[..., 1::2] * scale_x
targets[..., 2::2] = targets[..., 2::2] * scale_y
return inputs, targets
random_resize(self, data_loader, epoch, rank, is_distributed)
inherited
¶Source code in zamba/object_detection/yolox/yolox_base.py
def random_resize(self, data_loader, epoch, rank, is_distributed):
tensor = torch.LongTensor(2).cuda()
if rank == 0:
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
if not hasattr(self, 'random_size'):
min_size = int(self.input_size[0] / 32) - self.multiscale_range
max_size = int(self.input_size[0] / 32) + self.multiscale_range
self.random_size = (min_size, max_size)
size = random.randint(*self.random_size)
size = (int(32 * size), 32 * int(size * size_factor))
tensor[0] = size[0]
tensor[1] = size[1]
if is_distributed:
dist.barrier()
dist.broadcast(tensor, 0)
input_size = (tensor[0].item(), tensor[1].item())
return input_size
yolox_nano
¶
Classes¶
YoloXNano (YoloXBase)
¶
Copied from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/nano.py
Source code in zamba/object_detection/yolox/yolox_nano.py
class YoloXNano(YoloXBase):
"""Copied from https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/nano.py"""
def __init__(self, num_classes: int):
super().__init__()
self.depth = 0.33
self.width = 0.25
self.input_size = (416, 416)
self.random_size = (10, 20)
self.mosaic_scale = (0.5, 1.5)
self.test_size = (416, 416)
self.mosaic_prob = 0.5
self.enable_mixup = False
self.num_classes = num_classes
self.exp_name = Path(__file__).stem
def get_model(self, sublinear=False):
def init_yolo(M):
for m in M.modules():
if isinstance(m, torch.nn.BatchNorm2d):
m.eps = 1e-3
m.momentum = 0.03
if "model" not in self.__dict__:
from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
in_channels = [256, 512, 1024]
# NANO model use depthwise = True, which is main difference.
backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
self.model = YOLOX(backbone, head)
self.model.apply(init_yolo)
self.model.head.initialize_biases(1e-2)
return self.model
__init__(self, num_classes: int)
special
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def __init__(self, num_classes: int):
super().__init__()
self.depth = 0.33
self.width = 0.25
self.input_size = (416, 416)
self.random_size = (10, 20)
self.mosaic_scale = (0.5, 1.5)
self.test_size = (416, 416)
self.mosaic_prob = 0.5
self.enable_mixup = False
self.num_classes = num_classes
self.exp_name = Path(__file__).stem
eval(self, model, evaluator, is_distributed, half = False)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def eval(self, model, evaluator, is_distributed, half=False):
return evaluator.evaluate(model, is_distributed, half)
get_data_loader(self, batch_size, is_distributed, no_aug = False, cache_img = False)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
from yolox.data import (
COCODataset,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
worker_init_reset_seed,
)
from yolox.utils import (
wait_for_the_master,
get_local_rank,
)
local_rank = get_local_rank()
with wait_for_the_master(local_rank):
dataset = COCODataset(
data_dir=self.data_dir,
name="data",
json_file=self.train_ann,
img_size=self.input_size,
preproc=TrainTransform(max_labels=50),
cache=cache_img,
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(max_labels=120),
degrees=self.degrees,
translate=self.translate,
mosaic_scale=self.mosaic_scale,
mixup_scale=self.mixup_scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
mosaic_prob=self.mosaic_prob,
mixup_prob=self.mixup_prob,
)
self.dataset = dataset
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
mosaic=not no_aug,
)
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
# Make sure each process has different random seed, especially for 'fork' method.
# Check https://github.com/pytorch/pytorch/issues/63311 for more details.
dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
return train_loader
get_eval_loader(self, batch_size, is_distributed, testdev = False, legacy = False)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
from yolox.data import COCODataset, ValTransform
valdataset = COCODataset(
data_dir=self.data_dir,
name="data",
json_file=self.val_ann,
img_size=self.test_size,
preproc=ValTransform(legacy=legacy),
)
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(valdataset, shuffle=False)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)
dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
return val_loader
get_evaluator(self, batch_size, is_distributed, testdev = False, legacy = False)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
from yolox.evaluators import COCOEvaluator
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator
get_lr_scheduler(self, lr, iters_per_epoch)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def get_lr_scheduler(self, lr, iters_per_epoch):
from yolox.utils import LRScheduler
scheduler = LRScheduler(
self.scheduler,
lr,
iters_per_epoch,
self.max_epoch,
warmup_epochs=self.warmup_epochs,
warmup_lr_start=self.warmup_lr,
no_aug_epochs=self.no_aug_epochs,
min_lr_ratio=self.min_lr_ratio,
)
return scheduler
get_model(self, sublinear = False)
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def get_model(self, sublinear=False):
def init_yolo(M):
for m in M.modules():
if isinstance(m, torch.nn.BatchNorm2d):
m.eps = 1e-3
m.momentum = 0.03
if "model" not in self.__dict__:
from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
in_channels = [256, 512, 1024]
# NANO model use depthwise = True, which is main difference.
backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
self.model = YOLOX(backbone, head)
self.model.apply(init_yolo)
self.model.head.initialize_biases(1e-2)
return self.model
get_optimizer(self, batch_size)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def get_optimizer(self, batch_size):
if "optimizer" not in self.__dict__:
if self.warmup_epochs > 0:
lr = self.warmup_lr
else:
lr = self.basic_lr_per_img * batch_size
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in self.model.named_modules():
if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
pg2.append(v.bias) # biases
if isinstance(v, nn.BatchNorm2d) or "bn" in k:
pg0.append(v.weight) # no decay
elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
pg1.append(v.weight) # apply decay
optimizer = torch.optim.SGD(
pg0, lr=lr, momentum=self.momentum, nesterov=True
)
optimizer.add_param_group(
{"params": pg1, "weight_decay": self.weight_decay}
) # add pg1 with weight_decay
optimizer.add_param_group({"params": pg2})
self.optimizer = optimizer
return self.optimizer
merge(self, cfg_list)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def merge(self, cfg_list):
assert len(cfg_list) % 2 == 0
for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
# only update value with same key
if hasattr(self, k):
src_value = getattr(self, k)
src_type = type(src_value)
if src_value is not None and src_type != type(v):
try:
v = src_type(v)
except Exception:
v = ast.literal_eval(v)
setattr(self, k, v)
preprocess(self, inputs, targets, tsize)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def preprocess(self, inputs, targets, tsize):
scale_y = tsize[0] / self.input_size[0]
scale_x = tsize[1] / self.input_size[1]
if scale_x != 1 or scale_y != 1:
inputs = nn.functional.interpolate(
inputs, size=tsize, mode="bilinear", align_corners=False
)
targets[..., 1::2] = targets[..., 1::2] * scale_x
targets[..., 2::2] = targets[..., 2::2] * scale_y
return inputs, targets
random_resize(self, data_loader, epoch, rank, is_distributed)
inherited
¶Source code in zamba/object_detection/yolox/yolox_nano.py
def random_resize(self, data_loader, epoch, rank, is_distributed):
tensor = torch.LongTensor(2).cuda()
if rank == 0:
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
if not hasattr(self, 'random_size'):
min_size = int(self.input_size[0] / 32) - self.multiscale_range
max_size = int(self.input_size[0] / 32) + self.multiscale_range
self.random_size = (min_size, max_size)
size = random.randint(*self.random_size)
size = (int(32 * size), 32 * int(size * size_factor))
tensor[0] = size[0]
tensor[1] = size[1]
if is_distributed:
dist.barrier()
dist.broadcast(tensor, 0)
input_size = (tensor[0].item(), tensor[1].item())
return input_size