Encode numpy array using uncompressed RLE for COCO dataset

As an improvement of @waspinator 's answer. This is 35% faster.

def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')

    last_elem = 0
    running_length = 0

    for i, elem in enumerate(binary_mask.ravel(order='F')):
        if elem == last_elem:
            pass
        else:
            counts.append(running_length)
            running_length = 0
            last_elem = elem
        running_length += 1

    counts.append(running_length)

    return rle

Information on the format is available here: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/mask.py

RLE is a simple yet efficient format for storing binary masks. RLE first divides a vector (or vectorized image) into a series of piecewise constant regions and then for each piece simply stores the length of that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] (note that the odd counts are always the numbers of zeros).

import numpy as np
from itertools import groupby

def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    return rle

test_list_1 = np.array([0, 0, 1, 1, 1, 0, 1])
test_list_2 = np.array([1, 1, 1, 1, 1, 1, 0])

print(binary_mask_to_rle(test_list_1))
print(binary_mask_to_rle(test_list_2))

output:

{'counts': [2, 3, 1, 1], 'size': [7]}
{'counts': [0, 6, 1], 'size': [7]}

You can use mask.frPyObjects(rle, size_x, size_y) to encode the RLE, and then do all the usual mask operations.

import json
import numpy as np
from pycocotools import mask
from skimage import measure

ground_truth_binary_mask = np.array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                     [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                     [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                     [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                     [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                     [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                     [  1,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                     [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                     [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0]], dtype=np.uint8)

fortran_ground_truth_binary_mask = np.asfortranarray(ground_truth_binary_mask)

encode the mask to RLE:

rle = binary_mask_to_rle(fortran_ground_truth_binary_mask)
print(rle)

output:

{'counts': [6, 1, 40, 4, 5, 4, 5, 4, 21], 'size': [9, 10]}

compress the RLE, and then decode:

compressed_rle = mask.frPyObjects(rle, rle.get('size')[0], rle.get('size')[1])
mask.decode(compressed_rle)

output:

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)

In order to decode the binary masks encoded in the COCO annotations, you need to first use the COCO's API to get the RLE and then use opencv to get the contours like this:

# Import libraries
import numpy as np
import cv2
import json
import mask

# Read the annotations
file_path = "coco/annotations/stuff_annotations_trainval2017/stuff_train2017.json"
with open(file_path, 'r') as f:
    data = json.load(f)

updated_data = []

# For each annotation
for annotation in data['annotations']:

    # Initialize variables
    obj = {}
    segmentation = []
    segmentation_polygons = []

    # Decode the binary mask
    mask_list = mask.decode(annotation['segmentation'])
    mask_list = np.ascontiguousarray(mask_list, dtype=np.uint8)
    mask_new, contours, hierarchy = cv2.findContours((mask_list).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Get the contours
    for contour in contours:
        contour = contour.flatten().tolist()
        segmentation.append(contour)
        if len(contour) > 4:
            segmentation.append(contour)
    if len(segmentation) == 0:
        continue

    # Get the polygons as (x, y) coordinates
    for i, segment in enumerate(segmentation):
        poligon = []
        poligons = []
        for j in range(len(segment)):
            poligon.append(segment[j])
            if (j+1)%2 == 0:
                poligons.append(poligon)
                poligon = []
        segmentation_polygons.append(poligons)

    # Save the segmentation and polygons for the current annotation
    obj['segmentation'] = segmentation
    obj['segmentation_polygons'] = segmentation_polygons
    updated_data.append(obj)

Note: Only the COCO stuff 2017 annotations are using binary masks, the COCO person 2017 annotations aren't, so you don't need to decode the latter and find their contours.

Inspired by this solution.