Source code for tracktable.algorithms.boxiness

#
# Copyright (c) 2014-2021 National Technology and Engineering
# Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
# with National Technology and Engineering Solutions of Sandia, LLC,
# the U.S. Government retains certain rights in this software.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""
tracktable.algorithms.boxiness - Determine how close to a perfect square/box a trajectory is as well
as determining how much a trajectory zigzags.

calculate_boxiness(), sort_boxiness(), calculate_zigzaginess() and sort_by_zigzaginess()
are the main driver functions for determining boxiness or zigzaginess.

.. warning:: The calculate_zigzaginess() and sort_by_zigzaginess()
    functions may produce results that are not expected as this functions are
    not fully fleshed out.

The algorithm details for boxiness can be found in the Demo 2: Detecting "boxes" notebook.
"""

import logging
from math import floor

import matplotlib.pyplot as plt
from tracktable.core.geomath import bearing, distance, length, simplify

logger = logging.getLogger(__name__)

#TODO: Create a function to generate plots for the bearing histograms.

########################################################################
# BOXINESS
########################################################################

[docs]def calculate_boxiness(trajectories,
                  window=5,
                  simplify_trajectory=False,
                  error_tolerance=0.00001,
                  ignore_zero_degree_quartet=True):
    """
    Calculates a boxiness score for each trajectory and appends it as a
    property, to be accessed as trajectory.property['boxiness'].

    .. note:: In actuality, this score will be higher for trajectories that move
        primarily in four directions that are each nintey degrees apart, and
        this does not exclusively limit to boxes.

    Arguments:
        trajectories (list of Tracktable trajectories):
            We will score each of these trajectories for "boxiness" and store
            the score as a trajectory property.

    Keyword Arguments:
        window (Odd int): To account for imperfect boxes, we sum our quartet products over a
                window of this width (in degrees), centered at the peak quartet
                product. Note: Even integers will be rounded up to the nearest odd
                integer. (Default: 5)
        simplify_trajectory (bool): We can opt to smooth the trajectory using Tracktable's simplify
            function. This can help if we have particularly noisy data, but
            should be used with caution, as oversmoothing can lower the boxiness
            scores of true boxes. (Default: False)
        error_tolerance (float): The simplify function will not exceed this positional error tolerance,
            measured in the trajectory's native distance represented in km. (Default: 0.00001)
        ignore_zero_degree_quartet (bool): GPS rounding can cause trajectories that don't move to create a
            box-like pattern (since GPS point round to a grid). To ignore these,
            we can opt to ignore all boxes on the north-south-east-west grid.
            WARNING: This may delete non-GPS rounding boxes! (Default: True)

    """
    # allow input of single trajectories or lists
    if not isinstance(trajectories, list):
        trajectories = [trajectories]

    # For each trajectory, calculate boxiness and store it as a parameter.
    for trajectory in trajectories:
        boxiness = _calculate_boxiness_using_quartets(trajectory,
                                                window=window,
                                                simplify_trajectory=simplify_trajectory,
                                                error_tolerance=error_tolerance,
                                                ignore_zero_degree_quartet=ignore_zero_degree_quartet)
        trajectory.set_property('boxiness', boxiness)


[docs]def sort_by_boxiness(trajectories,
                     calculate_boxiness=True,
                     window=5,
                     simplify_trajectory=False,
                     error_tolerance=0.00001,
                     ignore_zero_degree_quartet=True):
    """
    Sorts a list of trajectories by boxiness.

    Before sorting, boxiness will be calculated for each trajectory and
    stored as a property (trajectory.properties['boxiness']). If boxiness
    has already been calculated and stored under properties, set
    calculate_boxiness to False to avoid unnecessary computation time.

    .. note:: This boxiness score will be higher for
        trajectories that move primarily in four directions that are each
        ninety degrees apart, and this does not exclusively limit to boxes.

    Arguments:
        trajectories (list of Tracktable trajectories): We will score each of
            these trajectories for "boxiness" and store the score as a trajectory property.

    Keyword Arguments:
        calculate_boxiness (bool):
            Calculates boxiness as stores it as a property for each trajectory
            (trajectory.properties['boxiness']). This property must be
            calculated and stored before the trajectories can be sorted.
            (Default: True)
        window (Odd int): To account for imperfect boxes, we sum our quartet products over a
            window of this width (in degrees), centered at the peak quartet
            product. Note: Even integers will be rounded up to the nearest odd
            integer. (Default: 5)
        simplify_trajectory (bool): We can opt to smooth the trajectory using Tracktable's simplify
            function. This can help if we have particularly noisy data, but
            should be used with caution, as oversmoothing can lower the boxiness
            scores of true boxes. (Default: False)
        error_tolerance (float): The simplify function will not exceed this
            positional error tolerance, measured in the trajectory's native distance
            represented in km. (Default: 0.00001)
        ignore_zero_degree_quartet (bool): GPS rounding can cause trajectories that don't move to create a
            box-like pattern (since GPS point round to a grid). To ignore these,
            we can opt to ignore all boxes on the north-south-east-west grid.
            WARNING: This may delete non-GPS rounding boxes! (Default: True)

    Returns:
        List of trajectories sorted based off of their boxiness score.

    """

    if calculate_boxiness:
        calculate_boxiness(trajectories,
                      window=window,
                      simplify_trajectory=simplify_trajectory,
                      error_tolerance=error_tolerance,
                      ignore_zero_degree_quartet=ignore_zero_degree_quartet)

    try:
        return sorted(trajectories,
                      key=lambda trajectory: trajectory.properties['boxiness'],
                      reverse=True)
    except:
        logger.error('Unable to sort trajectories by boxiness. If boxiness '
              'has not already been calculated for these '
              'trajectories, make sure to set '
              'calculate_boxiness=True.')

[docs]def calculate_bearings_histogram(trajectory,
                            scale_by_length=True,
                            normalize=True,
                            simplify_trajectory=False,
                            error_tolerance=0.00001):
    """
    Calculates a bearings histogram, which can use the usual binning method
    or bin by segment length, and can also be optionally normalized.

    Arguments:
        trajectory (Tracktable trajectory): The trajectory for which
            we will create a scaled relative bearings histogram.

    Keyword Arguments:
        scale_by_length (bool): If true, then instead of traditional bining (each bearing value counts
            as one), we add the length of the segment with the corresponding
            bearing to the bin. The histogram is normalized by dividing each bin
            by the length of the trajectory. (Default: True)
        normalize (bool): If true, will yield a relative histogram (bins sum to 1) instead of
            an absolute histogram. (Default: True)
        simplify_trajectory (bool): We can opt to smooth the trajectory using Tracktable's simplify
            function. This can help if we have particularly noisy data, but
            should be used with caution, as oversmoothing can lower the boxiness
            scores of true boxes. (Default: False)
        error_tolerance (float): The simplify function will not exceed this positional error tolerance,
            measured in the trajectory's native distance represeted in km. (Default: 0.00001)

    Returns:
        Relative histogram of bearings, where dict keys define bins and dict
        values are percentages in each bin. Keys are degrees 0 through 359
        (but will only exist for nonzero histogram values). Values are the
        sum of the length of every segment with this bearing (rounded to the
        nearest degree), divided by the length of the entire trajectory.
    """

    if normalize and scale_by_length:
        # Since we are adding the segment length to each histogram bin, we
        # normalize by the length of the entire trajectory.
        traj_length = length(trajectory)

        # For zero length trajectories, there are no bearings.
        if traj_length == 0:
            return {}

    elif normalize:
        normalization_term = 0

    if simplify_trajectory:
        trajectory = simplify(trajectory, error_tolerance)

    bearings_histogram = {}

    for i in range(len(trajectory)-1):
        this_point = trajectory[i]
        next_point = trajectory[i+1]
        # Calculate the bearing, rounded to the nearest degree.
        segment_bearing = round(bearing(this_point, next_point)) % 360
        if scale_by_length:
            # Add the length of the segment to the appropriate bin.
            value_to_bin = distance(this_point, next_point) / traj_length
        else:
            # Just add a count of one to the appropriate bin.
            value_to_bin = 1
            if normalize:
                normalization_term += 1
        bearings_histogram[segment_bearing] = (bearings_histogram.get(segment_bearing, 0)
                                               + value_to_bin)

    if normalize and not scale_by_length:
        bearings_histogram = {bearing: count / normalization_term
                              for (bearing, count) in bearings_histogram.items()}

    ##
    # Plotting Code
    ##
    # bearings_bins = list(bearings_histogram.keys())
    # bearings_counts = list(bearings_histogram.values())
    # fig = plt.figure(figsize=(20,10))
    # plt.bar(bearings_bins, bearings_counts, color='g')
    # plt.title('Histogram of Bearings Scaled by Length', fontsize=30)
    # plt.xlim(-5,364)
    # ax = fig.axes
    # ax[0].tick_params(labelsize=30)
    # plt.show()

    return bearings_histogram

def _calculate_boxiness_using_quartets(trajectory,
                                    window=5,
                                    simplify_trajectory=False,
                                    error_tolerance=0.00001,
                                    ignore_zero_degree_quartet=True):
    """ Utilizing quartest calculate the boxiness values for the given trajectory

    Arguments:
        trajectory (Tracktable trajectory): trajectory to score for "boxiness".

    Keyword Arguments:
        window (Odd int): To account for imperfect boxes, we sum our quartet products over a
            window of this width (in degrees), centered at the peak quartet
            product. Note: Even integers will be rounded up to the nearest odd
            integer. (Default: 5)
        simplify_trajectory (bool): We can opt to smooth the trajectory using Tracktable's simplify
            function. This can help if we have particularly noisy data, but
            should be used with caution, as oversmoothing can lower the boxiness
            scores of true boxes. (Default: False)
        error_tolerance (float): The simplify function will not exceed this positional error tolerance,
            measured in the trajectory's native distance represented in km. (Default: 0.00001)
        ignore_zero_degree_quartet (bool): GPS rounding can cause trajectories that don't move to create a
            box-like pattern (since GPS point round to a grid). To ignore these,
            we can opt to ignore all boxes on the north-south-east-west grid.
            WARNING: This may delete non-GPS rounding boxes! (Default: True)

    Returns:
        A "boxiness" score. In actuality, this score will be higher for
        trajectories that move primarily in four directions that are each
        nintey degrees apart. Note that this does not exclusively limit
        to boxes.
    """
    # Get {bins: percentages} for a relative histogram of the bearings, scaled
    # by segment length.
    bearings_histogram = calculate_bearings_histogram(trajectory,
                                                    scale_by_length=True,
                                                    normalize=True,
                                                    simplify_trajectory=simplify_trajectory,
                                                    error_tolerance=error_tolerance)

    # If there are no bearings, boxiness must be zero.
    if len(bearings_histogram) == 0:
        return 0

    # CALCULATE "QUARTETS" USING A MODIFIED CONVOLUTION TECHNIQUE

    quartet_products = {}

    # Quartets will range from 0-90-180-270 to 89-179-269-359.
    start_degrees_for_quartets = range(90)

    # In this loop we both calculate boxiness for each quartet and keep track
    # of the boxiest quartet.
    all_zero = True
    boxiest_quartet_degree = None
    for start_degree in start_degrees_for_quartets:
        if start_degree == 0 and ignore_zero_degree_quartet:
            # Ignoring 0-90-180-270 quartet to filter out GPS grid artifacts.
            quartet_products[0] = 0
            continue

        # For each quartet, multiple the four peaks to gauge how likely it is
        # that the trajectory makes a box at the quartet's orientation.
        quartet_prod = 1
        for i in range(0, 4):
            quartet_prod *= bearings_histogram.get(start_degree + 90*i, 0)

        # Only store nonzero quartet products.
        if quartet_prod > 0:
            all_zero = False
            # Keep track of the quartet with the largest product.
            if (boxiest_quartet_degree == None
                  or quartet_products[boxiest_quartet_degree] < quartet_prod):
                boxiest_quartet_degree = start_degree
            # Store all nonzero quartet products
            quartet_products[start_degree] = quartet_prod

    # CALCULATE "BOXINESS"

    # To improve computation speed, skip remaining calculation if there were
    # no positive quartets.
    if all_zero:
        return 0

    # How far left/right from the peak quartet should we sum?
    window_radius = floor(window/2)

    ##
    # Plotting Code
    ##
    # fig = plt.figure(figsize=(20,10))
    # # Plot every nonzero quartet product as a blue bar chart.
    # plt.bar(quartet_products.keys(),
    #         quartet_products.values(),
    #         color='b',
    #         label='all quartet products')
    # plt.title('Quartet Products', fontsize=30)
    # plt.xlabel('degree', fontsize=30)
    # # Make every quartet product used for the boxiness score red (window
    # # centered at the peak).
    # degrees_summed = [(boxiest_quartet_degree+i) % 90
    #                   for i in range(-window_radius, window_radius+1)]
    # degrees_summed = [degree for degree in degrees_summed
    #                   if degree in quartet_products.keys()]
    # plt.bar(degrees_summed,
    #         [quartet_products[degree] for degree in degrees_summed],
    #         color='r',
    #         label='quartet products that are summed for score')
    # # plot adjustments
    # plt.legend(fontsize=20)
    # plt.xlim(-5,95)
    # ax = fig.axes
    # ax[0].tick_params(labelsize=30)
    # plt.show()


    # Now that we know the peak boxiness quartet, calculate the boxiness
    # score using a window centered on that quartet degree.
    boxiness = 0
    for i in range(-window_radius, window_radius+1):
        boxiness += quartet_products.get((boxiest_quartet_degree+i) % 90, 0)

    # We normalize by dividing by boxiest possible score of (1/4)^4.
    return boxiness * 256

########################################################################
# ZIGZAGINESS
########################################################################

[docs]def calculate_zigzaginess(trajectories,
                        buffer=5):

    """
    Calculates a zigzaginess score for each trajectory and appends it as a
    property, to be accessed as trajectory.property['zigzaginess'].

    Arguments:
        trajectories (list of Tracktable trajectories):
            We will score each of these trajectories for "boxiness" and store
            the score as a trajectory property.

    Keyword Arguments:
        buffer (int): To account for imperfect zigzags, we need to buffer for a given number of degrees
            the calculation of the zigzaginess score. (Default: 5)

    """

    # allow input of single trajectories or lists
    if not isinstance(trajectories, list):
        trajectories = [trajectories]

    # For each trajectory, calculate zigzaginess and store it as a parameter.
    for trajectory in trajectories:
        zigzaginess = _calculate_zigzaginess_score(trajectory,
                                             buffer=buffer,
                                             penalize=True)
        trajectory.set_property('zigzaginess', zigzaginess)


[docs]def sort_by_zigzaginess(trajectories,
                        calculate_zigzaginess=True,
                        buffer=5):

    """
    Sorts a list of trajectories by zigzaginess.

    Before sorting, zigzaginess will be calculated for each trajectory and
    stored as a property (trajectory.properties['zigzaginess']). If zigzaginess
    has already been calculated and stored under properties, set
    calculate_zigzaginess to False to avoid unnecessary computation time.

    Arguments:
        trajectories (list of Tracktable trajectories): We will score each of
            these trajectories for "boxiness" and store the score as a trajectory property.

    Keyword Arguments:
        buffer (int): To account for imperfect zigzags, we need to buffer for a given number of degrees
            the calculation of the zigzaginess score. (Default: 5)
        calculate_zigzaginess (bool):
            Calculates zigzaginess as stores it as a property for each trajectory
            (trajectory.properties['zigzaginess']). This property must be
            calculated and stored before the trajectories can be sorted.
            (Default: True)

    Returns:
        List of trajectories sorted based off of their zigzaginess score

    """

    if calculate_zigzaginess:
        calculate_zigzaginess(trajectories,
                         buffer=buffer)

    try:
        return sorted(trajectories,
                      key=lambda trajectory: trajectory.properties['zigzaginess'],
                      reverse=True)
    except:
        logger.error('Unable to sort trajectories by zigzaginess. If zigzaginess '
              'has not already been calculated for these trajectories, make '
              'sure to set calculate_zigzaginess=True.')

def _calculate_zigzaginess_score(trajectory,
                           buffer=5,
                           penalize=True):

    """ Calculate the zigzaginess score of the given trajectory

    Arguments:
        trajectory (Tracktable trajectory): trajectory to score for "zigzaginess".

    Keyword Arguments:
        buffer (int): To account for imperfect zigzags, we need to buffer for a given number of degrees
            the calculation of the zigzaginess score. (Default: 5)
        penalize (bool): Flag to penalize all bearings outside of the two peaks to avoid
            biasing towards trajectories with more data points. (Default: True)

    Returns:
        A "zigzaginess" score.
    """

    window_radius = floor(buffer/2)

    # Get {bins: percentages} for a relative histogram of the bearings, scaled
    # by segment length.
    bearings_histogram = calculate_bearings_histogram(trajectory,
                                                 scale_by_length=False,
                                                 normalize=False)

    # account for GPS rounding anomalies (hacky fix)
    bearings_histogram[0] = 0
    bearings_histogram[90] = 0
    bearings_histogram[180] = 0
    bearings_histogram[270] = 0

    # If the trajectory doesn't move, or only moves in one direction,
    # it's not zigzagging.
    if len(bearings_histogram) <= 1:
        return 0

    sorted_histogram = sorted(bearings_histogram.items(),
                              key = lambda item: (item[1], item[0]),
                              reverse=True)

    logger.debug(sorted_histogram)

    # peaks = [[FIRST PEAK DEGREE, FIRST PEAK COUNT],
    #          [SECOND PEAK DEGREE, SECOND PEAK COUNT]]
    peaks = sorted_histogram[:2]

    #peak_degrees = [sorted_bins[0], sorted_bins[1]]

    # Find the next highest peak that is at least buffer degrees away, and
    # not too close to being just the opposite direction of the peak bearing.
    second_peak_found = False
    for possible_peak in sorted_histogram[2:]:
        distance_between_peaks = abs(peaks[0][0] - peaks[1][0])
        if buffer < distance_between_peaks and abs(distance_between_peaks - 180) > window_radius:
            second_peak_found = True
            break
        peaks[1] = possible_peak

    logger.debug(distance_between_peaks)

    # No second bearings peak means no zigzagging.
    if not second_peak_found:
        return 0

    # We'll add up the scores over a window of width <buffer> degrees centered
    # at each peak.
    peak_area = [0, 0]
    for i in range(-window_radius, window_radius+1):
        peak_area[0] += bearings_histogram.get(peaks[0][0] + i, 0)
        peak_area[1] += bearings_histogram.get(peaks[1][0] + i, 0)

    # Multiply the two peaks to score zigzaginess. A high score indicates
    # that the trajectory is moving in just these two directions, for the
    # most part.
    preliminary_score = peaks[0][1] * peaks[1][1]

    # We need to penalize for all bearings outside of the two peaks to avoid
    # biasing towards trajectories with more data points.
    penalty_score = 0
    if penalize:
        for degree, count in sorted_histogram[1:]:
            near_peak = False
            for i in range(-window_radius, window_radius+1):
                if ((peaks[0][0] + i) % 360 == degree or
                    (peaks[1][0] + i) % 360 == degree):
                    near_peak = True
                    break
            if not near_peak:
                penalty_score += count

    logger.debug(f'max peak: {peaks[0][0]}, {peaks[0][1]}')
    logger.debug(f'next peak: {peaks[1][0]}, {peaks[1][1]}')
    logger.debug(f'penalty score: {penalty_score}')

    return max(preliminary_score - penalty_score**2, 0)