Cluster Example Source Files

Main.cpp

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/*
 * Copyright (c) 2014-2021 National Technology and Engineering
 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
 * with National Technology and Engineering Solutions of Sandia, LLC,
 * the U.S. Government retains certain rights in this software.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "Correlation.h"

#include <tracktable/Analysis/ComputeDBSCANClustering.h>
#include <tracktable/Analysis/DistanceGeometry.h>
#include <tracktable/CommandLineFactories/AssemblerFromCommandLine.h>
#include <tracktable/CommandLineFactories/PointReaderFromCommandLine.h>
#include <tracktable/Domain/FeatureVectors.h>
#include <tracktable/Domain/Terrestrial.h>

#include <boost/timer/timer.hpp>

using FeatureVectorT = tracktable::domain::feature_vectors::FeatureVector<10>;

using TrajectoryT = tracktable::domain::terrestrial::trajectory_type;
using PointT = typename TrajectoryT::point_type;
using PointReaderT = tracktable::PointReader<PointT>;
using PointReaderIteratorT = typename PointReaderT::iterator;
using AssemblerT = tracktable::AssembleTrajectories<TrajectoryT, PointReaderIteratorT>;

static constexpr auto helpmsg = R"(
--------------------------------------------------------------------------------
The cluster example demonstrates:
    - Using command line factories to read points and assemble trajectories
    - Create features using distance geometries
    - Cluster and and assign membership using dbscan

Typical use:
    ./cluster --input=/data/flights.tsv

Defaults assume a tab separated points file formatted as :

OBJECTID TIMESTAMP LON LAT
--------------------------------------------------------------------------------)";
int main(int _argc, char* _argv[]) {
    constexpr auto timerFormat = "\u001b[30;1m %w seconds\u001b[0m\n";
    // Set log level to reduce unecessary output
    tracktable::set_log_level(tracktable::log::info);
    // Create a basic command line option with boost
    boost::program_options::options_description commandLineOptions;
    commandLineOptions.add_options()("help", "Print help");

    // Create command line factories
    tracktable::PointReaderFromCommandLine<PointT> readerFactory;
    tracktable::AssemblerFromCommandLine<TrajectoryT> assemblerFactory;
    // Add options from the factories
    readerFactory.addOptions(commandLineOptions);
    assemblerFactory.addOptions(commandLineOptions);

    // And a command line option for output
    commandLineOptions.add_options()("output", bpo::value<std::string>()->default_value("-"),
                                     "file to write to (use '-' for stdout), overridden by 'separate-kmls'");

    /** Boost program options using a variable map to tie everything together.
     * one parse will have a single variable map. We need to let the factories know
     * about this variable map so they can pull information out of it */
    auto vm = std::make_shared<boost::program_options::variables_map>();
    readerFactory.setVariables(vm);
    assemblerFactory.setVariables(vm);

    // Parse the command lines, don't forget the 'notify' after
    try {
        // We use this try/catch to automatically display help when an unknown option is used
        boost::program_options::store(
            boost::program_options::command_line_parser(_argc, _argv).options(commandLineOptions).run(), *vm);
        boost::program_options::notify(*vm);
    } catch (boost::program_options::error e) {
        std::cerr << e.what();
        std::cerr << helpmsg << "\n\n";
        std::cerr << commandLineOptions << std::endl;
        return 1;
    }
    /** Parsing will give an error of an incorrect option is used, but it won't
     * display the help unless we tell it too */
    if (vm->count("help") != 0) {
        std::cerr << helpmsg << "\n\n";
        std::cerr << commandLineOptions << std::endl;
        return 1;
    }

    // Create Point Reader and assembler
    auto pointReader = readerFactory.createPointReader();
    auto assembler = assemblerFactory.createAssembler(pointReader);

    std::vector<std::shared_ptr<TrajectoryT>> trajectories = {};
    {
        std::cerr << "Assemble Trajectories" << std::endl;
        boost::timer::auto_cpu_timer assembleTimer(std::cerr, timerFormat);
        auto count = 0u;
        std::cerr << std::right;
        for (auto tIter = assembler->begin(); tIter != assembler->end(); ++tIter) {
            if (tracktable::length(*tIter) < 100) {  // filter out standing still
                continue;
            }
            std::cerr << "\b\b\b\b\b\b\b\b\b\b" << std::setw(10)  // Using backspaces for in place counter
                      << count++;
            trajectories.push_back(std::make_shared<TrajectoryT>(*tIter));
        }
        std::cerr << std::left << "\nStarting with " << trajectories.size() << " trajectories" << std::endl;
    }

    /* Create a set of features. distance geometry produces a vector of doubles
     * and must be transferred to feature vectors to be consumed by certain functions */
    std::vector<FeatureVectorT> features;
    {
        std::cerr << "Build features" << std::endl;
        boost::timer::auto_cpu_timer featuretTimer(std::cerr, timerFormat);
        for (auto& t : trajectories) {
            auto v = tracktable::distance_geometry_by_distance(*t, 4);
            FeatureVectorT f;
            for (auto i = 0u; i < features.size() && i < v.size(); ++i) {
                f[i] = v[i];
            }
            features.push_back(f);
        }
    }

    using ClusterLabelT = std::pair<int, int>;
    using ClusterLabelVectorT = std::vector<ClusterLabelT>;
    using IdVectorT = std::vector<int>;

    FeatureVectorT search_box;
    for (auto i = 0u; i < search_box.size(); ++i) {
        search_box[i] = 0.1;
    }

    ClusterLabelVectorT vertex_cluster_labels;
    {
        std::cerr << "Cluster with dbscan" << std::endl;
        boost::timer::auto_cpu_timer clusterTimer(std::cerr, timerFormat);
        tracktable::cluster_with_dbscan(features.begin(), features.end(), search_box, 3,
                                        std::back_inserter(vertex_cluster_labels));
    }

    std::vector<IdVectorT> membership;
    {
        std::cerr << "Build Cluster membership List" << std::endl;
        boost::timer::auto_cpu_timer membershiptTimer(std::cerr, timerFormat);
        tracktable::build_cluster_membership_lists(vertex_cluster_labels.begin(), vertex_cluster_labels.end(),
                                                   std::back_inserter(membership));
    }

    for (unsigned int i = 0; i < membership.size(); ++i) {
        std::cerr << i << "(" << std::setw(3) << membership[i].size() << "):";
        FeatureVectorT avg;
        avg = tracktable::arithmetic::zero<FeatureVectorT>();
        for (unsigned int j = 0; j < membership[i].size(); ++j) {
            //      std::cout << trajectories[membership[i][j]].object_id() << " ";
            tracktable::arithmetic::add_in_place(avg, features[membership[i][j]]);
        }
        tracktable::arithmetic::divide_scalar_in_place(avg, static_cast<double>(membership[i].size()));
        std::cerr << avg;
        std::cerr << std::endl;
    }

    std::cerr << "------------------------- Correlation --------------------------\n";
    std::cerr << Correlation(features) << std::endl;

    return 0;
}

Correlation.h

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/*
 * Copyright (c) 2014-2021 National Technology and Engineering
 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
 * with National Technology and Engineering Solutions of Sandia, LLC,
 * the U.S. Government retains certain rights in this software.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef Correlation_h
#define Correlation_h

#include <tracktable/Domain/FeatureVectors.h>

#include <sstream>
#include <string>
#include <vector>

/** Calculates and prints the covariance matrix for a set of features.
 * @Tparam N size of individual feature vector
 * @param feature Vector of feature vectors to display covariance matrix of
 */
template <size_t N>
std::string Correlation(std::vector<tracktable::domain::feature_vectors::FeatureVector<N>>& features) {
    double mean[N] = {0.0};
    double sq_mean[N] = {0.0};
    double cov[N][N] = {{0.0}};

    for (auto i = 0u; i < features.size(); ++i) {
        for (auto j = 0u; j < N; ++j) {
            mean[j] += features[i][j] / static_cast<double>(N);
        }
    }

    for (auto i = 0u; i < features.size(); ++i) {
        for (auto j = 0u; j < N; ++j) {
            auto e = features[i][j] - mean[j];
            sq_mean[j] += e * e;
        }
    }
    for (auto k = 0u; k < features.size(); ++k) {
        auto& feature = features[k];
        for (auto i = 0u; i < N; ++i) {
            for (auto j = 0u; j <= i; ++j) {
                cov[i][j] +=
                    ((feature[i] - mean[i]) * (feature[j] - mean[j])) / (sqrt(sq_mean[i] * sq_mean[j]));
            }
        }
    }

    std::stringstream result;
    for (auto& row : cov) {
        for (auto& col : row) {
            result << std::setw(8) << col << "\t";
        }
        result << "\b\n";
    }
    return result.str();
}

#endif