Cluster Example Source Files

Main.cpp

  1/*
  2 * Copyright (c) 2014-2023 National Technology and Engineering
  3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
  4 * with National Technology and Engineering Solutions of Sandia, LLC,
  5 * the U.S. Government retains certain rights in this software.
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions
  9 * are met:
 10 *
 11 * 1. Redistributions of source code must retain the above copyright
 12 * notice, this list of conditions and the following disclaimer.
 13 *
 14 * 2. Redistributions in binary form must reproduce the above copyright
 15 * notice, this list of conditions and the following disclaimer in the
 16 * documentation and/or other materials provided with the distribution.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 */
 30
 31#include "Correlation.h"
 32
 33#include <tracktable/Analysis/ComputeDBSCANClustering.h>
 34#include <tracktable/Analysis/DistanceGeometry.h>
 35#include <tracktable/CommandLineFactories/AssemblerFromCommandLine.h>
 36#include <tracktable/CommandLineFactories/PointReaderFromCommandLine.h>
 37#include <tracktable/Domain/FeatureVectors.h>
 38#include <tracktable/Domain/Terrestrial.h>
 39
 40#include <boost/timer/timer.hpp>
 41
 42using FeatureVectorT = tracktable::domain::feature_vectors::FeatureVector<10>;
 43
 44using TrajectoryT = tracktable::domain::terrestrial::trajectory_type;
 45using PointT = typename TrajectoryT::point_type;
 46using PointReaderT = tracktable::PointReader<PointT>;
 47using PointReaderIteratorT = typename PointReaderT::iterator;
 48using AssemblerT = tracktable::AssembleTrajectories<TrajectoryT, PointReaderIteratorT>;
 49
 50static constexpr auto helpmsg = R"(
 51--------------------------------------------------------------------------------
 52The cluster example demonstrates:
 53    - Using command line factories to read points and assemble trajectories
 54    - Create features using distance geometries
 55    - Cluster and and assign membership using dbscan
 56
 57Typical use:
 58    ./cluster --input=/data/flights.tsv
 59
 60Defaults assume a tab separated points file formatted as :
 61
 62OBJECTID TIMESTAMP LON LAT
 63--------------------------------------------------------------------------------)";
 64int main(int _argc, char* _argv[]) {
 65    constexpr auto timerFormat = "\u001b[30;1m %w seconds\u001b[0m\n";
 66    // Set log level to reduce unecessary output
 67    tracktable::set_log_level(tracktable::log::info);
 68    // Create a basic command line option with boost
 69    boost::program_options::options_description commandLineOptions;
 70    commandLineOptions.add_options()("help", "Print help");
 71
 72    // Create command line factories
 73    tracktable::PointReaderFromCommandLine<PointT> readerFactory;
 74    tracktable::AssemblerFromCommandLine<TrajectoryT> assemblerFactory;
 75    // Add options from the factories
 76    readerFactory.addOptions(commandLineOptions);
 77    assemblerFactory.addOptions(commandLineOptions);
 78
 79    // And a command line option for output
 80    commandLineOptions.add_options()("output", bpo::value<std::string>()->default_value("-"),
 81                                     "file to write to (use '-' for stdout), overridden by 'separate-kmls'");
 82
 83    /** Boost program options using a variable map to tie everything together.
 84     * one parse will have a single variable map. We need to let the factories know
 85     * about this variable map so they can pull information out of it */
 86    auto vm = std::make_shared<boost::program_options::variables_map>();
 87    readerFactory.setVariables(vm);
 88    assemblerFactory.setVariables(vm);
 89
 90    // Parse the command lines, don't forget the 'notify' after
 91    try {
 92        // We use this try/catch to automatically display help when an unknown option is used
 93        boost::program_options::store(
 94            boost::program_options::command_line_parser(_argc, _argv).options(commandLineOptions).run(), *vm);
 95        boost::program_options::notify(*vm);
 96    } catch (boost::program_options::error e) {
 97        std::cerr << e.what();
 98        std::cerr << helpmsg << "\n\n";
 99        std::cerr << commandLineOptions << std::endl;
100        return 1;
101    }
102    /** Parsing will give an error of an incorrect option is used, but it won't
103     * display the help unless we tell it too */
104    if (vm->count("help") != 0) {
105        std::cerr << helpmsg << "\n\n";
106        std::cerr << commandLineOptions << std::endl;
107        return 1;
108    }
109
110    // Create Point Reader and assembler
111    auto pointReader = readerFactory.createPointReader();
112    auto assembler = assemblerFactory.createAssembler(pointReader);
113
114    std::vector<std::shared_ptr<TrajectoryT>> trajectories = {};
115    {
116        std::cerr << "Assemble Trajectories" << std::endl;
117        boost::timer::auto_cpu_timer assembleTimer(std::cerr, timerFormat);
118        auto count = 0u;
119        std::cerr << std::right;
120        for (auto tIter = assembler->begin(); tIter != assembler->end(); ++tIter) {
121            if (tracktable::length(*tIter) < 100) {  // filter out standing still
122                continue;
123            }
124            std::cerr << "\b\b\b\b\b\b\b\b\b\b" << std::setw(10)  // Using backspaces for in place counter
125                      << count++;
126            trajectories.push_back(std::make_shared<TrajectoryT>(*tIter));
127        }
128        std::cerr << std::left << "\nStarting with " << trajectories.size() << " trajectories" << std::endl;
129    }
130
131    /* Create a set of features. distance geometry produces a vector of doubles
132     * and must be transferred to feature vectors to be consumed by certain functions */
133    std::vector<FeatureVectorT> features;
134    {
135        std::cerr << "Build features" << std::endl;
136        boost::timer::auto_cpu_timer featuretTimer(std::cerr, timerFormat);
137        for (auto& t : trajectories) {
138            auto v = tracktable::distance_geometry_by_distance(*t, 4);
139            FeatureVectorT f;
140            for (auto i = 0u; i < features.size() && i < v.size(); ++i) {
141                f[i] = v[i];
142            }
143            features.push_back(f);
144        }
145    }
146
147    using ClusterLabelT = std::pair<int, int>;
148    using ClusterLabelVectorT = std::vector<ClusterLabelT>;
149    using IdVectorT = std::vector<int>;
150
151    FeatureVectorT search_box;
152    for (auto i = 0u; i < search_box.size(); ++i) {
153        search_box[i] = 0.1;
154    }
155
156    ClusterLabelVectorT vertex_cluster_labels;
157    {
158        std::cerr << "Cluster with dbscan" << std::endl;
159        boost::timer::auto_cpu_timer clusterTimer(std::cerr, timerFormat);
160        tracktable::cluster_with_dbscan(features.begin(), features.end(), search_box, 3,
161                                        std::back_inserter(vertex_cluster_labels));
162    }
163
164    std::vector<IdVectorT> membership;
165    {
166        std::cerr << "Build Cluster membership List" << std::endl;
167        boost::timer::auto_cpu_timer membershiptTimer(std::cerr, timerFormat);
168        tracktable::build_cluster_membership_lists(vertex_cluster_labels.begin(), vertex_cluster_labels.end(),
169                                                   std::back_inserter(membership));
170    }
171
172    for (unsigned int i = 0; i < membership.size(); ++i) {
173        std::cerr << i << "(" << std::setw(3) << membership[i].size() << "):";
174        FeatureVectorT avg;
175        avg = tracktable::arithmetic::zero<FeatureVectorT>();
176        for (unsigned int j = 0; j < membership[i].size(); ++j) {
177            //      std::cout << trajectories[membership[i][j]].object_id() << " ";
178            tracktable::arithmetic::add_in_place(avg, features[membership[i][j]]);
179        }
180        tracktable::arithmetic::divide_scalar_in_place(avg, static_cast<double>(membership[i].size()));
181        std::cerr << avg;
182        std::cerr << std::endl;
183    }
184
185    std::cerr << "------------------------- Correlation --------------------------\n";
186    std::cerr << Correlation(features) << std::endl;
187
188    return 0;
189}

Correlation.h

 1/*
 2 * Copyright (c) 2014-2023 National Technology and Engineering
 3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
 4 * with National Technology and Engineering Solutions of Sandia, LLC,
 5 * the U.S. Government retains certain rights in this software.
 6 *
 7 * Redistribution and use in source and binary forms, with or without
 8 * modification, are permitted provided that the following conditions
 9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#ifndef Correlation_h
32#define Correlation_h
33
34#include <tracktable/Domain/FeatureVectors.h>
35
36#include <sstream>
37#include <string>
38#include <vector>
39
40/** Calculates and prints the covariance matrix for a set of features.
41 * @Tparam N size of individual feature vector
42 * @param feature Vector of feature vectors to display covariance matrix of
43 */
44template <size_t N>
45std::string Correlation(std::vector<tracktable::domain::feature_vectors::FeatureVector<N>>& features) {
46    double mean[N] = {0.0};
47    double sq_mean[N] = {0.0};
48    double cov[N][N] = {{0.0}};
49
50    for (auto i = 0u; i < features.size(); ++i) {
51        for (auto j = 0u; j < N; ++j) {
52            mean[j] += features[i][j] / static_cast<double>(N);
53        }
54    }
55
56    for (auto i = 0u; i < features.size(); ++i) {
57        for (auto j = 0u; j < N; ++j) {
58            auto e = features[i][j] - mean[j];
59            sq_mean[j] += e * e;
60        }
61    }
62    for (auto k = 0u; k < features.size(); ++k) {
63        auto& feature = features[k];
64        for (auto i = 0u; i < N; ++i) {
65            for (auto j = 0u; j <= i; ++j) {
66                cov[i][j] +=
67                    ((feature[i] - mean[i]) * (feature[j] - mean[j])) / (sqrt(sq_mean[i] * sq_mean[j]));
68            }
69        }
70    }
71
72    std::stringstream result;
73    for (auto& row : cov) {
74        for (auto& col : row) {
75            result << std::setw(8) << col << "\t";
76        }
77        result << "\b\n";
78    }
79    return result.str();
80}
81
82#endif