Cluster Example Source Files¶
Table of Contents
Main.cpp¶
1/*
2 * Copyright (c) 2014-2023 National Technology and Engineering
3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
4 * with National Technology and Engineering Solutions of Sandia, LLC,
5 * the U.S. Government retains certain rights in this software.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "Correlation.h"
32
33#include <tracktable/Analysis/ComputeDBSCANClustering.h>
34#include <tracktable/Analysis/DistanceGeometry.h>
35#include <tracktable/CommandLineFactories/AssemblerFromCommandLine.h>
36#include <tracktable/CommandLineFactories/PointReaderFromCommandLine.h>
37#include <tracktable/Domain/FeatureVectors.h>
38#include <tracktable/Domain/Terrestrial.h>
39
40#include <boost/timer/timer.hpp>
41
42using FeatureVectorT = tracktable::domain::feature_vectors::FeatureVector<10>;
43
44using TrajectoryT = tracktable::domain::terrestrial::trajectory_type;
45using PointT = typename TrajectoryT::point_type;
46using PointReaderT = tracktable::PointReader<PointT>;
47using PointReaderIteratorT = typename PointReaderT::iterator;
48using AssemblerT = tracktable::AssembleTrajectories<TrajectoryT, PointReaderIteratorT>;
49
50static constexpr auto helpmsg = R"(
51--------------------------------------------------------------------------------
52The cluster example demonstrates:
53 - Using command line factories to read points and assemble trajectories
54 - Create features using distance geometries
55 - Cluster and and assign membership using dbscan
56
57Typical use:
58 ./cluster --input=/data/flights.tsv
59
60Defaults assume a tab separated points file formatted as :
61
62OBJECTID TIMESTAMP LON LAT
63--------------------------------------------------------------------------------)";
64int main(int _argc, char* _argv[]) {
65 constexpr auto timerFormat = "\u001b[30;1m %w seconds\u001b[0m\n";
66 // Set log level to reduce unecessary output
67 tracktable::set_log_level(tracktable::log::info);
68 // Create a basic command line option with boost
69 boost::program_options::options_description commandLineOptions;
70 commandLineOptions.add_options()("help", "Print help");
71
72 // Create command line factories
73 tracktable::PointReaderFromCommandLine<PointT> readerFactory;
74 tracktable::AssemblerFromCommandLine<TrajectoryT> assemblerFactory;
75 // Add options from the factories
76 readerFactory.addOptions(commandLineOptions);
77 assemblerFactory.addOptions(commandLineOptions);
78
79 // And a command line option for output
80 commandLineOptions.add_options()("output", bpo::value<std::string>()->default_value("-"),
81 "file to write to (use '-' for stdout), overridden by 'separate-kmls'");
82
83 /** Boost program options using a variable map to tie everything together.
84 * one parse will have a single variable map. We need to let the factories know
85 * about this variable map so they can pull information out of it */
86 auto vm = std::make_shared<boost::program_options::variables_map>();
87 readerFactory.setVariables(vm);
88 assemblerFactory.setVariables(vm);
89
90 // Parse the command lines, don't forget the 'notify' after
91 try {
92 // We use this try/catch to automatically display help when an unknown option is used
93 boost::program_options::store(
94 boost::program_options::command_line_parser(_argc, _argv).options(commandLineOptions).run(), *vm);
95 boost::program_options::notify(*vm);
96 } catch (boost::program_options::error e) {
97 std::cerr << e.what();
98 std::cerr << helpmsg << "\n\n";
99 std::cerr << commandLineOptions << std::endl;
100 return 1;
101 }
102 /** Parsing will give an error of an incorrect option is used, but it won't
103 * display the help unless we tell it too */
104 if (vm->count("help") != 0) {
105 std::cerr << helpmsg << "\n\n";
106 std::cerr << commandLineOptions << std::endl;
107 return 1;
108 }
109
110 // Create Point Reader and assembler
111 auto pointReader = readerFactory.createPointReader();
112 auto assembler = assemblerFactory.createAssembler(pointReader);
113
114 std::vector<std::shared_ptr<TrajectoryT>> trajectories = {};
115 {
116 std::cerr << "Assemble Trajectories" << std::endl;
117 boost::timer::auto_cpu_timer assembleTimer(std::cerr, timerFormat);
118 auto count = 0u;
119 std::cerr << std::right;
120 for (auto tIter = assembler->begin(); tIter != assembler->end(); ++tIter) {
121 if (tracktable::length(*tIter) < 100) { // filter out standing still
122 continue;
123 }
124 std::cerr << "\b\b\b\b\b\b\b\b\b\b" << std::setw(10) // Using backspaces for in place counter
125 << count++;
126 trajectories.push_back(std::make_shared<TrajectoryT>(*tIter));
127 }
128 std::cerr << std::left << "\nStarting with " << trajectories.size() << " trajectories" << std::endl;
129 }
130
131 /* Create a set of features. distance geometry produces a vector of doubles
132 * and must be transferred to feature vectors to be consumed by certain functions */
133 std::vector<FeatureVectorT> features;
134 {
135 std::cerr << "Build features" << std::endl;
136 boost::timer::auto_cpu_timer featuretTimer(std::cerr, timerFormat);
137 for (auto& t : trajectories) {
138 auto v = tracktable::distance_geometry_by_distance(*t, 4);
139 FeatureVectorT f;
140 for (auto i = 0u; i < features.size() && i < v.size(); ++i) {
141 f[i] = v[i];
142 }
143 features.push_back(f);
144 }
145 }
146
147 using ClusterLabelT = std::pair<int, int>;
148 using ClusterLabelVectorT = std::vector<ClusterLabelT>;
149 using IdVectorT = std::vector<int>;
150
151 FeatureVectorT search_box;
152 for (auto i = 0u; i < search_box.size(); ++i) {
153 search_box[i] = 0.1;
154 }
155
156 ClusterLabelVectorT vertex_cluster_labels;
157 {
158 std::cerr << "Cluster with dbscan" << std::endl;
159 boost::timer::auto_cpu_timer clusterTimer(std::cerr, timerFormat);
160 tracktable::cluster_with_dbscan(features.begin(), features.end(), search_box, 3,
161 std::back_inserter(vertex_cluster_labels));
162 }
163
164 std::vector<IdVectorT> membership;
165 {
166 std::cerr << "Build Cluster membership List" << std::endl;
167 boost::timer::auto_cpu_timer membershiptTimer(std::cerr, timerFormat);
168 tracktable::build_cluster_membership_lists(vertex_cluster_labels.begin(), vertex_cluster_labels.end(),
169 std::back_inserter(membership));
170 }
171
172 for (unsigned int i = 0; i < membership.size(); ++i) {
173 std::cerr << i << "(" << std::setw(3) << membership[i].size() << "):";
174 FeatureVectorT avg;
175 avg = tracktable::arithmetic::zero<FeatureVectorT>();
176 for (unsigned int j = 0; j < membership[i].size(); ++j) {
177 // std::cout << trajectories[membership[i][j]].object_id() << " ";
178 tracktable::arithmetic::add_in_place(avg, features[membership[i][j]]);
179 }
180 tracktable::arithmetic::divide_scalar_in_place(avg, static_cast<double>(membership[i].size()));
181 std::cerr << avg;
182 std::cerr << std::endl;
183 }
184
185 std::cerr << "------------------------- Correlation --------------------------\n";
186 std::cerr << Correlation(features) << std::endl;
187
188 return 0;
189}
Correlation.h¶
1/*
2 * Copyright (c) 2014-2023 National Technology and Engineering
3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
4 * with National Technology and Engineering Solutions of Sandia, LLC,
5 * the U.S. Government retains certain rights in this software.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#ifndef Correlation_h
32#define Correlation_h
33
34#include <tracktable/Domain/FeatureVectors.h>
35
36#include <sstream>
37#include <string>
38#include <vector>
39
40/** Calculates and prints the covariance matrix for a set of features.
41 * @Tparam N size of individual feature vector
42 * @param feature Vector of feature vectors to display covariance matrix of
43 */
44template <size_t N>
45std::string Correlation(std::vector<tracktable::domain::feature_vectors::FeatureVector<N>>& features) {
46 double mean[N] = {0.0};
47 double sq_mean[N] = {0.0};
48 double cov[N][N] = {{0.0}};
49
50 for (auto i = 0u; i < features.size(); ++i) {
51 for (auto j = 0u; j < N; ++j) {
52 mean[j] += features[i][j] / static_cast<double>(N);
53 }
54 }
55
56 for (auto i = 0u; i < features.size(); ++i) {
57 for (auto j = 0u; j < N; ++j) {
58 auto e = features[i][j] - mean[j];
59 sq_mean[j] += e * e;
60 }
61 }
62 for (auto k = 0u; k < features.size(); ++k) {
63 auto& feature = features[k];
64 for (auto i = 0u; i < N; ++i) {
65 for (auto j = 0u; j <= i; ++j) {
66 cov[i][j] +=
67 ((feature[i] - mean[i]) * (feature[j] - mean[j])) / (sqrt(sq_mean[i] * sq_mean[j]));
68 }
69 }
70 }
71
72 std::stringstream result;
73 for (auto& row : cov) {
74 for (auto& col : row) {
75 result << std::setw(8) << col << "\t";
76 }
77 result << "\b\n";
78 }
79 return result.str();
80}
81
82#endif