Predict Example Source Files

Main.cpp

  1/*
  2 * Copyright (c) 2014-2023 National Technology and Engineering
  3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
  4 * with National Technology and Engineering Solutions of Sandia, LLC,
  5 * the U.S. Government retains certain rights in this software.
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions
  9 * are met:
 10 *
 11 * 1. Redistributions of source code must retain the above copyright
 12 * notice, this list of conditions and the following disclaimer.
 13 *
 14 * 2. Redistributions in binary form must reproduce the above copyright
 15 * notice, this list of conditions and the following disclaimer in the
 16 * documentation and/or other materials provided with the distribution.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 */
 30
 31#include "Predict.h"
 32
 33#include <tracktable/CommandLineFactories/AssemblerFromCommandLine.h>
 34#include <tracktable/CommandLineFactories/PointReaderFromCommandLine.h>
 35#include <tracktable/Domain/Terrestrial.h>
 36
 37#include <boost/timer/timer.hpp>
 38
 39#include <string>
 40#include <vector>
 41
 42using TrajectoryT = tracktable::domain::terrestrial::trajectory_type;
 43using PointT = typename TrajectoryT::point_type;
 44using PointReaderT = tracktable::PointReader<PointT>;
 45using PointReaderIteratorT = typename PointReaderT::iterator;
 46using AssemblerT = tracktable::AssembleTrajectories<TrajectoryT, PointReaderIteratorT>;
 47
 48bool has_tail_number(const TrajectoryT &_trajectory);
 49bool has_destination(const TrajectoryT &_trajectory);
 50
 51static constexpr auto helpmsg = R"(
 52--------------------------------------------------------------------------------
 53This example demonstrates using feature vectors to measure similarities between
 54trajectories via an Rtree
 55
 56The predict example demonstrates:
 57    - Using command line factories to read points and assemble trajectories
 58    - Using boost program options to take parameters from command lines(in addition to the factories)
 59    - Conditioning trajectories based on length and objectid
 60    - Using boost rtree to locate similar trajectories based on cartesian distance in feature space
 61
 62Typical use: '--string-field=dest x' is required
 63
 64    ./predict --input=/data/SampleASDI.csv --delimiter=, --string-field=dest 30 --num-samples=10
 65
 66--------------------------------------------------------------------------------)";
 67int main(int _argc, char *_argv[]) {
 68    auto numSamples = 10u;
 69
 70    // Set log level to reduce unecessary output
 71    tracktable::set_log_level(tracktable::log::info);
 72    // Create a basic command line option with boost
 73    bpo::options_description commandLineOptions("Options");
 74    // clang-format off
 75    commandLineOptions.add_options()
 76      ("help", "Print help")
 77      ("num-samples", bpo::value(&numSamples)->default_value(10),
 78        "Number of samples")
 79    ;
 80    // clang-format on
 81
 82    // Create command line factories
 83    tracktable::PointReaderFromCommandLine<PointT> readerFactory;
 84    tracktable::AssemblerFromCommandLine<TrajectoryT> assemblerFactory;
 85    // Add options from the factories
 86    readerFactory.addOptions(commandLineOptions);
 87    assemblerFactory.addOptions(commandLineOptions);
 88
 89    /** Boost program options using a variable map to tie everything together.
 90     * one parse will have a single variable map. We need to let the factories know
 91     * about this variable map so they can pull information out of it */
 92    auto vm = std::make_shared<boost::program_options::variables_map>();
 93    readerFactory.setVariables(vm);
 94    assemblerFactory.setVariables(vm);
 95
 96    // Parse the command lines, don't forget the 'notify' after
 97    try {
 98        // We use this try/catch to automatically display help when an unknown option is used
 99        boost::program_options::store(
100            boost::program_options::command_line_parser(_argc, _argv).options(commandLineOptions).run(), *vm);
101        boost::program_options::notify(*vm);
102    } catch (boost::program_options::error e) {
103        std::cerr << e.what();
104        std::cerr << helpmsg << "\n\n";
105        std::cerr << commandLineOptions << std::endl;
106        return 1;
107    }
108    /** Parsing will give an error of an incorrect option is used, but it won't
109     * display the help unless we tell it too */
110    if (vm->count("help") != 0) {
111        std::cerr << helpmsg << "\n\n";
112        std::cerr << commandLineOptions << std::endl;
113        return 1;
114    }
115
116    // Create Point Reader and assembler
117    auto pointReader = readerFactory.createPointReader();
118    auto assembler = assemblerFactory.createAssembler(pointReader);
119
120    std::vector<std::shared_ptr<TrajectoryT>> trajectories = {};
121    // This block exists for easy timing of trajectory assembling using the boost auto timer
122    // Note that all feedback to the user is done on std::cerr, this allows us to only
123    // put desired results into std::cout, this make processing output easier.
124    {
125        std::cerr << "Assemble Trajectories" << std::endl;
126        boost::timer::auto_cpu_timer timer3(std::cerr);
127        auto count = 0u;
128        std::cerr << std::right;
129        for (auto tIter = assembler->begin(); tIter != assembler->end(); ++tIter) {
130            if (has_tail_number(*tIter)) {  // Skip tail number flights
131                continue;
132            }
133            if (!has_destination(*tIter)) {  // Skip flights without destination or errors
134                continue;
135            }
136            std::cerr << "\b\b\b\b\b\b\b\b\b\b" << std::setw(10)  // Using backspaces for in place counter
137                      << count++;
138            trajectories.push_back(std::make_shared<TrajectoryT>(*tIter));
139        }
140        std::cerr << std::left << "\nStarting with " << trajectories.size() << " trajectories" << std::endl;
141    }
142
143    // This routine does a prediction based on destination airport
144    Predict(trajectories, numSamples);
145
146    return 0;
147}
148
149bool has_tail_number(const TrajectoryT &_trajectory) {
150    auto s = _trajectory.object_id();
151    return !((s[0] != 'N') || (s[1] < '0') || (s[1] > '9'));
152}
153
154bool has_destination(const TrajectoryT &_trajectory) {
155    return !_trajectory.front().string_property("dest").empty() &&
156           (_trajectory.front().string_property("dest") == _trajectory.back().string_property("dest"));
157}

Predict.h

 1/*
 2 * Copyright (c) 2014-2023 National Technology and Engineering
 3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
 4 * with National Technology and Engineering Solutions of Sandia, LLC,
 5 * the U.S. Government retains certain rights in this software.
 6 *
 7 * Redistribution and use in source and binary forms, with or without
 8 * modification, are permitted provided that the following conditions
 9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#ifndef Predict_h
32#define Predict_h
33
34#include <tracktable/Domain/Terrestrial.h>
35
36#include <vector>
37
38using TrajectoryT = tracktable::domain::terrestrial::trajectory_type;
39using TrajectoryVectorT = std::vector<std::shared_ptr<TrajectoryT>>;
40
41void Predict(const TrajectoryVectorT &_trajectories, const size_t _numSamples);
42
43#endif

Predict.cpp

  1/*
  2 * Copyright (c) 2014-2023 National Technology and Engineering
  3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
  4 * with National Technology and Engineering Solutions of Sandia, LLC,
  5 * the U.S. Government retains certain rights in this software.
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions
  9 * are met:
 10 *
 11 * 1. Redistributions of source code must retain the above copyright
 12 * notice, this list of conditions and the following disclaimer.
 13 *
 14 * 2. Redistributions in binary form must reproduce the above copyright
 15 * notice, this list of conditions and the following disclaimer in the
 16 * documentation and/or other materials provided with the distribution.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 */
 30
 31#include "Predict.h"
 32
 33#include "BuildFeatures.h"
 34#include "PredictData.h"
 35
 36#include <tracktable/RW/KmlOut.h>
 37
 38using tracktable::kml;
 39
 40void Predict(const TrajectoryVectorT &_trajectories, const size_t _numSamples) {
 41    // This routine builds a feature database from specific intervals
 42    auto features = BuildManyEvenFeatures(_trajectories);
 43    // use random features as our 'test' set
 44    auto to_be_predicted = BuildRandomFeatures(_trajectories, 0.2, 0.8);
 45
 46    // Create rtree for our predictions
 47    PredictRtreeT rtree;
 48    // std::vector<PredictRtreeT::value_type> data;
 49
 50    // Build the feature vector/id number combo for the rtree.  There is an
 51    // unused value set to 0 that might be used in the future.
 52
 53    for (auto i = 0u; i < features.size(); ++i) {
 54        rtree.insert(&(features[i]));
 55    }
 56
 57    // Define the the number of trajectories that will be used to predict
 58    // the destination.  The bins vector will be used to hold the result of
 59    // how far down the potential list of predicted destinations you need to
 60    // go to find the right destination.  The bins vector is a little larger
 61    // to hold the count of getting it wrong.
 62
 63    std::vector<size_t> bins(_numSamples + 1);
 64
 65    // Okay.  Here is where the work is done.  Go through each flight and
 66    // find all of its neighbors to predict where it will land.
 67
 68    for (auto &current : to_be_predicted) {
 69        std::vector<PredictRtreeT::value_type> result_n;
 70
 71        // Note we are getting more results than _numSamples.  This is because
 72        // we will throw out the hit that corresponds to the trajectory itself.
 73        // It would be cheating to use that for prediction.
 74
 75        auto it = rtree.qbegin(boost::geometry::index::nearest(current.feature, _numSamples + 10));
 76        for (; (it != rtree.qend()) && (result_n.size() < _numSamples); ++it) {
 77            if ((*it)->index != current.index) result_n.push_back(*it);
 78        }
 79
 80        using WeightPairT = std::pair<std::string, double>;
 81        std::map<WeightPairT::first_type, WeightPairT::second_type> weights;
 82
 83        TrajectoryVectorT results;
 84
 85        auto dest = current.index->front().string_property("dest");
 86        std::cout << dest << std::endl;
 87
 88        // Take the results from the rtree query, and then build a vector that
 89        // has the resulting flights.  In addition, build a table of weights for
 90        // each potential destination (via a map) using what is essentially a
 91        // 1/d^2 weight.  The d^2 term comes from the "comparable_distance"
 92        // function.
 93
 94        auto total_weight = 0.0;
 95        for (auto &found : result_n) {
 96            double weight =
 97                1.0 / (0.01 + boost::geometry::comparable_distance(current.feature, found->feature));
 98            total_weight += weight;
 99            results.push_back(found->index);
100            weights[found->index->front().string_property("dest")] += weight;
101        }
102
103        // An intermediate step.  Basically, the elements of the map are sorted
104        // by key (destination) not by value (weight).  We put them in a vector
105        // that will be sorted by value since that is how we will use them.
106
107        std::vector<WeightPairT> sorted(weights.begin(), weights.end());
108
109        // Do the actual sorting.  Have to specify using the second element since
110        // sort will use the first element (destination) by default.
111
112        std::sort(sorted.begin(), sorted.end(),
113                  [](WeightPairT &_lhs, WeightPairT &_rhs) { return _lhs.second > _rhs.second; });
114
115        // Here is where we fill the bin of whether they were ther first guess,
116        // second guess, etc., or wrong. The wrong answers go in the 0 bin.
117        // The number is done by iterator subtraction, which is a totally
118        // valid way to do things in C++.  Note that if pos == sorted.size(),
119        // then the find_if returned sorted.end() and the value was
120        // not found.
121
122        // size_t cast safe because gauranteed >= 0
123        auto pos = size_t(std::find_if(sorted.begin(), sorted.end(),
124                                       [&dest](WeightPairT &_wp) { return _wp.first == dest; }) -
125                          sorted.begin());
126        ++bins[pos == sorted.size() ? 0 : pos + 1];
127
128        if ((pos != sorted.size()) && (pos == 3) && (total_weight > 400)) {
129            std::string filename = "output/" + current.index->object_id() + "cand.kml";
130            std::ofstream canidateOut(filename);
131            if (!canidateOut.is_open()) {
132                std::cerr << "Could not open" << filename << std::endl;
133            } else {
134                canidateOut << kml(results);
135            }
136            filename = "output/" + current.index->object_id() + ".kml";
137            std::ofstream resultOut(filename);
138            if (!resultOut.is_open()) {
139                std::cerr << "Could not open" << filename << std::endl;
140            } else {
141                resultOut << kml(*(current.index));
142            }
143        }
144    }
145
146    int total = 0;
147    for (unsigned int i = 1; i < bins.size(); ++i) {
148        total += bins[i];
149        std::cout << "bins[" << i << "] = " << bins[i] << ", total = " << total
150                  << ", cumulative fraction  = " << double(total) / double(to_be_predicted.size())
151                  << std::endl;
152    }
153    std::cout << "Got " << bins[0] << " (" << double(bins[0]) / double(to_be_predicted.size())
154              << " fraction) wrong" << std::endl;
155}

PredictData.h

 1/*
 2 * Copyright (c) 2014-2023 National Technology and Engineering
 3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
 4 * with National Technology and Engineering Solutions of Sandia, LLC,
 5 * the U.S. Government retains certain rights in this software.
 6 *
 7 * Redistribution and use in source and binary forms, with or without
 8 * modification, are permitted provided that the following conditions
 9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#ifndef PredictData_h
32#define PredictData_h
33
34#include <tracktable/Analysis/GuardedBoostGeometryRTreeHeader.h>
35#include <tracktable/Domain/Terrestrial.h>
36
37#include <boost/geometry/geometries/adapted/std_array.hpp>
38
39#include <array>
40#include <memory>
41
42using TrajectoryT = tracktable::domain::terrestrial::trajectory_type;
43// TODO: Make TrajectoryT a template param
44
45class PredictData {
46   public:
47    // Have to use boost array instead of standard because rtree can't handle std::array
48    using FeatureT = std::array<double, 10>;
49    FeatureT feature;
50    size_t Id;
51    std::shared_ptr<TrajectoryT> index;
52
53    PredictData() = delete;
54    PredictData(FeatureT const& _f, size_t id, std::shared_ptr<TrajectoryT> idx)
55        : feature(_f), Id(id), index(idx) {}
56    ~PredictData() = default;
57
58    PredictData(PredictData const& _other) {
59        feature = _other.feature;
60        Id = _other.Id;
61        index = _other.index;
62    }
63
64    PredictData& operator=(PredictData const& _other) {
65        feature = _other.feature;
66        Id = _other.Id;
67        index = _other.index;
68        return *this;
69    }
70};
71
72BOOST_GEOMETRY_REGISTER_STD_ARRAY_CS(cs::cartesian)
73
74namespace boost {
75namespace geometry {
76namespace index {
77
78template <>
79struct indexable<PredictData*> {
80    using result_type = PredictData::FeatureT const&;  // required by boost
81    result_type& operator()(PredictData* const& v) const { return v->feature; }
82};
83
84}  // namespace index
85}  // namespace geometry
86}  // namespace boost
87using PredictRtreeT = boost::geometry::index::rtree<PredictData*, boost::geometry::index::quadratic<16>>;
88
89#endif

BuildFeatures.h

 1/*
 2 * Copyright (c) 2014-2023 National Technology and Engineering
 3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
 4 * with National Technology and Engineering Solutions of Sandia, LLC,
 5 * the U.S. Government retains certain rights in this software.
 6 *
 7 * Redistribution and use in source and binary forms, with or without
 8 * modification, are permitted provided that the following conditions
 9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30#ifndef BuildFeatures_h
31#define BuildFeatures_h
32
33#include "PredictData.h"
34
35#include <tracktable/Domain/Terrestrial.h>
36
37#include <vector>
38
39using TrajectoryT = tracktable::domain::terrestrial::trajectory_type;
40using TrajectoryVectorT = std::vector<std::shared_ptr<TrajectoryT>>;
41using DataVectorT = std::vector<PredictData>;
42
43DataVectorT BuildFeatures(const TrajectoryVectorT &_trajectories, double _fraction);
44DataVectorT BuildManyEvenFeatures(const TrajectoryVectorT &_trajectories);
45DataVectorT BuildManyRandomFeatures(const TrajectoryVectorT &_trajectories);
46DataVectorT BuildRandomFeatures(const TrajectoryVectorT &_trajectories, double _lower, double _upper);
47PredictData BuildFeature(std::shared_ptr<TrajectoryT> _trajectory, double _fraction);
48
49#endif

BuildFeatures.cpp

 1/*
 2 * Copyright (c) 2014-2023 National Technology and Engineering
 3 * Solutions of Sandia, LLC. Under the terms of Contract DE-NA0003525
 4 * with National Technology and Engineering Solutions of Sandia, LLC,
 5 * the U.S. Government retains certain rights in this software.
 6 *
 7 * Redistribution and use in source and binary forms, with or without
 8 * modification, are permitted provided that the following conditions
 9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "BuildFeatures.h"
32
33#include <algorithm>  // for transform
34#include <iterator>   //for back_insterter
35#include <random>
36
37DataVectorT BuildFeatures(const TrajectoryVectorT &_trajectories, double _fraction) {
38    DataVectorT features;
39    // transform allows creating a vector of type b from a vector of type a
40    std::transform(_trajectories.begin(), _trajectories.end(), std::back_inserter(features),
41                   [&](std::shared_ptr<TrajectoryT> _t) { return BuildFeature(_t, _fraction); });
42    return features;
43}
44
45DataVectorT BuildManyEvenFeatures(const TrajectoryVectorT &_trajectories) {
46    DataVectorT features;
47    for (auto i = 2u; i <= 8u; ++i) {
48        auto f = BuildFeatures(_trajectories, i / 10.0);
49        features.insert(features.end(), f.begin(), f.end());
50    }
51    return features;
52}
53
54DataVectorT BuildManyRandomFeatures(const TrajectoryVectorT &_trajectories) {
55    DataVectorT features;
56    for (auto i = 0u; i < 7u; ++i) {
57        auto f = BuildRandomFeatures(_trajectories, 0.2, 0.8);
58        features.insert(features.end(), f.begin(), f.end());
59    }
60    return features;
61}
62
63DataVectorT BuildRandomFeatures(const TrajectoryVectorT &_trajectories, double _lower, double _upper) {
64    static std::mt19937 gen(0);
65    DataVectorT features;
66    std::uniform_real_distribution<double> dis(_lower, _upper);  // uniform distribution
67    std::transform(_trajectories.begin(), _trajectories.end(), std::back_inserter(features),
68                   [&](std::shared_ptr<TrajectoryT> _t) { return BuildFeature(_t, dis(gen)); });
69    return features;
70}
71
72PredictData BuildFeature(std::shared_ptr<TrajectoryT> _trajectory, double _fraction) {
73    // Nothing complicated here.  Basically, divide the trajectory up into
74    // some even-sized pieces, and take the early way-points and build a
75    // feature vector out of them. The only tricky part is that when you
76    // divide a trajectory into n pieces, you essentially have n+1 points
77    // to choose from.
78
79    constexpr auto numSamples = 4u;  // Must be >= 2 and <= 4
80
81    PredictData::FeatureT feature;
82    static_assert(((numSamples - 1) * 2 + 1) < feature.size(), "Size Mismatch");
83    for (auto i = 0u; i < numSamples; ++i) {
84        auto frac = _fraction * double(i) / double(numSamples - 1.0);
85        auto p = tracktable::point_at_length_fraction(*_trajectory, frac);
86        feature[2 * i] = p.longitude();
87        feature[2 * i + 1] = p.latitude();
88    }
89    auto p = tracktable::point_at_length_fraction(*_trajectory, _fraction);
90    feature[2 * numSamples] = (p.timestamp() - _trajectory->front().timestamp()).seconds() / 1200.;
91
92    return PredictData(feature, 0, _trajectory);
93}