Developer Documentation
Histogram.hh
1 /*===========================================================================*\
2  * *
3  * OpenFlipper *
4  * Copyright (c) 2001-2015, RWTH-Aachen University *
5  * Department of Computer Graphics and Multimedia *
6  * All rights reserved. *
7  * www.openflipper.org *
8  * *
9  *---------------------------------------------------------------------------*
10  * This file is part of OpenFlipper. *
11  *---------------------------------------------------------------------------*
12  * *
13  * Redistribution and use in source and binary forms, with or without *
14  * modification, are permitted provided that the following conditions *
15  * are met: *
16  * *
17  * 1. Redistributions of source code must retain the above copyright notice, *
18  * this list of conditions and the following disclaimer. *
19  * *
20  * 2. Redistributions in binary form must reproduce the above copyright *
21  * notice, this list of conditions and the following disclaimer in the *
22  * documentation and/or other materials provided with the distribution. *
23  * *
24  * 3. Neither the name of the copyright holder nor the names of its *
25  * contributors may be used to endorse or promote products derived from *
26  * this software without specific prior written permission. *
27  * *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *
31  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
32  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
33  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
34  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
35  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
36  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
37  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
38  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
39  * *
40 \*===========================================================================*/
41 
42 #pragma once
43 
44 #include <vector>
45 #include <cassert>
46 #include <memory>
47 #include <exception>
48 #include <algorithm>
49 #include <type_traits>
50 #include <map>
51 
52 #include <QString>
53 
54 #include "SmartPointer.hh"
55 #include "../Config/ACGDefines.hh"
56 
57 namespace ACG {
58 class ACGDLLEXPORT Histogram {
59 public:
60  enum class LabelType {
61  PerBin,
62  PerBoundary,
63  };
64 
65  Histogram() = default;
66  Histogram(std::vector<size_t> &&bins,
67  std::vector<double> &&bin_widths)
68  : bins_(std::move(bins)),
69  bin_widths_(std::move(bin_widths))
70  {}
71 
72  virtual ~Histogram() = default;
73  const std::vector<size_t> &getBins() const { return bins_; }
74  const std::vector<double> &getBinWidths() const { return bin_widths_; }
75  virtual double getTotalWidth() const = 0;
76 
77  virtual LabelType getLabelType() const = 0;
78  virtual QString getBoundaryLabel(size_t /*idx*/) const { assert(false); return QString();}
79  virtual QString getBinLabel (size_t /*idx*/) const { assert(false); return QString();}
80 
81 protected:
82  std::vector<size_t> bins_;
83  std::vector<double> bin_widths_;
84 };
85 
86 
87 inline QString formatValue (int val) {
88  return QString::number(val);
89 }
90 inline QString formatValue (unsigned int val) {
91  return QString::number(val);
92 }
93 inline QString formatValue (double val) {
94  return QString::number(val, 'g', 3);
95 }
96 
97 template<typename T>
99 {
100 public:
101  UnbinnedHistogram(std::vector<size_t> &&bin_counts,
102  std::vector<T> &&bin_values)
103  : Histogram(std::move(bin_counts),
104  std::vector<double>(bin_counts.size(), 1.)),
105  bin_values_(std::move(bin_values))
106  {
107  }
108  double getTotalWidth() const override { return bins_.size();};
109  LabelType getLabelType() const override { return LabelType::PerBin; };
110  QString getBinLabel (size_t idx) const override { return formatValue(bin_values_[idx]);}
111 private:
112  std::vector<T> bin_values_;
113 };
114 
115 template<typename T>
116 class HistogramT : public Histogram {
117 public:
118  HistogramT() = default;
119 
120  HistogramT(std::vector<size_t> &&histogram,
121  std::vector<T> &&bin_boundaries,
122  std::vector<double> &&bin_widths
123  )
124  : Histogram(std::move(histogram), std::move(bin_widths)),
125  bin_boundaries_(std::move(bin_boundaries))
126  {
127  if (bins_.size() != bin_widths_.size()
128  || bins_.size() + 1 != bin_boundaries_.size()) {
129  throw std::runtime_error("Histogram constructor sizes don't match.");
130  }
131  }
132 
133  const std::vector<T> &getBinBoundaries() const {
134  return bin_boundaries_;
135  }
136 
137  double getTotalWidth() const override
138  {
139  return bin_boundaries_.back() - bin_boundaries_.front();
140  }
141 
142  LabelType getLabelType() const override
143  {
144  return LabelType::PerBoundary;
145  }
146 
147  QString getBoundaryLabel (size_t idx) const override
148  {
149  // TODO: for floating point types, choose accuracy depending on bin size
150  return formatValue(bin_boundaries_[idx]);
151  };
152 
153 
154 private:
155  std::vector<T> bin_boundaries_;
156 };
157 
158 
159 template<typename T>
160 std::unique_ptr<Histogram> create_histogram_unbinned(const std::map<T, size_t> &counts)
161 {
162  std::vector<T> values;
163  std::vector<size_t> histogram;
164  values.reserve(counts.size());
165  histogram.reserve(counts.size());
166  for (const auto &entry: counts)
167  {
168  values.push_back(entry.first);
169  histogram.push_back(entry.second);
170  }
171  return ptr::make_unique<UnbinnedHistogram<T>>(std::move(histogram), std::move(values));
172 }
173 
174 template<typename T, typename Iterable>
175 std::unique_ptr<Histogram> create_histogram_autorange(const Iterable &range, size_t max_bins = 50)
176 {
177 // we need to be careful with ranges, some sums (e.g. INT_MAX - INT_MIN) do not fit into a signed int,
178 // so we store bin sizes as doubles. With specialization or some tricks we
179 // could probably use the next-biggest integer type, but if we're using
180 // the biggest integer type already, we should to fall back to double anyways.
181 
182 
183  std::vector<T> bin_boundaries;
184  std::vector<size_t> bins;
185  std::vector<double> bin_widths;
186 
187  const size_t n = std::distance(begin(range), end(range));
188  if (n == 0) return {};
189  const auto minmax = std::minmax_element(begin(range), end(range));
190  const T min = *minmax.first;
191  const T max = *minmax.second;
192 
193  const double min_dbl = static_cast<double>(min);
194  const double val_range = static_cast<double>(max) - min_dbl;
195 
196  const size_t n_bins_max = std::min(max_bins, n);
197  bin_boundaries.reserve(n_bins_max + 1);
198 
199  T last_boundary = min;
200  bin_boundaries.push_back(min);
201  for (size_t i = 1; i < n_bins_max; ++i) {
202  // Adding val_range/n_bins to a accumulator might seem more efficient/elegant,
203  // but might cause numeric issues.
204 
205  // This multiplication order is bad for huge ranges that cause overflows,
206  // however I assume tiny ranges are more common than huge values and more
207  // important to get right. If you disagree, add a case distinction or something better.
208 
209  T boundary = static_cast<T>(min + (i * val_range) / n_bins_max);
210  // avoid zero-sized bins (happens for many ints with values in a small range)
211  if (boundary != last_boundary || i == 0) {
212  bin_boundaries.push_back(boundary);
213  bin_widths.push_back(boundary - last_boundary);
214  }
215  last_boundary = boundary;
216  }
217  bin_boundaries.push_back(max); // avoid rounding issues etc by explicitly picking max.
218  bin_widths.push_back(max - last_boundary);
219 
220  bin_boundaries.shrink_to_fit();
221  size_t n_bins = bin_boundaries.size() - 1;
222  bins.resize(n_bins);
223 
224  // note that due to rounding, our bins may have differing sizes, which matters
225  // if we handle integral types (relative size difference worst case: bin width 1 vs 2).
226  // Be careful to select the right bin.
227  std::for_each(begin(range), end(range), [&](const T &val) {
228  auto it = std::upper_bound(bin_boundaries.begin(), bin_boundaries.end(), val);
229  if (it == bin_boundaries.end()) --it; // the last value is exactly max!
230  size_t idx = std::distance(bin_boundaries.begin(), it);
231  assert(idx > 0);
232  ++bins[idx - 1];
233  });
234  return ptr::make_unique<HistogramT<T>>(std::move(bins), std::move(bin_boundaries), std::move(bin_widths));
235 }
236 
237 template<typename Iterable>
238 std::unique_ptr<Histogram> create_histogram_auto(const Iterable &range, size_t max_bins = 50)
239 {
240  using T = typename std::remove_cv<
241  typename std::remove_reference<
242  decltype(*begin(range))
243  >::type
244  >::type;
245  const size_t n = std::distance(begin(range), end(range));
246  if (n == 0) return {};
247 
248  std::map<T, size_t> elem_counts;
249  bool too_many_unique = false;
250  for (const auto &v: range)
251  {
252  ++elem_counts[v];
253  if (elem_counts.size() > max_bins)
254  {
255  too_many_unique = true;
256  break;
257  }
258  }
259 
260  if (too_many_unique) {
261  return create_histogram_autorange<T>(range, max_bins);
262  } else {
263  return create_histogram_unbinned(elem_counts);
264  }
265 }
266 
267 } // namespace ACG
268 
Namespace providing different geometric functions concerning angles.