a lot of fixes, improve visualization speed, add c++ code

commit: cdfcdaf098249d74919d79e1c77aab7622c43a1e [log] [tgz]
author: kdanylov aka koder <koder.mail@gmail.com> Sat Apr 29 10:03:39 2017 +0300
committer: kdanylov aka koder <koder.mail@gmail.com> Sat Apr 29 10:03:39 2017 +0300
tree: 39c15b8e7c2af01f9939dbfd31006eb0e150e091
parent: 150b2199655e69a159a1b9cc8cf3f9732df8ecd6 [diff] [blame]
diff --git a/wally/statistic.py b/wally/statistic.py
index a256587..8543e0f 100644
--- a/wally/statistic.py
+++ b/wally/statistic.py

@@ -123,10 +123,12 @@
 
 
 def calc_histo_stat_props(ts: TimeSeries,
-                          bins_edges: numpy.array,
+                          bins_edges: numpy.array = None,
                           rebins_count: int = None,
                           tail: float = 0.005) -> HistoStatProps:
-    log_bins = False
+    if bins_edges is None:
+        bins_edges = ts.histo_bins
+
     res = HistoStatProps(ts.data)
 
     # summ across all series
@@ -286,27 +288,37 @@
 
 
 def hist_outliers_perc(bin_populations: numpy.array,
-                       bounds_perc: Tuple[float, float] = (0.01, 0.99)) -> Tuple[int, int]:
+                       bounds_perc: Tuple[float, float] = (0.01, 0.99),
+                       min_bins_left: int = None) -> Tuple[int, int]:
     assert len(bin_populations.shape) == 1
     total_count = bin_populations.sum()
     lower_perc = total_count * bounds_perc[0]
     upper_perc = total_count * bounds_perc[1]
-    return numpy.searchsorted(numpy.cumsum(bin_populations), [lower_perc, upper_perc])
+    idx1, idx2 = numpy.searchsorted(numpy.cumsum(bin_populations), [lower_perc, upper_perc])
+
+    # don't cut too many bins. At least min_bins_left must left
+    if min_bins_left is not None and idx2 - idx1 < min_bins_left:
+        missed = min_bins_left - (idx2 - idx1) // 2
+        idx2 = min(len(bin_populations), idx2 + missed)
+        idx1 = max(0, idx1 - missed)
+
+    return idx1, idx2
 
 
 def ts_hist_outliers_perc(bin_populations: numpy.array,
                           window_size: int = 10,
-                          bounds_perc: Tuple[float, float] = (0.01, 0.99)) -> Tuple[int, int]:
+                          bounds_perc: Tuple[float, float] = (0.01, 0.99),
+                          min_bins_left: int = None) -> Tuple[int, int]:
     assert len(bin_populations.shape) == 2
 
     points = list(range(0, len(bin_populations), window_size))
     if len(bin_populations) % window_size != 0:
         points.append(points[-1] + window_size)
 
-    ranges = []
+    ranges = []  # type: List[List[int]]
     for begin, end in zip(points[:-1], points[1:]):
         window_hist = bin_populations[begin:end].sum(axis=0)
-        ranges.append(hist_outliers_perc(window_hist, bounds_perc=bounds_perc))
+        ranges.append(hist_outliers_perc(window_hist, bounds_perc=bounds_perc, min_bins_left=min_bins_left))
 
     return min(i[0] for i in ranges), max(i[1] for i in ranges)
commit	cdfcdaf098249d74919d79e1c77aab7622c43a1e	[log] [tgz]
author	kdanylov aka koder <koder.mail@gmail.com>	Sat Apr 29 10:03:39 2017 +0300
committer	kdanylov aka koder <koder.mail@gmail.com>	Sat Apr 29 10:03:39 2017 +0300
tree	39c15b8e7c2af01f9939dbfd31006eb0e150e091
parent	150b2199655e69a159a1b9cc8cf3f9732df8ecd6 [diff] [blame]