diff --git a/treeherder/perf/alerts.py b/treeherder/perf/alerts.py index 153a6fbf419..a9923ca0cb1 100644 --- a/treeherder/perf/alerts.py +++ b/treeherder/perf/alerts.py @@ -11,7 +11,12 @@ from django.db.models import Exists, OuterRef, Subquery from treeherder.perf.email import AlertNotificationWriter -from treeherder.perf.methods import StudentDetector +from treeherder.perf.methods.CramerVonMisesDetector import CramerVonMisesDetector +from treeherder.perf.methods.KolmogorovSmirnovDetector import KolmogorovSmirnovDetector +from treeherder.perf.methods.LeveneDetector import LeveneDetector +from treeherder.perf.methods.MannWhitneyUDetector import MannWhitneyUDetector +from treeherder.perf.methods.StudentDetector import StudentDetector +from treeherder.perf.methods.WelchDetector import WelchDetector from treeherder.perf.models import ( PerformanceAlert, PerformanceAlertSummary, @@ -28,6 +33,8 @@ logger = logging.getLogger(__name__) +REPLICATES = False + def send_alert_emails(emails, alert, alert_summary): notify_client = taskcluster.notify_client_factory() @@ -214,7 +221,8 @@ def generate_new_alerts_in_series(signature): def build_cpd_methods(): - student = StudentDetector.StudentDetector( + student = StudentDetector( + name="student", min_back_window=12, max_back_window=24, fore_window=12, @@ -223,7 +231,64 @@ def build_cpd_methods(): mag_check=True, above_threshold_is_anomaly=True, ) - methods = {"student": student} + cvm = CramerVonMisesDetector( + name="cvm", + min_back_window=12, + max_back_window=24, + fore_window=12, + alert_threshold=2.0, + confidence_threshold=0.05, + mag_check=False, + above_threshold_is_anomaly=False, + ) + ks = KolmogorovSmirnovDetector( + name="ks", + min_back_window=12, + max_back_window=24, + fore_window=12, + alert_threshold=2.0, + confidence_threshold=0.05, + mag_check=False, + above_threshold_is_anomaly=False, + ) + welch = WelchDetector( + name="welch", + min_back_window=12, + max_back_window=24, + fore_window=12, + alert_threshold=2.0, + confidence_threshold=0.05, + mag_check=False, + above_threshold_is_anomaly=False, + ) + levene = LeveneDetector( + name="levene", + min_back_window=12, + max_back_window=24, + fore_window=12, + alert_threshold=2.0, + confidence_threshold=0.05, + mag_check=False, + above_threshold_is_anomaly=False, + ) + mwu = MannWhitneyUDetector( + name="mwu", + min_back_window=12, + max_back_window=24, + fore_window=12, + alert_threshold=2.0, + confidence_threshold=0.05, + mag_check=False, + above_threshold_is_anomaly=False, + ) + methods = { + "student": student, + "cvm": cvm, + "ks": ks, + "welch": welch, + "levene": levene, + "mwu": mwu, + } return methods @@ -313,7 +378,7 @@ def generate_new_test_alerts_in_series(signature): # get series data starting from either: # (1) the last alert, if there is one # (2) the alerts max age - # (use whichever is newer) + # use whichever is newer max_alert_age = alert_after_ts = datetime.now() - settings.PERFHERDER_ALERTS_MAX_AGE series = PerformanceDatum.objects.filter(signature=signature, push_timestamp__gte=max_alert_age) latest_alert_timestamp = ( @@ -360,7 +425,7 @@ def generate_new_test_alerts_in_series(signature): data = list(revision_data.values()) methods = build_cpd_methods() student_method = methods["student"] - analyzed_series = student_method.detect_changes(data, signature) + analyzed_series = student_method.detect_changes(data, signature, replicates_enabled=REPLICATES) with transaction.atomic(): create_alerts(signature, student_method, analyzed_series) diff --git a/treeherder/perf/methods/BaseDetector.py b/treeherder/perf/methods/BaseDetector.py index 2ed711ed2f2..c2bc1f5fea3 100644 --- a/treeherder/perf/methods/BaseDetector.py +++ b/treeherder/perf/methods/BaseDetector.py @@ -46,8 +46,10 @@ def linear_weights(self, i, n): return float(n - i) / float(n) @abstractmethod - def calc_confidence(self, jw, kw, confidence_threshold, last_seen_regression): - # replaces calc_t function + def calc_confidence( + self, jw, kw, confidence_threshold, last_seen_regression, replicates_enabled + ): + # replaces calc_confidence function """ Abstract method that must be implemented by subclasses to calculate confidence (p-value or T-value). """ @@ -113,7 +115,7 @@ def is_representative_point(self, data, i, min_back_window, fore_window): # No future consecutive points with same value, so this IS the representative return True - def analyze(self, revision_data, weight_fn=None): + def analyze(self, revision_data, replicates_enabled, weight_fn=None): """Returns the average and sample variance (s**2) of a list of floats. `weight_fn` is a function that takes a list index and a window width, and @@ -133,14 +135,17 @@ def analyze(self, revision_data, weight_fn=None): weights = [weight_fn(i, num_revisions) for i in range(num_revisions)] weighted_sum = 0 sum_of_weights = 0 + source_attr = "replicates" if replicates_enabled else "values" for i in range(num_revisions): - weighted_sum += sum(value * weights[i] for value in revision_data[i].values) - sum_of_weights += weights[i] * len(revision_data[i].values) + weighted_sum += sum( + value * weights[i] for value in getattr(revision_data[i], source_attr) + ) + sum_of_weights += weights[i] * len(getattr(revision_data[i], source_attr)) weighted_avg = weighted_sum / sum_of_weights if num_revisions > 0 else 0.0 # now that we have a weighted average, we can calculate the variance of the # whole series - all_data = [v for datum in revision_data for v in datum.values] + all_data = [v for datum in revision_data for v in getattr(datum, source_attr)] variance = ( (sum(pow(d - weighted_avg, 2) for d in all_data) / (len(all_data) - 1)) if len(all_data) > 1 @@ -181,7 +186,7 @@ def check_magnitude_of_change(self, signature, prev_value, new_value, magnitude_ return True return False - def detect_changes(self, data, signature): + def detect_changes(self, data, signature, replicates_enabled): min_back_window = signature.min_back_window if min_back_window is None: min_back_window = self.min_back_window @@ -230,11 +235,11 @@ def detect_changes(self, data, signature): di.amount_next_data += len(kw[-1].values) next_indice += 1 - di.historical_stats = self.analyze(jw) - di.forward_stats = self.analyze(kw) + di.historical_stats = self.analyze(jw, replicates_enabled) + di.forward_stats = self.analyze(kw, replicates_enabled) di.confidence[self.name], last_seen_regression = self.calc_confidence( - jw, kw, confidence_threshold, last_seen_regression + jw, kw, confidence_threshold, last_seen_regression, replicates_enabled ) # Now that the confidence scores are calculated, go back through the data to diff --git a/treeherder/perf/methods/CramerVonMisesDetector.py b/treeherder/perf/methods/CramerVonMisesDetector.py new file mode 100644 index 00000000000..77cf673167e --- /dev/null +++ b/treeherder/perf/methods/CramerVonMisesDetector.py @@ -0,0 +1,34 @@ +from scipy import stats + +from treeherder.perf.methods.BaseDetector import BaseDetector + + +class CramerVonMisesDetector(BaseDetector): + """ + Detector using Cramér-von Mises test. + """ + + def calc_confidence(self, jw, kw, confidence_threshold, confidence, replicates_enabled): + """ + Calculate Cramér-von Mises test statistic and p-value. + """ + source_attr = "replicates" if replicates_enabled else "values" + + jw_values = [v for datum in jw for v in getattr(datum, source_attr)] + kw_values = [v for datum in kw for v in getattr(datum, source_attr)] + + if len(jw_values) < 2 or len(kw_values) < 2: + return 1.0, confidence + 1 + + try: + result = stats.cramervonmises_2samp(jw_values, kw_values) + p = result.pvalue + except Exception: + p = 1.0 + + if p < confidence_threshold: + confidence = 0 + else: + confidence += 1 + + return p, confidence diff --git a/treeherder/perf/methods/KolmogorovSmirnovDetector.py b/treeherder/perf/methods/KolmogorovSmirnovDetector.py new file mode 100644 index 00000000000..854d28c1e33 --- /dev/null +++ b/treeherder/perf/methods/KolmogorovSmirnovDetector.py @@ -0,0 +1,34 @@ +from scipy import stats + +from treeherder.perf.methods.BaseDetector import BaseDetector + + +class KolmogorovSmirnovDetector(BaseDetector): + """ + Detector using Kolmogorov-Smirnov test. + """ + + def calc_confidence(self, jw, kw, confidence_threshold, confidence, replicates_enabled): + """ + Calculate Kolmogorov-Smirnov test statistic and p-value. + """ + source_attr = "replicates" if replicates_enabled else "values" + + jw_values = [v for datum in jw for v in getattr(datum, source_attr)] + kw_values = [v for datum in kw for v in getattr(datum, source_attr)] + + if len(jw_values) < 2 or len(kw_values) < 2: + return 1.0, confidence + 1 + + try: + result = stats.ks_2samp(jw_values, kw_values) + p = result.pvalue + except Exception: + p = 0, 1.0 + + if p < confidence_threshold: + confidence = 0 + else: + confidence += 1 + + return p, confidence diff --git a/treeherder/perf/methods/LeveneDetector.py b/treeherder/perf/methods/LeveneDetector.py new file mode 100644 index 00000000000..51b90462385 --- /dev/null +++ b/treeherder/perf/methods/LeveneDetector.py @@ -0,0 +1,36 @@ +from scipy import stats + +from treeherder.perf.methods.BaseDetector import BaseDetector + + +class LeveneDetector(BaseDetector): + """ + Detector using Levene's test (tests for equal variances). + """ + + def calc_confidence( + self, jw, kw, confidence_threshold, last_seen_regression, replicates_enabled + ): + """ + Calculate Levene's test statistic and p-value. + """ + source_attr = "replicates" if replicates_enabled else "values" + + jw_values = [v for datum in jw for v in getattr(datum, source_attr)] + kw_values = [v for datum in kw for v in getattr(datum, source_attr)] + + if len(jw_values) < 2 or len(kw_values) < 2: + return 1.0, last_seen_regression + 1 + + try: + result = stats.levene(jw_values, kw_values) + p = result.pvalue + except Exception: + p = 1.0 + + if p < confidence_threshold: + last_seen_regression = 0 + else: + last_seen_regression += 1 + + return p, last_seen_regression diff --git a/treeherder/perf/methods/MannWhitneyUDetector.py b/treeherder/perf/methods/MannWhitneyUDetector.py new file mode 100644 index 00000000000..f0b34b3387d --- /dev/null +++ b/treeherder/perf/methods/MannWhitneyUDetector.py @@ -0,0 +1,36 @@ +from scipy import stats + +from treeherder.perf.methods.BaseDetector import BaseDetector + + +class MannWhitneyUDetector(BaseDetector): + """ + Detector using Mann-Whitney U test (non-parametric). + """ + + def calc_confidence( + self, jw, kw, confidence_threshold, last_seen_regression, replicates_enabled + ): + """ + Calculate Mann-Whitney U test statistic and p-value. + """ + source_attr = "replicates" if replicates_enabled else "values" + + jw_values = [v for datum in jw for v in getattr(datum, source_attr)] + kw_values = [v for datum in kw for v in getattr(datum, source_attr)] + + if len(jw_values) < 2 or len(kw_values) < 2: + return 1.0, last_seen_regression + 1 + + try: + result = stats.mannwhitneyu(jw_values, kw_values, alternative="two-sided") + p = result.pvalue + except Exception: + p = 1.0 + + if p < confidence_threshold: + last_seen_regression = 0 + else: + last_seen_regression += 1 + + return p, last_seen_regression diff --git a/treeherder/perf/methods/StudentDetector.py b/treeherder/perf/methods/StudentDetector.py index dc35cea251a..a02b46d5c17 100644 --- a/treeherder/perf/methods/StudentDetector.py +++ b/treeherder/perf/methods/StudentDetector.py @@ -2,29 +2,9 @@ class StudentDetector(BaseDetector): - def __init__( - self, - name="student", - min_back_window=12, - max_back_window=24, - fore_window=12, - alert_threshold=2.0, - confidence_threshold=7, - mag_check=True, - above_threshold_is_anomaly=True, + def calc_confidence( + self, w1, w2, confidence_threshold, last_seen_regression, replicates_enabled ): - super().__init__( - name=name, - min_back_window=min_back_window, - max_back_window=max_back_window, - fore_window=fore_window, - alert_threshold=alert_threshold, - confidence_threshold=confidence_threshold, - mag_check=mag_check, - above_threshold_is_anomaly=above_threshold_is_anomaly, - ) - - def calc_confidence(self, w1, w2, confidence_threshold, last_seen_regression): # replaces calc_t function """Perform a Students t-test on the two sets of revision data. @@ -33,8 +13,8 @@ def calc_confidence(self, w1, w2, confidence_threshold, last_seen_regression): if not w1 or not w2: confidence = 0 else: - s1 = self.analyze(w1, self.linear_weights) - s2 = self.analyze(w2, self.linear_weights) + s1 = self.analyze(w1, replicates_enabled, self.linear_weights) + s2 = self.analyze(w2, replicates_enabled, self.linear_weights) delta_s = s2["avg"] - s1["avg"] if delta_s == 0: diff --git a/treeherder/perf/methods/WelchDetector.py b/treeherder/perf/methods/WelchDetector.py new file mode 100644 index 00000000000..b4a5a6840fb --- /dev/null +++ b/treeherder/perf/methods/WelchDetector.py @@ -0,0 +1,36 @@ +from scipy import stats + +from treeherder.perf.methods.BaseDetector import BaseDetector + + +class WelchDetector(BaseDetector): + """ + Detector using Welch's t-test (unequal variances t-test). + """ + + def calc_confidence( + self, jw, kw, confidence_threshold, last_seen_regression, replicates_enabled + ): + """ + Calculate Welch's t-test statistic and p-value. + """ + source_attr = "replicates" if replicates_enabled else "values" + + jw_values = [v for datum in jw for v in getattr(datum, source_attr)] + kw_values = [v for datum in kw for v in getattr(datum, source_attr)] + + if len(jw_values) < 2 or len(kw_values) < 2: + return 1.0, last_seen_regression + 1 # p-value of 1.0 (no significance) + + try: + result = stats.ttest_ind(jw_values, kw_values, equal_var=False) + p = result.pvalue + except Exception: + p = 1.0 + + if p < confidence_threshold: + last_seen_regression = 0 + else: + last_seen_regression += 1 + + return p, last_seen_regression