import { DataQuality } from "@prisma/client";
import { standardDeviation } from "math-standard-deviation";
import { sumBy } from "~/lib/lodash";

const isNormalDistribution = (values: number[]): boolean => {
  const stdev = standardDeviation(values);
  const average = sumBy(values) / values.length;
  const length = values.length;

  const within = values.filter((value) => {
    return Math.abs(average + stdev) > Math.abs(value);
  });

  const within2 = values.filter((value) => {
    return Math.abs(average + stdev * 2) > Math.abs(value);
  });

  return within.length / length >= 0.68 && within2.length / length >= 0.95;
};

export const computeDataQuality = (amounts: number[], distinctCompaniesCount: number): DataQuality => {
  const sampleSize = amounts.length;

  const minimalQuality = computeMinimalDataQuality(sampleSize, distinctCompaniesCount);

  if (minimalQuality !== DataQuality.STRONG) {
    return minimalQuality;
  }

  if (sampleSize < 20 || distinctCompaniesCount < 6) {
    return DataQuality.STRONG;
  }

  const normalDistribution = isNormalDistribution(amounts);

  return normalDistribution ? DataQuality.EXCELLENT : DataQuality.STRONG;
};

// this function give a minimal data quality based only on the sample size.
// we don't apply any normal distribution logic here
// we should NOT exclude the current company from the distinctCompaniesCount parameter
const computeMinimalDataQuality = (sampleSize: number, distinctCompaniesCount: number): DataQuality => {
  if (sampleSize < 5 || distinctCompaniesCount < 3) {
    return DataQuality.NONE;
  }

  if (sampleSize < 8) {
    return DataQuality.FAIR;
  }

  return DataQuality.STRONG;
};

// quick helper that's easier to use than computeDataQuality because it doesn't take an amount list but only a sample size
export const hasInsufficientDataQuality = (sampleSize: number, distinctCompaniesCount: number): boolean => {
  return computeMinimalDataQuality(sampleSize, distinctCompaniesCount) === DataQuality.NONE;
};

export const combineDataQuality = (dataQualityList: DataQuality[]) => {
  if (dataQualityList.includes(DataQuality.NONE)) {
    return DataQuality.NONE;
  }

  if (dataQualityList.includes(DataQuality.AI_ESTIMATED)) {
    return DataQuality.AI_ESTIMATED;
  }

  if (dataQualityList.includes(DataQuality.FAIR)) {
    return DataQuality.FAIR;
  }

  if (dataQualityList.includes(DataQuality.STRONG)) {
    return DataQuality.STRONG;
  }

  return DataQuality.EXCELLENT;
};
