Source code for adam_core.orbit_determination.outliers

from typing import Tuple

import numpy as np
import pyarrow.compute as pc

from .differential_correction import OrbitDeterminationObservations
from .fitted_orbits import FittedOrbitMembers


[docs] def calculate_max_outliers( num_obs: int, min_obs: int, contamination_percentage: float ) -> int: """ Calculate the maximum number of allowable outliers. Linkages may contain err oneuos observations that need to be removed. This function calculates the maximum number of observations that can be removed before the linkage no longer has the minimum number of observations required. The contamination percentage is the maximum percentage of observations that allowed to be erroneous. Parameters ---------- num_obs : int Number of observations in the linkage. min_obs : int Minimum number of observations required for a valid linkage. contamination_percentage : float Maximum percentage of observations that allowed to be erroneous. Range is [0, 100]. Returns ------- outliers : int Maximum number of allowable outliers. """ assert ( num_obs >= min_obs ), "Number of observations must be greater than or equal to the minimum number of observations." assert ( contamination_percentage >= 0 and contamination_percentage <= 100 ), "Contamination percentage must be between 0 and 100." max_outliers = num_obs * (contamination_percentage / 100) outliers = np.min([max_outliers, num_obs - min_obs]).astype(int) return outliers
[docs] def remove_lowest_probability_observation( orbit_members: FittedOrbitMembers, observations: OrbitDeterminationObservations ) -> Tuple[str, OrbitDeterminationObservations]: """ Remove the observation with the worst residual from the observations. The probability is defined to be the probability of drawing a more extreme residual than the one observed. If multiple observations have the same probability, then the observation with the highest squared residual value is removed. Parameters ---------- orbit_members : FittedOrbitMembers The orbit members that contain the residuals with respect to the observations. observations : OrbitDeterminationObservations The observations to remove the worst residual from. Returns ------- obs_id : str The ID of the observation that was removed. filtered_observations : OrbitDeterminationObservations The observations with the worst residual removed. """ assert ( len(pc.unique(orbit_members.orbit_id)) == 1 ), "Orbit members must only contain one orbit" assert pc.all( pc.is_in(orbit_members.obs_id, observations.id) ).as_py(), "Observations must contain all orbit member observations" # Find the worst outlier (the observation that has the lowest probability of # drawing a more extreme residual than the one observed) worst_outlier = orbit_members.apply_mask( pc.equal( orbit_members.residuals.probability, pc.min(orbit_members.residuals.probability), ) ) if len(worst_outlier) > 1: # If there are multiple worst outliers (which would be quite unlikely), # then remove the outlier with the highest squared residual value index = np.nansum(worst_outlier.residuals.to_array() ** 2, axis=1).argmax() worst_outlier = worst_outlier.take([index]) # Grab the observation ID of the worst outlier obs_id = worst_outlier.obs_id[0].as_py() # Grab the surviving observation IDs obs_ids = orbit_members.apply_mask( pc.invert(pc.equal(orbit_members.obs_id, obs_id)) ).obs_id return obs_id, observations.apply_mask(pc.is_in(observations.id, obs_ids))