Source code for pollscraper.trends

import pandas as pd
import numpy as np
from pollscraper import logger
from pandas.api.types import is_datetime64_any_dtype as is_datetime
from pandas.tseries.frequencies import to_offset
from datetime import datetime

NoneTypeOverload = type(None)


[docs]class Weighting: def __init__(self) -> None: self.modality_factor_weights = {'Online': 1.0, 'IVR': 1.0, 'Live caller': 1.} self.population_factor_weights = {'Adults': 1.0, 'RV': 1.0, 'LV': 1.} self.pollster_factor_weights = { 'Dataland Daily': 1., 'No Province Left Behind': 1., 'Progressive Polling': 1., 'Cobolite Coalition Calling': 1., 'Big Dataland Surveys': 1., 'Synapse Strategies': 1., 'Metaflux University': 1., 'Conference Board of Dataland': 1., 'Dataland Register-Gazette': 1., 'Proudly Paid For Polling': 1., 'Electropolis Elects': 1.}
[docs] def modality_factor(self, sample_weights, modality_col): map = self.modality_factor_weights if type(modality_col) is NoneTypeOverload: # noqa E721 return assert type(modality_col) is pd.Series sample_weights *= modality_col.map(map).fillna(1)
[docs] def sponsor_factor(self, sample_weights, sponsor_col): if type(sponsor_col) is NoneTypeOverload: # noqa E721 return assert type(sponsor_col) is pd.Series
# sample_weights*=population_col.map(map).fillna(1)
[docs] def population_factor(self, sample_weights, population_col): map = self.population_factor_weights if type(population_col) is NoneTypeOverload: # noqa E721 return assert type(population_col) is pd.Series sample_weights *= population_col.map(map).fillna(1)
[docs] def pollster_factor(self, sample_weights, pollster_col): map = self.pollster_factor_weights if type(pollster_col) is NoneTypeOverload: # noqa E721 return assert type(pollster_col) is pd.Series sample_weights *= pollster_col.map(map).fillna(1)
[docs] def sample_size_factor(self, sample_weights, sample_col): if type(sample_col) is NoneTypeOverload: # noqa E721 return assert type(sample_col) is pd.Series avg_sample_size = sample_col.mean() sample_weights *= np.sqrt(sample_col/avg_sample_size)
[docs] def weighting_scheme_538(self, samples, sample_col=None, modality_col=None, sponsor_col=None, population_col=None, pollster_col=None): # avg_sample_size = samples.mean() # sample_weights = np.sqrt(samples/avg_sample_size) sample_weights = pd.Series(np.full_like(samples, fill_value=1.)) self.sample_size_factor(sample_weights, sample_col) self.modality_factor(sample_weights, modality_col) self.sponsor_factor(sample_weights, sponsor_col) self.population_factor(sample_weights, population_col) self.pollster_factor(sample_weights, pollster_col) return sample_weights
[docs]def wavg(group): d = group.iloc[:, 0] w = group.iloc[:, 1] return (d * w).sum() / w.sum()
[docs]def check_offset(offset): try: to_offset(offset) except Exception as e: raise e
[docs]class PollTrend: """ Represents poll trends and provides methods to calculate trends. This class calculates average poll trends based on poll data. Attributes: None """
[docs]def check_for_outliers_in_poll_averages( poll_averages, avg, sig, n_sigma, candidate ): avg_outliers = poll_averages.loc[ np.abs(poll_averages - avg) >= n_sigma * sig ] if not avg_outliers.empty: logger.warning(f'Checking averaged polls for candidate {candidate}.') logger.warning(f'Found {avg_outliers.shape[0]} poll averages detected ' f'at > {n_sigma} sigma from the mean') return avg_outliers
[docs]def check_for_outliers_in_individual_polls( poll_data, candidate, avg, sig, n_sigma ): rolling = pd.DataFrame() rolling['sigma_band'] = sig rolling['rolling_avg'] = avg # Left join of the rolling variables to the individual polls check_individual_polls = poll_data[candidate].to_frame()\ .join(rolling) individual_outliers = check_individual_polls.loc[ np.abs( check_individual_polls[candidate] - check_individual_polls['rolling_avg'] ) >= n_sigma * check_individual_polls['sigma_band'] ] if not individual_outliers.empty: logger.warning(f'Checking individual polls for candidate {candidate}.') logger.warning(f'Found {individual_outliers.shape[0]} individual ' f'polls detected at > {n_sigma} sigma from the mean') return individual_outliers[candidate]