Source code for vespa.transitsignal

from __future__ import division, print_function

import os,os.path
import logging
import pickle

on_rtd = os.environ.get('READTHEDOCS') == 'True'

if not on_rtd:
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy.random as rand
    from scipy.stats import gaussian_kde
    import corner
    from emcee.autocorr import integrated_time, AutocorrError
    from astropy.io import ascii
else:
    np, pd, plt, rand = (None, None, None, None)
    gaussian_kde = None



try:
    import corner
except ImportError:
    pass

if not on_rtd:
    from .plotutils import setfig
    from .hashutils import hashcombine, hasharray
    from .transit_basic import traptransit, fit_traptransit, traptransit_MCMC, MAXSLOPE
    from .statutils import kdeconf, qstd, conf_interval
else:
    MAXSLOPE = None

def load_pkl(filename):
    with open(filename, 'rb') as fin:
        return pickle.load(fin)

[docs]class TransitSignal(object): """A phased-folded transit signal. Epoch of the transit at 0, 'continuum' set at 1. :param ts, fs, dfs: Times (days from mid-transit), fluxes (relative to 1), flux uncertainties. dfs optional :param P: Orbital period. :param p0: (optional) Initial guess for least-squares trapezoid fit. If not provided, then some decent guess will be made (which is better on made-up data than real...) :param name: (optional) Name of the signal. :param maxslope: (optional) Upper limit to use for "slope" parameter (T/tau) in the MCMC fitting of signal. Default is 15. .. note:: The implementation of this object can use some refactoring; as it is directly translated from some older code. As such, not all methods/attributes are well documented. """ def __init__(self,ts,fs,dfs=None,P=None,p0=None,name='',maxslope=MAXSLOPE): ts = np.atleast_1d(ts) fs = np.atleast_1d(fs) inds = ts.argsort() self.ts = ts[inds] self.fs = fs[inds] self.name = name self.P = P self.maxslope = maxslope if type(P) == type(np.array([1])): self.P = P[0] #set default best-guess trapezoid parameters if p0 is None: depth = 1 - fs.min() duration = (fs < (1-0.01*depth)).sum()/float(len(fs)) * (ts[-1] - ts[0]) tc0 = ts[fs.argmin()] p0 = np.array([duration,depth,5.,tc0]) tfit = fit_traptransit(ts,fs,p0) if dfs is None: dfs = (self.fs - traptransit(self.ts,tfit)).std() if np.size(dfs)==1: dfs = np.ones(len(self.ts))*dfs self.dfs = dfs self.dur,self.depth,self.slope,self.center = tfit self.trapfit = tfit logging.debug('trapezoidal leastsq fit: {}'.format(self.trapfit)) self.hasMCMC = False @classmethod def from_ascii(cls, filename, **kwargs): table = ascii.read(filename).to_pandas() if len(table.columns)==3: return cls(table.iloc[:, 0].values, table.iloc[:, 1].values, table.iloc[:, 2].values, **kwargs) elif len(table.columns)==2: return cls(table.iloc[:, 0].values, table.iloc[:, 1].values, **kwargs)
[docs] def save_hdf(self, filename, path=''): """ Save transitsignal info using HDF...not yet implemented. .. note:: Refactoring plan is to re-write saving to use HDF instead of pickle. """ raise NotImplementedError
def triangle(self, **kwargs): pts = np.array([self.logdeps, self.durs, self.slopes]).T fig = corner.corner(pts, labels=['log (Depth)', 'Duration', 'T/tau'], **kwargs) return fig
[docs] def save_pkl(self, filename): """ Pickles TransitSignal. """ with open(filename, 'wb') as fout: pickle.dump(self, fout)
#eventually make this save_hdf
[docs] def save(self, filename): """ Calls save_pkl function. """ self.save_pkl(filename)
def __eq__(self,other): return hash(self) == hash(other) def __hash__(self): key = hashcombine(hasharray(self.ts), hasharray(self.fs), self.P, self.maxslope) if self.hasMCMC: key = hashcombine(key, hasharray(self.slopes), hasharray(self.durs), hasharray(self.logdeps)) return key
[docs] def plot(self, fig=None, plot_trap=False, name=False, trap_color='g', trap_kwargs=None, **kwargs): """ Makes a simple plot of signal :param fig: (optional) Argument for :func:`plotutils.setfig`. :param plot_trap: (optional) Whether to plot the (best-fit least-sq) trapezoid fit. :param name: (optional) Whether to annotate plot with the name of the signal; can be ``True`` (in which case ``self.name`` will be used), or any arbitrary string. :param trap_color: (optional) Color of trapezoid fit line. :param trap_kwargs: (optional) Keyword arguments to pass to trapezoid fit line. :param **kwargs: (optional) Additional keyword arguments passed to ``plt.plot``. """ setfig(fig) plt.plot(self.ts,self.fs,'.',**kwargs) if plot_trap and hasattr(self,'trapfit'): if trap_kwargs is None: trap_kwargs = {} plt.plot(self.ts, traptransit(self.ts,self.trapfit), color=trap_color, **trap_kwargs) if name is not None: if type(name)==type(''): text = name else: text = self.name plt.annotate(text,xy=(0.1,0.1),xycoords='axes fraction',fontsize=22) if hasattr(self,'depthfit') and not np.isnan(self.depthfit[0]): lo = 1 - 3*self.depthfit[0] hi = 1 + 2*self.depthfit[0] else: lo = 1 hi = 1 sig = qstd(self.fs,0.005) hi = max(hi,self.fs.mean() + 7*sig) lo = min(lo,self.fs.mean() - 7*sig) logging.debug('lo={}, hi={}'.format(lo,hi)) plt.ylim((lo,hi)) plt.xlabel('time [days]') plt.ylabel('Relative flux')
[docs] def MCMC(self, niter=500, nburn=200, nwalkers=200, threads=1, fit_partial=False, width=3, savedir=None, refit=False, thin=10, conf=0.95, maxslope=MAXSLOPE, debug=False, p0=None): """ Fit transit signal to trapezoid model using MCMC .. note:: As currently implemented, this method creates a bunch of attributes relevant to the MCMC fit; I plan to refactor this to define those attributes as properties so as not to have their creation hidden away here. I plan to refactor how this works. """ if fit_partial: wok = np.where((np.absolute(self.ts-self.center) < (width*self.dur)) & ~np.isnan(self.fs)) else: wok = np.where(~np.isnan(self.fs)) if savedir is not None: if not os.path.exists(savedir): os.mkdir(savedir) alreadydone = True alreadydone &= savedir is not None alreadydone &= os.path.exists('%s/ts.npy' % savedir) alreadydone &= os.path.exists('%s/fs.npy' % savedir) if savedir is not None and alreadydone: ts_done = np.load('%s/ts.npy' % savedir) fs_done = np.load('%s/fs.npy' % savedir) alreadydone &= np.all(ts_done == self.ts[wok]) alreadydone &= np.all(fs_done == self.fs[wok]) if alreadydone and not refit: logging.info('MCMC fit already done for %s. Loading chains.' % self.name) Ts = np.load('%s/duration_chain.npy' % savedir) ds = np.load('%s/depth_chain.npy' % savedir) slopes = np.load('%s/slope_chain.npy' % savedir) tcs = np.load('%s/tc_chain.npy' % savedir) else: logging.info('Fitting data to trapezoid shape with MCMC for %s....' % self.name) if p0 is None: p0 = self.trapfit.copy() p0[0] = np.absolute(p0[0]) if p0[2] < 2: p0[2] = 2.01 if p0[1] < 0: p0[1] = 1e-5 logging.debug('p0 for MCMC = {}'.format(p0)) sampler = traptransit_MCMC(self.ts[wok],self.fs[wok],self.dfs[wok], niter=niter,nburn=nburn,nwalkers=nwalkers, threads=threads,p0=p0,return_sampler=True, maxslope=maxslope) Ts,ds,slopes,tcs = (sampler.flatchain[:,0],sampler.flatchain[:,1], sampler.flatchain[:,2],sampler.flatchain[:,3]) self.sampler = sampler if savedir is not None: np.save('%s/duration_chain.npy' % savedir,Ts) np.save('%s/depth_chain.npy' % savedir,ds) np.save('%s/slope_chain.npy' % savedir,slopes) np.save('%s/tc_chain.npy' % savedir,tcs) np.save('%s/ts.npy' % savedir,self.ts[wok]) np.save('%s/fs.npy' % savedir,self.fs[wok]) if debug: print(Ts) print(ds) print(slopes) print(tcs) N = len(Ts) try: self.Ts_acor = integrated_time(Ts) self.ds_acor = integrated_time(ds) self.slopes_acor = integrated_time(slopes) self.tcs_acor = integrated_time(tcs) self.fit_converged = True except AutocorrError: self.fit_converged = False ok = (Ts > 0) & (ds > 0) & (slopes > 0) & (slopes < self.maxslope) logging.debug('trapezoidal fit has {} good sample points'.format(ok.sum())) if ok.sum()==0: if (Ts > 0).sum()==0: #logging.debug('{} points with Ts > 0'.format((Ts > 0).sum())) logging.debug('{}'.format(Ts)) raise MCMCError('{}: 0 points with Ts > 0'.format(self.name)) if (ds > 0).sum()==0: #logging.debug('{} points with ds > 0'.format((ds > 0).sum())) logging.debug('{}'.format(ds)) raise MCMCError('{}: 0 points with ds > 0'.format(self.name)) if (slopes > 0).sum()==0: #logging.debug('{} points with slopes > 0'.format((slopes > 0).sum())) logging.debug('{}'.format(slopes)) raise MCMCError('{}: 0 points with slopes > 0'.format(self.name)) if (slopes < self.maxslope).sum()==0: #logging.debug('{} points with slopes < maxslope ({})'.format((slopes < self.maxslope).sum(),self.maxslope)) logging.debug('{}'.format(slopes)) raise MCMCError('{} points with slopes < maxslope ({})'.format((slopes < self.maxslope).sum(),self.maxslope)) durs,deps,logdeps,slopes = (Ts[ok],ds[ok],np.log10(ds[ok]), slopes[ok]) inds = (np.arange(len(durs)/thin)*thin).astype(int) durs,deps,logdeps,slopes = (durs[inds],deps[inds],logdeps[inds], slopes[inds]) self.durs,self.deps,self.logdeps,self.slopes = (durs,deps,logdeps,slopes) self._make_kde(conf=conf) self.hasMCMC = True
def corner(self, outfile=None, plot_contours=False, **kwargs): fig = corner.corner(self.kde.dataset.T, labels=['Duration', 'log(depth)', 'T/tau'], plot_contours=False, **kwargs) if outfile is not None: fig.savefig(outfile) return fig def _make_kde(self, conf=0.95): self.durkde = gaussian_kde(self.durs) self.depthkde = gaussian_kde(self.deps) self.slopekde = gaussian_kde(self.slopes) self.logdepthkde = gaussian_kde(self.logdeps) if self.fit_converged: try: durconf = kdeconf(self.durkde,conf) depconf = kdeconf(self.depthkde,conf) logdepconf = kdeconf(self.logdepthkde,conf) slopeconf = kdeconf(self.slopekde,conf) except: raise raise MCMCError('Error generating confidence intervals...fit must not have worked.') durmed = np.median(self.durs) depmed = np.median(self.deps) logdepmed = np.median(self.logdeps) slopemed = np.median(self.slopes) self.durfit = (durmed,np.array([durmed-durconf[0],durconf[1]-durmed])) self.depthfit = (depmed,np.array([depmed-depconf[0],depconf[1]-depmed])) self.logdepthfit = (logdepmed,np.array([logdepmed-logdepconf[0],logdepconf[1]-logdepmed])) self.slopefit = (slopemed,np.array([slopemed-slopeconf[0],slopeconf[1]-slopemed])) else: self.durfit = (np.nan,(np.nan,np.nan)) self.depthfit = (np.nan,(np.nan,np.nan)) self.logdepthfit = (np.nan,(np.nan,np.nan)) self.slopefit = (np.nan,(np.nan,np.nan)) points = np.array([self.durs,self.logdeps,self.slopes]) self.kde = gaussian_kde(points)
class TransitSignal_FromSamples(TransitSignal): """Use this if all you have is the trapezoid-fit samples """ def __init__(self, period, durs, depths, slopes, name='', **kwargs): self.period = period self.durs = durs self.deps = depths self.logdeps = np.log10(depths) self.slopes = slopes self.hasMCMC = True self.fit_converged = True #better be self._make_kde() self.name = name def MCMC(self, *args, **kwargs): pass def plot(self, *args, **kwargs): pass def __hash__(self): return hashcombine(self.period, hasharray(self.durs), hasharray(self.deps), hasharray(self.slopes)) class TransitSignal_DF(TransitSignal): def __init__(self, df, columns=['t','f','e_f'], **kwargs): t_col, f_col, e_f_col = columns t = df[t_col] f = df[f_col] if e_f_col in df: e_f = df[e_f_col] else: e_f = None TransitSignal.__init__(self, t, f, e_f, **kwargs) class TransitSignal_ASCII(TransitSignal): def __init__(self, filename, cols=(0,1), err_col=2, **kwargs): t, f = np.loadtxt(filename, usecols=cols, unpack=True) try: e_f = np.loadtxt(filename, usecols=(err_col,)) except: e_f = None TransitSignal.__init__(self, t, f, e_f, **kwargs) ############# Exceptions ##############3 class MCMCError(Exception): pass