from __future__ import division, print_function
import os,os.path
import logging
import pickle
on_rtd = os.environ.get('READTHEDOCS') == 'True'
if not on_rtd:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy.random as rand
from scipy.stats import gaussian_kde
import corner
from emcee.autocorr import integrated_time, AutocorrError
from astropy.io import ascii
else:
np, pd, plt, rand = (None, None, None, None)
gaussian_kde = None
try:
import corner
except ImportError:
pass
if not on_rtd:
from .plotutils import setfig
from .hashutils import hashcombine, hasharray
from .transit_basic import traptransit, fit_traptransit, traptransit_MCMC, MAXSLOPE
from .statutils import kdeconf, qstd, conf_interval
else:
MAXSLOPE = None
def load_pkl(filename):
with open(filename, 'rb') as fin:
return pickle.load(fin)
[docs]class TransitSignal(object):
"""A phased-folded transit signal.
Epoch of the transit at 0, 'continuum' set at 1.
:param ts, fs, dfs:
Times (days from mid-transit), fluxes (relative to 1),
flux uncertainties. dfs optional
:param P:
Orbital period.
:param p0: (optional)
Initial guess for least-squares trapezoid fit.
If not provided, then some decent guess will be made
(which is better on made-up data than real...)
:param name: (optional)
Name of the signal.
:param maxslope: (optional)
Upper limit to use for "slope" parameter (T/tau)
in the MCMC fitting of signal. Default is 15.
.. note:: The implementation of this object can use some refactoring;
as it is directly translated from some older code. As
such, not all methods/attributes are well documented.
"""
def __init__(self,ts,fs,dfs=None,P=None,p0=None,name='',maxslope=MAXSLOPE):
ts = np.atleast_1d(ts)
fs = np.atleast_1d(fs)
inds = ts.argsort()
self.ts = ts[inds]
self.fs = fs[inds]
self.name = name
self.P = P
self.maxslope = maxslope
if type(P) == type(np.array([1])):
self.P = P[0]
#set default best-guess trapezoid parameters
if p0 is None:
depth = 1 - fs.min()
duration = (fs < (1-0.01*depth)).sum()/float(len(fs)) * (ts[-1] - ts[0])
tc0 = ts[fs.argmin()]
p0 = np.array([duration,depth,5.,tc0])
tfit = fit_traptransit(ts,fs,p0)
if dfs is None:
dfs = (self.fs - traptransit(self.ts,tfit)).std()
if np.size(dfs)==1:
dfs = np.ones(len(self.ts))*dfs
self.dfs = dfs
self.dur,self.depth,self.slope,self.center = tfit
self.trapfit = tfit
logging.debug('trapezoidal leastsq fit: {}'.format(self.trapfit))
self.hasMCMC = False
@classmethod
def from_ascii(cls, filename, **kwargs):
table = ascii.read(filename).to_pandas()
if len(table.columns)==3:
return cls(table.iloc[:, 0].values, table.iloc[:, 1].values, table.iloc[:, 2].values,
**kwargs)
elif len(table.columns)==2:
return cls(table.iloc[:, 0].values, table.iloc[:, 1].values,
**kwargs)
[docs] def save_hdf(self, filename, path=''):
"""
Save transitsignal info using HDF...not yet implemented.
.. note::
Refactoring plan is to re-write saving to use HDF
instead of pickle.
"""
raise NotImplementedError
def triangle(self, **kwargs):
pts = np.array([self.logdeps, self.durs, self.slopes]).T
fig = corner.corner(pts, labels=['log (Depth)',
'Duration', 'T/tau'], **kwargs)
return fig
[docs] def save_pkl(self, filename):
"""
Pickles TransitSignal.
"""
with open(filename, 'wb') as fout:
pickle.dump(self, fout)
#eventually make this save_hdf
[docs] def save(self, filename):
"""
Calls save_pkl function.
"""
self.save_pkl(filename)
def __eq__(self,other):
return hash(self) == hash(other)
def __hash__(self):
key = hashcombine(hasharray(self.ts),
hasharray(self.fs),
self.P,
self.maxslope)
if self.hasMCMC:
key = hashcombine(key, hasharray(self.slopes),
hasharray(self.durs),
hasharray(self.logdeps))
return key
[docs] def plot(self, fig=None, plot_trap=False, name=False, trap_color='g',
trap_kwargs=None, **kwargs):
"""
Makes a simple plot of signal
:param fig: (optional)
Argument for :func:`plotutils.setfig`.
:param plot_trap: (optional)
Whether to plot the (best-fit least-sq) trapezoid fit.
:param name: (optional)
Whether to annotate plot with the name of the signal;
can be ``True`` (in which case ``self.name`` will be
used), or any arbitrary string.
:param trap_color: (optional)
Color of trapezoid fit line.
:param trap_kwargs: (optional)
Keyword arguments to pass to trapezoid fit line.
:param **kwargs: (optional)
Additional keyword arguments passed to ``plt.plot``.
"""
setfig(fig)
plt.plot(self.ts,self.fs,'.',**kwargs)
if plot_trap and hasattr(self,'trapfit'):
if trap_kwargs is None:
trap_kwargs = {}
plt.plot(self.ts, traptransit(self.ts,self.trapfit),
color=trap_color, **trap_kwargs)
if name is not None:
if type(name)==type(''):
text = name
else:
text = self.name
plt.annotate(text,xy=(0.1,0.1),xycoords='axes fraction',fontsize=22)
if hasattr(self,'depthfit') and not np.isnan(self.depthfit[0]):
lo = 1 - 3*self.depthfit[0]
hi = 1 + 2*self.depthfit[0]
else:
lo = 1
hi = 1
sig = qstd(self.fs,0.005)
hi = max(hi,self.fs.mean() + 7*sig)
lo = min(lo,self.fs.mean() - 7*sig)
logging.debug('lo={}, hi={}'.format(lo,hi))
plt.ylim((lo,hi))
plt.xlabel('time [days]')
plt.ylabel('Relative flux')
[docs] def MCMC(self, niter=500, nburn=200, nwalkers=200, threads=1,
fit_partial=False, width=3, savedir=None, refit=False,
thin=10, conf=0.95, maxslope=MAXSLOPE, debug=False, p0=None):
"""
Fit transit signal to trapezoid model using MCMC
.. note:: As currently implemented, this method creates a
bunch of attributes relevant to the MCMC fit; I plan
to refactor this to define those attributes as properties
so as not to have their creation hidden away here. I plan
to refactor how this works.
"""
if fit_partial:
wok = np.where((np.absolute(self.ts-self.center) < (width*self.dur)) &
~np.isnan(self.fs))
else:
wok = np.where(~np.isnan(self.fs))
if savedir is not None:
if not os.path.exists(savedir):
os.mkdir(savedir)
alreadydone = True
alreadydone &= savedir is not None
alreadydone &= os.path.exists('%s/ts.npy' % savedir)
alreadydone &= os.path.exists('%s/fs.npy' % savedir)
if savedir is not None and alreadydone:
ts_done = np.load('%s/ts.npy' % savedir)
fs_done = np.load('%s/fs.npy' % savedir)
alreadydone &= np.all(ts_done == self.ts[wok])
alreadydone &= np.all(fs_done == self.fs[wok])
if alreadydone and not refit:
logging.info('MCMC fit already done for %s. Loading chains.' % self.name)
Ts = np.load('%s/duration_chain.npy' % savedir)
ds = np.load('%s/depth_chain.npy' % savedir)
slopes = np.load('%s/slope_chain.npy' % savedir)
tcs = np.load('%s/tc_chain.npy' % savedir)
else:
logging.info('Fitting data to trapezoid shape with MCMC for %s....' % self.name)
if p0 is None:
p0 = self.trapfit.copy()
p0[0] = np.absolute(p0[0])
if p0[2] < 2:
p0[2] = 2.01
if p0[1] < 0:
p0[1] = 1e-5
logging.debug('p0 for MCMC = {}'.format(p0))
sampler = traptransit_MCMC(self.ts[wok],self.fs[wok],self.dfs[wok],
niter=niter,nburn=nburn,nwalkers=nwalkers,
threads=threads,p0=p0,return_sampler=True,
maxslope=maxslope)
Ts,ds,slopes,tcs = (sampler.flatchain[:,0],sampler.flatchain[:,1],
sampler.flatchain[:,2],sampler.flatchain[:,3])
self.sampler = sampler
if savedir is not None:
np.save('%s/duration_chain.npy' % savedir,Ts)
np.save('%s/depth_chain.npy' % savedir,ds)
np.save('%s/slope_chain.npy' % savedir,slopes)
np.save('%s/tc_chain.npy' % savedir,tcs)
np.save('%s/ts.npy' % savedir,self.ts[wok])
np.save('%s/fs.npy' % savedir,self.fs[wok])
if debug:
print(Ts)
print(ds)
print(slopes)
print(tcs)
N = len(Ts)
try:
self.Ts_acor = integrated_time(Ts)
self.ds_acor = integrated_time(ds)
self.slopes_acor = integrated_time(slopes)
self.tcs_acor = integrated_time(tcs)
self.fit_converged = True
except AutocorrError:
self.fit_converged = False
ok = (Ts > 0) & (ds > 0) & (slopes > 0) & (slopes < self.maxslope)
logging.debug('trapezoidal fit has {} good sample points'.format(ok.sum()))
if ok.sum()==0:
if (Ts > 0).sum()==0:
#logging.debug('{} points with Ts > 0'.format((Ts > 0).sum()))
logging.debug('{}'.format(Ts))
raise MCMCError('{}: 0 points with Ts > 0'.format(self.name))
if (ds > 0).sum()==0:
#logging.debug('{} points with ds > 0'.format((ds > 0).sum()))
logging.debug('{}'.format(ds))
raise MCMCError('{}: 0 points with ds > 0'.format(self.name))
if (slopes > 0).sum()==0:
#logging.debug('{} points with slopes > 0'.format((slopes > 0).sum()))
logging.debug('{}'.format(slopes))
raise MCMCError('{}: 0 points with slopes > 0'.format(self.name))
if (slopes < self.maxslope).sum()==0:
#logging.debug('{} points with slopes < maxslope ({})'.format((slopes < self.maxslope).sum(),self.maxslope))
logging.debug('{}'.format(slopes))
raise MCMCError('{} points with slopes < maxslope ({})'.format((slopes < self.maxslope).sum(),self.maxslope))
durs,deps,logdeps,slopes = (Ts[ok],ds[ok],np.log10(ds[ok]),
slopes[ok])
inds = (np.arange(len(durs)/thin)*thin).astype(int)
durs,deps,logdeps,slopes = (durs[inds],deps[inds],logdeps[inds],
slopes[inds])
self.durs,self.deps,self.logdeps,self.slopes = (durs,deps,logdeps,slopes)
self._make_kde(conf=conf)
self.hasMCMC = True
def corner(self, outfile=None, plot_contours=False, **kwargs):
fig = corner.corner(self.kde.dataset.T, labels=['Duration', 'log(depth)', 'T/tau'],
plot_contours=False, **kwargs)
if outfile is not None:
fig.savefig(outfile)
return fig
def _make_kde(self, conf=0.95):
self.durkde = gaussian_kde(self.durs)
self.depthkde = gaussian_kde(self.deps)
self.slopekde = gaussian_kde(self.slopes)
self.logdepthkde = gaussian_kde(self.logdeps)
if self.fit_converged:
try:
durconf = kdeconf(self.durkde,conf)
depconf = kdeconf(self.depthkde,conf)
logdepconf = kdeconf(self.logdepthkde,conf)
slopeconf = kdeconf(self.slopekde,conf)
except:
raise
raise MCMCError('Error generating confidence intervals...fit must not have worked.')
durmed = np.median(self.durs)
depmed = np.median(self.deps)
logdepmed = np.median(self.logdeps)
slopemed = np.median(self.slopes)
self.durfit = (durmed,np.array([durmed-durconf[0],durconf[1]-durmed]))
self.depthfit = (depmed,np.array([depmed-depconf[0],depconf[1]-depmed]))
self.logdepthfit = (logdepmed,np.array([logdepmed-logdepconf[0],logdepconf[1]-logdepmed]))
self.slopefit = (slopemed,np.array([slopemed-slopeconf[0],slopeconf[1]-slopemed]))
else:
self.durfit = (np.nan,(np.nan,np.nan))
self.depthfit = (np.nan,(np.nan,np.nan))
self.logdepthfit = (np.nan,(np.nan,np.nan))
self.slopefit = (np.nan,(np.nan,np.nan))
points = np.array([self.durs,self.logdeps,self.slopes])
self.kde = gaussian_kde(points)
class TransitSignal_FromSamples(TransitSignal):
"""Use this if all you have is the trapezoid-fit samples
"""
def __init__(self, period, durs, depths, slopes,
name='', **kwargs):
self.period = period
self.durs = durs
self.deps = depths
self.logdeps = np.log10(depths)
self.slopes = slopes
self.hasMCMC = True
self.fit_converged = True #better be
self._make_kde()
self.name = name
def MCMC(self, *args, **kwargs):
pass
def plot(self, *args, **kwargs):
pass
def __hash__(self):
return hashcombine(self.period, hasharray(self.durs),
hasharray(self.deps),
hasharray(self.slopes))
class TransitSignal_DF(TransitSignal):
def __init__(self, df, columns=['t','f','e_f'], **kwargs):
t_col, f_col, e_f_col = columns
t = df[t_col]
f = df[f_col]
if e_f_col in df:
e_f = df[e_f_col]
else:
e_f = None
TransitSignal.__init__(self, t, f, e_f, **kwargs)
class TransitSignal_ASCII(TransitSignal):
def __init__(self, filename, cols=(0,1), err_col=2, **kwargs):
t, f = np.loadtxt(filename, usecols=cols, unpack=True)
try:
e_f = np.loadtxt(filename, usecols=(err_col,))
except:
e_f = None
TransitSignal.__init__(self, t, f, e_f, **kwargs)
############# Exceptions ##############3
class MCMCError(Exception):
pass