Coverage for frank/statistical_models.py: 78%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Frankenstein: 1D disc brightness profile reconstruction from Fourier data
2# using non-parametric Gaussian Processes
3#
4# Copyright (C) 2019-2020 R. Booth, J. Jennings, M. Tazzari
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program. If not, see <https://www.gnu.org/licenses/>
18#
20import numpy as np
21import scipy.linalg
22import scipy.sparse
23import scipy.optimize
24import logging
26from frank.constants import rad_to_arcsec, deg_to_rad
27from frank.minimizer import LineSearch, MinimizeNewton
29class VisibilityMapping:
30 r"""Builds the mapping between the visibility and image planes.
32 VisibilityMapping generates the transform matrices :math:`H(q)` such that
33 :math:`V_\nu(q) = H(q) I_\nu`. It also uses these to construct the design
34 matrices :math:`M` and :math:`j` used in the fitting.
36 VisibilityMapping supports following models:
37 1. An optically thick and geometrically thin disc
38 2. An optically thin and geometrically thin disc
39 3. An opticall thin disc with a known Gaussian verictal structure.
40 All models are axisymmetric.
42 Parameters
43 ----------
44 DHT : DiscreteHankelTransform
45 A DHT object with N bins that defines H(p). The DHT is used to compute
46 :math:`S(p)`
47 geometry: SourceGeometry object, optional
48 Geometry used to correct the visibilities for the source inclination.
49 vis_model : string,
50 One of ['opt_thick', 'opt_thin', 'debris'], corresponding to models
51 1-3 described above, respectively.
52 scale_height : function H(R), units = arcsec
53 The vertical thickness of the disc in terms of its Guassian scale-height.
54 Only used if vis_model="debris".
55 block_data : bool, default = True
56 Large temporary matrices are needed to set up the data. If block_data
57 is True, we avoid this, limiting the memory requirement to block_size
58 elements.
59 block_size : int, default = 10**5
60 Size of the matrices if blocking is used
61 check_qbounds: bool, default = True
62 Whether to check if the first (last) collocation point is smaller
63 (larger) than the shortest (longest) deprojected baseline in the dataset
64 verbose : bool, default = False
65 Whether to print notification messages
66 """
67 def __init__(self, DHT, geometry,
68 vis_model='opt_thick', scale_height=None, block_data=True,
69 block_size=10 ** 5, check_qbounds=True, verbose=True):
71 _vis_models = ['opt_thick', 'opt_thin', 'debris']
72 if vis_model not in _vis_models:
73 raise ValueError(f"vis_model must be one of {_vis_models}")
75 # Store flags
76 self._vis_model = vis_model
77 self.check_qbounds = check_qbounds
78 self._verbose = verbose
80 self._chunking = block_data
81 self._chunk_size = block_size
83 self._DHT = DHT
84 self._geometry = geometry
86 # Check for consistency and report the model choice.
87 self._scale_height = None
88 if self._vis_model == 'opt_thick':
89 if self._verbose:
90 logging.info(' Assuming an optically thick model (the default): '
91 'Scaling the total flux to account for the source '
92 'inclination')
93 elif self._vis_model == 'opt_thin':
94 if self._verbose:
95 logging.info(' Assuming an optically thin model: *Not* scaling the '
96 'total flux to account for the source inclination')
97 elif self._vis_model == 'debris':
98 if scale_height is None:
99 raise ValueError('You requested a model with a non-zero scale height'
100 ' but did not specify H(R) (scale_height=None)')
101 self._scale_height = scale_height(self.r)
102 self._H2 = 0.5*(2*np.pi*self._scale_height / rad_to_arcsec)**2
104 if self._verbose:
105 logging.info(' Assuming an optically thin model but geometrically '
106 'thick model: *Not* scaling the total flux to account for '
107 'the source inclination')
109 def map_visibilities(self, u, v, V, weights, frequencies=None, geometry=None):
110 r"""
111 Compute the matrices :math:`M` abd :math:`j` from the visibility data.
113 Also compute the null likelihood,
114 .. math:
115 `H0 = 0.5*\log[det(weights/(2*np.pi))]
116 - 0.5*np.sum(V * weights * V):math:`
118 Parameters
119 ----------
120 u,v : 1D array, unit = :math:`\lambda`
121 uv-points of the visibilies
122 V : 1D array, unit = Jy
123 Visibility amplitudes at q
124 weights : 1D array, optional, unit = J^-2
125 Weights of the visibilities, weight = 1 / sigma^2, where sigma is
126 the standard deviation
127 frequencies : 1D array, optional, unit = Hz
128 Channel frequencies of each data point. If not provided, a single
129 channel is assumed.
130 geometry : SourceGeometry object
131 Geometry used to deproject the visibilities before fitting. If not
132 provided the geometry passed in during construction will be used.
134 Returns
135 -------
136 mapped visibilities : dict.
137 The format will be depend whether a single channel was assumed
138 (frequencies=None). The following data maybe present:
139 'multi_freq' : bool,
140 Specifies if mult-frequency analysis was done
141 'channels' : array, 1D, mult-frequency analysis only
142 The frequency of each channel.
143 'M' : array, 2D or 3D.
144 The matrix :math:`M` of the mapping. This will have shape
145 (num_channels, size, size) for multi_freq analysis and
146 (size, size) for single channel analysis.
147 'j' : array, 1D or 2D.
148 The matrix :math:`j` of the mapping. This will have shape
149 (num_channels, size) for multi_freq analysis and (size,)
150 for single channel analysis.
151 'null_likelihood' : float,
152 The likelihood of a model with I=0. See above.
153 'hash' : list,
154 Identifying data, used to ensure compatability between the
155 mapped visibilies and fitting objects.
156 """
158 if geometry is None:
159 geometry = self._geometry
161 if self._verbose:
162 logging.info(' Building visibility matrices M and j')
164 # Deproject the visibilities
165 u, v, k, V = self._geometry.apply_correction(u, v, V, use3D=True)
166 q = np.hypot(u, v)
168 # Check consistency of the uv points with the model
169 self._check_uv_range(q)
171 # Use only the real part of V.
172 V = V.real
173 w = np.ones_like(V) * weights
175 multi_freq = True
176 if frequencies is None:
177 multi_freq = False
178 frequencies = np.ones_like(V)
180 channels = np.unique(frequencies)
181 Ms = np.zeros([len(channels), self.size, self.size], dtype='f8')
182 js = np.zeros([len(channels), self.size], dtype='f8')
183 for i, f in enumerate(channels):
184 idx = frequencies == f
186 qi = q[idx]
187 ki = k[idx]
188 wi = w[idx]
189 Vi = V[idx]
191 # If chunking is used, we will build up M and j chunk-by-chunk
192 if self._chunking:
193 Nstep = int(self._chunk_size / self.size + 1)
194 else:
195 Nstep = len(Vi)
197 start = 0
198 end = Nstep
199 Ndata = len(Vi)
200 while start < Ndata:
201 qs = qi[start:end]
202 ks = ki[start:end]
203 ws = wi[start:end]
204 Vs = Vi[start:end]
206 X = self._get_mapping_coefficients(qs, ks)
208 wXT = np.array(X.T * ws, order='C')
210 Ms[i] += np.dot(wXT, X)
211 js[i] += np.dot(wXT, Vs)
213 start = end
214 end = min(Ndata, end + Nstep)
216 # Compute likelihood normalization H_0, i.e., the
217 # log-likelihood of a source with I=0.
218 H0 = 0.5 * np.sum(np.log(w / (2 * np.pi)) - V * w * V)
220 if multi_freq:
221 return {
222 'mult_freq' : True,
223 'channels' : channels,
224 'M' : Ms,
225 'j' : js,
226 'null_likelihood' : H0,
227 'hash' : [True, self._DHT, geometry, self._vis_model, self._scale_height],
228 }
229 else:
230 return {
231 'mult_freq' : False,
232 'channels' : None,
233 'M' : Ms[0],
234 'j' : js[0],
235 'null_likelihood' : H0,
236 'hash' : [False, self._DHT, geometry, self._vis_model, self._scale_height],
237 }
239 def check_hash(self, hash, multi_freq=False, geometry=None):
240 """Checks whether the hash of some mapped visibilities are compatible
241 with this VisibilityMapping.
243 Parameters
244 ----------
245 hash : list
246 Hash to compare
247 multi_freq : bool
248 Whether we are expected a multi-frequncy fit
249 geometry : SourceGeometry object, optional
250 Geometry to use in the comparison.
251 """
252 if geometry is None:
253 geometry = self._geometry
255 passed = (
256 multi_freq == hash[0] and
257 self._DHT.Rmax == hash[1].Rmax and
258 self._DHT.size == hash[1].size and
259 self._DHT.order == hash[1].order and
260 geometry.inc == hash[2].inc and
261 geometry.PA == hash[2].PA and
262 geometry.dRA == hash[2].dRA and
263 geometry.dDec == hash[2].dDec and
264 self._vis_model == hash[3]
265 )
267 if not passed:
268 return False
270 if self._scale_height is None:
271 return hash[4] is None
272 else:
273 if hash[4] is None:
274 return False
275 else:
276 return np.all(self._scale_height == hash[4])
279 def predict_visibilities(self, I, q, k=None, geometry=None):
280 r"""Compute the predicted visibilities given the brightness profile, I
282 Parameters
283 ----------
284 I : array, unit = Jy
285 Brightness at the collocation points.
286 q : array, unit = :math:`\lambda`
287 Radial uv-distance to predict the visibility at
288 k : array, optional, unit = :math:`\lambda`
289 Vertical uv-distance to predict the visibility. Only needed for a
290 geometrically thick model.
291 geometry : SourceGeometry object, optional
292 Geometry used to correct the visibilities for the source
293 inclination. Only needed for the optically thick model. If not
294 provided, the geometry passed in during construction will be used.
296 Returns
297 -------
298 V(q, k) : array, unit = Jy
299 Predicted visibilties of a source with a radial flux distribution
300 given by :math:`I` and the position angle, inclination determined
301 by the geometry.
303 Notes
304 -----
305 For an optically thick model the visibility amplitudes are reduced due
306 to the projection but phase centre corrections are not added.
307 """
308 # Chunk the visibility calulation for speed
309 if self._chunking:
310 Ni = int(self._chunk_size / self.size + 1)
311 else:
312 Ni = len(q)
314 end = 0
315 start = 0
316 V = []
317 while end < len(q):
318 start = end
319 end = start + Ni
320 qi = q[start:end]
322 ki = None
323 if k is not None:
324 ki = k[start:end]
326 H = self._get_mapping_coefficients(qi, ki, geometry)
328 V.append(np.dot(H, I))
329 return np.concatenate(V)
331 def invert_visibilities(self, V, R, geometry=None):
332 r"""Compute the brightness, I, from the visibilities.
334 Note this method does not work for an arbitrary distribution of
335 baselines and therefore cannot be used to determine the brightness
336 given a generic set of data. Instead it needs the visibilities at
337 collocation points of the DiscrteHankelTransform, q.
339 For geometrically thick models the visibilities used must be those for
340 which kz = 0.
342 Given the above constraints, this method computes the inverse of
343 predict_visibilites.
345 Parameters
346 ----------
347 V : array, unit = Jy
348 Visibility at the collocation points. This must be the deprojected
349 and phase-center corrected visibilities.
350 R : array, unit = arcsec
351 Radial distance to compute the brightness at
352 geometry : SourceGeometry object, optional
353 Geometry used to correct the visibilities for the source
354 inclination. Only needed for the optically thick model. If not
355 provided, the geometry passed in during construction will be used.
357 Returns
358 -------
359 I(R) : array, unit = Jy / Sr
360 Brightness at the radial locations, R.
362 Notes
363 -----
364 The brightness is corrected under the optically thin
365 """
366 # Chunk the visibility calulation for speed
367 R = np.atleast_1d(R)
368 if self._chunking:
369 Ni = int(self._chunk_size / self.size + 1)
370 else:
371 Ni = len(R)
373 end = 0
374 start = 0
375 I = []
376 while end < len(R):
377 start = end
378 end = start + Ni
379 Ri = R[start:end]
381 H = self._get_mapping_coefficients(Ri, 0, geometry, inverse=True)
383 I.append(np.dot(H, V))
384 return np.concatenate(I)[R < self.Rmax]
386 def transform(self, f, q=None, direction='forward'):
387 """Apply a DHT directly to data provided
389 Parameters
390 ----------
391 f : array, size = N
392 Function to Hankel transform, evaluated at the collocation points:
393 f[k] = f(r_k) or f[k] = f(q_k)
394 q : array or None
395 The frequency points at which to evaluate the Hankel
396 transform. If not specified, the conjugate points of the
397 DHT will be used. For the backwards transform, q should be
398 the radius points in arcsec
399 direction : { 'forward', 'backward' }, optional
400 Direction of the transform. If not supplied, the forward
401 transform is used
403 Returns
404 -------
405 H[f] : array, size = N or len(q) if supplied
406 The Hankel transform of the array f
408 """
409 if direction == 'backward' and q is not None:
410 q = q / rad_to_arcsec
412 return self._DHT.transform(f, q, direction)
414 def DHT_coefficients(self, direction='forward'):
415 """Get the coefficients of the Discrete Hankel Transform.
417 The coefficients are defined by
418 .. math:
419 H[h] = np.dot(H, f)
421 Parameters
422 ----------
423 direction : { 'forward', 'backward' }, optional
424 Direction of the transform. If not supplied, the forward
425 transform is used
427 Returns
428 -------
429 H : array, size = N or len(q) if supplied
430 The Hankel transform of the array f
432 """
433 return self._DHT.coefficients(direction=direction)
435 def interpolate(self, f, r, space='Real'):
436 """
437 Interpolate f (evaluated at the collocation points) to the new points,
438 pts, using interpolation that is consistent with the Fourier-Bessel
439 Series / Discrete Hankel Transform.
441 Parameters
442 ----------
443 f : array, size = N
444 Function to interpolate, evaluated at the collocation points:
445 f[k] = f(r_k) or f[k] = f(q_k)
446 r : array or None
447 The points at which to evaluate the interpolation.
448 space : { 'Real', 'Fourier' }, optional
449 Space in which the interpolation is done. If not supplied,
450 'Real' is assumed.
453 Returns
454 -------
455 f_interp : array, size = len(r)
456 The interpolated results
457 """
458 if space == 'Real':
459 r = r / rad_to_arcsec
460 return self._DHT.interpolate(f, r, space)
463 def _get_mapping_coefficients(self, qs, ks, geometry=None, inverse=False):
464 """Get :math:`H(q)`, such that :math:`V(q) = H(q) I_\nu`"""
466 if self._vis_model == 'opt_thick':
467 # Optically thick & geometrically thin
468 if geometry is None:
469 geometry = self._geometry
470 scale = np.cos(geometry.inc * deg_to_rad)
471 elif self._vis_model == 'opt_thin':
472 # Optically thin & geometrically thin
473 scale = 1
474 elif self._vis_model == 'debris':
475 # Optically thin & geometrically thick
476 scale = np.exp(-np.outer(ks*ks, self._H2))
477 else:
478 raise ValueError("model not supported. Should never occur.")
480 if inverse:
481 scale = np.atleast_1d(1/scale).reshape(1,-1)
482 qs = qs / rad_to_arcsec
483 direction='backward'
484 else:
485 direction='forward'
487 H = self._DHT.coefficients(qs, direction=direction) * scale
489 return H
492 def _check_uv_range(self, uv):
493 """Check that the uv domain is properly covered"""
495 # Check whether the first (last) collocation point is smaller (larger)
496 # than the shortest (longest) deprojected baseline in the dataset
497 if self.check_qbounds:
498 if self.q[0] < uv.min():
499 logging.warning(r"WARNING: First collocation point, q[0] = {:.3e} \lambda,"
500 " is at a baseline shorter than the"
501 " shortest deprojected baseline in the dataset,"
502 r" min(uv) = {:.3e} \lambda. For q[0] << min(uv),"
503 " the fit's total flux may be biased"
504 " low.".format(self.q[0], uv.min()))
506 if self.q[-1] < uv.max():
507 raise ValueError(r"ERROR: Last collocation point, {:.3e} \lambda, is at"
508 " a shorter baseline than the longest deprojected"
509 r" baseline in the dataset, {:.3e} \lambda. Please"
510 " increase N in FrankMultFrequencyFitter (this is"
511 " `hyperparameters: n` if you're using a parameter"
512 " file). Or if you'd like to fit to shorter maximum baseline,"
513 " cut the (u, v) distribution before fitting"
514 " (`modify_data: baseline_range` in the"
515 " parameter file).".format(self.q[-1], uv.max()))
517 @property
518 def r(self):
519 """Radius points, unit = arcsec"""
520 return self._DHT.r * rad_to_arcsec
522 @property
523 def Rmax(self):
524 """Maximum radius, unit = arcsec"""
525 return self._DHT.Rmax * rad_to_arcsec
527 @property
528 def q(self):
529 r"""Frequency points, unit = :math:`\lambda`"""
530 return self._DHT.q
532 @property
533 def Qmax(self):
534 r"""Maximum frequency, unit = :math:`\lambda`"""
535 return self._DHT.Qmax
537 @property
538 def size(self):
539 """Number of points in reconstruction"""
540 return self._DHT.size
542 @property
543 def scale_height(self):
544 "Vertial thickness of the disc, unit = arcsec"
545 if self._scale_height is not None:
546 return self._scale_height
547 else:
548 return None
551class GaussianModel:
552 r"""
553 Solves the linear regression problem to compute the posterior,
555 .. math::
556 P(I|q,V,p) \propto G(I-\mu, D),
558 where :math:`I` is the intensity to be predicted, :math:`q` are the
559 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`D` are
560 the mean and covariance of the posterior distribution.
562 If :math:`p` is provided, the covariance matrix of the prior is included,
563 with
565 .. math::
566 P(I|p) \propto G(I, S(p)),
568 and the Bayesian Linear Regression problem is solved. :math:`S` is computed
569 from the power spectrum, :math:`p`, if provided. Otherwise the traditional
570 (frequentist) linear regression is used.
572 The problem is framed in terms of the design matrix :math:`M` and
573 information source :math:`j`.
575 :math:`H(q)` is the matrix that projects the intensity :math:`I` to
576 visibility space. :math:`M` is defined by
578 .. math::
579 M = H(q)^T w H(q),
581 where :math:`w` is the weights matrix and
583 .. math::
584 j = H(q)^T w V.
586 The mean and covariance of the posterior are then given by
588 .. math::
589 \mu = D j
591 and
593 .. math::
594 D = [ M + S(p)^{-1}]^{-1},
596 if the prior is provided, otherwise
598 .. math::
599 D = M^{-1}.
602 Parameters
603 ----------
604 DHT : DiscreteHankelTransform
605 A DHT object with N bins that defines H(p). The DHT is used to compute
606 :math:`S(p)`
607 M : 2/3D array, size = (N, N) or (Nf, N, N)
608 The design matrix, see above. Nf is the number of channels (frequencies).
609 j : 1/2D array, size = N or (Nf, N)
610 Information source, see above
611 p : 1D array, size = N, optional
612 Power spectrum used to generate the covarience matrix :math:`S(p)`
613 guess: array, optional
614 Initial guess used in computing the brightness.
615 Nfields : int, optional
616 Number of fields used to fit the data. Typically 1, but could be more
617 for multi-wavelength data. If not provided it will be determined from
618 the shape of guess.
619 noise_likelihood : float, optional
620 An optional parameter needed to compute the full likelihood, which
621 should be equal to
623 .. math::
624 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)].
626 If not provided, the likelihood can still be computed up to this
627 missing constant
628 """
630 def __init__(self, DHT, M, j, p=None, scale=None, guess=None,
631 Nfields=None, noise_likelihood=0):
633 self._DHT = DHT
635 # Correct shape of design matrix etc.
636 if len(M.shape) == 2:
637 M = M.reshape(1, *M.shape)
638 if len(j.shape) == 1:
639 j = j.reshape(1, *j.shape)
641 # Number of frequencies / radial points
642 Nf, Nr = j.shape
644 # Get the number of fields
645 if Nfields is None:
646 if guess is None:
647 Nfields = 1
648 else:
649 guess = guess.reshape(-1, Nr)
650 Nfields = guess.shape[0]
651 elif guess is not None:
652 guess = guess.reshape(Nfields, Nr)
653 self._Nfields = Nfields
655 # Create the correct shape for the power spectrum and scale factors
656 if p is not None:
657 p = p.reshape(-1, Nr)
658 if p.shape[0] == 1 and Nfields > 1:
659 p = np.repeat(p, Nfields, axis=0)
660 self._p = p
662 if scale is None:
663 self._scale = np.ones([Nf, Nfields], dtype='f8')
664 else:
665 self._scale = np.empty([Nf, Nfields], dtype='f8')
666 self._scale[:] = scale.reshape(Nf, -1)
668 if p is not None:
669 if np.any(p <= 0) or np.any(np.isnan(p)):
670 print(p)
671 raise ValueError("Bad value in power spectrum. The power"
672 " spectrum must be positive and not contain"
673 " any NaN values. This is likely due to"
674 " your UVtable (incorrect units or weights), "
675 " or the deprojection being applied (incorrect"
676 " geometry and/or phase center). Else you may"
677 " want to increase `rout` by 10-20% or `n` so"
678 " that it is large, >~300.")
680 Ykm = self._DHT.coefficients()
681 Sj = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm)
683 self._Sinv = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8')
684 for n in range(0, Nfields):
685 sn = n*Nr
686 en = (n+1)*Nr
688 self._Sinv[sn:en, sn:en] += Sj[n]
689 else:
690 self._Sinv = None
692 # Compute the design matrix
693 self._M = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8')
694 self._j = np.zeros(Nr*Nfields, dtype='f8')
695 for si, Mi, ji in zip(self._scale, M, j):
697 for n in range(0, Nfields):
698 sn = n*Nr
699 en = (n+1)*Nr
701 self._j[sn:en] += si[n] * ji
702 for m in range(0, Nfields):
703 sm = m*Nr
704 em = (m+1)*Nr
706 self._M[sn:en, sm:em] += si[n]*si[m] * Mi
708 self._like_noise = noise_likelihood
710 self._fit()
712 def _fit(self):
713 """Compute the mean and variance"""
714 # Compute the inverse prior covariance, S(p)^-1
715 Sinv = self._Sinv
716 if Sinv is None:
717 Sinv = 0
719 Dinv = self._M + Sinv
721 try:
722 self._Dchol = scipy.linalg.cho_factor(Dinv)
723 self._Dsvd = None
725 self._mu = scipy.linalg.cho_solve(self._Dchol, self._j)
727 except np.linalg.LinAlgError:
728 U, s, V = scipy.linalg.svd(Dinv, full_matrices=False)
730 s1 = np.where(s > 0, 1. / s, 0)
732 self._Dchol = None
733 self._Dsvd = U, s1, V
735 self._mu = np.dot(V.T, np.multiply(np.dot(U.T, self._j), s1))
737 # Reset the covariance matrix - we will compute it when needed
738 if self._Nfields > 1:
739 self._mu = self._mu.reshape(self._Nfields, self.size)
740 self._cov = None
742 def Dsolve(self, b):
743 r"""
744 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`.
746 Parameters
747 ----------
748 b : array, size = (N,...)
749 Right-hand side to solve for
751 Returns
752 -------
753 x : array, shape = np.shape(b)
754 Solution to the equation D x = b
756 """
757 if self._Dchol is not None:
758 return scipy.linalg.cho_solve(self._Dchol, b)
759 else:
760 U, s1, V = self._Dsvd
761 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1))
763 def draw(self, N):
764 """Compute N draws from the posterior"""
765 draws = np.random.multivariate_normal(self.mean.reshape(-1), self.covariance, N)
766 if self.num_fields > 1:
767 draws = draws.reshape(N, self.num_fields, self.size)
768 return draws
770 def log_likelihood(self, I=None):
771 r"""
772 Compute one of two types of likelihood.
774 If :math:`I` is provided, this computes
776 .. math:
777 \log[P(I,V|S)].
779 Otherwise the marginalized likelihood is computed,
781 .. math:
782 \log[P(V|S)].
785 Parameters
786 ----------
787 I : array, size = N, optional, unit = Jy / sr
788 Intensity :math:`I(r)` to compute the likelihood of
790 Returns
791 -------
792 log_P : float
793 Log likelihood, :math:`\log[P(I,V|p)]` or :math:`\log[P(V|p)]`
795 Notes
796 -----
797 1. The prior probability P(S) is not included.
798 2. The likelihoods take the form:
800 .. math::
801 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I
802 - \frac{1}{2} \log[\det(2 \pi S)] + H_0
804 and
806 .. math::
807 \log[P(V|p)] = \frac{1}{2} j^T D j
808 + \frac{1}{2} \log[\det(D)/\det(S)] + H_0
810 where
812 .. math::
813 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi)
815 is the noise likelihood.
816 """
818 if I is None:
819 like = 0.5 * np.sum(self._j * self._mu)
821 if self._Sinv is not None:
822 Q = self.Dsolve(self._Sinv)
823 like += 0.5 * np.linalg.slogdet(Q)[1]
824 else:
825 Sinv = self._Sinv
826 if Sinv is None:
827 Sinv = 0
829 Dinv = self._M + Sinv
831 like = np.sum(self._j * I) - 0.5 * np.dot(I, np.dot(Dinv, I))
833 if self._Sinv is not None:
834 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1]
836 return like + self._like_noise
838 def solve_non_negative(self):
839 """Compute the best fit solution with non-negative intensities"""
840 Sinv = self._Sinv
841 if Sinv is None:
842 Sinv = 0
844 Dinv = self._M + Sinv
845 return scipy.optimize.nnls(Dinv, self._j,
846 maxiter=100*len(self._j))[0]
848 @property
849 def mean(self):
850 """Posterior mean, unit = Jy / sr"""
851 return self._mu
853 @property
854 def MAP(self):
855 """Posterior maximum, unit = Jy / sr"""
856 return self.mean
858 @property
859 def covariance(self):
860 """Posterior covariance, unit = (Jy / sr)**2"""
861 if self._cov is None:
862 self._cov = self.Dsolve(np.eye(self.size*self.num_fields))
863 return self._cov
865 @property
866 def s_0(self):
867 return 0
869 @property
870 def power_spectrum(self):
871 """Power spectrum coefficients"""
872 if self.num_fields == 1 and self._p is not None:
873 return self._p.reshape(self.size)
874 return self._p
876 @property
877 def num_fields(self):
878 """Number of fields fit for"""
879 return self._Nfields
881 @property
882 def size(self):
883 """Number of points in reconstruction"""
884 return self._DHT.size
887class LogNormalMAPModel:
888 r"""
889 Finds the maximum a posteriori field for log-normal regression problems,
891 .. math::
892 P(s|q,V,p,s0) \propto G(H exp(scale*(s+s0)) - V, M) P(s|p)
894 where :math:`s` is the log-intensity to be predicted, :math:`q` are the
895 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`H` is
896 the design matrix of the transform, e.g. the coefficient matrix of
897 the forward Hankel transform.
899 If :math:`p` is provided, the covariance matrix of the prior is included,
900 with
902 .. math::
903 P(s|p) \propto G(s, S(p)),
905 The problem is framed in terms of the design matrix :math:`M` and
906 information source :math:`j`.
908 :math:`H(q)` is the matrix that projects the intensity :math:`exp(s*s0)` to
909 visibility space. :math:`M` is defined by
911 .. math::
912 M = H(q)^T w H(q),
914 where :math:`w` is the weights matrix and
916 .. math::
917 j = H(q)^T w V.
920 The maximum a posteori field, s_MAP, is found by maximizing
921 :math:`\log P(s|q,V,p,s0)` and the posterior covariance at s_MAP is
923 .. math::
924 D = [ M + S(p)^{-1}]^{-1}.
926 If the prior is not provided then
928 .. math::
929 D = M^{-1}.
931 and the posterior for exp(s) is the same as the standard Gaussian model.
933 Note: This class also supports :math:`M` and :math:`j` being split into
934 multiple terms (i.e. :math:`M_i, j_i`) such that different scale
935 factors, :math:`s0_i` can be applied to each system. This allows fitting
936 multi-frequency data.
939 Parameters
940 ----------
941 DHT : DiscreteHankelTransform
942 A DHT object with N bins that defines H(p). The DHT is used to compute
943 :math:`S(p)`
944 M : 2/3D array, size = (N, N) or (Nf, N, N)
945 The design matrix, see above. Nf is the number of channels (frequencies).
946 j : 1/2D array, size = N or (Nf, N)
947 Information source, see above
948 p : 1D array, size = N, optional
949 Power spectrum used to generate the covarience matrix :math:`S(p)`
950 scale : float, 1D array (size=N), or list of
951 Scale factors s0 (see above). These factors can be a constant, one
952 per brightness point or per band (optionally per collocation point)
953 to enable multi-frequency fitting.
954 s0 : float or 1D array, size Nfields. Optional
955 Zero point for the brightness function, see above.
956 guess: array, optional
957 Initial guess used in computing the brightness.
958 Nfields : int, optional
959 Number of fields used to fit the data. Typically 1, but could be more
960 for multi-wavelength data. If not provided it will be determined from
961 the shape of guess.
962 full_hessian: float, range [0, 1]
963 If 1 then use the full Hessian in evaluating the matrix :math:`D`. When
964 0 a term is omitted that can cause the Hessian not be a positive-
965 definite matrix when the solution is a poor fit to the data. A value
966 between 0 and 1 will scale this term by that factor.
967 noise_likelihood : float, optional
968 An optional parameter needed to compute the full likelihood, which
969 should be equal to
971 .. math::
972 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)].
974 If not provided, the likelihood can still be computed up to this
975 missing constant
976 """
978 def __init__(self, DHT, M, j, p=None, scale=None, s0=None, guess=None,
979 Nfields=None, full_hessian=1, noise_likelihood=0):
981 self._DHT = DHT
982 self._full_hess = full_hessian
984 # Correct shape of design matrix etc.
985 if len(M.shape) == 2:
986 M = M.reshape(1, *M.shape)
987 if len(j.shape) == 1:
988 j = j.reshape(1, *j.shape)
990 self._M = M
991 self._j = j
993 # Number of frequencies / radial points
994 Nf, Nr = j.shape
995 # Number of signal fields:
996 # Get the number of fields
997 if Nfields is None:
998 if guess is None:
999 Nfields = 1
1000 else:
1001 guess = guess.reshape(-1, Nr)
1002 Nfields = guess.shape[0]
1003 elif guess is not None:
1004 guess = guess.reshape(Nfields, Nr)
1005 self._Nfields = Nfields
1008 # Create the correct shape for the power spectrum and scale factors
1009 if p is not None:
1010 p = p.reshape(-1, Nr)
1011 if p.shape[0] == 1 and Nfields > 1:
1012 p = np.repeat(p, Nfields, axis=0)
1013 self._p = p
1016 if scale is None:
1017 self._scale = np.ones([Nf, Nfields], dtype='f8')
1018 else:
1019 self._scale = np.empty([Nf, Nfields], dtype='f8')
1020 self._scale[:] = scale.reshape(Nf, -1)
1022 if s0 is None:
1023 self._s0 = np.ones(Nfields, dtype='f8')
1024 else:
1025 s0 = np.atleast_1d(s0)
1026 if len(s0) == 1:
1027 s0 = np.repeat(s0, Nfields)
1028 elif len(s0) != Nfields:
1029 raise ValueError("Signal zero-point (s0) must have the same "
1030 "length as the number of fields or length 1")
1031 self._s0 = s0.reshape(Nfields, 1)
1033 if p is not None:
1034 if np.any(p <= 0) or np.any(np.isnan(p)):
1035 raise ValueError("Bad value in power spectrum. The power"
1036 " spectrum must be positive and not contain"
1037 " any NaN values. This is likely due to"
1038 " your UVtable (incorrect units or weights), "
1039 " or the deprojection being applied (incorrect"
1040 " geometry and/or phase center). Else you may"
1041 " want to increase `rout` by 10-20% or `n` so"
1042 " that it is large, >~300.")
1044 Ykm = self._DHT.coefficients()
1045 self._Sinv = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm)
1046 else:
1047 self._Sinv = np.zeros([Nfields, Nr, Nr], dtype='f8')
1049 self._like_noise = noise_likelihood
1051 self._fit(guess)
1053 def _fit(self, guess):
1054 """Find the maximum likelihood solution and variance"""
1055 Sinv = self._Sinv
1056 Ns, Nr = Sinv.shape[:2]
1058 Sflat = np.zeros([Ns*Nr, Ns*Nr])
1059 for i in range(Ns):
1060 s = i*Nr
1061 e = (i+1)*Nr
1062 Sflat[s:e,s:e] = Sinv[i]
1065 scale = self._scale
1066 s0 = self._s0
1068 def H(s):
1069 """Log-likelihood function"""
1070 s = s.reshape(Ns, Nr)
1071 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr
1073 f = 0.5*np.einsum('ij,ijk,ik', s, Sinv, s)
1075 f += 0.5*np.einsum('ij,ijk,ik',I,self._M,I)
1076 f -= np.sum(I*self._j)
1078 return f
1080 def jac(s):
1081 """1st Derivative of log-likelihood"""
1082 s = s.reshape(Ns, Nr)
1083 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr
1085 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr
1087 S1_s = np.einsum('sjk,sk->sj', Sinv, s) # shape Ns, Nr
1088 MI = np.einsum('isj,ijk,ik->sj', sI, self._M, I)
1089 jI = np.einsum('isj,ij->sj', sI, self._j)
1091 return (S1_s + (MI - jI)).reshape(Ns*Nr)
1093 def hess(s):
1094 """2nd derivative of log-likelihood"""
1095 s = s.reshape(Ns, Nr)
1096 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr
1098 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr
1099 s2I = np.einsum('is,ij->isj',scale**2, I) # shape Nf, Ns, Nr
1101 Mjk = np.einsum('isj,ijk,itk->sjtk',sI, self._M, sI)
1103 resid = 0
1104 if self._full_hess > 0:
1105 MI = Mjk.sum(3)
1106 jI = np.einsum('is,itj,ij->sjt', scale, sI, self._j)
1108 resid = np.einsum('sjt,jk->sjtk', MI-jI, np.eye(Nr)).reshape(Ns*Nr, Ns*Nr)
1109 if self._full_hess < 1:
1110 resid *= self._full_hess
1112 return Mjk.reshape(Ns*Nr, Ns*Nr) + resid + Sflat
1114 x = guess.reshape(Ns*Nr)
1116 def limit_step(dx, x):
1117 alpha = 1.1*np.min(np.abs(x/dx))
1119 alpha = min(alpha, 1)
1120 return alpha*dx
1122 # Ignore convergence because it will often fail due to round off when
1123 # we're super close to the minimum
1124 search = LineSearch(reduce_step=limit_step)
1125 s, _ = MinimizeNewton(H, jac, hess, x, search, tol=1e-7)
1126 self._s_MAP = s.reshape(Ns, Nr)
1128 Dinv = hess(s)
1129 try:
1130 self._Dchol = scipy.linalg.cho_factor(Dinv)
1131 self._Dsvd = None
1132 except np.linalg.LinAlgError:
1133 U, s_svd, V = scipy.linalg.svd(Dinv, full_matrices=False)
1135 s1 = np.where(s_svd > 0, 1./s_svd, 0)
1137 self._Dchol = None
1138 self._Dsvd = U, s1, V
1140 self._cov = None
1142 def Dsolve(self, b):
1143 r"""
1144 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`.
1146 Parameters
1147 ----------
1148 b : array, size = (N,...)
1149 Right-hand side to solve for
1151 Returns
1152 -------
1153 x : array, shape = np.shape(b)
1154 Solution to the equation D x = b
1156 """
1157 if self._Dchol is not None:
1158 return scipy.linalg.cho_solve(self._Dchol, b)
1159 else:
1160 U, s1, V = self._Dsvd
1161 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1))
1163 def draw(self, N):
1164 """Compute N draws from the (approximate) posterior"""
1165 draws = np.random.multivariate_normal(self.MAP.reshape(-1), self.covariance, N)
1166 if self.num_fields > 1:
1167 draws = draws.reshape(N, self.num_fields, self.size)
1168 return draws
1170 def log_likelihood(self, s=None):
1171 r"""
1172 Compute the likelihood,
1174 .. math:
1175 \log[P(I,V|S)].
1177 Parameters
1178 ----------
1179 s : array, size = N, optional
1180 Log-intensity :math:`I(r)=exp(s0*s)` to compute the likelihood of
1182 Returns
1183 -------
1184 log_P : float
1185 Log likelihood, :math:`\log[P(I,V|p)]`
1187 Notes
1188 -----
1189 1. The prior probability P(S) is not included.
1190 2. The likelihood takes the form:
1192 .. math::
1193 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I
1194 - \frac{1}{2} \log[\det(2 \pi S)] + H_0
1196 where
1198 .. math::
1199 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi)
1201 is the noise likelihood.
1202 """
1204 if s is None:
1205 s = self._s_MAP
1207 Sinv = self._Sinv
1208 if Sinv is None:
1209 Sinv = 0
1212 I = np.exp(np.einsum('i,j->ij', self._scale,s))
1214 like = - 0.5*np.dot(s-self._s0, np.dot(Sinv, s-self._s0))
1216 like -= 0.5*np.einsum('ij,ijk,ik',I,self._M,I)
1217 like += np.sum(I*self._j)
1220 if self._Sinv is not None:
1221 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1]
1223 return like + self._like_noise
1225 def solve_non_negative(self):
1226 """Compute the best fit solution with non-negative intensities"""
1227 # The solution is alway non-negative. Provided for convenience.
1228 return self.MAP
1230 @property
1231 def MAP(self):
1232 """Posterior maximum, unit = Jy / sr"""
1233 MAP = self._s_MAP
1234 if MAP.shape[0] == 1:
1235 return MAP.reshape(self.size)
1236 return MAP
1238 @property
1239 def covariance(self):
1240 """Posterior covariance at MAP, unit = (Jy / sr)**2"""
1241 if self._cov is None:
1242 self._cov = self.Dsolve(np.eye(self.size*self.num_fields))
1243 return self._cov
1245 @property
1246 def power_spectrum(self):
1247 """Power spectrum coefficients"""
1248 p = self._p
1249 if p.shape[0] == 1:
1250 return p.reshape(self.size)
1251 return p
1253 @property
1254 def scale(self):
1255 scale = self._scale
1256 if scale.shape[1] == 1:
1257 return scale[:,0]
1258 return self._scale
1260 @property
1261 def s_0(self):
1262 s0 = self._s0
1263 if s0.shape[0] == 1:
1264 return s0[0]
1265 return s0
1267 @property
1268 def num_fields(self):
1269 """Number of fields fit for"""
1270 return self._Nfields
1272 @property
1273 def size(self):
1274 """Number of points in reconstruction"""
1275 return self._DHT.size