Coverage for frank/statistical_models.py: 76%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Frankenstein: 1D disc brightness profile reconstruction from Fourier data
2# using non-parametric Gaussian Processes
3#
4# Copyright (C) 2019-2020 R. Booth, J. Jennings, M. Tazzari
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program. If not, see <https://www.gnu.org/licenses/>
18#
20import numpy as np
21import scipy.linalg
22import scipy.sparse
23import scipy.optimize
24import logging
26from frank.constants import rad_to_arcsec, deg_to_rad
27from frank.minimizer import LineSearch, MinimizeNewton
29class VisibilityMapping:
30 r"""Builds the mapping between the visibility and image planes.
32 VisibilityMapping generates the transform matrices :math:`H(q)` such that
33 :math:`V_\nu(q) = H(q) I_\nu`. It also uses these to construct the design
34 matrices :math:`M` and :math:`j` used in the fitting.
36 VisibilityMapping supports following models:
37 1. An optically thick and geometrically thin disc
38 2. An optically thin and geometrically thin disc
39 3. An opticall thin disc with a known Gaussian verictal structure.
40 All models are axisymmetric.
42 Parameters
43 ----------
44 DHT : DiscreteHankelTransform
45 A DHT object with N bins that defines H(p). The DHT is used to compute
46 :math:`S(p)`
47 geometry: SourceGeometry object, optional
48 Geometry used to correct the visibilities for the source inclination.
49 vis_model : string,
50 One of ['opt_thick', 'opt_thin', 'debris'], corresponding to models
51 1-3 described above, respectively.
52 scale_height : function H(R), units = arcsec
53 The vertical thickness of the disc in terms of its Guassian scale-height.
54 Only used if vis_model="debris".
55 block_data : bool, default = True
56 Large temporary matrices are needed to set up the data. If block_data
57 is True, we avoid this, limiting the memory requirement to block_size
58 elements.
59 block_size : int, default = 10**5
60 Size of the matrices if blocking is used
61 check_qbounds: bool, default = True
62 Whether to check if the first (last) collocation point is smaller
63 (larger) than the shortest (longest) deprojected baseline in the dataset
64 verbose : bool, default = False
65 Whether to print notification messages
66 """
67 def __init__(self, DHT, geometry,
68 vis_model='opt_thick', scale_height=None, block_data=True,
69 block_size=10 ** 5, check_qbounds=True, verbose=True):
71 _vis_models = ['opt_thick', 'opt_thin', 'debris']
72 if vis_model not in _vis_models:
73 raise ValueError(f"vis_model must be one of {_vis_models}")
75 # Store flags
76 self._vis_model = vis_model
77 self.check_qbounds = check_qbounds
78 self._verbose = verbose
80 self._chunking = block_data
81 self._chunk_size = block_size
83 self._DHT = DHT
84 self._geometry = geometry
86 # Check for consistency and report the model choice.
87 self._scale_height = None
88 if self._vis_model == 'opt_thick':
89 if self._verbose:
90 logging.info(' Assuming an optically thick model (the default): '
91 'Scaling the total flux to account for the source '
92 'inclination')
93 elif self._vis_model == 'opt_thin':
94 if self._verbose:
95 logging.info(' Assuming an optically thin model: *Not* scaling the '
96 'total flux to account for the source inclination')
97 elif self._vis_model == 'debris':
98 if scale_height is None:
99 raise ValueError('You requested a model with a non-zero scale height'
100 ' but did not specify H(R) (scale_height=None)')
101 self._scale_height = scale_height(self.r)
102 self._H2 = 0.5*(2*np.pi*self._scale_height / rad_to_arcsec)**2
104 if self._verbose:
105 logging.info(' Assuming an optically thin model but geometrically '
106 'thick model: *Not* scaling the total flux to account for '
107 'the source inclination')
109 def map_visibilities(self, u, v, V, weights, frequencies=None, geometry=None):
110 r"""
111 Compute the matrices :math:`M` abd :math:`j` from the visibility data.
113 Also compute the null likelihood,
114 .. math:
115 `H0 = 0.5*\log[det(weights/(2*np.pi))]
116 - 0.5*np.sum(V * weights * V):math:`
118 Parameters
119 ----------
120 u,v : 1D array, unit = :math:`\lambda`
121 uv-points of the visibilies
122 V : 1D array, unit = Jy
123 Visibility amplitudes at q
124 weights : 1D array, optional, unit = J^-2
125 Weights of the visibilities, weight = 1 / sigma^2, where sigma is
126 the standard deviation
127 frequencies : 1D array, optional, unit = Hz
128 Channel frequencies of each data point. If not provided, a single
129 channel is assumed.
130 geometry : SourceGeometry object
131 Geometry used to deproject the visibilities before fitting. If not
132 provided the geometry passed in during construction will be used.
134 Returns
135 -------
136 mapped visibilities : dict.
137 The format will be depend whether a single channel was assumed
138 (frequencies=None). The following data maybe present:
139 'multi_freq' : bool,
140 Specifies if mult-frequency analysis was done
141 'channels' : array, 1D, mult-frequency analysis only
142 The frequency of each channel.
143 'M' : array, 2D or 3D.
144 The matrix :math:`M` of the mapping. This will have shape
145 (num_channels, size, size) for multi_freq analysis and
146 (size, size) for single channel analysis.
147 'j' : array, 1D or 2D.
148 The matrix :math:`j` of the mapping. This will have shape
149 (num_channels, size) for multi_freq analysis and (size,)
150 for single channel analysis.
151 'null_likelihood' : float,
152 The likelihood of a model with I=0. See above.
153 'hash' : list,
154 Identifying data, used to ensure compatability between the
155 mapped visibilies and fitting objects.
156 """
158 if geometry is None:
159 geometry = self._geometry
161 if self._verbose:
162 logging.info(' Building visibility matrices M and j')
164 # Deproject the visibilities
165 u, v, k, V = self._geometry.apply_correction(u, v, V, use3D=True)
166 q = np.hypot(u, v)
168 # Check consistency of the uv points with the model
169 self._check_uv_range(q)
171 # Use only the real part of V.
172 V = V.real
173 w = np.ones_like(V) * weights
175 multi_freq = True
176 if frequencies is None:
177 multi_freq = False
178 frequencies = np.ones_like(V)
180 channels = np.unique(frequencies)
181 Ms = np.zeros([len(channels), self.size, self.size], dtype='f8')
182 js = np.zeros([len(channels), self.size], dtype='f8')
183 for i, f in enumerate(channels):
184 idx = frequencies == f
186 qi = q[idx]
187 ki = k[idx]
188 wi = w[idx]
189 Vi = V[idx]
191 # If chunking is used, we will build up M and j chunk-by-chunk
192 if self._chunking:
193 Nstep = int(self._chunk_size / self.size + 1)
194 else:
195 Nstep = len(Vi)
197 start = 0
198 end = Nstep
199 Ndata = len(Vi)
200 while start < Ndata:
201 qs = qi[start:end]
202 ks = ki[start:end]
203 ws = wi[start:end]
204 Vs = Vi[start:end]
206 X = self._get_mapping_coefficients(qs, ks)
208 wXT = np.array(X.T * ws, order='C')
210 Ms[i] += np.dot(wXT, X)
211 js[i] += np.dot(wXT, Vs)
213 start = end
214 end = min(Ndata, end + Nstep)
216 # Compute likelihood normalization H_0, i.e., the
217 # log-likelihood of a source with I=0.
218 H0 = 0.5 * np.sum(np.log(w / (2 * np.pi)) - V * w * V)
220 if multi_freq:
221 return {
222 'mult_freq' : True,
223 'channels' : channels,
224 'M' : Ms,
225 'j' : js,
226 'null_likelihood' : H0,
227 'hash' : [True, self._DHT, geometry, self._vis_model, self._scale_height],
228 }
229 else:
230 return {
231 'mult_freq' : False,
232 'channels' : None,
233 'M' : Ms[0],
234 'j' : js[0],
235 'null_likelihood' : H0,
236 'hash' : [False, self._DHT, geometry, self._vis_model, self._scale_height],
237 }
239 def check_hash(self, hash, multi_freq=False, geometry=None):
240 """Checks whether the hash of some mapped visibilities are compatible
241 with this VisibilityMapping.
243 Parameters
244 ----------
245 hash : list
246 Hash to compare
247 multi_freq : bool
248 Whether we are expected a multi-frequncy fit
249 geometry : SourceGeometry object, optional
250 Geometry to use in the comparison.
251 """
252 if geometry is None:
253 geometry = self._geometry
255 passed = (
256 multi_freq == hash[0] and
257 self._DHT.Rmax == hash[1].Rmax and
258 self._DHT.size == hash[1].size and
259 self._DHT.order == hash[1].order and
260 geometry.inc == hash[2].inc and
261 geometry.PA == hash[2].PA and
262 geometry.dRA == hash[2].dRA and
263 geometry.dDec == hash[2].dDec and
264 self._vis_model == hash[3]
265 )
267 if not passed:
268 return False
270 if self._scale_height is None:
271 return hash[4] is None
272 else:
273 if hash[4] is None:
274 return False
275 else:
276 return np.all(self._scale_height == hash[4])
279 def predict_visibilities(self, I, q, k=None, geometry=None):
280 r"""Compute the predicted visibilities given the brightness profile, I
282 Parameters
283 ----------
284 I : array, unit = Jy
285 Brightness at the collocation points.
286 q : array, unit = :math:`\lambda`
287 Radial uv-distance to predict the visibility at
288 k : array, optional, unit = :math:`\lambda`
289 Vertical uv-distance to predict the visibility. Only needed for a
290 geometrically thick model.
291 geometry : SourceGeometry object, optional
292 Geometry used to correct the visibilities for the source
293 inclination. Only needed for the optically thick model. If not
294 provided, the geometry passed in during construction will be used.
296 Returns
297 -------
298 V(q, k) : array, unit = Jy
299 Predicted visibilties of a source with a radial flux distribution
300 given by :math:`I` and the position angle, inclination determined
301 by the geometry.
303 Notes
304 -----
305 For an optically thick model the visibility amplitudes are reduced due
306 to the projection but phase centre corrections are not added.
307 """
308 # Chunk the visibility calulation for speed
309 if self._chunking:
310 Ni = int(self._chunk_size / self.size + 1)
311 else:
312 Ni = len(q)
314 end = 0
315 start = 0
316 V = []
317 while end < len(q):
318 start = end
319 end = start + Ni
320 qi = q[start:end]
322 ki = None
323 if k is not None:
324 ki = k[start:end]
326 H = self._get_mapping_coefficients(qi, ki, geometry)
328 V.append(np.dot(H, I))
329 return np.concatenate(V)
331 def invert_visibilities(self, V, R, geometry=None):
332 r"""Compute the brightness, I, from the visibilities.
334 Note this method does not work for an arbitrary distribution of
335 baselines and therefore cannot be used to determine the brightness
336 given a generic set of data. Instead it needs the visibilities at
337 collocation points of the DiscrteHankelTransform, q.
339 For geometrically thick models the visibilities used must be those for
340 which kz = 0.
342 Given the above constraints, this method computes the inverse of
343 predict_visibilites.
345 Parameters
346 ----------
347 V : array, unit = Jy
348 Visibility at the collocation points. This must be the deprojected
349 and phase-center corrected visibilities.
350 R : array, unit = arcsec
351 Radial distance to compute the brightness at
352 geometry : SourceGeometry object, optional
353 Geometry used to correct the visibilities for the source
354 inclination. Only needed for the optically thick model. If not
355 provided, the geometry passed in during construction will be used.
357 Returns
358 -------
359 I(R) : array, unit = Jy / Sr
360 Brightness at the radial locations, R.
362 Notes
363 -----
364 The brightness is corrected under the optically thin
365 """
366 # Chunk the visibility calulation for speed
367 R = np.atleast_1d(R)
368 if self._chunking:
369 Ni = int(self._chunk_size / self.size + 1)
370 else:
371 Ni = len(R)
373 end = 0
374 start = 0
375 I = []
376 while end < len(R):
377 start = end
378 end = start + Ni
379 Ri = R[start:end]
381 H = self._get_mapping_coefficients(Ri, 0, geometry, inverse=True)
383 I.append(np.dot(H, V))
384 return np.concatenate(I)[R < self.Rmax]
386 def transform(self, f, q=None, direction='forward'):
387 """Apply a DHT directly to data provided
389 Parameters
390 ----------
391 f : array, size = N
392 Function to Hankel transform, evaluated at the collocation points:
393 f[k] = f(r_k) or f[k] = f(q_k)
394 q : array or None
395 The frequency points at which to evaluate the Hankel
396 transform. If not specified, the conjugate points of the
397 DHT will be used. For the backwards transform, q should be
398 the radius points in arcsec
399 direction : { 'forward', 'backward' }, optional
400 Direction of the transform. If not supplied, the forward
401 transform is used
403 Returns
404 -------
405 H[f] : array, size = N or len(q) if supplied
406 The Hankel transform of the array f
408 """
409 if direction == 'backward' and q is not None:
410 q = q / rad_to_arcsec
412 return self._DHT.transform(f, q, direction)
414 def DHT_coefficients(self, direction='forward'):
415 """Get the coefficients of the Discrete Hankel Transform.
417 The coefficients are defined by
418 .. math:
419 H[h] = np.dot(H, f)
421 Parameters
422 ----------
423 direction : { 'forward', 'backward' }, optional
424 Direction of the transform. If not supplied, the forward
425 transform is used
427 Returns
428 -------
429 H : array, size = N or len(q) if supplied
430 The Hankel transform of the array f
432 """
433 return self._DHT.coefficients(direction=direction)
435 def interpolate(self, f, r, space='Real'):
436 """
437 Interpolate f (evaluated at the collocation points) to the new points,
438 pts, using interpolation that is consistent with the Fourier-Bessel
439 Series / Discrete Hankel Transform.
441 Parameters
442 ----------
443 f : array, size = N
444 Function to interpolate, evaluated at the collocation points:
445 f[k] = f(r_k) or f[k] = f(q_k)
446 r : array or None
447 The points at which to evaluate the interpolation.
448 space : { 'Real', 'Fourier' }, optional
449 Space in which the interpolation is done. If not supplied,
450 'Real' is assumed.
453 Returns
454 -------
455 f_interp : array, size = len(r)
456 The interpolated results
457 """
458 if space == 'Real':
459 r = r / rad_to_arcsec
461 r = np.array(r)
462 shape = r.shape
463 r = r.reshape(-1)
465 if self._chunking:
466 Ni = int(self._chunk_size / len(r) + 1)
467 else:
468 Ni = len(r)
470 end = 0
471 start = 0
472 I = []
473 while end < len(r):
474 start = end
475 end = start + Ni
476 ri = r[start:end]
478 I.append(self._DHT.interpolate(f, ri, space))
480 return np.concatenate(I).reshape(*shape)
483 def _get_mapping_coefficients(self, qs, ks, geometry=None, inverse=False):
484 """Get :math:`H(q)`, such that :math:`V(q) = H(q) I_\nu`"""
486 if self._vis_model == 'opt_thick':
487 # Optically thick & geometrically thin
488 if geometry is None:
489 geometry = self._geometry
490 scale = np.cos(geometry.inc * deg_to_rad)
491 elif self._vis_model == 'opt_thin':
492 # Optically thin & geometrically thin
493 scale = 1
494 elif self._vis_model == 'debris':
495 # Optically thin & geometrically thick
496 scale = np.exp(-np.outer(ks*ks, self._H2))
497 else:
498 raise ValueError("model not supported. Should never occur.")
500 if inverse:
501 scale = np.atleast_1d(1/scale).reshape(1,-1)
502 qs = qs / rad_to_arcsec
503 direction='backward'
504 else:
505 direction='forward'
507 H = self._DHT.coefficients(qs, direction=direction) * scale
509 return H
512 def _check_uv_range(self, uv):
513 """Check that the uv domain is properly covered"""
515 # Check whether the first (last) collocation point is smaller (larger)
516 # than the shortest (longest) deprojected baseline in the dataset
517 if self.check_qbounds:
518 if self.q[0] < uv.min():
519 logging.warning(r"WARNING: First collocation point, q[0] = {:.3e} \lambda,"
520 " is at a baseline shorter than the"
521 " shortest deprojected baseline in the dataset,"
522 r" min(uv) = {:.3e} \lambda. For q[0] << min(uv),"
523 " the fit's total flux may be biased"
524 " low.".format(self.q[0], uv.min()))
526 if self.q[-1] < uv.max():
527 raise ValueError(r"ERROR: Last collocation point, {:.3e} \lambda, is at"
528 " a shorter baseline than the longest deprojected"
529 r" baseline in the dataset, {:.3e} \lambda. Please"
530 " increase N in FrankMultFrequencyFitter (this is"
531 " `hyperparameters: n` if you're using a parameter"
532 " file). Or if you'd like to fit to shorter maximum baseline,"
533 " cut the (u, v) distribution before fitting"
534 " (`modify_data: baseline_range` in the"
535 " parameter file).".format(self.q[-1], uv.max()))
537 @property
538 def r(self):
539 """Radius points, unit = arcsec"""
540 return self._DHT.r * rad_to_arcsec
542 @property
543 def Rmax(self):
544 """Maximum radius, unit = arcsec"""
545 return self._DHT.Rmax * rad_to_arcsec
547 @property
548 def q(self):
549 r"""Frequency points, unit = :math:`\lambda`"""
550 return self._DHT.q
552 @property
553 def Qmax(self):
554 r"""Maximum frequency, unit = :math:`\lambda`"""
555 return self._DHT.Qmax
557 @property
558 def size(self):
559 """Number of points in reconstruction"""
560 return self._DHT.size
562 @property
563 def scale_height(self):
564 "Vertial thickness of the disc, unit = arcsec"
565 if self._scale_height is not None:
566 return self._scale_height
567 else:
568 return None
571class GaussianModel:
572 r"""
573 Solves the linear regression problem to compute the posterior,
575 .. math::
576 P(I|q,V,p) \propto G(I-\mu, D),
578 where :math:`I` is the intensity to be predicted, :math:`q` are the
579 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`D` are
580 the mean and covariance of the posterior distribution.
582 If :math:`p` is provided, the covariance matrix of the prior is included,
583 with
585 .. math::
586 P(I|p) \propto G(I, S(p)),
588 and the Bayesian Linear Regression problem is solved. :math:`S` is computed
589 from the power spectrum, :math:`p`, if provided. Otherwise the traditional
590 (frequentist) linear regression is used.
592 The problem is framed in terms of the design matrix :math:`M` and
593 information source :math:`j`.
595 :math:`H(q)` is the matrix that projects the intensity :math:`I` to
596 visibility space. :math:`M` is defined by
598 .. math::
599 M = H(q)^T w H(q),
601 where :math:`w` is the weights matrix and
603 .. math::
604 j = H(q)^T w V.
606 The mean and covariance of the posterior are then given by
608 .. math::
609 \mu = D j
611 and
613 .. math::
614 D = [ M + S(p)^{-1}]^{-1},
616 if the prior is provided, otherwise
618 .. math::
619 D = M^{-1}.
622 Parameters
623 ----------
624 DHT : DiscreteHankelTransform
625 A DHT object with N bins that defines H(p). The DHT is used to compute
626 :math:`S(p)`
627 M : 2/3D array, size = (N, N) or (Nf, N, N)
628 The design matrix, see above. Nf is the number of channels (frequencies).
629 j : 1/2D array, size = N or (Nf, N)
630 Information source, see above
631 p : 1D array, size = N, optional
632 Power spectrum used to generate the covarience matrix :math:`S(p)`
633 guess: array, optional
634 Initial guess used in computing the brightness.
635 Nfields : int, optional
636 Number of fields used to fit the data. Typically 1, but could be more
637 for multi-wavelength data. If not provided it will be determined from
638 the shape of guess.
639 noise_likelihood : float, optional
640 An optional parameter needed to compute the full likelihood, which
641 should be equal to
643 .. math::
644 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)].
646 If not provided, the likelihood can still be computed up to this
647 missing constant
648 """
650 def __init__(self, DHT, M, j, p=None, scale=None, guess=None,
651 Nfields=None, noise_likelihood=0):
653 self._DHT = DHT
655 # Correct shape of design matrix etc.
656 if len(M.shape) == 2:
657 M = M.reshape(1, *M.shape)
658 if len(j.shape) == 1:
659 j = j.reshape(1, *j.shape)
661 # Number of frequencies / radial points
662 Nf, Nr = j.shape
664 # Get the number of fields
665 if Nfields is None:
666 if guess is None:
667 Nfields = 1
668 else:
669 guess = guess.reshape(-1, Nr)
670 Nfields = guess.shape[0]
671 elif guess is not None:
672 guess = guess.reshape(Nfields, Nr)
673 self._Nfields = Nfields
675 # Create the correct shape for the power spectrum and scale factors
676 if p is not None:
677 p = p.reshape(-1, Nr)
678 if p.shape[0] == 1 and Nfields > 1:
679 p = np.repeat(p, Nfields, axis=0)
680 self._p = p
682 if scale is None:
683 self._scale = np.ones([Nf, Nfields], dtype='f8')
684 else:
685 self._scale = np.empty([Nf, Nfields], dtype='f8')
686 self._scale[:] = scale.reshape(Nf, -1)
688 if p is not None:
689 if np.any(p <= 0) or np.any(np.isnan(p)):
690 print(p)
691 raise ValueError("Bad value in power spectrum. The power"
692 " spectrum must be positive and not contain"
693 " any NaN values. This is likely due to"
694 " your UVtable (incorrect units or weights), "
695 " or the deprojection being applied (incorrect"
696 " geometry and/or phase center). Else you may"
697 " want to adjust `rout` (ensure it is larger than"
698 " the source) or `n` (up to ~300).")
700 Ykm = self._DHT.coefficients()
701 Sj = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm)
703 self._Sinv = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8')
704 for n in range(0, Nfields):
705 sn = n*Nr
706 en = (n+1)*Nr
708 self._Sinv[sn:en, sn:en] += Sj[n]
709 else:
710 self._Sinv = None
712 # Compute the design matrix
713 self._M = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8')
714 self._j = np.zeros(Nr*Nfields, dtype='f8')
715 for si, Mi, ji in zip(self._scale, M, j):
717 for n in range(0, Nfields):
718 sn = n*Nr
719 en = (n+1)*Nr
721 self._j[sn:en] += si[n] * ji
722 for m in range(0, Nfields):
723 sm = m*Nr
724 em = (m+1)*Nr
726 self._M[sn:en, sm:em] += si[n]*si[m] * Mi
728 self._like_noise = noise_likelihood
730 self._fit()
732 def _fit(self):
733 """Compute the mean and variance"""
734 # Compute the inverse prior covariance, S(p)^-1
735 Sinv = self._Sinv
736 if Sinv is None:
737 Sinv = 0
739 Dinv = self._M + Sinv
741 try:
742 self._Dchol = scipy.linalg.cho_factor(Dinv)
743 self._Dsvd = None
745 self._mu = scipy.linalg.cho_solve(self._Dchol, self._j)
747 except np.linalg.LinAlgError:
748 U, s, V = scipy.linalg.svd(Dinv, full_matrices=False)
750 s1 = np.where(s > 0, 1. / s, 0)
752 self._Dchol = None
753 self._Dsvd = U, s1, V
755 self._mu = np.dot(V.T, np.multiply(np.dot(U.T, self._j), s1))
757 # Reset the covariance matrix - we will compute it when needed
758 if self._Nfields > 1:
759 self._mu = self._mu.reshape(self._Nfields, self.size)
760 self._cov = None
762 def Dsolve(self, b):
763 r"""
764 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`.
766 Parameters
767 ----------
768 b : array, size = (N,...)
769 Right-hand side to solve for
771 Returns
772 -------
773 x : array, shape = np.shape(b)
774 Solution to the equation D x = b
776 """
777 if self._Dchol is not None:
778 return scipy.linalg.cho_solve(self._Dchol, b)
779 else:
780 U, s1, V = self._Dsvd
781 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1))
783 def draw(self, N):
784 """Compute N draws from the posterior"""
785 draws = np.random.multivariate_normal(self.mean.reshape(-1), self.covariance, N)
786 if self.num_fields > 1:
787 draws = draws.reshape(N, self.num_fields, self.size)
788 return draws
790 def log_likelihood(self, I=None):
791 r"""
792 Compute one of two types of likelihood.
794 If :math:`I` is provided, this computes
796 .. math:
797 \log[P(I,V|S)].
799 Otherwise the marginalized likelihood is computed,
801 .. math:
802 \log[P(V|S)].
805 Parameters
806 ----------
807 I : array, size = N, optional, unit = Jy / sr
808 Intensity :math:`I(r)` to compute the likelihood of
810 Returns
811 -------
812 log_P : float
813 Log likelihood, :math:`\log[P(I,V|p)]` or :math:`\log[P(V|p)]`
815 Notes
816 -----
817 1. The prior probability P(S) is not included.
818 2. The likelihoods take the form:
820 .. math::
821 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I
822 - \frac{1}{2} \log[\det(2 \pi S)] + H_0
824 and
826 .. math::
827 \log[P(V|p)] = \frac{1}{2} j^T D j
828 + \frac{1}{2} \log[\det(D)/\det(S)] + H_0
830 where
832 .. math::
833 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi)
835 is the noise likelihood.
836 """
838 if I is None:
839 like = 0.5 * np.sum(self._j * self._mu)
841 if self._Sinv is not None:
842 Q = self.Dsolve(self._Sinv)
843 like += 0.5 * np.linalg.slogdet(Q)[1]
844 else:
845 Sinv = self._Sinv
846 if Sinv is None:
847 Sinv = 0
849 Dinv = self._M + Sinv
851 like = np.sum(self._j * I) - 0.5 * np.dot(I, np.dot(Dinv, I))
853 if self._Sinv is not None:
854 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1]
856 return like + self._like_noise
858 def solve_non_negative(self):
859 """Compute the best fit solution with non-negative intensities"""
860 Sinv = self._Sinv
861 if Sinv is None:
862 Sinv = 0
864 Dinv = self._M + Sinv
865 return scipy.optimize.nnls(Dinv, self._j,
866 maxiter=100*len(self._j))[0]
868 @property
869 def mean(self):
870 """Posterior mean, unit = Jy / sr"""
871 return self._mu
873 @property
874 def MAP(self):
875 """Posterior maximum, unit = Jy / sr"""
876 return self.mean
878 @property
879 def covariance(self):
880 """Posterior covariance, unit = (Jy / sr)**2"""
881 if self._cov is None:
882 self._cov = self.Dsolve(np.eye(self.size*self.num_fields))
883 return self._cov
885 @property
886 def s_0(self):
887 return 0
889 @property
890 def power_spectrum(self):
891 """Power spectrum coefficients"""
892 if self.num_fields == 1 and self._p is not None:
893 return self._p.reshape(self.size)
894 return self._p
896 @property
897 def num_fields(self):
898 """Number of fields fit for"""
899 return self._Nfields
901 @property
902 def size(self):
903 """Number of points in reconstruction"""
904 return self._DHT.size
907class LogNormalMAPModel:
908 r"""
909 Finds the maximum a posteriori field for log-normal regression problems,
911 .. math::
912 P(s|q,V,p,s0) \propto G(H exp(scale*(s+s0)) - V, M) P(s|p)
914 where :math:`s` is the log-intensity to be predicted, :math:`q` are the
915 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`H` is
916 the design matrix of the transform, e.g. the coefficient matrix of
917 the forward Hankel transform.
919 If :math:`p` is provided, the covariance matrix of the prior is included,
920 with
922 .. math::
923 P(s|p) \propto G(s, S(p)),
925 The problem is framed in terms of the design matrix :math:`M` and
926 information source :math:`j`.
928 :math:`H(q)` is the matrix that projects the intensity :math:`exp(s*s0)` to
929 visibility space. :math:`M` is defined by
931 .. math::
932 M = H(q)^T w H(q),
934 where :math:`w` is the weights matrix and
936 .. math::
937 j = H(q)^T w V.
940 The maximum a posteori field, s_MAP, is found by maximizing
941 :math:`\log P(s|q,V,p,s0)` and the posterior covariance at s_MAP is
943 .. math::
944 D = [ M + S(p)^{-1}]^{-1}.
946 If the prior is not provided then
948 .. math::
949 D = M^{-1}.
951 and the posterior for exp(s) is the same as the standard Gaussian model.
953 Note: This class also supports :math:`M` and :math:`j` being split into
954 multiple terms (i.e. :math:`M_i, j_i`) such that different scale
955 factors, :math:`s0_i` can be applied to each system. This allows fitting
956 multi-frequency data.
959 Parameters
960 ----------
961 DHT : DiscreteHankelTransform
962 A DHT object with N bins that defines H(p). The DHT is used to compute
963 :math:`S(p)`
964 M : 2/3D array, size = (N, N) or (Nf, N, N)
965 The design matrix, see above. Nf is the number of channels (frequencies).
966 j : 1/2D array, size = N or (Nf, N)
967 Information source, see above
968 p : 1D array, size = N, optional
969 Power spectrum used to generate the covarience matrix :math:`S(p)`
970 scale : float, 1D array (size=N), or list of
971 Scale factors s0 (see above). These factors can be a constant, one
972 per brightness point or per band (optionally per collocation point)
973 to enable multi-frequency fitting.
974 s0 : float or 1D array, size Nfields. Optional
975 Zero point for the brightness function, see above.
976 guess: array, optional
977 Initial guess used in computing the brightness.
978 Nfields : int, optional
979 Number of fields used to fit the data. Typically 1, but could be more
980 for multi-wavelength data. If not provided it will be determined from
981 the shape of guess.
982 full_hessian: float, range [0, 1]
983 If 1 then use the full Hessian in evaluating the matrix :math:`D`. When
984 0 a term is omitted that can cause the Hessian not be a positive-
985 definite matrix when the solution is a poor fit to the data. A value
986 between 0 and 1 will scale this term by that factor.
987 noise_likelihood : float, optional
988 An optional parameter needed to compute the full likelihood, which
989 should be equal to
991 .. math::
992 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)].
994 If not provided, the likelihood can still be computed up to this
995 missing constant
996 """
998 def __init__(self, DHT, M, j, p=None, scale=None, s0=None, guess=None,
999 Nfields=None, full_hessian=1, noise_likelihood=0):
1001 self._DHT = DHT
1002 self._full_hess = full_hessian
1004 # Correct shape of design matrix etc.
1005 if len(M.shape) == 2:
1006 M = M.reshape(1, *M.shape)
1007 if len(j.shape) == 1:
1008 j = j.reshape(1, *j.shape)
1010 self._M = M
1011 self._j = j
1013 # Number of frequencies / radial points
1014 Nf, Nr = j.shape
1015 # Number of signal fields:
1016 # Get the number of fields
1017 if Nfields is None:
1018 if guess is None:
1019 Nfields = 1
1020 else:
1021 guess = guess.reshape(-1, Nr)
1022 Nfields = guess.shape[0]
1023 elif guess is not None:
1024 guess = guess.reshape(Nfields, Nr)
1025 self._Nfields = Nfields
1028 # Create the correct shape for the power spectrum and scale factors
1029 if p is not None:
1030 p = p.reshape(-1, Nr)
1031 if p.shape[0] == 1 and Nfields > 1:
1032 p = np.repeat(p, Nfields, axis=0)
1033 self._p = p
1036 if scale is None:
1037 self._scale = np.ones([Nf, Nfields], dtype='f8')
1038 else:
1039 self._scale = np.empty([Nf, Nfields], dtype='f8')
1040 self._scale[:] = scale.reshape(Nf, -1)
1042 if s0 is None:
1043 self._s0 = np.ones(Nfields, dtype='f8')
1044 else:
1045 s0 = np.atleast_1d(s0)
1046 if len(s0) == 1:
1047 s0 = np.repeat(s0, Nfields)
1048 elif len(s0) != Nfields:
1049 raise ValueError("Signal zero-point (s0) must have the same "
1050 "length as the number of fields or length 1")
1051 self._s0 = s0.reshape(Nfields, 1)
1053 if p is not None:
1054 if np.any(p <= 0) or np.any(np.isnan(p)):
1055 raise ValueError("Bad value in power spectrum. The power"
1056 " spectrum must be positive and not contain"
1057 " any NaN values. This is likely due to"
1058 " your UVtable (incorrect units or weights), "
1059 " or the deprojection being applied (incorrect"
1060 " geometry and/or phase center). Else you may"
1061 " want to increase `rout` by 10-20% or `n` so"
1062 " that it is large, >~300.")
1064 Ykm = self._DHT.coefficients()
1065 self._Sinv = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm)
1066 else:
1067 self._Sinv = np.zeros([Nfields, Nr, Nr], dtype='f8')
1069 self._like_noise = noise_likelihood
1071 self._fit(guess)
1073 def _fit(self, guess):
1074 """Find the maximum likelihood solution and variance"""
1075 Sinv = self._Sinv
1076 Ns, Nr = Sinv.shape[:2]
1078 Sflat = np.zeros([Ns*Nr, Ns*Nr])
1079 for i in range(Ns):
1080 s = i*Nr
1081 e = (i+1)*Nr
1082 Sflat[s:e,s:e] = Sinv[i]
1085 scale = self._scale
1086 s0 = self._s0
1088 def H(s):
1089 """Log-likelihood function"""
1090 s = s.reshape(Ns, Nr)
1091 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr
1093 f = 0.5*np.einsum('ij,ijk,ik', s, Sinv, s)
1095 f += 0.5*np.einsum('ij,ijk,ik',I,self._M,I)
1096 f -= np.sum(I*self._j)
1098 return f
1100 def jac(s):
1101 """1st Derivative of log-likelihood"""
1102 s = s.reshape(Ns, Nr)
1103 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr
1105 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr
1107 S1_s = np.einsum('sjk,sk->sj', Sinv, s) # shape Ns, Nr
1108 MI = np.einsum('isj,ijk,ik->sj', sI, self._M, I)
1109 jI = np.einsum('isj,ij->sj', sI, self._j)
1111 return (S1_s + (MI - jI)).reshape(Ns*Nr)
1113 def hess(s):
1114 """2nd derivative of log-likelihood"""
1115 s = s.reshape(Ns, Nr)
1116 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr
1118 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr
1119 s2I = np.einsum('is,ij->isj',scale**2, I) # shape Nf, Ns, Nr
1121 Mjk = np.einsum('isj,ijk,itk->sjtk',sI, self._M, sI)
1123 resid = 0
1124 if self._full_hess > 0:
1125 MI = Mjk.sum(3)
1126 jI = np.einsum('is,itj,ij->sjt', scale, sI, self._j)
1128 resid = np.einsum('sjt,jk->sjtk', MI-jI, np.eye(Nr)).reshape(Ns*Nr, Ns*Nr)
1129 if self._full_hess < 1:
1130 resid *= self._full_hess
1132 return Mjk.reshape(Ns*Nr, Ns*Nr) + resid + Sflat
1134 x = guess.reshape(Ns*Nr)
1136 def limit_step(dx, x):
1137 alpha = 1.1*np.min(np.abs(x/dx))
1139 alpha = min(alpha, 1)
1140 return alpha*dx
1142 # Ignore convergence because it will often fail due to round off when
1143 # we're super close to the minimum
1144 search = LineSearch(reduce_step=limit_step)
1145 s, _ = MinimizeNewton(H, jac, hess, x, search, tol=1e-7)
1146 self._s_MAP = s.reshape(Ns, Nr)
1148 Dinv = hess(s)
1149 try:
1150 self._Dchol = scipy.linalg.cho_factor(Dinv)
1151 self._Dsvd = None
1152 except np.linalg.LinAlgError:
1153 U, s_svd, V = scipy.linalg.svd(Dinv, full_matrices=False)
1155 s1 = np.where(s_svd > 0, 1./s_svd, 0)
1157 self._Dchol = None
1158 self._Dsvd = U, s1, V
1160 self._cov = None
1162 def Dsolve(self, b):
1163 r"""
1164 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`.
1166 Parameters
1167 ----------
1168 b : array, size = (N,...)
1169 Right-hand side to solve for
1171 Returns
1172 -------
1173 x : array, shape = np.shape(b)
1174 Solution to the equation D x = b
1176 """
1177 if self._Dchol is not None:
1178 return scipy.linalg.cho_solve(self._Dchol, b)
1179 else:
1180 U, s1, V = self._Dsvd
1181 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1))
1183 def draw(self, N):
1184 """Compute N draws from the (approximate) posterior"""
1185 draws = np.random.multivariate_normal(self.MAP.reshape(-1), self.covariance, N)
1186 if self.num_fields > 1:
1187 draws = draws.reshape(N, self.num_fields, self.size)
1188 return draws
1190 def log_likelihood(self, s=None):
1191 r"""
1192 Compute the likelihood,
1194 .. math:
1195 \log[P(I,V|S)].
1197 Parameters
1198 ----------
1199 s : array, size = N, optional
1200 Log-intensity :math:`I(r)=exp(s0*s)` to compute the likelihood of
1202 Returns
1203 -------
1204 log_P : float
1205 Log likelihood, :math:`\log[P(I,V|p)]`
1207 Notes
1208 -----
1209 1. The prior probability P(S) is not included.
1210 2. The likelihood takes the form:
1212 .. math::
1213 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I
1214 - \frac{1}{2} \log[\det(2 \pi S)] + H_0
1216 where
1218 .. math::
1219 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi)
1221 is the noise likelihood.
1222 """
1224 if s is None:
1225 s = self._s_MAP
1227 Sinv = self._Sinv
1228 if Sinv is None:
1229 Sinv = 0
1232 I = np.exp(np.einsum('i,j->ij', self._scale,s))
1234 like = - 0.5*np.dot(s-self._s0, np.dot(Sinv, s-self._s0))
1236 like -= 0.5*np.einsum('ij,ijk,ik',I,self._M,I)
1237 like += np.sum(I*self._j)
1240 if self._Sinv is not None:
1241 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1]
1243 return like + self._like_noise
1245 def solve_non_negative(self):
1246 """Compute the best fit solution with non-negative intensities"""
1247 # The solution is alway non-negative. Provided for convenience.
1248 return self.MAP
1250 @property
1251 def MAP(self):
1252 """Posterior maximum, unit = Jy / sr"""
1253 MAP = self._s_MAP
1254 if MAP.shape[0] == 1:
1255 return MAP.reshape(self.size)
1256 return MAP
1258 @property
1259 def covariance(self):
1260 """Posterior covariance at MAP, unit = (Jy / sr)**2"""
1261 if self._cov is None:
1262 self._cov = self.Dsolve(np.eye(self.size*self.num_fields))
1263 return self._cov
1265 @property
1266 def power_spectrum(self):
1267 """Power spectrum coefficients"""
1268 p = self._p
1269 if p.shape[0] == 1:
1270 return p.reshape(self.size)
1271 return p
1273 @property
1274 def scale(self):
1275 scale = self._scale
1276 if scale.shape[1] == 1:
1277 return scale[:,0]
1278 return self._scale
1280 @property
1281 def s_0(self):
1282 s0 = self._s0
1283 if s0.shape[0] == 1:
1284 return s0[0]
1285 return s0
1287 @property
1288 def num_fields(self):
1289 """Number of fields fit for"""
1290 return self._Nfields
1292 @property
1293 def size(self):
1294 """Number of points in reconstruction"""
1295 return self._DHT.size