Coverage for frank/statistical

1# Frankenstein: 1D disc brightness profile reconstruction from Fourier data

2# using non-parametric Gaussian Processes

6# This program is free software: you can redistribute it and/or modify

7# it under the terms of the GNU General Public License as published by

8# the Free Software Foundation, either version 3 of the License, or

9# (at your option) any later version.

10#

11# This program is distributed in the hope that it will be useful,

12# but WITHOUT ANY WARRANTY; without even the implied warranty of

13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14# GNU General Public License for more details.

15#

16# You should have received a copy of the GNU General Public License

17# along with this program. If not, see <https://www.gnu.org/licenses/>

18#

20import numpy as np

21import scipy.linalg

22import scipy.sparse

23import scipy.optimize

24import logging

26from frank.constants import rad_to_arcsec, deg_to_rad

27from frank.minimizer import LineSearch, MinimizeNewton

29class VisibilityMapping:

30 r"""Builds the mapping between the visibility and image planes.

32 VisibilityMapping generates the transform matrices :math:`H(q)` such that

33 :math:`V_\nu(q) = H(q) I_\nu`. It also uses these to construct the design

34 matrices :math:`M` and :math:`j` used in the fitting.

36 VisibilityMapping supports following models:

37 1. An optically thick and geometrically thin disc

38 2. An optically thin and geometrically thin disc

39 3. An opticall thin disc with a known Gaussian verictal structure.

40 All models are axisymmetric.

42 Parameters

43 ----------

44 DHT : DiscreteHankelTransform

45 A DHT object with N bins that defines H(p). The DHT is used to compute

46 :math:`S(p)`

47 geometry: SourceGeometry object, optional

48 Geometry used to correct the visibilities for the source inclination.

49 vis_model : string,

50 One of ['opt_thick', 'opt_thin', 'debris'], corresponding to models

51 1-3 described above, respectively.

52 scale_height : function H(R), units = arcsec

53 The vertical thickness of the disc in terms of its Guassian scale-height.

54 Only used if vis_model="debris".

55 block_data : bool, default = True

56 Large temporary matrices are needed to set up the data. If block_data

57 is True, we avoid this, limiting the memory requirement to block_size

58 elements.

59 block_size : int, default = 10**5

60 Size of the matrices if blocking is used

61 check_qbounds: bool, default = True

62 Whether to check if the first (last) collocation point is smaller

63 (larger) than the shortest (longest) deprojected baseline in the dataset

64 verbose : bool, default = False

65 Whether to print notification messages

66 """

67 def __init__(self, DHT, geometry,

68 vis_model='opt_thick', scale_height=None, block_data=True,

69 block_size=10 ** 5, check_qbounds=True, verbose=True):

71 _vis_models = ['opt_thick', 'opt_thin', 'debris']

72 if vis_model not in _vis_models:

73 raise ValueError(f"vis_model must be one of {_vis_models}")

75 # Store flags

76 self._vis_model = vis_model

77 self.check_qbounds = check_qbounds

78 self._verbose = verbose

80 self._chunking = block_data

81 self._chunk_size = block_size

83 self._DHT = DHT

84 self._geometry = geometry

86 # Check for consistency and report the model choice.

87 self._scale_height = None

88 if self._vis_model == 'opt_thick':

89 if self._verbose:

90 logging.info(' Assuming an optically thick model (the default): '

91 'Scaling the total flux to account for the source '

92 'inclination')

93 elif self._vis_model == 'opt_thin':

94 if self._verbose:

95 logging.info(' Assuming an optically thin model: *Not* scaling the '

96 'total flux to account for the source inclination')

97 elif self._vis_model == 'debris':

98 if scale_height is None:

99 raise ValueError('You requested a model with a non-zero scale height'

100 ' but did not specify H(R) (scale_height=None)')

101 self._scale_height = scale_height(self.r)

102 self._H2 = 0.5*(2*np.pi*self._scale_height / rad_to_arcsec)**2

103

104 if self._verbose:

105 logging.info(' Assuming an optically thin model but geometrically '

106 'thick model: *Not* scaling the total flux to account for '

107 'the source inclination')

108

109 def map_visibilities(self, u, v, V, weights, frequencies=None, geometry=None):

110 r"""

111 Compute the matrices :math:`M` abd :math:`j` from the visibility data.

112

113 Also compute the null likelihood,

114 .. math:

115 `H0 = 0.5*\log[det(weights/(2*np.pi))]

116 - 0.5*np.sum(V * weights * V):math:`

117

118 Parameters

119 ----------

120 u,v : 1D array, unit = :math:`\lambda`

121 uv-points of the visibilies

122 V : 1D array, unit = Jy

123 Visibility amplitudes at q

124 weights : 1D array, optional, unit = J^-2

125 Weights of the visibilities, weight = 1 / sigma^2, where sigma is

126 the standard deviation

127 frequencies : 1D array, optional, unit = Hz

128 Channel frequencies of each data point. If not provided, a single

129 channel is assumed.

130 geometry : SourceGeometry object

131 Geometry used to deproject the visibilities before fitting. If not

132 provided the geometry passed in during construction will be used.

133

134 Returns

135 -------

136 mapped visibilities : dict.

137 The format will be depend whether a single channel was assumed

138 (frequencies=None). The following data maybe present:

139 'multi_freq' : bool,

140 Specifies if mult-frequency analysis was done

141 'channels' : array, 1D, mult-frequency analysis only

142 The frequency of each channel.

143 'M' : array, 2D or 3D.

144 The matrix :math:`M` of the mapping. This will have shape

145 (num_channels, size, size) for multi_freq analysis and

146 (size, size) for single channel analysis.

147 'j' : array, 1D or 2D.

148 The matrix :math:`j` of the mapping. This will have shape

149 (num_channels, size) for multi_freq analysis and (size,)

150 for single channel analysis.

151 'null_likelihood' : float,

152 The likelihood of a model with I=0. See above.

153 'hash' : list,

154 Identifying data, used to ensure compatability between the

155 mapped visibilies and fitting objects.

156 """

157

158 if geometry is None:

159 geometry = self._geometry

160

161 if self._verbose:

162 logging.info(' Building visibility matrices M and j')

163

164 # Deproject the visibilities

165 u, v, k, V = self._geometry.apply_correction(u, v, V, use3D=True)

166 q = np.hypot(u, v)

167

168 # Check consistency of the uv points with the model

169 self._check_uv_range(q)

170

171 # Use only the real part of V.

172 V = V.real

173 w = np.ones_like(V) * weights

174

175 multi_freq = True

176 if frequencies is None:

177 multi_freq = False

178 frequencies = np.ones_like(V)

179

180 channels = np.unique(frequencies)

181 Ms = np.zeros([len(channels), self.size, self.size], dtype='f8')

182 js = np.zeros([len(channels), self.size], dtype='f8')

183 for i, f in enumerate(channels):

184 idx = frequencies == f

185

186 qi = q[idx]

187 ki = k[idx]

188 wi = w[idx]

189 Vi = V[idx]

190

191 # If chunking is used, we will build up M and j chunk-by-chunk

192 if self._chunking:

193 Nstep = int(self._chunk_size / self.size + 1)

194 else:

195 Nstep = len(Vi)

196

197 start = 0

198 end = Nstep

199 Ndata = len(Vi)

200 while start < Ndata:

201 qs = qi[start:end]

202 ks = ki[start:end]

203 ws = wi[start:end]

204 Vs = Vi[start:end]

205

206 X = self._get_mapping_coefficients(qs, ks)

207

208 wXT = np.array(X.T * ws, order='C')

209

210 Ms[i] += np.dot(wXT, X)

211 js[i] += np.dot(wXT, Vs)

212

213 start = end

214 end = min(Ndata, end + Nstep)

215

216 # Compute likelihood normalization H_0, i.e., the

217 # log-likelihood of a source with I=0.

218 H0 = 0.5 * np.sum(np.log(w / (2 * np.pi)) - V * w * V)

219

220 if multi_freq:

221 return {

222 'mult_freq' : True,

223 'channels' : channels,

224 'M' : Ms,

225 'j' : js,

226 'null_likelihood' : H0,

227 'hash' : [True, self._DHT, geometry, self._vis_model, self._scale_height],

228 }

229 else:

230 return {

231 'mult_freq' : False,

232 'channels' : None,

233 'M' : Ms[0],

234 'j' : js[0],

235 'null_likelihood' : H0,

236 'hash' : [False, self._DHT, geometry, self._vis_model, self._scale_height],

237 }

238

239 def check_hash(self, hash, multi_freq=False, geometry=None):

240 """Checks whether the hash of some mapped visibilities are compatible

241 with this VisibilityMapping.

242

243 Parameters

244 ----------

245 hash : list

246 Hash to compare

247 multi_freq : bool

248 Whether we are expected a multi-frequncy fit

249 geometry : SourceGeometry object, optional

250 Geometry to use in the comparison.

251 """

252 if geometry is None:

253 geometry = self._geometry

254

255 passed = (

256 multi_freq == hash[0] and

257 self._DHT.Rmax == hash[1].Rmax and

258 self._DHT.size == hash[1].size and

259 self._DHT.order == hash[1].order and

260 geometry.inc == hash[2].inc and

261 geometry.PA == hash[2].PA and

262 geometry.dRA == hash[2].dRA and

263 geometry.dDec == hash[2].dDec and

264 self._vis_model == hash[3]

265 )

266

267 if not passed:

268 return False

269

270 if self._scale_height is None:

271 return hash[4] is None

272 else:

273 if hash[4] is None:

274 return False

275 else:

276 return np.all(self._scale_height == hash[4])

277

278

279 def predict_visibilities(self, I, q, k=None, geometry=None):

280 r"""Compute the predicted visibilities given the brightness profile, I

281

282 Parameters

283 ----------

284 I : array, unit = Jy

285 Brightness at the collocation points.

286 q : array, unit = :math:`\lambda`

287 Radial uv-distance to predict the visibility at

288 k : array, optional, unit = :math:`\lambda`

289 Vertical uv-distance to predict the visibility. Only needed for a

290 geometrically thick model.

291 geometry : SourceGeometry object, optional

292 Geometry used to correct the visibilities for the source

293 inclination. Only needed for the optically thick model. If not

294 provided, the geometry passed in during construction will be used.

295

296 Returns

297 -------

298 V(q, k) : array, unit = Jy

299 Predicted visibilties of a source with a radial flux distribution

300 given by :math:`I` and the position angle, inclination determined

301 by the geometry.

302

303 Notes

304 -----

305 For an optically thick model the visibility amplitudes are reduced due

306 to the projection but phase centre corrections are not added.

307 """

308 # Chunk the visibility calulation for speed

309 if self._chunking:

310 Ni = int(self._chunk_size / self.size + 1)

311 else:

312 Ni = len(q)

313

314 end = 0

315 start = 0

316 V = []

317 while end < len(q):

318 start = end

319 end = start + Ni

320 qi = q[start:end]

321

322 ki = None

323 if k is not None:

324 ki = k[start:end]

325

326 H = self._get_mapping_coefficients(qi, ki, geometry)

327

328 V.append(np.dot(H, I))

329 return np.concatenate(V)

330

331 def invert_visibilities(self, V, R, geometry=None):

332 r"""Compute the brightness, I, from the visibilities.

333

334 Note this method does not work for an arbitrary distribution of

335 baselines and therefore cannot be used to determine the brightness

336 given a generic set of data. Instead it needs the visibilities at

337 collocation points of the DiscrteHankelTransform, q.

338

339 For geometrically thick models the visibilities used must be those for

340 which kz = 0.

341

342 Given the above constraints, this method computes the inverse of

343 predict_visibilites.

344

345 Parameters

346 ----------

347 V : array, unit = Jy

348 Visibility at the collocation points. This must be the deprojected

349 and phase-center corrected visibilities.

350 R : array, unit = arcsec

351 Radial distance to compute the brightness at

352 geometry : SourceGeometry object, optional

353 Geometry used to correct the visibilities for the source

354 inclination. Only needed for the optically thick model. If not

355 provided, the geometry passed in during construction will be used.

356

357 Returns

358 -------

359 I(R) : array, unit = Jy / Sr

360 Brightness at the radial locations, R.

361

362 Notes

363 -----

364 The brightness is corrected under the optically thin

365 """

366 # Chunk the visibility calulation for speed

367 R = np.atleast_1d(R)

368 if self._chunking:

369 Ni = int(self._chunk_size / self.size + 1)

370 else:

371 Ni = len(R)

372

373 end = 0

374 start = 0

375 I = []

376 while end < len(R):

377 start = end

378 end = start + Ni

379 Ri = R[start:end]

380

381 H = self._get_mapping_coefficients(Ri, 0, geometry, inverse=True)

382

383 I.append(np.dot(H, V))

384 return np.concatenate(I)[R < self.Rmax]

385

386 def transform(self, f, q=None, direction='forward'):

387 """Apply a DHT directly to data provided

388

389 Parameters

390 ----------

391 f : array, size = N

392 Function to Hankel transform, evaluated at the collocation points:

393 f[k] = f(r_k) or f[k] = f(q_k)

394 q : array or None

395 The frequency points at which to evaluate the Hankel

396 transform. If not specified, the conjugate points of the

397 DHT will be used. For the backwards transform, q should be

398 the radius points in arcsec

399 direction : { 'forward', 'backward' }, optional

400 Direction of the transform. If not supplied, the forward

401 transform is used

402

403 Returns

404 -------

405 H[f] : array, size = N or len(q) if supplied

406 The Hankel transform of the array f

407

408 """

409 if direction == 'backward' and q is not None:

410 q = q / rad_to_arcsec

411

412 return self._DHT.transform(f, q, direction)

413

414 def DHT_coefficients(self, direction='forward'):

415 """Get the coefficients of the Discrete Hankel Transform.

416

417 The coefficients are defined by

418 .. math:

419 H[h] = np.dot(H, f)

420

421 Parameters

422 ----------

423 direction : { 'forward', 'backward' }, optional

424 Direction of the transform. If not supplied, the forward

425 transform is used

426

427 Returns

428 -------

429 H : array, size = N or len(q) if supplied

430 The Hankel transform of the array f

431

432 """

433 return self._DHT.coefficients(direction=direction)

434

435 def interpolate(self, f, r, space='Real'):

436 """

437 Interpolate f (evaluated at the collocation points) to the new points,

438 pts, using interpolation that is consistent with the Fourier-Bessel

439 Series / Discrete Hankel Transform.

440

441 Parameters

442 ----------

443 f : array, size = N

444 Function to interpolate, evaluated at the collocation points:

445 f[k] = f(r_k) or f[k] = f(q_k)

446 r : array or None

447 The points at which to evaluate the interpolation.

448 space : { 'Real', 'Fourier' }, optional

449 Space in which the interpolation is done. If not supplied,

450 'Real' is assumed.

451

452

453 Returns

454 -------

455 f_interp : array, size = len(r)

456 The interpolated results

457 """

458 if space == 'Real':

459 r = r / rad_to_arcsec

460

461 r = np.array(r)

462 shape = r.shape

463 r = r.reshape(-1)

464

465 if self._chunking:

466 Ni = int(self._chunk_size / len(r) + 1)

467 else:

468 Ni = len(r)

469

470 end = 0

471 start = 0

472 I = []

473 while end < len(r):

474 start = end

475 end = start + Ni

476 ri = r[start:end]

477

478 I.append(self._DHT.interpolate(f, ri, space))

479

480 return np.concatenate(I).reshape(*shape)

481

482

483 def _get_mapping_coefficients(self, qs, ks, geometry=None, inverse=False):

484 """Get :math:`H(q)`, such that :math:`V(q) = H(q) I_\nu`"""

485

486 if self._vis_model == 'opt_thick':

487 # Optically thick & geometrically thin

488 if geometry is None:

489 geometry = self._geometry

490 scale = np.cos(geometry.inc * deg_to_rad)

491 elif self._vis_model == 'opt_thin':

492 # Optically thin & geometrically thin

493 scale = 1

494 elif self._vis_model == 'debris':

495 # Optically thin & geometrically thick

496 scale = np.exp(-np.outer(ks*ks, self._H2))

497 else:

498 raise ValueError("model not supported. Should never occur.")

499

500 if inverse:

501 scale = np.atleast_1d(1/scale).reshape(1,-1)

502 qs = qs / rad_to_arcsec

503 direction='backward'

504 else:

505 direction='forward'

506

507 H = self._DHT.coefficients(qs, direction=direction) * scale

508

509 return H

510

511

512 def _check_uv_range(self, uv):

513 """Check that the uv domain is properly covered"""

514

515 # Check whether the first (last) collocation point is smaller (larger)

516 # than the shortest (longest) deprojected baseline in the dataset

517 if self.check_qbounds:

518 if self.q[0] < uv.min():

519 logging.warning(r"WARNING: First collocation point, q[0] = {:.3e} \lambda,"

520 " is at a baseline shorter than the"

521 " shortest deprojected baseline in the dataset,"

522 r" min(uv) = {:.3e} \lambda. For q[0] << min(uv),"

523 " the fit's total flux may be biased"

524 " low.".format(self.q[0], uv.min()))

525

526 if self.q[-1] < uv.max():

527 raise ValueError(r"ERROR: Last collocation point, {:.3e} \lambda, is at"

528 " a shorter baseline than the longest deprojected"

529 r" baseline in the dataset, {:.3e} \lambda. Please"

530 " increase N in FrankMultFrequencyFitter (this is"

531 " `hyperparameters: n` if you're using a parameter"

532 " file). Or if you'd like to fit to shorter maximum baseline,"

533 " cut the (u, v) distribution before fitting"

534 " (`modify_data: baseline_range` in the"

535 " parameter file).".format(self.q[-1], uv.max()))

536

537 @property

538 def r(self):

539 """Radius points, unit = arcsec"""

540 return self._DHT.r * rad_to_arcsec

541

542 @property

543 def Rmax(self):

544 """Maximum radius, unit = arcsec"""

545 return self._DHT.Rmax * rad_to_arcsec

546

547 @property

548 def q(self):

549 r"""Frequency points, unit = :math:`\lambda`"""

550 return self._DHT.q

551

552 @property

553 def Qmax(self):

554 r"""Maximum frequency, unit = :math:`\lambda`"""

555 return self._DHT.Qmax

556

557 @property

558 def size(self):

559 """Number of points in reconstruction"""

560 return self._DHT.size

561

562 @property

563 def scale_height(self):

564 "Vertial thickness of the disc, unit = arcsec"

565 if self._scale_height is not None:

566 return self._scale_height

567 else:

568 return None

569

570

571class GaussianModel:

572 r"""

573 Solves the linear regression problem to compute the posterior,

574

575 .. math::

576 P(I|q,V,p) \propto G(I-\mu, D),

577

578 where :math:`I` is the intensity to be predicted, :math:`q` are the

579 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`D` are

580 the mean and covariance of the posterior distribution.

581

582 If :math:`p` is provided, the covariance matrix of the prior is included,

583 with

584

585 .. math::

586 P(I|p) \propto G(I, S(p)),

587

588 and the Bayesian Linear Regression problem is solved. :math:`S` is computed

589 from the power spectrum, :math:`p`, if provided. Otherwise the traditional

590 (frequentist) linear regression is used.

591

592 The problem is framed in terms of the design matrix :math:`M` and

593 information source :math:`j`.

594

595 :math:`H(q)` is the matrix that projects the intensity :math:`I` to

596 visibility space. :math:`M` is defined by

597

598 .. math::

599 M = H(q)^T w H(q),

600

601 where :math:`w` is the weights matrix and

602

603 .. math::

604 j = H(q)^T w V.

605

606 The mean and covariance of the posterior are then given by

607

608 .. math::

609 \mu = D j

610

611 and

612

613 .. math::

614 D = [ M + S(p)^{-1}]^{-1},

615

616 if the prior is provided, otherwise

617

618 .. math::

619 D = M^{-1}.

620

621

622 Parameters

623 ----------

624 DHT : DiscreteHankelTransform

625 A DHT object with N bins that defines H(p). The DHT is used to compute

626 :math:`S(p)`

627 M : 2/3D array, size = (N, N) or (Nf, N, N)

628 The design matrix, see above. Nf is the number of channels (frequencies).

629 j : 1/2D array, size = N or (Nf, N)

630 Information source, see above

631 p : 1D array, size = N, optional

632 Power spectrum used to generate the covarience matrix :math:`S(p)`

633 guess: array, optional

634 Initial guess used in computing the brightness.

635 Nfields : int, optional

636 Number of fields used to fit the data. Typically 1, but could be more

637 for multi-wavelength data. If not provided it will be determined from

638 the shape of guess.

639 noise_likelihood : float, optional

640 An optional parameter needed to compute the full likelihood, which

641 should be equal to

642

643 .. math::

644 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)].

645

646 If not provided, the likelihood can still be computed up to this

647 missing constant

648 """

649

650 def __init__(self, DHT, M, j, p=None, scale=None, guess=None,

651 Nfields=None, noise_likelihood=0):

652

653 self._DHT = DHT

654

655 # Correct shape of design matrix etc.

656 if len(M.shape) == 2:

657 M = M.reshape(1, *M.shape)

658 if len(j.shape) == 1:

659 j = j.reshape(1, *j.shape)

660

661 # Number of frequencies / radial points

662 Nf, Nr = j.shape

663

664 # Get the number of fields

665 if Nfields is None:

666 if guess is None:

667 Nfields = 1

668 else:

669 guess = guess.reshape(-1, Nr)

670 Nfields = guess.shape[0]

671 elif guess is not None:

672 guess = guess.reshape(Nfields, Nr)

673 self._Nfields = Nfields

674

675 # Create the correct shape for the power spectrum and scale factors

676 if p is not None:

677 p = p.reshape(-1, Nr)

678 if p.shape[0] == 1 and Nfields > 1:

679 p = np.repeat(p, Nfields, axis=0)

680 self._p = p

681

682 if scale is None:

683 self._scale = np.ones([Nf, Nfields], dtype='f8')

684 else:

685 self._scale = np.empty([Nf, Nfields], dtype='f8')

686 self._scale[:] = scale.reshape(Nf, -1)

687

688 if p is not None:

689 if np.any(p <= 0) or np.any(np.isnan(p)):

690 print(p)

691 raise ValueError("Bad value in power spectrum. The power"

692 " spectrum must be positive and not contain"

693 " any NaN values. This is likely due to"

694 " your UVtable (incorrect units or weights), "

695 " or the deprojection being applied (incorrect"

696 " geometry and/or phase center). Else you may"

697 " want to adjust `rout` (ensure it is larger than"

698 " the source) or `n` (up to ~300).")

699

700 Ykm = self._DHT.coefficients()

701 Sj = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm)

702

703 self._Sinv = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8')

704 for n in range(0, Nfields):

705 sn = n*Nr

706 en = (n+1)*Nr

707

708 self._Sinv[sn:en, sn:en] += Sj[n]

709 else:

710 self._Sinv = None

711

712 # Compute the design matrix

713 self._M = np.zeros([Nr*Nfields, Nr*Nfields], dtype='f8')

714 self._j = np.zeros(Nr*Nfields, dtype='f8')

715 for si, Mi, ji in zip(self._scale, M, j):

716

717 for n in range(0, Nfields):

718 sn = n*Nr

719 en = (n+1)*Nr

720

721 self._j[sn:en] += si[n] * ji

722 for m in range(0, Nfields):

723 sm = m*Nr

724 em = (m+1)*Nr

725

726 self._M[sn:en, sm:em] += si[n]*si[m] * Mi

727

728 self._like_noise = noise_likelihood

729

730 self._fit()

731

732 def _fit(self):

733 """Compute the mean and variance"""

734 # Compute the inverse prior covariance, S(p)^-1

735 Sinv = self._Sinv

736 if Sinv is None:

737 Sinv = 0

738

739 Dinv = self._M + Sinv

740

741 try:

742 self._Dchol = scipy.linalg.cho_factor(Dinv)

743 self._Dsvd = None

744

745 self._mu = scipy.linalg.cho_solve(self._Dchol, self._j)

746

747 except np.linalg.LinAlgError:

748 U, s, V = scipy.linalg.svd(Dinv, full_matrices=False)

749

750 s1 = np.where(s > 0, 1. / s, 0)

751

752 self._Dchol = None

753 self._Dsvd = U, s1, V

754

755 self._mu = np.dot(V.T, np.multiply(np.dot(U.T, self._j), s1))

756

757 # Reset the covariance matrix - we will compute it when needed

758 if self._Nfields > 1:

759 self._mu = self._mu.reshape(self._Nfields, self.size)

760 self._cov = None

761

762 def Dsolve(self, b):

763 r"""

764 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`.

765

766 Parameters

767 ----------

768 b : array, size = (N,...)

769 Right-hand side to solve for

770

771 Returns

772 -------

773 x : array, shape = np.shape(b)

774 Solution to the equation D x = b

775

776 """

777 if self._Dchol is not None:

778 return scipy.linalg.cho_solve(self._Dchol, b)

779 else:

780 U, s1, V = self._Dsvd

781 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1))

782

783 def draw(self, N):

784 """Compute N draws from the posterior"""

785 draws = np.random.multivariate_normal(self.mean.reshape(-1), self.covariance, N)

786 if self.num_fields > 1:

787 draws = draws.reshape(N, self.num_fields, self.size)

788 return draws

789

790 def log_likelihood(self, I=None):

791 r"""

792 Compute one of two types of likelihood.

793

794 If :math:`I` is provided, this computes

795

796 .. math:

797 \log[P(I,V|S)].

798

799 Otherwise the marginalized likelihood is computed,

800

801 .. math:

802 \log[P(V|S)].

803

804

805 Parameters

806 ----------

807 I : array, size = N, optional, unit = Jy / sr

808 Intensity :math:`I(r)` to compute the likelihood of

809

810 Returns

811 -------

812 log_P : float

813 Log likelihood, :math:`\log[P(I,V|p)]` or :math:`\log[P(V|p)]`

814

815 Notes

816 -----

817 1. The prior probability P(S) is not included.

818 2. The likelihoods take the form:

819

820 .. math::

821 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I

822 - \frac{1}{2} \log[\det(2 \pi S)] + H_0

823

824 and

825

826 .. math::

827 \log[P(V|p)] = \frac{1}{2} j^T D j

828 + \frac{1}{2} \log[\det(D)/\det(S)] + H_0

829

830 where

831

832 .. math::

833 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi)

834

835 is the noise likelihood.

836 """

837

838 if I is None:

839 like = 0.5 * np.sum(self._j * self._mu)

840

841 if self._Sinv is not None:

842 Q = self.Dsolve(self._Sinv)

843 like += 0.5 * np.linalg.slogdet(Q)[1]

844 else:

845 Sinv = self._Sinv

846 if Sinv is None:

847 Sinv = 0

848

849 Dinv = self._M + Sinv

850

851 like = np.sum(self._j * I) - 0.5 * np.dot(I, np.dot(Dinv, I))

852

853 if self._Sinv is not None:

854 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1]

855

856 return like + self._like_noise

857

858 def solve_non_negative(self):

859 """Compute the best fit solution with non-negative intensities"""

860 Sinv = self._Sinv

861 if Sinv is None:

862 Sinv = 0

863

864 Dinv = self._M + Sinv

865 return scipy.optimize.nnls(Dinv, self._j,

866 maxiter=100*len(self._j))[0]

867

868 @property

869 def mean(self):

870 """Posterior mean, unit = Jy / sr"""

871 return self._mu

872

873 @property

874 def MAP(self):

875 """Posterior maximum, unit = Jy / sr"""

876 return self.mean

877

878 @property

879 def covariance(self):

880 """Posterior covariance, unit = (Jy / sr)**2"""

881 if self._cov is None:

882 self._cov = self.Dsolve(np.eye(self.size*self.num_fields))

883 return self._cov

884

885 @property

886 def s_0(self):

887 return 0

888

889 @property

890 def power_spectrum(self):

891 """Power spectrum coefficients"""

892 if self.num_fields == 1 and self._p is not None:

893 return self._p.reshape(self.size)

894 return self._p

895

896 @property

897 def num_fields(self):

898 """Number of fields fit for"""

899 return self._Nfields

900

901 @property

902 def size(self):

903 """Number of points in reconstruction"""

904 return self._DHT.size

905

906

907class LogNormalMAPModel:

908 r"""

909 Finds the maximum a posteriori field for log-normal regression problems,

910

911 .. math::

912 P(s|q,V,p,s0) \propto G(H exp(scale*(s+s0)) - V, M) P(s|p)

913

914 where :math:`s` is the log-intensity to be predicted, :math:`q` are the

915 baselines and :math:`V` the visibility data. :math:`\mu` and :math:`H` is

916 the design matrix of the transform, e.g. the coefficient matrix of

917 the forward Hankel transform.

918

919 If :math:`p` is provided, the covariance matrix of the prior is included,

920 with

921

922 .. math::

923 P(s|p) \propto G(s, S(p)),

924

925 The problem is framed in terms of the design matrix :math:`M` and

926 information source :math:`j`.

927

928 :math:`H(q)` is the matrix that projects the intensity :math:`exp(s*s0)` to

929 visibility space. :math:`M` is defined by

930

931 .. math::

932 M = H(q)^T w H(q),

933

934 where :math:`w` is the weights matrix and

935

936 .. math::

937 j = H(q)^T w V.

938

939

940 The maximum a posteori field, s_MAP, is found by maximizing

941 :math:`\log P(s|q,V,p,s0)` and the posterior covariance at s_MAP is

942

943 .. math::

944 D = [ M + S(p)^{-1}]^{-1}.

945

946 If the prior is not provided then

947

948 .. math::

949 D = M^{-1}.

950

951 and the posterior for exp(s) is the same as the standard Gaussian model.

952

953 Note: This class also supports :math:`M` and :math:`j` being split into

954 multiple terms (i.e. :math:`M_i, j_i`) such that different scale

955 factors, :math:`s0_i` can be applied to each system. This allows fitting

956 multi-frequency data.

957

958

959 Parameters

960 ----------

961 DHT : DiscreteHankelTransform

962 A DHT object with N bins that defines H(p). The DHT is used to compute

963 :math:`S(p)`

964 M : 2/3D array, size = (N, N) or (Nf, N, N)

965 The design matrix, see above. Nf is the number of channels (frequencies).

966 j : 1/2D array, size = N or (Nf, N)

967 Information source, see above

968 p : 1D array, size = N, optional

969 Power spectrum used to generate the covarience matrix :math:`S(p)`

970 scale : float, 1D array (size=N), or list of

971 Scale factors s0 (see above). These factors can be a constant, one

972 per brightness point or per band (optionally per collocation point)

973 to enable multi-frequency fitting.

974 s0 : float or 1D array, size Nfields. Optional

975 Zero point for the brightness function, see above.

976 guess: array, optional

977 Initial guess used in computing the brightness.

978 Nfields : int, optional

979 Number of fields used to fit the data. Typically 1, but could be more

980 for multi-wavelength data. If not provided it will be determined from

981 the shape of guess.

982 full_hessian: float, range [0, 1]

983 If 1 then use the full Hessian in evaluating the matrix :math:`D`. When

984 0 a term is omitted that can cause the Hessian not be a positive-

985 definite matrix when the solution is a poor fit to the data. A value

986 between 0 and 1 will scale this term by that factor.

987 noise_likelihood : float, optional

988 An optional parameter needed to compute the full likelihood, which

989 should be equal to

990

991 .. math::

992 -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log[w/(2 \pi)].

993

994 If not provided, the likelihood can still be computed up to this

995 missing constant

996 """

997

998 def __init__(self, DHT, M, j, p=None, scale=None, s0=None, guess=None,

999 Nfields=None, full_hessian=1, noise_likelihood=0):

1000

1001 self._DHT = DHT

1002 self._full_hess = full_hessian

1003

1004 # Correct shape of design matrix etc.

1005 if len(M.shape) == 2:

1006 M = M.reshape(1, *M.shape)

1007 if len(j.shape) == 1:

1008 j = j.reshape(1, *j.shape)

1009

1010 self._M = M

1011 self._j = j

1012

1013 # Number of frequencies / radial points

1014 Nf, Nr = j.shape

1015 # Number of signal fields:

1016 # Get the number of fields

1017 if Nfields is None:

1018 if guess is None:

1019 Nfields = 1

1020 else:

1021 guess = guess.reshape(-1, Nr)

1022 Nfields = guess.shape[0]

1023 elif guess is not None:

1024 guess = guess.reshape(Nfields, Nr)

1025 self._Nfields = Nfields

1026

1027

1028 # Create the correct shape for the power spectrum and scale factors

1029 if p is not None:

1030 p = p.reshape(-1, Nr)

1031 if p.shape[0] == 1 and Nfields > 1:

1032 p = np.repeat(p, Nfields, axis=0)

1033 self._p = p

1034

1035

1036 if scale is None:

1037 self._scale = np.ones([Nf, Nfields], dtype='f8')

1038 else:

1039 self._scale = np.empty([Nf, Nfields], dtype='f8')

1040 self._scale[:] = scale.reshape(Nf, -1)

1041

1042 if s0 is None:

1043 self._s0 = np.ones(Nfields, dtype='f8')

1044 else:

1045 s0 = np.atleast_1d(s0)

1046 if len(s0) == 1:

1047 s0 = np.repeat(s0, Nfields)

1048 elif len(s0) != Nfields:

1049 raise ValueError("Signal zero-point (s0) must have the same "

1050 "length as the number of fields or length 1")

1051 self._s0 = s0.reshape(Nfields, 1)

1052

1053 if p is not None:

1054 if np.any(p <= 0) or np.any(np.isnan(p)):

1055 raise ValueError("Bad value in power spectrum. The power"

1056 " spectrum must be positive and not contain"

1057 " any NaN values. This is likely due to"

1058 " your UVtable (incorrect units or weights), "

1059 " or the deprojection being applied (incorrect"

1060 " geometry and/or phase center). Else you may"

1061 " want to increase `rout` by 10-20% or `n` so"

1062 " that it is large, >~300.")

1063

1064 Ykm = self._DHT.coefficients()

1065 self._Sinv = np.einsum('ji,lj,jk->lik', Ykm, 1/p, Ykm)

1066 else:

1067 self._Sinv = np.zeros([Nfields, Nr, Nr], dtype='f8')

1068

1069 self._like_noise = noise_likelihood

1070

1071 self._fit(guess)

1072

1073 def _fit(self, guess):

1074 """Find the maximum likelihood solution and variance"""

1075 Sinv = self._Sinv

1076 Ns, Nr = Sinv.shape[:2]

1077

1078 Sflat = np.zeros([Ns*Nr, Ns*Nr])

1079 for i in range(Ns):

1080 s = i*Nr

1081 e = (i+1)*Nr

1082 Sflat[s:e,s:e] = Sinv[i]

1083

1084

1085 scale = self._scale

1086 s0 = self._s0

1087

1088 def H(s):

1089 """Log-likelihood function"""

1090 s = s.reshape(Ns, Nr)

1091 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr

1092

1093 f = 0.5*np.einsum('ij,ijk,ik', s, Sinv, s)

1094

1095 f += 0.5*np.einsum('ij,ijk,ik',I,self._M,I)

1096 f -= np.sum(I*self._j)

1097

1098 return f

1099

1100 def jac(s):

1101 """1st Derivative of log-likelihood"""

1102 s = s.reshape(Ns, Nr)

1103 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr

1104

1105 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr

1106

1107 S1_s = np.einsum('sjk,sk->sj', Sinv, s) # shape Ns, Nr

1108 MI = np.einsum('isj,ijk,ik->sj', sI, self._M, I)

1109 jI = np.einsum('isj,ij->sj', sI, self._j)

1110

1111 return (S1_s + (MI - jI)).reshape(Ns*Nr)

1112

1113 def hess(s):

1114 """2nd derivative of log-likelihood"""

1115 s = s.reshape(Ns, Nr)

1116 I = np.exp(np.dot(scale,s+s0)) # shape Nf, Nr

1117

1118 sI = np.einsum('is,ij->isj',scale, I) # shape Nf, Ns, Nr

1119 s2I = np.einsum('is,ij->isj',scale**2, I) # shape Nf, Ns, Nr

1120

1121 Mjk = np.einsum('isj,ijk,itk->sjtk',sI, self._M, sI)

1122

1123 resid = 0

1124 if self._full_hess > 0:

1125 MI = Mjk.sum(3)

1126 jI = np.einsum('is,itj,ij->sjt', scale, sI, self._j)

1127

1128 resid = np.einsum('sjt,jk->sjtk', MI-jI, np.eye(Nr)).reshape(Ns*Nr, Ns*Nr)

1129 if self._full_hess < 1:

1130 resid *= self._full_hess

1131

1132 return Mjk.reshape(Ns*Nr, Ns*Nr) + resid + Sflat

1133

1134 x = guess.reshape(Ns*Nr)

1135

1136 def limit_step(dx, x):

1137 alpha = 1.1*np.min(np.abs(x/dx))

1138

1139 alpha = min(alpha, 1)

1140 return alpha*dx

1141

1142 # Ignore convergence because it will often fail due to round off when

1143 # we're super close to the minimum

1144 search = LineSearch(reduce_step=limit_step)

1145 s, _ = MinimizeNewton(H, jac, hess, x, search, tol=1e-7)

1146 self._s_MAP = s.reshape(Ns, Nr)

1147

1148 Dinv = hess(s)

1149 try:

1150 self._Dchol = scipy.linalg.cho_factor(Dinv)

1151 self._Dsvd = None

1152 except np.linalg.LinAlgError:

1153 U, s_svd, V = scipy.linalg.svd(Dinv, full_matrices=False)

1154

1155 s1 = np.where(s_svd > 0, 1./s_svd, 0)

1156

1157 self._Dchol = None

1158 self._Dsvd = U, s1, V

1159

1160 self._cov = None

1161

1162 def Dsolve(self, b):

1163 r"""

1164 Compute :math:`D \cdot b` by solving :math:`D^{-1} x = b`.

1165

1166 Parameters

1167 ----------

1168 b : array, size = (N,...)

1169 Right-hand side to solve for

1170

1171 Returns

1172 -------

1173 x : array, shape = np.shape(b)

1174 Solution to the equation D x = b

1175

1176 """

1177 if self._Dchol is not None:

1178 return scipy.linalg.cho_solve(self._Dchol, b)

1179 else:

1180 U, s1, V = self._Dsvd

1181 return np.dot(V.T, np.multiply(np.dot(U.T, b), s1))

1182

1183 def draw(self, N):

1184 """Compute N draws from the (approximate) posterior"""

1185 draws = np.random.multivariate_normal(self.MAP.reshape(-1), self.covariance, N)

1186 if self.num_fields > 1:

1187 draws = draws.reshape(N, self.num_fields, self.size)

1188 return draws

1189

1190 def log_likelihood(self, s=None):

1191 r"""

1192 Compute the likelihood,

1193

1194 .. math:

1195 \log[P(I,V|S)].

1196

1197 Parameters

1198 ----------

1199 s : array, size = N, optional

1200 Log-intensity :math:`I(r)=exp(s0*s)` to compute the likelihood of

1201

1202 Returns

1203 -------

1204 log_P : float

1205 Log likelihood, :math:`\log[P(I,V|p)]`

1206

1207 Notes

1208 -----

1209 1. The prior probability P(S) is not included.

1210 2. The likelihood takes the form:

1211

1212 .. math::

1213 \log[P(I,V|p)] = j^T I - \frac{1}{2} I^T D^{-1} I

1214 - \frac{1}{2} \log[\det(2 \pi S)] + H_0

1215

1216 where

1217

1218 .. math::

1219 H_0 = -\frac{1}{2} V^T w V + \frac{1}{2} \sum \log(w /2 \pi)

1220

1221 is the noise likelihood.

1222 """

1223

1224 if s is None:

1225 s = self._s_MAP

1226

1227 Sinv = self._Sinv

1228 if Sinv is None:

1229 Sinv = 0

1230

1231

1232 I = np.exp(np.einsum('i,j->ij', self._scale,s))

1233

1234 like = - 0.5*np.dot(s-self._s0, np.dot(Sinv, s-self._s0))

1235

1236 like -= 0.5*np.einsum('ij,ijk,ik',I,self._M,I)

1237 like += np.sum(I*self._j)

1238

1239

1240 if self._Sinv is not None:

1241 like += 0.5 * np.linalg.slogdet(2 * np.pi * Sinv)[1]

1242

1243 return like + self._like_noise

1244

1245 def solve_non_negative(self):

1246 """Compute the best fit solution with non-negative intensities"""

1247 # The solution is alway non-negative. Provided for convenience.

1248 return self.MAP

1249

1250 @property

1251 def MAP(self):

1252 """Posterior maximum, unit = Jy / sr"""

1253 MAP = self._s_MAP

1254 if MAP.shape[0] == 1:

1255 return MAP.reshape(self.size)

1256 return MAP

1257

1258 @property

1259 def covariance(self):

1260 """Posterior covariance at MAP, unit = (Jy / sr)**2"""

1261 if self._cov is None:

1262 self._cov = self.Dsolve(np.eye(self.size*self.num_fields))

1263 return self._cov

1264

1265 @property

1266 def power_spectrum(self):

1267 """Power spectrum coefficients"""

1268 p = self._p

1269 if p.shape[0] == 1:

1270 return p.reshape(self.size)

1271 return p

1272

1273 @property

1274 def scale(self):

1275 scale = self._scale

1276 if scale.shape[1] == 1:

1277 return scale[:,0]

1278 return self._scale

1279

1280 @property

1281 def s_0(self):

1282 s0 = self._s0

1283 if s0.shape[0] == 1:

1284 return s0[0]

1285 return s0

1286

1287 @property

1288 def num_fields(self):

1289 """Number of fields fit for"""

1290 return self._Nfields

1291

1292 @property

1293 def size(self):

1294 """Number of points in reconstruction"""

1295 return self._DHT.size

Coverage for frank/statistical_models.py: 76%

472 statements