Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 3 Dec 2021 13:43:13 GMT
From:      "Jason W. Bacon" <jwb@FreeBSD.org>
To:        ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org
Subject:   git: 9152cfc775fc - main - biology/bio-mocha: bcftools plugin for mosaic chromosomal alterations
Message-ID:  <202112031343.1B3DhDZF087754@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by jwb:

URL: https://cgit.FreeBSD.org/ports/commit/?id=9152cfc775fca98435610da5faad6d56c5cdeacb

commit 9152cfc775fca98435610da5faad6d56c5cdeacb
Author:     Jason W. Bacon <jwb@FreeBSD.org>
AuthorDate: 2021-12-03 13:41:26 +0000
Commit:     Jason W. Bacon <jwb@FreeBSD.org>
CommitDate: 2021-12-03 13:41:26 +0000

    biology/bio-mocha: bcftools plugin for mosaic chromosomal alterations
    
    MoChA is a bcftools plugin released under the MIT license for mosaic
    chromosomal alteration detection and analysis from DNA microarray or
    whole genome sequence data. It can be used both with Illumina and
    Affymetrix data. It can also be used for detection of germline copy
    number variants. Data can be prepared in usable file formats using the
    gtc2vcf plugin.
---
 biology/Makefile                                   |   1 +
 biology/bio-mocha/Makefile                         |  48 +++
 biology/bio-mocha/distinfo                         |   5 +
 biology/bio-mocha/files/patch-Makefile             |  33 ++
 biology/bio-mocha/files/patch-configure.ac         |  11 +
 .../bio-mocha/files/patch-plugins_beta__binom.h    |  19 +
 biology/bio-mocha/files/patch-plugins_mocha.c      | 479 +++++++++++++++++++++
 biology/bio-mocha/pkg-descr                        |   8 +
 biology/bio-mocha/pkg-plist                        |   9 +
 9 files changed, 613 insertions(+)

diff --git a/biology/Makefile b/biology/Makefile
index 1c79cff38867..02ba7b46b286 100644
--- a/biology/Makefile
+++ b/biology/Makefile
@@ -11,6 +11,7 @@
     SUBDIR += bcftools
     SUBDIR += bedtools
     SUBDIR += bfc
+    SUBDIR += bio-mocha
     SUBDIR += bioawk
     SUBDIR += biococoa
     SUBDIR += biolibc
diff --git a/biology/bio-mocha/Makefile b/biology/bio-mocha/Makefile
new file mode 100644
index 000000000000..a046d5fd95c5
--- /dev/null
+++ b/biology/bio-mocha/Makefile
@@ -0,0 +1,48 @@
+PORTNAME=	bio-mocha
+DISTVERSION=	1.13
+CATEGORIES=	biology
+MASTER_SITES=	https://software.broadinstitute.org/software/mocha/
+DISTFILES+=	${PORTNAME}_${DISTVERSION}-20211015.tar.gz
+
+MAINTAINER=	jwb@FreeBSD.org
+COMMENT=	bcftools plugin for mosaic chromosomal alteration analysis
+
+LICENSE=	MIT
+
+LIB_DEPENDS=	libhts.so:biology/htslib
+BUILD_DEPENDS=	bash:shells/bash
+RUN_DEPENDS=	bcftools==${PORTVERSION}:biology/bcftools
+
+USES=		autoreconf gmake localbase perl5 python:env shebangfix
+USE_GITHUB=	yes
+USE_PERL5=	test
+
+GH_ACCOUNT=	samtools
+GH_PROJECT=	bcftools
+GNU_CONFIGURE=	yes
+SHEBANG_FILES=	misc/* test/test.pl
+
+post-extract:
+	@${MV} ${WRKDIR}/*.c ${WRKDIR}/*.h ${WRKSRC}/plugins
+	@${MKDIR} ${WRKSRC}/MoCha
+	@${MV} ${WRKDIR}/*.R ${WRKSRC}/MoCha
+
+pre-configure:
+	@${REINPLACE_CMD} -e 's|@PORTVERSION@|${PORTVERSION}|g' \
+		${WRKSRC}/configure.ac
+
+do-install:
+	${MKDIR} ${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/extendFMT.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/mocha.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/mochatools.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/score.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	${INSTALL_PROGRAM} ${WRKSRC}/plugins/trio-phase.so \
+		${STAGEDIR}${PREFIX}/libexec/bcftools
+	(cd ${WRKSRC}/MoCha && ${COPYTREE_SHARE} . ${STAGEDIR}${DATADIR})
+
+.include <bsd.port.mk>
diff --git a/biology/bio-mocha/distinfo b/biology/bio-mocha/distinfo
new file mode 100644
index 000000000000..e67e570dfc93
--- /dev/null
+++ b/biology/bio-mocha/distinfo
@@ -0,0 +1,5 @@
+TIMESTAMP = 1638495618
+SHA256 (bio-mocha_1.13-20211015.tar.gz) = e145bb48fea347202e16395ab7d78e7e7314749b8d472a2071f8074004cd63a5
+SIZE (bio-mocha_1.13-20211015.tar.gz) = 73627
+SHA256 (samtools-bcftools-1.13_GH0.tar.gz) = 55fbc674ec69e243052e9fb6560eb43d06f45f210e1842ba4dbe33acb394e562
+SIZE (samtools-bcftools-1.13_GH0.tar.gz) = 3133637
diff --git a/biology/bio-mocha/files/patch-Makefile b/biology/bio-mocha/files/patch-Makefile
new file mode 100644
index 000000000000..3299954ae45b
--- /dev/null
+++ b/biology/bio-mocha/files/patch-Makefile
@@ -0,0 +1,33 @@
+--- Makefile.orig	2021-03-17 09:16:18 UTC
++++ Makefile
+@@ -58,13 +58,14 @@ pluginpath  = $(plugindir)
+ # Installation location for $(MISC_PROGRAMS) and $(MISC_SCRIPTS)
+ misc_bindir = $(bindir)
+ 
+-MKDIR_P = mkdir -p
+-INSTALL = install -p
+-INSTALL_DATA    = $(INSTALL) -m 644
+-INSTALL_DIR     = $(MKDIR_P) -m 755
+-INSTALL_MAN     = $(INSTALL_DATA)
+-INSTALL_PROGRAM = $(INSTALL)
+-INSTALL_SCRIPT  = $(INSTALL_PROGRAM)
++# Use BSD_INSTALL_PROGRAM to strip when WITH_DEBUG not set
++MKDIR_P 	= mkdir -p
++INSTALL 	= install -p
++INSTALL_DATA    = ${BSD_INSTALL_DATA}
++INSTALL_DIR     = $(MKDIR_P)
++INSTALL_MAN     = ${BSD_INSTALL_MAN}
++INSTALL_PROGRAM = ${BSD_INSTALL_PROGRAM}
++INSTALL_SCRIPT  = ${BSD_INSTALL_SCRIPT}
+ 
+ PROGRAMS = bcftools
+ MISC_SCRIPTS = \
+@@ -142,7 +143,7 @@ print-version:
+ ifdef USE_GPL
+     main.o : EXTRA_CPPFLAGS += -DUSE_GPL
+     OBJS += polysomy.o peakfit.o
+-    GSL_LIBS ?= -lgsl -lcblas
++    GSL_LIBS ?= -lgslcblas
+ endif
+ 
+ print-%:
diff --git a/biology/bio-mocha/files/patch-configure.ac b/biology/bio-mocha/files/patch-configure.ac
new file mode 100644
index 000000000000..ca845d2ad85f
--- /dev/null
+++ b/biology/bio-mocha/files/patch-configure.ac
@@ -0,0 +1,11 @@
+--- configure.ac.orig	2018-07-18 08:34:29 UTC
++++ configure.ac
+@@ -23,7 +23,7 @@
+ # DEALINGS IN THE SOFTWARE.
+ 
+ dnl Process this file with autoconf to produce a configure script
+-AC_INIT([BCFtools], m4_esyscmd_s([./version.sh 2>/dev/null]),
++AC_INIT([BCFtools], [@PORTVERSION@],
+         [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/])
+ AC_PREREQ([2.63])  dnl This version introduced 4-argument AC_CHECK_HEADER
+ AC_CONFIG_SRCDIR([main.c])
diff --git a/biology/bio-mocha/files/patch-plugins_beta__binom.h b/biology/bio-mocha/files/patch-plugins_beta__binom.h
new file mode 100644
index 000000000000..a6804a6dc9f4
--- /dev/null
+++ b/biology/bio-mocha/files/patch-plugins_beta__binom.h
@@ -0,0 +1,19 @@
+--- plugins/beta_binom.h.orig	2021-11-30 13:41:36 UTC
++++ plugins/beta_binom.h
+@@ -137,14 +137,14 @@ void beta_binom_update(beta_binom_t *self, double p, d
+  *  Returns the equivalent of dbeta_binom(a, a+b, p, (1 - rho) / rho, log=TRUE) from R package
+  * rmutil
+  */
+-inline double beta_binom_log_unsafe(const beta_binom_t *self, int a, int b) {
++static inline double beta_binom_log_unsafe(const beta_binom_t *self, int a, int b) {
+     return self->log_gamma_alpha[a] + self->log_gamma_beta[b] - self->log_gamma_alpha_beta[a + b];
+ }
+ 
+ /**
+  *  Same as before but it performs boundary checking before computing the log likelihood
+  */
+-inline double beta_binom_log(beta_binom_t *self, int a, int b) {
++static inline double beta_binom_log(beta_binom_t *self, int a, int b) {
+     if (a < 0 || b < 0) return NAN;
+     if (a > self->n1 || b > self->n1 || a + b > self->n2)
+         beta_binom_update(self, self->p, self->rho, a > b ? a : b, a + b);
diff --git a/biology/bio-mocha/files/patch-plugins_mocha.c b/biology/bio-mocha/files/patch-plugins_mocha.c
new file mode 100644
index 000000000000..665d785c00f3
--- /dev/null
+++ b/biology/bio-mocha/files/patch-plugins_mocha.c
@@ -0,0 +1,479 @@
+--- plugins/mocha.c.orig	2021-10-15 02:37:57 UTC
++++ plugins/mocha.c
+@@ -705,6 +705,44 @@ static double baf_phase_lod(const float *baf_arr, cons
+     return (double)ret * M_LOG10E;
+ }
+ 
++typedef struct
++{
++    const float *baf;
++    const int8_t *gt_phase;
++    int n;
++    const int *imap;
++    int8_t *path;
++    float err_log_prb;
++    float baf_sd;
++}   f1_data_t;
++
++double f1(double x, void *data)
++{
++    f1_data_t	*d = data;
++
++    return -baf_phase_lod(d->baf, d->gt_phase, d->n, d->imap, d->path,
++                          d->err_log_prb, d->baf_sd, x);
++}
++
++static inline f1_data_t *f1_pack(const float *baf, const int8_t *gt_phase, int n,
++		   const int *imap, int8_t *path, float err_log_prb,
++		   float baf_sd)
++{
++    // Switch to malloc() and free() if more than one object must exist at
++    // any given time
++    f1_data_t	*d = malloc(sizeof(f1_data_t));
++
++    d->baf = baf;
++    d->gt_phase = gt_phase;
++    d->n = n;
++    d->imap = imap;
++    d->path = path;
++    d->err_log_prb = err_log_prb;
++    d->baf_sd = baf_sd;
++
++    return d;
++}
++
+ // TODO find a better title for this function
+ static float compare_models(const float *baf, const int8_t *gt_phase, int n, const int *imap, float xy_log_prb,
+                             float err_log_prb, float flip_log_prb, float tel_log_prb, float baf_sd, const float *bdev,
+@@ -716,8 +754,11 @@ static float compare_models(const float *baf, const in
+     int n_flips = 0;
+     for (int i = 1; i < n; i++)
+         if (path[i - 1] && path[i] && path[i - 1] != path[i]) n_flips++;
+-    double f(double x, void *data) { return -baf_phase_lod(baf, gt_phase, n, imap, path, err_log_prb, baf_sd, x); }
+-    double x, fx = kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x);
++    double x;
++    f1_data_t *f1_data = f1_pack(baf, gt_phase, n, imap, path, err_log_prb,
++				 baf_sd);
++    double fx = kmin_brent(f1, 0.1, 0.2, f1_data, KMIN_EPS, &x);
++    free(f1_data);
+     free(path);
+     return -(float)fx + (float)n_flips * flip_log_prb * (float)M_LOG10E;
+ }
+@@ -750,6 +791,34 @@ static float get_sample_mean(const float *v, int n, co
+     return mean /= (float)j;
+ }
+ 
++typedef struct
++{
++    const float *baf_arr;
++    int n;
++    const int *imap;
++    float baf_sd;
++}   f7_data_t;
++
++double f7(double x, void *data)
++{
++    f7_data_t *d = data;
++
++    return -baf_log_lkl(d->baf_arr, d->n, d->imap, d->baf_sd, x);
++}
++
++static inline f7_data_t *f7_pack(const float *baf_arr, int n, const int *imap,
++    float baf_sd)
++{
++    f7_data_t *d = malloc(sizeof(f7_data_t));
++
++    d->baf_arr = baf_arr;
++    d->n = n;
++    d->imap = imap;
++    d->baf_sd = baf_sd;
++
++    return d;
++}
++
+ static float get_baf_bdev(const float *baf_arr, int n, const int *imap, float baf_sd) {
+     double bdev = 0.0;
+     int j = 0;
+@@ -763,8 +832,9 @@ static float get_baf_bdev(const float *baf_arr, int n,
+     bdev /= j;
+     // simple method to compute bdev should work well for germline duplications
+     if ((float)bdev > 2.0f * baf_sd) return (float)bdev;
+-    double f(double x, void *data) { return -baf_log_lkl(baf_arr, n, imap, baf_sd, x); }
+-    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++    f7_data_t *f7_data = f7_pack(baf_arr, n, imap, baf_sd);
++    kmin_brent(f7, 0.1, 0.2, f7_data, KMIN_EPS, &bdev);
++    free(f7_data);
+     return (float)bdev < 1e-4 ? (float)NAN : (float)bdev;
+ }
+ 
+@@ -801,10 +871,41 @@ static double ad_log_lkl(const int16_t *ad0_arr, const
+     return (double)ret * M_LOG10E;
+ }
+ 
++typedef struct
++{
++    const int16_t *ad0_arr;
++    const int16_t *ad1_arr;
++    int n;
++    const int *imap;
++    float ad_rho;
++}   f5_data_t;
++
++double f5(double x, void *data)
++{
++    f5_data_t *d = data;
++
++    return -ad_log_lkl(d->ad0_arr, d->ad1_arr, d->n, d->imap, d->ad_rho, x);
++}
++
++static inline f5_data_t *f5_pack(const int16_t *ad0_arr,
++    const int16_t *ad1_arr, int n, const int *imap, float ad_rho)
++{
++    f5_data_t *d = malloc(sizeof(f5_data_t));
++
++    d->ad0_arr = ad0_arr;
++    d->ad1_arr = ad1_arr;
++    d->n = n;
++    d->imap = imap;
++    d->ad_rho = ad_rho;
++
++    return d;
++}
++
+ static float get_ad_bdev(const int16_t *ad0_arr, const int16_t *ad1_arr, int n, const int *imap, float ad_rho) {
+     double bdev = 0.0;
+-    double f(double x, void *data) { return -ad_log_lkl(ad0_arr, ad1_arr, n, imap, ad_rho, x); }
+-    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++    f5_data_t *f5_data = f5_pack(ad0_arr, ad1_arr, n, imap, ad_rho);
++    kmin_brent(f5, 0.1, 0.2, f5_data, KMIN_EPS, &bdev);
++    free(f5_data);
+     return (float)bdev < 1e-4 ? (float)NAN : (float)bdev;
+ }
+ 
+@@ -986,6 +1087,44 @@ static double ad_phase_lod(const int16_t *ad0_arr, con
+     return (double)ret * M_LOG10E;
+ }
+ 
++typedef struct
++{
++    const int16_t *ad0;
++    const int16_t *ad1;
++    const int8_t *gt_phase;
++    int n;
++    const int *imap;
++    int8_t *path;
++    float err_log_prb;
++    float ad_rho;
++}   f4_data_t;
++
++double f4(double x, void *data)
++{
++    f4_data_t *d = data;
++
++    return -ad_phase_lod(d->ad0, d->ad1, d->gt_phase, d->n, d->imap, d->path,
++        d->err_log_prb, d->ad_rho, x);
++}
++
++static inline f4_data_t *f4_pack(const int16_t *ad0, const int16_t *ad1,
++    const int8_t *gt_phase, int n, const int *imap, int8_t *path,
++    float err_log_prb, float ad_rho)
++{
++    f4_data_t *d = malloc(sizeof(f4_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->gt_phase = gt_phase;
++    d->n = n;
++    d->imap = imap;
++    d->path = path;
++    d->err_log_prb = err_log_prb;
++    d->ad_rho = ad_rho;
++
++    return d;
++}
++
+ // TODO find a better title for this function
+ static float compare_wgs_models(const int16_t *ad0, const int16_t *ad1, const int8_t *gt_phase, int n, const int *imap,
+                                 float xy_log_prb, float err_log_prb, float flip_log_prb, float tel_log_prb,
+@@ -998,8 +1137,9 @@ static float compare_wgs_models(const int16_t *ad0, co
+     int n_flips = 0;
+     for (int i = 1; i < n; i++)
+         if (path[i - 1] && path[i] && path[i - 1] != path[i]) n_flips++;
+-    double f(double x, void *data) { return -ad_phase_lod(ad0, ad1, gt_phase, n, imap, path, err_log_prb, ad_rho, x); }
+-    double x, fx = kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x);
++    f4_data_t *f4_data = f4_pack(ad0, ad1, gt_phase, n, imap, path, err_log_prb, ad_rho);
++    double x, fx = kmin_brent(f4, 0.1, 0.2, f4_data, KMIN_EPS, &x);
++    free(f4_data);
+     free(path);
+     return -(float)fx + (float)n_flips * flip_log_prb * (float)M_LOG10E;
+ }
+@@ -1485,6 +1625,129 @@ static float get_path_segs(const int8_t *path, const f
+     return 0;
+ }
+ 
++typedef struct
++{
++    float *lrr;
++    int a;
++    int16_t *ad0;
++    int16_t *ad1;
++    mocha_t *mocha;
++    const model_t *model;
++    sample_t *self;
++    float *baf;
++}   f3_data_t;
++    
++double f3(double x, void *data)
++{
++    f3_data_t *d = data;
++
++    if (d->model->flags & WGS_DATA)
++        return -lrr_ad_lod(d->lrr + d->a, d->ad0 + d->a, d->ad1 + d->a,
++            d->mocha->n_sites, NULL, NAN, d->model->lrr_bias,
++            d->model->lrr_hap2dip, d->self->adjlrr_sd,
++            d->self->stats.dispersion, x);
++    else
++        return -lrr_baf_lod(d->lrr + d->a, d->baf + d->a, d->mocha->n_sites,
++            NULL, NAN, d->model->lrr_bias, d->model->lrr_hap2dip,
++            d->self->adjlrr_sd, d->self->stats.dispersion, x);
++}
++
++static inline f3_data_t *f3_pack(float *lrr, int a, int16_t *ad0, int16_t *ad1,
++    mocha_t *mocha, const model_t *model, sample_t *self, float *baf)
++
++{
++    f3_data_t *d = malloc(sizeof(f3_data_t));
++
++    d->lrr = lrr;
++    d->a = a;
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->mocha = mocha;
++    d->model = model;
++    d->self = self;
++    d->baf = baf;
++
++    return d;
++}
++
++typedef struct
++{
++    int16_t *ad0;
++    int16_t *ad1;
++    int8_t *gt_phase;
++    mocha_t *mocha;
++    int *imap_arr;
++    int *beg;
++    int i;
++    int8_t *path;
++    sample_t *self;
++}   f6_data_t;
++
++double f6(double x, void *data)
++{
++    f6_data_t *d = data;
++
++    return -ad_phase_lod(d->ad0, d->ad1, d->gt_phase, d->mocha->n_hets,
++        d->imap_arr + d->beg[d->i], d->path + d->beg[d->i], NAN, d->self->stats.dispersion, x);
++}
++
++static inline f6_data_t *f6_pack(int16_t *ad0, int16_t *ad1, int8_t *gt_phase,
++    mocha_t *mocha, int *imap_arr, int *beg, int i, int8_t *path, sample_t *self)
++
++{
++    f6_data_t *d = malloc(sizeof(f6_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->gt_phase = gt_phase;
++    d->mocha = mocha;
++    d->imap_arr = imap_arr;
++    d->beg = beg;
++    d->i = i;
++    d->path = path;
++    d->self = self;
++
++    return d;
++}
++
++typedef struct
++{
++    float *baf;
++    int8_t *gt_phase;
++    mocha_t *mocha;
++    int *imap_arr;
++    int *beg;
++    int i;
++    int8_t *path;
++    sample_t *self;
++}   f8_data_t;
++
++double f8(double x, void *data)
++{
++    f8_data_t *d = data;
++
++    return -baf_phase_lod(d->baf, d->gt_phase, d->mocha->n_hets,
++	d->imap_arr + d->beg[d->i], d->path + d->beg[d->i], NAN,
++	d->self->stats.dispersion, x);
++}
++
++static inline f8_data_t *f8_pack(float *baf, int8_t *gt_phase, mocha_t *mocha,
++    int *imap_arr, int *beg, int i, int8_t *path, sample_t *self)
++{
++    f8_data_t *d = malloc(sizeof(f8_data_t));
++
++    d->baf = baf;
++    d->gt_phase = gt_phase;
++    d->mocha = mocha;
++    d->imap_arr = imap_arr;
++    d->beg = beg;
++    d->i = i;
++    d->path = path;
++    d->self = self;
++
++    return d;
++}
++
+ // process one contig for one sample
+ static void sample_run(sample_t *self, mocha_table_t *mocha_table, const model_t *model) {
+     // do nothing if chromosome Y or MT are being tested
+@@ -1735,16 +1998,11 @@ static void sample_run(sample_t *self, mocha_table_t *
+             mocha.ldev = get_median(lrr + a, b + 1 - a, NULL);
+             get_mocha_stats(pos, lrr, baf, gt_phase, n, a, b, cen_beg, cen_end, length, self->stats.baf_conc, &mocha);
+ 
+-            double f(double x, void *data) {
+-                if (model->flags & WGS_DATA)
+-                    return -lrr_ad_lod(lrr + a, ad0 + a, ad1 + a, mocha.n_sites, NULL, NAN, model->lrr_bias,
+-                                       model->lrr_hap2dip, self->adjlrr_sd, self->stats.dispersion, x);
+-                else
+-                    return -lrr_baf_lod(lrr + a, baf + a, mocha.n_sites, NULL, NAN, model->lrr_bias, model->lrr_hap2dip,
+-                                        self->adjlrr_sd, self->stats.dispersion, x);
+-            }
+             double bdev_lrr_baf;
+-            kmin_brent(f, -0.15, 0.15, NULL, KMIN_EPS, &bdev_lrr_baf);
++            f3_data_t *f3_data = f3_pack(lrr, a, ad0, ad1, &mocha, model,
++                 self, baf);
++            kmin_brent(f3, -0.15, 0.15, f3_data, KMIN_EPS, &bdev_lrr_baf);
++	    free(f3_data);
+             if (model->flags & WGS_DATA)
+                 mocha.lod_lrr_baf =
+                     lrr_ad_lod(lrr + a, ad0 + a, ad1 + a, mocha.n_sites, NULL, model->err_log_prb, model->lrr_bias,
+@@ -1796,23 +2054,21 @@ static void sample_run(sample_t *self, mocha_table_t *
+                     if (path[j] != path[j + 1]) mocha.n_flips++;
+ 
+                 if (model->flags & WGS_DATA) {
+-                    double f(double x, void *data) {
+-                        return -ad_phase_lod(ad0, ad1, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], NAN,
+-                                             self->stats.dispersion, x);
+-                    }
+                     double bdev;
+-                    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++		    f6_data_t *f6_data = f6_pack(ad0, ad1, gt_phase, &mocha,
++                        imap_arr, beg, i, path, self);
++                    kmin_brent(f6, 0.1, 0.2, f6_data, KMIN_EPS, &bdev);
++		    free(f6_data);
+                     mocha.bdev = fabsf((float)bdev);
+                     mocha.lod_baf_phase =
+                         ad_phase_lod(ad0, ad1, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i],
+                                      model->err_log_prb, self->stats.dispersion, mocha.bdev);
+                 } else {
+-                    double f(double x, void *data) {
+-                        return -baf_phase_lod(baf, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i], NAN,
+-                                              self->stats.dispersion, x);
+-                    }
+                     double bdev;
+-                    kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &bdev);
++		    f8_data_t *f8_data = f8_pack(baf, gt_phase, &mocha,
++			imap_arr, beg, i, path, self);
++                    kmin_brent(f8, 0.1, 0.2, f8_data, KMIN_EPS, &bdev);
++		    free(f8_data);
+                     mocha.bdev = fabsf((float)bdev);
+                     mocha.lod_baf_phase = baf_phase_lod(baf, gt_phase, mocha.n_hets, imap_arr + beg[i], path + beg[i],
+                                                         model->err_log_prb, self->stats.dispersion, mocha.bdev);
+@@ -1923,6 +2179,60 @@ static float get_lrr_cutoff(const float *v, int n) {
+     return cutoff;
+ }
+ 
++typedef struct
++{
++    int16_t *ad0;
++    int16_t *ad1;
++    int n;
++}   f2_data_t;
++
++double f2(double x, void *data)
++{
++    f2_data_t *d = data;
++
++    return -lod_lkl_beta_binomial(d->ad0, d->ad1, d->n, NULL, x);
++}
++
++static inline f2_data_t *f2_pack(int16_t *ad0, int16_t *ad1, int n)
++
++{
++    f2_data_t *d = malloc(sizeof(f2_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->n = n;
++
++    return d;
++}
++
++typedef struct
++{
++    int16_t *ad0;
++    int16_t *ad1;
++    int n_imap;
++    int *imap_arr;
++}   f9_data_t;
++
++double f9(double x, void *data)
++{
++    f9_data_t *d = data;
++
++    return -lod_lkl_beta_binomial(d->ad0, d->ad1, d->n_imap, d->imap_arr, x);
++}
++
++static inline f9_data_t *f9_pack(int16_t *ad0, int16_t *ad1, int n_imap,
++    int *imap_arr)
++{
++    f9_data_t *d = malloc(sizeof(f9_data_t));
++
++    d->ad0 = ad0;
++    d->ad1 = ad1;
++    d->n_imap = n_imap;
++    d->imap_arr = imap_arr;
++
++    return d;
++}
++
+ // this function computes several contig stats and then clears the contig data from the sample
+ static void sample_stats(sample_t *self, const model_t *model) {
+     int n = self->n;
+@@ -1968,9 +2278,10 @@ static void sample_stats(sample_t *self, const model_t
+         self->x_nonpar_lrr_median = get_median(lrr, n_imap, imap_arr);
+ 
+         if (model->flags & WGS_DATA) {
+-            double f(double x, void *data) { return -lod_lkl_beta_binomial(ad0, ad1, n_imap, imap_arr, x); }
++            f9_data_t *f9_data = f9_pack(ad0, ad1, n_imap, imap_arr);
+             double x;
+-            kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++            kmin_brent(f9, 0.1, 0.2, f9_data, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++	    free(f9_data);
+             self->x_nonpar_dispersion = (float)x;
+         } else {
+             self->x_nonpar_dispersion = get_sample_sd(baf, n_imap, imap_arr);
+@@ -1995,9 +2306,10 @@ static void sample_stats(sample_t *self, const model_t
+         hts_expand(stats_t, self->n_stats, self->m_stats, self->stats_arr);
+ 
+         if (model->flags & WGS_DATA) {
+-            double f(double x, void *data) { return -lod_lkl_beta_binomial(ad0, ad1, n, NULL, x); }
+             double x;
+-            kmin_brent(f, 0.1, 0.2, NULL, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++	    f2_data_t *f2_data = f2_pack(ad0, ad1, n);
++            kmin_brent(f2, 0.1, 0.2, f2_data, KMIN_EPS, &x); // dispersions above 0.5 are not allowed
++	    free(f2_data);
+             self->stats_arr[self->n_stats - 1].dispersion = (float)x;
+         } else {
+             self->stats_arr[self->n_stats - 1].dispersion = get_sample_sd(baf, n, NULL);
diff --git a/biology/bio-mocha/pkg-descr b/biology/bio-mocha/pkg-descr
new file mode 100644
index 000000000000..cfecbffefbde
--- /dev/null
+++ b/biology/bio-mocha/pkg-descr
@@ -0,0 +1,8 @@
+MoChA is a bcftools plugin released under the MIT license for mosaic
+chromosomal alteration detection and analysis from DNA microarray or
+whole genome sequence data. It can be used both with Illumina and
+Affymetrix data. It can also be used for detection of germline copy
+number variants. Data can be prepared in usable file formats using the
+gtc2vcf plugin.
+
+WWW: https://software.broadinstitute.org/software/mocha/
diff --git a/biology/bio-mocha/pkg-plist b/biology/bio-mocha/pkg-plist
new file mode 100644
index 000000000000..b7be082bf4e3
--- /dev/null
+++ b/biology/bio-mocha/pkg-plist
@@ -0,0 +1,9 @@
+libexec/bcftools/extendFMT.so
+libexec/bcftools/mocha.so
+libexec/bcftools/mochatools.so
+libexec/bcftools/score.so
+libexec/bcftools/trio-phase.so
+%%DATADIR%%/mocha_plot.R
+%%DATADIR%%/pileup_plot.R
+%%DATADIR%%/shift_plot.R
+%%DATADIR%%/summary_plot.R



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202112031343.1B3DhDZF087754>