-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.nf
More file actions
133 lines (113 loc) · 5.49 KB
/
Copy pathmain.nf
File metadata and controls
133 lines (113 loc) · 5.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
nextflow.enable.dsl=2
include { metamdbg_assemble } from './modules/metaMDBG.nf'
include { fcs_gx_clean as fcs_gx_initial_clean } from './modules/fcs_gx.nf'
include { minimap2_align } from './modules/minimap2.nf'
include { rasusa_subset } from './modules/rasusa.nf'
include { hifiasm_reassemble } from './modules/hifiasm.nf'
include { fcs_gx_clean as fcs_gx_final_clean } from './modules/fcs_gx.nf'
include { quality_check } from './modules/quality_check.nf'
include { merge_quality_reports } from './modules/quality_check.nf'
process deliver_final_clean {
tag "deliver final clean"
publishDir "${params.outdir}", mode: 'copy'
cpus 1
memory '1 GB'
input:
tuple path(cleaned), val(target_name)
output:
path target_name, emit: delivered
script:
"""
set -euo pipefail
cp ${cleaned} ${target_name}
"""
}
workflow {
main:
if (params.help) {
log.info """
TEA (Target Eukaryotic genome Assembly)
Usage:
nextflow run main.nf \\
--reads <reads.fastq.gz> \\
--gx_db <path/to/gx-db-prefix> \\
--tax_id <ncbi_tax_id> \\
--target_bases <bases> \\
--threads <int> \\
--fcs_gx_memory <memory> \\
--hifiasm_option '<opts>' \\
--outdir <results_dir> \\
--quality_library <path/to/compleasm_db> \\
--quality_lineage <lineage>
Parameters:
--reads Input PacBio HiFi reads in FASTQ.GZ format (required).
--gx_db Path prefix to the NCBI FCS-GX database bundle (required).
--tax_id NCBI taxonomy identifier used by FCS-GX (required).
--target_bases Optional target number of bases for subsampling (genome size * coverage);
omit to use all mapped reads.
--rasusa_seed Random seed for rasusa subsampling (default: 0).
--threads Maximum CPU cores assigned to threaded processes
(default: ${params.threads}).
--fcs_gx_memory Memory requested for each FCS-GX process
(default: ${params.fcs_gx_memory}).
--hifiasm_option Extra options passed to hifiasm (default: '-l 2').
--keep_intermediates
Keep intermediate files (draft assemblies, mapped reads, etc.);
use this flag to retain them (default: off).
--outdir Directory for published outputs (default: ${params.outdir}).
--quality_library Path to the Compleasm database; required to enable quality checks.
--quality_lineage Compleasm lineage dataset name (required when --quality_library is supplied).
For additional details, consult README.md.
"""
exit 0
}
if (!params.reads) {
error "Parameter --reads must point to a .fastq.gz file"
}
if (!params.gx_db) {
error "Parameter --gx_db must point to the FCS-GX database"
}
if (params.tax_id == null) {
error "Parameter --tax_id is required for FCS-GX"
}
def reads_path = file(params.reads)
def gx_db_path = file(params.gx_db)
metamdbg_assemble(channel.of(reads_path))
fcs_gx_initial_clean(
metamdbg_assemble.out.assembly.map { assembly -> tuple(assembly, gx_db_path, params.tax_id, 'fcs_initial') }
)
minimap2_align(
fcs_gx_initial_clean.out.clean_fasta.map { draft -> tuple(draft, reads_path) }
)
// Subsample with rasusa if target_bases is set, otherwise pass through
if (params.target_bases) {
log.info "Rasusa subsampling enabled: targeting ${params.target_bases} bases"
rasusa_subset(
minimap2_align.out.mapped_reads.map { reads -> tuple(reads, params.target_bases) }
)
hifiasm_reassemble(rasusa_subset.out.subset_reads)
} else {
log.warn "Rasusa subsampling SKIPPED (--target_bases not set). All mapped reads will be used for reassembly."
hifiasm_reassemble(minimap2_align.out.mapped_reads)
}
fcs_gx_final_clean(
hifiasm_reassemble.out.assembly.map { assembly -> tuple(assembly, gx_db_path, params.tax_id, 'fcs_final') }
)
deliver_final_clean(
fcs_gx_final_clean.out.clean_fasta.map { cleaned -> tuple(cleaned, "${reads_path.simpleName}.fasta.gz") }
)
if (params.quality_library) {
if (!params.quality_lineage) {
error "Parameter --quality_lineage is required when --quality_library is provided"
}
def quality_lib_path = file(params.quality_library)
def qc_inputs = metamdbg_assemble.out.assembly.map { asm -> tuple('metamdbg', 'metaMDBG', asm, quality_lib_path, params.quality_lineage, 'false') }
.mix(fcs_gx_initial_clean.out.clean_fasta.map { asm -> tuple('fcs_initial', 'fcs_gx round 1', asm, quality_lib_path, params.quality_lineage, 'false') })
.mix(hifiasm_reassemble.out.assembly.map { asm -> tuple('hifiasm', 'hifiasm', asm, quality_lib_path, params.quality_lineage, 'false') })
.mix(fcs_gx_final_clean.out.clean_fasta.map { asm -> tuple('fcs_final', 'fcs_gx round 2', asm, quality_lib_path, params.quality_lineage, 'true') })
quality_check(qc_inputs)
merge_quality_reports(quality_check.out.metrics.collect(), reads_path.simpleName)
} else {
log.warn "Skipping quality check because --quality_library was not provided."
}
}