#!/usr/bin/env python
import os
import dxpy
import re
import sys
from typing import List
from subprocess import getstatusoutput
@dxpy.entry_point("main")
def main(bam_tumor, reference):
bam_tumor = [dxpy.DXFile(item) for item in bam_tumor] # 1
reference = dxpy.DXFile(reference) # 2
reference_name = reference.describe().get("name", "reference.cnn")
dxpy.download_dxfile(reference.get_id(), reference_name)
bam_dir = "bams"
os.makedirs(bam_dir)
bam_files = [] # 3
for file in bam_tumor:
desc = file.describe()
file_id = file.get_id()
path = os.path.join(bam_dir, desc.get("name", file_id))
dxpy.download_dxfile(file_id, path) # 4
bam_files.append(path)
out_dir = "cnvkit-out"
cmd = (
f"cnvkit.py batch {' '.join(bam_files)} "
f"-r {reference_name} "
f"-p $(expr $(nproc) - 1) "
f"-d {out_dir} --scatter"
)
print(cmd)
rv, out = getstatusoutput(cmd) # 5
if rv != 0:
sys.exit(out)
out_files = [os.path.join(out_dir, file) for file in os.listdir(out_dir)] # 6
print('out_files = {",".join(out_files)}')
return {
"cns": upload("\.call\.cns$", out_files), # 7
"cns_filtered": upload("(?<!\.call)\.cns$", out_files),
"plot": upload("-scatter.png$", out_files),
}
def upload(pattern: str, paths: List[str]) -> List[str]:
"""Upload files matching a pattern and return DX link"""
regex = re.compile(pattern) # 8
return [
dxpy.dxlink(dxpy.upload_local_file(file)) # 9
for file in filter(regex.search, paths) # 10
]
dxpy.run()