Module molcrawl.rna.dataset.cellxgene.script.conv
Functions
def convert(output_dir, num_worker)-
Expand source code
def convert(output_dir, num_worker): output_dir = Path(output_dir) arg_list = get_file_to_process(output_dir) os.makedirs(output_dir / "extract/", exist_ok=True) with concurrent.futures.ThreadPoolExecutor(max_workers=num_worker) as executor: func = partial(run, output_dir=output_dir) list( track( executor.map(func, arg_list), description="Conversion...", total=len(arg_list), ) ) def get_file_to_process(output_dir: pathlib.Path)-
Expand source code
def get_file_to_process(output_dir: Path): with open(output_dir / "tissue_list.tsv", "r") as file: tissue_list = file.read().splitlines() arg_list = [] for tissue in tissue_list: for filename in output_dir.glob(f"download_dir/{tissue}.*.jbl"): arg_list.append(filename) return arg_list def run(filename, output_dir: pathlib.Path)-
Expand source code
def run(filename, output_dir: Path): import joblib name = Path(filename).stem obj = joblib.load(filename) obj.write_h5ad(output_dir / f"extract/{name}.h5ad", compression="gzip") # obj.write_h5ad(filename,compression="lzf")