[docs]defload_flavor(flavor,flavors_config_path):ifflavor.lower().endswith(".yaml"):ifnotos.path.isfile(flavor):raiseFileNotFoundError(f"Custom flavor file '{flavor}' not found.")withopen(flavor,"r")asstream:try:custom_flavor_config=yaml.safe_load(stream)returncustom_flavor_configexceptyaml.YAMLErrorasexc:raiseyaml.YAMLError(f"Error parsing custom flavor file: {exc}")else:withopen(flavors_config_path,"r")asstream:try:flavor_config=yaml.safe_load(stream)exceptyaml.YAMLErrorasexc:raiseyaml.YAMLError(f"Error parsing flavors config file: {exc}")ifflavornotinflavor_config["barcode_flavors"]:raiseValueError(f"Flavor `{flavor}` could not be found")current_flavor_config=flavor_config["barcode_flavors"][flavor]returncurrent_flavor_config
def_run_index(args):# Validate that input files exist and output files don'tifargs.flavor!='bulk':for_rinargs.reads_in:check_file_exists(_r,except_when=False)else:check_file_exists(args.reads_in[1],except_when=False)_out_dir_exists=check_directory_exists(args.index_out)ifnot_out_dir_exists:logging.info("Output directory does not exist. Creating...")os.mkdir(args.index_out)logging.info(f"Configuring flavor `{args.flavor}`")_config_path=os.path.join(get_module_path(),"data","config.yaml")# TODO: we remove this because it would prevent from running on a smk environment# if check_file_exists("config.yaml"):# _config_path = "config.yaml"flavor_config=load_flavor(args.flavor,_config_path)_bam_tags=flavor_config["bam_tags"]_cell=flavor_config["cell"]_sindex_loc=os.path.join(args.index_out,"sindex.bin")_sindex_exists=check_file_exists(_sindex_loc)if_sindex_exists:logging.info("Loading previously created `cell (spot) barcode->spatial coordinate` index")sindex=SpatialIndex()ifargs.flavor=="stereo_seq":sindex.load_binary_stomics(_sindex_loc)else:sindex.load_binary(_sindex_loc)elifargs.flavor.startswith("sc_")orargs.flavor=='bulk':logging.info("Will not use a spatial index, but a barcode single-cell index")sindex=create_singlecell_index(args.spatial_bc_in)else:ifargs.flavor=="stereo_seq":raiseValueError("STOmics indices (really large ones) have to be provided in .bin format!")logging.info("Creating `cell (spot) barcode->spatial coordinate` index")sindex=create_spatial_index(args.spatial_bc_in)logging.info("Saving `cell (spot) barcode->spatial coordinate` index")sindex.save_binary(_sindex_loc)logging.info(f"Configuring the malva index")jump_amount=1ifargs.overlappingelseargs.kmer_lengthkmer_index=MalvaIndex(args.index_out,kmer_size_initialize=args.kmer_length,jump_amount=jump_amount)# TODO: fix this more elegantlylogging.info("Adding cell barcode index to malva index")ifargs.flavor=="stereo_seq":kmer_index.set_barcode_index(sindex)kmer_index.set_spatial_coords(sindex.get_coords_stomics())elifargs.flavor.startswith("sc_")orargs.flavor=='bulk':kmer_index.set_barcode_index(sindex)else:kmer_index.set_spatial_index(sindex)ifargs.flavor=='bulk':# we ignore the first readstry:args.reads_in[0]=int(args.bulk_id)except:logging.error("Could not set the bulk identifier to a number")exit(1)logging.info(f"Indexing sequence {args.kmer_length}-mers in space from {args.reads_in} with flavor {args.flavor}")logging.info(f"Will write to disk every {args.chunksize:,} sequences, and once at the end (remaining sequences)")kmer_index.add_reads(args.reads_in,_bam_tags,_cell,chunksize=args.chunksize,threads=args.threads)ifargs.merge_chunksandkmer_index.n_chunks>1:logging.info(f"Now, {kmer_index.n_chunks} chunks will be merged")merged_file=f"{kmer_index.index_file}.merged"kmer_index.verbose=Truekmer_index.merge_chunks(merged_file)os.remove(f'{kmer_index.index_file}-r.h5')os.remove(f'{kmer_index.index_file}-m.h5')shutil.move(f'{merged_file}-r.h5',f'{kmer_index.index_file}-r.h5')shutil.move(f'{merged_file}-m.h5',f'{kmer_index.index_file}-m.h5')logging.info("SUCCESS!")if__name__=="__main__":frommalva.cliimportget_index_parserargs=get_index_parser().parse_args()_run_index()