####################### # parameter file format ####################### # # The FIRST non-commmented line should include the following TAB delimited key-value parameter pairs: # key-value pairs: # min_seq_length, max_seq_length -> length range of regions # region_max_score -> region score threshold (discard peaks with higher score) # region_min_score -> set to -10000 (not used in current implementation) # sample_space -> ensemble_ # twobit_annotation_file -> '2bit' file containing the genome sequence in compressed form # species -> 'human' or 'mouse' # known_pssm_path -> path to the pssm files for the datasets to be pre-processed # peaks_file_path -> path to the peaks files with the corresponding binding evidence scores # comment character -> # # # On a separate line for each input dataset specify the following parameters: # dataset_id -> user-specified id for each ChIP-seq dataset (should be part of the dataset peaks file name) # known_consensus -> literature consensus for the TF (if missing, enter an arbitraty dummy placeholder IUPAC string) # # NOTE: if the 'ensemble' approach is used to define the space of putative regulatory regions multiple lines should specify the # datasets to be included in the analysis, otherwise a single line should specify the datasets to be pre-processed for cERMIT analysis. ###################### min_seq_length=100 max_seq_length=1000 sample_space=top_5000 species=human region_max_score=10 known_pssm_path=./data/pssms/ twobit_annotation_file=/nfs/labs/ohlerlab/sata/data/sgeorg/gbdb/hg18.2bit peaks_file_path=./data/ region_min_score=-10000 out_path=./ #dataset_id=Human_STAT1 known_consensus=TTCNNNGAA #dataset_id=Human_CTCF known_consensus=RCCASYAGRKGGCRS #dataset_id=Human_FoxA1 known_consensus=TRTTKRYTY #dataset_id=Human_GABP known_consensus=VVMGGAAGNB #dataset_id=Human_NRSF known_consensus=CTSTCCNNGGTBCTGR #dataset_id=Human_SRF known_consensus=GMCCWWWWWWGG dataset_id=DNase_Human_STAT1 known_consensus=TTCNNNGAA #dataset_id=DNase_Human_CTCF known_consensus=RCCASYAGRKGGCRS #dataset_id=DNase_Human_FoxA1 known_consensus=TRTTKRYTY #dataset_id=DNase_Human_GABP known_consensus=VVMGGAAGNB #dataset_id=DNase_Human_NRSF_Johnson known_consensus=CTSTCCNNGGTBCTGR #dataset_id=DNase_Human_SRF known_consensus=GMCCWWWWWWGG