######################## # parameters file format ######################## # # %----------------------% # | comment character: # | # %----------------------% # # The FIRST non-commmented line should include the following TAB delimited key-value parameter pairs: # # REQUIRED # min_seq_length, max_seq_length -> length range of regions # region_max_score -> maximum region score threshold (discard peaks with higher score) # sample_space -> top_, where NUM = number of top regions from each dataset (e.g. NUM=1000, # datasets=2 => 2,000 total regions) # twobit_annotation_file -> '2bit' file containing the genome sequence in compressed form # # OPTIONAL (if not specified default values are assumed) # region_min_score -> maximum region score threshold (discard peaks with lower score, default: -10000) # species -> 'human' or 'mouse' (default: 'human') # known_pssm_path -> path to the pssm files for the datasets to be pre-processed (default: known_pssm_path=./data/pssms/) # peaks_file_path -> path to the root directory of the peaks files containgin the binding evidence # (default: ./data, NOTE that the full path is ./data//) # NEXT # Specify the ids of the datasets to be used in the analysis (each on a separate line, e.g. dataset_id=) # The peaks files corresponding to individual dataset ids must follow the naming convention: _score.out # e.g. dataset_id=Human_CTCF => peaks file name: Human_CTCF_score.out # ######################## min_seq_length=100 max_seq_length=500 sample_space=top_15000 twobit_annotation_file=/nfs/labs/ohlerlab/sata/data/sgeorg/gbdb/hg18.2bit #dataset_id=Human_STAT1 #dataset_id=Human_CTCF #dataset_id=Human_FoxA1 #dataset_id=Human_GABP dataset_id=Human_NRSF #dataset_id=Human_SRF #dataset_id=DNase_Human_STAT1 #dataset_id=DNase_Human_CTCF #dataset_id=DNase_Human_FoxA1 #dataset_id=DNase_Human_GABP dataset_id=DNase_Human_NRSF #dataset_id=DNase_Human_SRF