diff -r 1ecdbf2ddb78 -r 6e8ff424ce2b scripts/last-dotplot --- a/scripts/last-dotplot Thu May 18 14:06:41 2017 +0900 +++ b/scripts/last-dotplot Thu May 18 14:23:58 2017 +0900 @@ -306,6 +306,55 @@ color = "#f4f4ff" yield layer, color, seqName, beg, end +def commaSeparatedInts(text): + return map(int, text.rstrip(",").split(",")) + +def readGenePred(opts, fileName, seqLimits): + if not fileName: return + for line in myOpen(fileName): + fields = line.split() + if not fields: continue + if fields[2] not in "+-": fields = fields[1:] + seqName = fields[1] + if seqName not in seqLimits: continue + #strand = fields[2] + cdsBeg = int(fields[5]) + cdsEnd = int(fields[6]) + exonBegs = commaSeparatedInts(fields[8]) + exonEnds = commaSeparatedInts(fields[9]) + for beg, end in zip(exonBegs, exonEnds): + yield 300, opts.exon_color, seqName, beg, end + b = max(beg, cdsBeg) + e = min(end, cdsEnd) + if b < e: yield 400, opts.cds_color, seqName, b, e + +def readRmsk(fileName, seqLimits): + if not fileName: return + for line in myOpen(fileName): + fields = line.split() + if len(fields) == 17: # rmsk.txt + seqName = fields[5] + if seqName not in seqLimits: continue # do this ASAP for speed + beg = int(fields[6]) + end = int(fields[7]) + strand = fields[9] + repeatClass = fields[11] + elif len(fields) == 15: # .out + seqName = fields[4] + if seqName not in seqLimits: continue + beg = int(fields[5]) - 1 + end = int(fields[6]) + strand = fields[8] + repeatClass = fields[10] + else: + continue + if repeatClass in ("Low_complexity", "Simple_repeat"): + yield 200, "#ffe4ff", seqName, beg, end + elif strand == "+": + yield 100, "#fff4f4", seqName, beg, end + else: + yield 100, "#f4f4ff", seqName, beg, end + def isExtraFirstGapField(fields): return fields[4].isdigit() @@ -448,10 +497,14 @@ origins2 = expandedSeqDict(origins2) beds1 = itertools.chain(readBed(opts.bed1, seqLimits1), + readRmsk(opts.rmsk1, seqLimits1), + readGenePred(opts, opts.genePred1, seqLimits1), readGaps(opts, opts.gap1, seqLimits1)) b1 = bedBoxes(beds1, seqLimits1, origins1, margin2, height, True, bpPerPix) beds2 = itertools.chain(readBed(opts.bed2, seqLimits2), + readRmsk(opts.rmsk2, seqLimits2), + readGenePred(opts, opts.genePred2, seqLimits2), readGaps(opts, opts.gap2, seqLimits2)) b2 = bedBoxes(beds2, seqLimits2, origins2, margin1, width, False, bpPerPix) @@ -522,10 +575,6 @@ op.add_option("--border-color", metavar="COLOR", default="#dcdcdc", help="color for pixels between sequences (default=%default)") # xxx --margin-color? - op.add_option("--bed1", metavar="FILE", - help="read genome1 annotations from bed file") - op.add_option("--bed2", metavar="FILE", - help="read genome2 annotations from bed file") og = optparse.OptionGroup(op, "Text options") og.add_option("-f", "--fontfile", metavar="FILE", @@ -538,6 +587,28 @@ help="show sequence lengths for the 2nd (vertical) genome") op.add_option_group(og) + og = optparse.OptionGroup(op, "Annotation options") + og.add_option("--bed1", metavar="FILE", + help="read genome1 annotations from BED file") + og.add_option("--bed2", metavar="FILE", + help="read genome2 annotations from BED file") + og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from " + "RepeatMasker .out or rmsk.txt file") + og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from " + "RepeatMasker .out or rmsk.txt file") + op.add_option_group(og) + + og = optparse.OptionGroup(op, "Gene options") + og.add_option("--genePred1", metavar="FILE", + help="read genome1 genes from genePred file") + og.add_option("--genePred2", metavar="FILE", + help="read genome2 genes from genePred file") + og.add_option("--exon-color", metavar="COLOR", default="#dfd", + help="color for exons (default=%default)") + og.add_option("--cds-color", metavar="COLOR", default="#bdb", + help="color for protein-coding regions (default=%default)") + op.add_option_group(og) + og = optparse.OptionGroup(op, "Unsequenced gap options") og.add_option("--gap1", metavar="FILE", help="read genome1 unsequenced gaps from agp or gap file")