scripts/last-dotplot
changeset 860 6e8ff424ce2b
parent 859 1ecdbf2ddb78
child 861 a75b439f0cf2
     1.1 --- a/scripts/last-dotplot	Thu May 18 14:06:41 2017 +0900
     1.2 +++ b/scripts/last-dotplot	Thu May 18 14:23:58 2017 +0900
     1.3 @@ -306,6 +306,55 @@
     1.4                      color = "#f4f4ff"
     1.5          yield layer, color, seqName, beg, end
     1.6  
     1.7 +def commaSeparatedInts(text):
     1.8 +    return map(int, text.rstrip(",").split(","))
     1.9 +
    1.10 +def readGenePred(opts, fileName, seqLimits):
    1.11 +    if not fileName: return
    1.12 +    for line in myOpen(fileName):
    1.13 +        fields = line.split()
    1.14 +        if not fields: continue
    1.15 +        if fields[2] not in "+-": fields = fields[1:]
    1.16 +        seqName = fields[1]
    1.17 +        if seqName not in seqLimits: continue
    1.18 +        #strand = fields[2]
    1.19 +        cdsBeg = int(fields[5])
    1.20 +        cdsEnd = int(fields[6])
    1.21 +        exonBegs = commaSeparatedInts(fields[8])
    1.22 +        exonEnds = commaSeparatedInts(fields[9])
    1.23 +        for beg, end in zip(exonBegs, exonEnds):
    1.24 +            yield 300, opts.exon_color, seqName, beg, end
    1.25 +            b = max(beg, cdsBeg)
    1.26 +            e = min(end, cdsEnd)
    1.27 +            if b < e: yield 400, opts.cds_color, seqName, b, e
    1.28 +
    1.29 +def readRmsk(fileName, seqLimits):
    1.30 +    if not fileName: return
    1.31 +    for line in myOpen(fileName):
    1.32 +        fields = line.split()
    1.33 +        if len(fields) == 17:  # rmsk.txt
    1.34 +            seqName = fields[5]
    1.35 +            if seqName not in seqLimits: continue  # do this ASAP for speed
    1.36 +            beg = int(fields[6])
    1.37 +            end = int(fields[7])
    1.38 +            strand = fields[9]
    1.39 +            repeatClass = fields[11]
    1.40 +        elif len(fields) == 15:  # .out
    1.41 +            seqName = fields[4]
    1.42 +            if seqName not in seqLimits: continue
    1.43 +            beg = int(fields[5]) - 1
    1.44 +            end = int(fields[6])
    1.45 +            strand = fields[8]
    1.46 +            repeatClass = fields[10]
    1.47 +        else:
    1.48 +            continue
    1.49 +        if repeatClass in ("Low_complexity", "Simple_repeat"):
    1.50 +            yield 200, "#ffe4ff", seqName, beg, end
    1.51 +        elif strand == "+":
    1.52 +            yield 100, "#fff4f4", seqName, beg, end
    1.53 +        else:
    1.54 +            yield 100, "#f4f4ff", seqName, beg, end
    1.55 +
    1.56  def isExtraFirstGapField(fields):
    1.57      return fields[4].isdigit()
    1.58  
    1.59 @@ -448,10 +497,14 @@
    1.60      origins2 = expandedSeqDict(origins2)
    1.61  
    1.62      beds1 = itertools.chain(readBed(opts.bed1, seqLimits1),
    1.63 +                            readRmsk(opts.rmsk1, seqLimits1),
    1.64 +                            readGenePred(opts, opts.genePred1, seqLimits1),
    1.65                              readGaps(opts, opts.gap1, seqLimits1))
    1.66      b1 = bedBoxes(beds1, seqLimits1, origins1, margin2, height, True, bpPerPix)
    1.67  
    1.68      beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
    1.69 +                            readRmsk(opts.rmsk2, seqLimits2),
    1.70 +                            readGenePred(opts, opts.genePred2, seqLimits2),
    1.71                              readGaps(opts, opts.gap2, seqLimits2))
    1.72      b2 = bedBoxes(beds2, seqLimits2, origins2, margin1, width, False, bpPerPix)
    1.73  
    1.74 @@ -522,10 +575,6 @@
    1.75      op.add_option("--border-color", metavar="COLOR", default="#dcdcdc",
    1.76                    help="color for pixels between sequences (default=%default)")
    1.77      # xxx --margin-color?
    1.78 -    op.add_option("--bed1", metavar="FILE",
    1.79 -                  help="read genome1 annotations from bed file")
    1.80 -    op.add_option("--bed2", metavar="FILE",
    1.81 -                  help="read genome2 annotations from bed file")
    1.82  
    1.83      og = optparse.OptionGroup(op, "Text options")
    1.84      og.add_option("-f", "--fontfile", metavar="FILE",
    1.85 @@ -538,6 +587,28 @@
    1.86                    help="show sequence lengths for the 2nd (vertical) genome")
    1.87      op.add_option_group(og)
    1.88  
    1.89 +    og = optparse.OptionGroup(op, "Annotation options")
    1.90 +    og.add_option("--bed1", metavar="FILE",
    1.91 +                  help="read genome1 annotations from BED file")
    1.92 +    og.add_option("--bed2", metavar="FILE",
    1.93 +                  help="read genome2 annotations from BED file")
    1.94 +    og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
    1.95 +                  "RepeatMasker .out or rmsk.txt file")
    1.96 +    og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
    1.97 +                  "RepeatMasker .out or rmsk.txt file")
    1.98 +    op.add_option_group(og)
    1.99 +
   1.100 +    og = optparse.OptionGroup(op, "Gene options")
   1.101 +    og.add_option("--genePred1", metavar="FILE",
   1.102 +                  help="read genome1 genes from genePred file")
   1.103 +    og.add_option("--genePred2", metavar="FILE",
   1.104 +                  help="read genome2 genes from genePred file")
   1.105 +    og.add_option("--exon-color", metavar="COLOR", default="#dfd",
   1.106 +                  help="color for exons (default=%default)")
   1.107 +    og.add_option("--cds-color", metavar="COLOR", default="#bdb",
   1.108 +                  help="color for protein-coding regions (default=%default)")
   1.109 +    op.add_option_group(og)
   1.110 +
   1.111      og = optparse.OptionGroup(op, "Unsequenced gap options")
   1.112      og.add_option("--gap1", metavar="FILE",
   1.113                    help="read genome1 unsequenced gaps from agp or gap file")