Added last-dotplot options to show BED features.
authorMartin C. Frith
Tue Apr 04 11:51:15 2017 +0900 (2017-04-04)
changeset 84516060c00b129
parent 844 23de4eb3be1d
child 846 1f46ab956351
Added last-dotplot options to show BED features.
doc/last-dotplot.txt
scripts/last-dotplot
     1.1 --- a/doc/last-dotplot.txt	Tue Apr 04 10:50:57 2017 +0900
     1.2 +++ b/doc/last-dotplot.txt	Tue Apr 04 11:51:15 2017 +0900
     1.3 @@ -79,6 +79,15 @@
     1.4        Trim unaligned sequence flanks from the 1st genome.
     1.5    --trim2
     1.6        Trim unaligned sequence flanks from the 2nd genome.
     1.7 +  --bed1=FILE
     1.8 +      Read `BED-format
     1.9 +      <https://genome.ucsc.edu/FAQ/FAQformat.html#format1>`_
    1.10 +      annotations for the 1st genome.  They are drawn as rectangles,
    1.11 +      with coordinates given by the first three BED fields.  The color
    1.12 +      is specified by the RGB field if present, else pale red if the
    1.13 +      strand is "+", pale blue if "-", or pale purple.
    1.14 +  --bed2=FILE
    1.15 +      Read BED-format annotations for the 2nd genome.
    1.16  
    1.17  Unsequenced gap options
    1.18  ~~~~~~~~~~~~~~~~~~~~~~~
     2.1 --- a/scripts/last-dotplot	Tue Apr 04 10:50:57 2017 +0900
     2.2 +++ b/scripts/last-dotplot	Tue Apr 04 11:51:15 2017 +0900
     2.3 @@ -258,13 +258,35 @@
     2.4          newDict[base] = x
     2.5      return newDict
     2.6  
     2.7 +def readBed(fileName, seqLimits):
     2.8 +    if not fileName: return
     2.9 +    for line in myOpen(fileName):
    2.10 +        w = line.split()
    2.11 +        if not w or w[0][0] == "#": continue
    2.12 +        seqName = w[0]
    2.13 +        if seqName not in seqLimits: continue
    2.14 +        cropBeg, cropEnd = seqLimits[seqName]
    2.15 +        beg = int(w[1])
    2.16 +        end = int(w[2])
    2.17 +        b = max(beg, cropBeg)
    2.18 +        e = min(end, cropEnd)
    2.19 +        if b >= e: continue
    2.20 +        if len(w) > 8:
    2.21 +            color = tuple(map(int, w[8].split(",")))
    2.22 +        elif len(w) > 5:
    2.23 +            if   w[5] == "+": color = 255, 244, 244
    2.24 +            elif w[5] == "-": color = 244, 244, 255
    2.25 +            else:             color = 255, 228, 255
    2.26 +        else:
    2.27 +            color = 255, 228, 255
    2.28 +        yield seqName, b, e, color
    2.29 +
    2.30  def isExtraFirstGapField(fields):
    2.31      return fields[4].isdigit()
    2.32  
    2.33  def readGaps(fileName, seqLimits):
    2.34      '''Read locations of unsequenced gaps, from an agp or gap file.'''
    2.35      if not fileName: return
    2.36 -    seqLimits = expandedSeqDict(seqLimits)
    2.37      for line in myOpen(fileName):
    2.38          w = line.split()
    2.39          if not w or w[0][0] == "#": continue
    2.40 @@ -281,6 +303,16 @@
    2.41          bridgedText = w[7]
    2.42          yield seqName, b, e, bridgedText
    2.43  
    2.44 +def drawAnnotations(im, beds, origins, margin, limit, isTop, bp_per_pix):
    2.45 +    # XXX no consideration of different-color overlaps
    2.46 +    for seqName, beg, end, color in beds:
    2.47 +        ori = origins[seqName]
    2.48 +        b = (ori + beg) // bp_per_pix
    2.49 +        e = div_ceil(ori + end, bp_per_pix)
    2.50 +        if isTop: box = b, margin, e, limit
    2.51 +        else:     box = margin, b, limit, e
    2.52 +        im.paste(color, box)
    2.53 +
    2.54  def drawUnsequencedGaps(im, gaps, origins, margin, limit, isTop, bridgedText,
    2.55                          bp_per_pix, color):
    2.56      '''Draw rectangles representing unsequenced gaps.'''
    2.57 @@ -379,8 +411,16 @@
    2.58      image_size = width, height
    2.59      im = Image.new(image_mode, image_size, opts.background_color)
    2.60  
    2.61 +    seqLimits1 = expandedSeqDict(seqLimits1)
    2.62 +    seqLimits2 = expandedSeqDict(seqLimits2)
    2.63      origins1 = expandedSeqDict(origins1)
    2.64      origins2 = expandedSeqDict(origins2)
    2.65 +
    2.66 +    beds1 = list(readBed(opts.bed1, seqLimits1))
    2.67 +    beds2 = list(readBed(opts.bed2, seqLimits2))
    2.68 +    drawAnnotations(im, beds1, origins1, margin2, height, True, bp_per_pix)
    2.69 +    drawAnnotations(im, beds2, origins2, margin1, width, False, bp_per_pix)
    2.70 +
    2.71      gaps1 = list(readGaps(opts.gap1, seqLimits1))
    2.72      gaps2 = list(readGaps(opts.gap2, seqLimits2))
    2.73      # draw bridged gaps first, then unbridged gaps on top:
    2.74 @@ -450,6 +490,10 @@
    2.75                    help="trim unaligned sequence flanks from the 1st genome")
    2.76      op.add_option("--trim2", action="store_true",
    2.77                    help="trim unaligned sequence flanks from the 2nd genome")
    2.78 +    op.add_option("--bed1", metavar="FILE",
    2.79 +                  help="read genome1 annotations from bed file")
    2.80 +    op.add_option("--bed2", metavar="FILE",
    2.81 +                  help="read genome2 annotations from bed file")
    2.82      og = optparse.OptionGroup(op, "Unsequenced gap options")
    2.83      og.add_option("--gap1", metavar="FILE",
    2.84                    help="read genome1 unsequenced gaps from agp or gap file")