last-dotplot: refactor
authorMartin C. Frith
Thu Jul 30 22:02:15 2020 +0900 (12 days ago)
changeset 1071c5d3cf8c1058
parent 1070 6cf9358f80da
child 1072 e43ecf1c3fb6
last-dotplot: refactor
scripts/last-dotplot
     1.1 --- a/scripts/last-dotplot	Thu Jul 30 21:47:07 2020 +0900
     1.2 +++ b/scripts/last-dotplot	Thu Jul 30 22:02:15 2020 +0900
     1.3 @@ -588,6 +588,7 @@
     1.4          if not w: continue
     1.5          seqName = w[0]
     1.6          if seqName not in rangeDict: continue
     1.7 +        seqRanges = rangeDict[seqName]
     1.8          beg = int(w[1])
     1.9          end = int(w[2])
    1.10          layer = 900
    1.11 @@ -600,7 +601,7 @@
    1.12                      color = "rgb(" + w[8] + ")"
    1.13                  else:
    1.14                      strand = w[5]
    1.15 -                    isRev = rangeDict[seqName][0][2]
    1.16 +                    isRev = (seqRanges[0][3] > 1)
    1.17                      if strand == "+" and not isRev or strand == "-" and isRev:
    1.18                          color = "#ffe8e8"
    1.19                      if strand == "-" and not isRev or strand == "+" and isRev:
    1.20 @@ -648,9 +649,10 @@
    1.21              repeatClass = fields[10].split("/")[0]
    1.22          else:
    1.23              continue
    1.24 +        seqRanges = rangeDict[seqName]
    1.25          if repeatClass in ("Low_complexity", "Simple_repeat", "Satellite"):
    1.26              yield 200, "#fbf", seqName, beg, end
    1.27 -        elif (strand == "+") != rangeDict[seqName][0][2]:
    1.28 +        elif (strand == "+") != (seqRanges[0][3] > 1):
    1.29              yield 100, "#ffe8e8", seqName, beg, end
    1.30          else:
    1.31              yield 100, "#e8e8ff", seqName, beg, end
    1.32 @@ -756,6 +758,10 @@
    1.33          draw.text(position, text, font=font, fill=fill)
    1.34      return im
    1.35  
    1.36 +def rangesPerSeq(sortedRanges):
    1.37 +    for seqName, group in itertools.groupby(sortedRanges, itemgetter(0)):
    1.38 +        yield seqName, sorted(group)
    1.39 +
    1.40  def rangesWithOrigins(sortedRanges, rangePixBegs, rangePixLens, bpPerPix):
    1.41      for i, j, k in zip(sortedRanges, rangePixBegs, rangePixLens):
    1.42          seqName, rangeBeg, rangeEnd, strandNum = i
    1.43 @@ -809,7 +815,8 @@
    1.44      remainingSequences = set(i[seqIndex] for i in alignments)
    1.45      return [i for i in seqRanges if i[0] in remainingSequences]
    1.46  
    1.47 -def readAnnotations(opts, rangeDict, bedFile, repFile, geneFile, gapFile):
    1.48 +def readAnnotations(opts, sortedRanges, bedFile, repFile, geneFile, gapFile):
    1.49 +    rangeDict = expandedSeqDict(dict(rangesPerSeq(sortedRanges)))
    1.50      annots = itertools.chain(readBed(bedFile, rangeDict),
    1.51                               readRmsk(repFile, rangeDict),
    1.52                               readGenePred(opts, geneFile, rangeDict),
    1.53 @@ -910,12 +917,12 @@
    1.54      logging.info("reading annotations...")
    1.55  
    1.56      rangeDict1 = expandedSeqDict(rangeDict1)
    1.57 -    annots1 = readAnnotations(opts, rangeDict1,
    1.58 +    annots1 = readAnnotations(opts, sortedRanges1,
    1.59                                opts.bed1, opts.rmsk1, opts.genePred1, opts.gap1)
    1.60      boxes1 = bedBoxes(annots1, rangeDict1, tMargin, height, True, bpPerPix)
    1.61  
    1.62      rangeDict2 = expandedSeqDict(rangeDict2)
    1.63 -    annots2 = readAnnotations(opts, rangeDict2,
    1.64 +    annots2 = readAnnotations(opts, sortedRanges2,
    1.65                                opts.bed2, opts.rmsk2, opts.genePred2, opts.gap2)
    1.66      boxes2 = bedBoxes(annots2, rangeDict2, lMargin, width, False, bpPerPix)
    1.67