From fd408a8b110682f94a017f468edb30b97d1e0e72 Mon Sep 17 00:00:00 2001 From: Ben Rush Date: Mon, 5 Apr 2021 13:04:31 -0700 Subject: [PATCH 1/4] Get annotations working --- rmrl/annotation.py | 128 +++++++++++++++++++++++++++++++++++++++ rmrl/document.py | 34 +++-------- rmrl/pens/highlighter.py | 55 ++++++++++++----- rmrl/render.py | 64 ++++++++++++++------ 4 files changed, 224 insertions(+), 57 deletions(-) create mode 100644 rmrl/annotation.py diff --git a/rmrl/annotation.py b/rmrl/annotation.py new file mode 100644 index 0000000..9ab061a --- /dev/null +++ b/rmrl/annotation.py @@ -0,0 +1,128 @@ +# Copyright 2021 Ben Rush +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +class Point: + def __init__(self, x: float, y: float): + self.x = x + self.y = y + + def toList(self) -> list: + return [self.x, self.y] + +class Rect: + """ + From PDF spec: + a specific array object used to describe locations on a page and + bounding boxes for a variety of objects and written as an array + of four numbers giving the coordinates of a pair of diagonally + opposite corners, typically in the form [ll.x, ll.y, ur.x, ur.x] + """ + + def __init__(self, ll: Point, ur: Point): + self.ll = ll + self.ur = ur + + def intersects(self, rectB: Rect) -> bool: + # To check if either rectangle is actually a line + # For example : l1 ={-1,0} r1={1,1} l2={0,-1} r2={0,1} + + if (self.ll.x == self.ur.x or self.ll.y == self.ur.y or rectB.ll.x == rectB.ur.x or rectB.ll.y == rectB.ur.y): + # the line cannot have positive overlap + return False + + + # If one rectangle is on left side of other + if(self.ll.x >= rectB.ur.y or rectB.ll.x >= self.ur.y): + return False + + # If one rectangle is above other + if(self.ur.y <= rectB.ll.y or rectB.ur.y <= self.ll.y): + return False + + return True + + def union(self, rectB: Rect) -> Rect: + ll = Point(min(self.ll.x, rectB.ll.x), + min(self.ll.y, rectB.ll.y)) + ur = Point(max(self.ur.x, rectB.ur.x), + max(self.ur.y, rectB.ur.y)) + return Rect(ll, ur) + + def toList(self) -> list: + return [self.ll.x, self.ll.y, self.ur.x, self.ur.y] + +class QuadPoints: + """ + From PDF spec: + An array of 8 x n numbers specifying the coordinates of n quadrilaterals + in default user space. Each quadrilateral shall encompass a word or group + of contiguous words in the text underlying the annotation. The coordinates + for each quadrilateral shall be given in the order x1, y1, x2, y2, x3, y3, x4, y4 + specifying the quadrilateral's four vertices in counterclockwise order + starting with the lower left. The text shall be oriented with respect to the + edge connecting points (x1, y1) with (x2, y2). + """ + + points: list[Point] + + def __init__(self, points: list[Point]): + self.points = points + + def append(self, quadpoints: QuadPoints) -> QuadPoints: + return QuadPoints(self.points + quadpoints.points) + + def toList(self) -> list: + return [c for p in points for c in p.toList()] + + + @staticmethod + def fromRect(rect: Rect): + """ + Assumes that the rect is aligned with the text. Will return incorrect + results otherwise + """ + # Needs to be in this order to account for rotations applied later? + # ll.x, ur.y, ur.x, ur.y, ll.x, ll.y, ur.x, ll.y + quadpoints = [Point(rect.ll.x, rect.ur.y), + Point(rect.ur.x, rect.ur.y), + Point(rect.ll.x, rect.ll.y), + Point(rect.ur.x, rect.ll.y)] + return QuadPoints(quadpoints) + +class Annotation(): + annotype: str + rect: Rect + quadpoints: QuadPoints + + def __init__(self, annotype: str, rect: Rect, quadpoints: list = None): + self.annotype = annotype + self.rect = rect + if quadpoints: + self.quadpoints = quadpoints + else: + self.quadpoints = QuadPoints.fromRect(rect) + + def united(self, annot: Annotation) -> Annotation: + if self.annotype != annot.annotype: + raise Exception("Cannot merge annotations with different types") + + return Annotation(self.annotype, + self.rect.union(annot.rect), + self.quadpoints.append(annot.quadpoints)) + + def intersects(self, annot: Annotation) -> bool: + return self.rect.intersects(annot.rect) diff --git a/rmrl/document.py b/rmrl/document.py index 14a2f91..f626a1b 100644 --- a/rmrl/document.py +++ b/rmrl/document.py @@ -24,6 +24,7 @@ from . import lines, pens from .constants import DISPLAY, PDFHEIGHT, PDFWIDTH, PTPERPX, TEMPLATE_PATH +from typing import List, Tuple log = logging.getLogger(__name__) @@ -172,8 +173,8 @@ def __init__(self, page, name=None): # PDF layers are ever implemented. self.annot_paths = [] - def get_grouped_annotations(self): - # return: (LayerName, [(AnnotType, minX, minY, maxX, maxY)]) + def get_grouped_annotations(self) -> Tuple[str, list]: + # return: (LayerName, [Annotations]) # Compare all the annot_paths to each other. If any overlap, # they will be grouped together. This is done recursively. @@ -181,18 +182,18 @@ def grouping_func(pathset): newset = [] for p in pathset: - annotype = p[0] - path = p[1] + annotype = p.annotype + #path = p[1] #returns (xmin, ymin, xmax, ymax) did_fit = False for i, g in enumerate(newset): - gannotype = g[0] - group = g[1] + gannotype = g.annotype + #group = g[1] # Only compare annotations of the same type if gannotype != annotype: continue - if path.intersects(group): + if p.intersects(g): did_fit = True - newset[i] = (annotype, group.united(path)) + newset[i] = g.united(p) #left off here, need to build united and quadpoints break if did_fit: continue @@ -207,22 +208,7 @@ def grouping_func(pathset): return newset grouped = grouping_func(self.annot_paths) - - # Get the bounding rect of each group, which sets the PDF - # annotation geometry. - annot_rects = [] - for p in grouped: - annotype = p[0] - path = p[1] - rect = path.boundingRect() - annot = (annotype, - float(rect.x()), - float(rect.y()), - float(rect.x() + rect.width()), - float(rect.y() + rect.height())) - annot_rects.append(annot) - - return (self.name, annot_rects) + return (self.name, grouped) def paint_strokes(self, canvas, vector): for stroke in self.strokes: diff --git a/rmrl/pens/highlighter.py b/rmrl/pens/highlighter.py index ce45cdd..bf39ff1 100644 --- a/rmrl/pens/highlighter.py +++ b/rmrl/pens/highlighter.py @@ -15,12 +15,15 @@ # along with this program. If not, see . from .generic import GenericPen +from reportlab.graphics.shapes import Rect +from reportlab.pdfgen.pathobject import PDFPathObject +from ..annotation import Annotation, Point, Rect, QuadPoints class HighlighterPen(GenericPen): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.layer = kwargs.get('layer') - self.annotate = False #TODO bool(int(QSettings().value( + self.annotate = True#False #TODO bool(int(QSettings().value( # 'pane/notebooks/export_pdf_annotate'))) def paint_stroke(self, canvas, stroke): @@ -28,25 +31,47 @@ def paint_stroke(self, canvas, stroke): canvas.setLineCap(2) # Square canvas.setLineJoin(1) # Round #canvas.setDash ?? for solid line - canvas.setStrokeColor((1.000, 0.914, 0.290), alpha=0.392) + yellow = (1.000, 0.914, 0.290) + canvas.setStrokeColor(yellow, alpha=0.392) canvas.setLineWidth(stroke.width) path = canvas.beginPath() path.moveTo(stroke.segments[0].x, stroke.segments[0].y) + + x0 = stroke.segments[0].x + y0 = stroke.segments[0].y + + ll = Point(x0, y0) + ur = Point(x0, y0) + for segment in stroke.segments[1:]: path.lineTo(segment.x, segment.y) - canvas.drawPath(path, stroke=1, fill=0) - canvas.restoreState() + + # Do some basic vector math to rotate the line width + # perpendicular to this segment + + x1 = segment.x + y1 = segment.y + width = segment.width + + l = [x1-x0, y1-y0] + v0 = -l[1]/l[0] + scale = (1+v0**2)**0.5 + orthogonal = [v0/scale, 1/scale] + + xmin = x0-width/2*orthogonal[0] + ymin = y0-width/2*orthogonal[1] + xmax = x1+width/2*orthogonal[0] + ymax = y1+width/2*orthogonal[1] + + ll = Point(min(ll.x, xmin), min(ll.y, ymin)) + ur = Point(max(ur.x, xmax), max(ur.y, ymax)) + + x0 = x1 + y0 = y1 if self.annotate: - assert False - # Create outline of the path. Annotations that are close to - # each other get groups. This is determined by overlapping - # paths. In order to fuzz this, we'll double the normal - # width and extend the end caps. - self.setWidthF(self.widthF() * 2) - self.setCapStyle(Qt.SquareCap) - opath = QPainterPathStroker(self).createStroke(path) - # The annotation type is carried all the way through. This - # is the type specified in the PDF spec. - self.layer.annot_paths.append(('Highlight', opath)) + self.layer.annot_paths.append(Annotation("Highlight", Rect(ll, ur))) + + canvas.drawPath(path, stroke=1, fill=0) + canvas.restoreState() \ No newline at end of file diff --git a/rmrl/render.py b/rmrl/render.py index 475b1da..de78a9a 100644 --- a/rmrl/render.py +++ b/rmrl/render.py @@ -27,6 +27,7 @@ from . import document, sources from .constants import PDFHEIGHT, PDFWIDTH, PTPERPX, SPOOL_MAX +from typing import Tuple, List log = logging.getLogger(__name__) @@ -87,7 +88,7 @@ def render(source, *, # about 500 pages could use up to 3 GB of RAM. Create them by # iteration so they get released by garbage collector. changed_pages = [] - annotations = [] + annotations = [] # [pages[layers[(layer, [Annotations])]]] for i in range(0, len(pages)): page = document.DocumentPage(source, pages[i], i) if source.exists(page.rmpath): @@ -388,23 +389,36 @@ def do_apply_ocg(basepage, rmpage, i, uses_base_pdf, ocgprop, annotations): return ocgorderinner +def invert_coords(point) -> Tuple[float]: + print(point) + x = (point.x * PTPERPX) + y = PDFHEIGHT - (point.y * PTPERPX) + return (x, y) def apply_annotations(rmpage, page_annot, ocgorderinner): + # page_annot = layers[(layer, [Annotations])] for k, layer_a in enumerate(page_annot): + # layer_a = (layer, [Annotations]) layerannots = layer_a[1] for a in layerannots: # PDF origin is in bottom-left, so invert all # y-coordinates. - author = 'RCU' #self.model.device_info['rcuname'] + author = 'reMarkable' #self.model.device_info['rcuname'] + + x1, y1 = invert_coords(a.rect.ll) + x2, y2 = invert_coords(a.rect.ur) + + w = x2-x1 + h = y1-y2 + print(a.quadpoints.points) + qp = [c for p in map(invert_coords, a.quadpoints.points) for c in p] + pdf_a = PdfDict(Type=PdfName('Annot'), - Rect=PdfArray([ - (a[1] * PTPERPX), - PDFHEIGHT - (a[2] * PTPERPX), - (a[3] * PTPERPX), - PDFHEIGHT - (a[4] * PTPERPX)]), + Rect=PdfArray([x1, y1, x2, y2]), + QuadPoints=PdfArray(qp), T=author, ANN='pdfmark', - Subtype=PdfName(a[0]), + Subtype=PdfName(a.annotype), P=rmpage) # Set to indirect because it makes a cleaner PDF # output. @@ -566,24 +580,23 @@ def merge_pages(basepage, rmpage, changed_page, expand_pages): if '/Annots' in rmpage: for a, annot in enumerate(rmpage.Annots): rect = annot.Rect - rmpage.Annots[a].Rect = PdfArray([ - rect[1], - PDFWIDTH - rect[0], - rect[3], - PDFWIDTH - rect[2]]) + rmpage.Annots[a].Rect = PdfArray(rotate_annot_points(rect)) + + qp = annot.QuadPoints + rmpage.Annots[a].QuadPoints = PdfArray(rotate_annot_points(qp)) annot_adjust = [0, 0] if '/Annots' in rmpage: for a, annot in enumerate(rmpage.Annots): rect = annot.Rect - newrect = PdfArray([ - rect[0] * scale + annot_adjust[0], - rect[1] * scale + annot_adjust[1], - rect[2] * scale + annot_adjust[0], - rect[3] * scale + annot_adjust[1]]) + newrect = PdfArray(scale_annot_points(rect, scale, annot_adjust)) rmpage.Annots[a].Rect = newrect + qp = annot.QuadPoints + newqp = PdfArray(scale_annot_points(qp, scale, annot_adjust)) + rmpage.Annots[a].QuadPoints = newqp + # Gives the basepage the rmpage as a new object np.render() @@ -592,3 +605,18 @@ def merge_pages(basepage, rmpage, changed_page, expand_pages): if not '/Annots' in basepage: basepage.Annots = PdfArray() basepage.Annots += rmpage.Annots + +def rotate_annot_points(points: list) -> list: + rotated = [] + for n in range(0,len(points),2): + rotated.append(points[n+1]) + rotated.append(PDFWIDTH-points[n]) + + return rotated + +def scale_annot_points(points: list, scale:float, adjust: list) -> list: + scaled = [] + for i, p in enumerate(points): + scaled.append(p*scale + adjust[i%2]) + + return scaled \ No newline at end of file From 7af721fe36ae057e8788cb3b16fa88f180c06e13 Mon Sep 17 00:00:00 2001 From: Ben Rush Date: Wed, 7 Apr 2021 13:35:47 -0700 Subject: [PATCH 2/4] Fixed y-offset error for wider aspect ratios --- rmrl/pens/highlighter.py | 9 ++++++--- rmrl/render.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/rmrl/pens/highlighter.py b/rmrl/pens/highlighter.py index bf39ff1..2257d12 100644 --- a/rmrl/pens/highlighter.py +++ b/rmrl/pens/highlighter.py @@ -55,9 +55,12 @@ def paint_stroke(self, canvas, stroke): width = segment.width l = [x1-x0, y1-y0] - v0 = -l[1]/l[0] - scale = (1+v0**2)**0.5 - orthogonal = [v0/scale, 1/scale] + if l[0] == 0: + orthogonal = [1, 0] + else: + v0 = -l[1]/l[0] + scale = (1+v0**2)**0.5 + orthogonal = [v0/scale, 1/scale] xmin = x0-width/2*orthogonal[0] ymin = y0-width/2*orthogonal[1] diff --git a/rmrl/render.py b/rmrl/render.py index de78a9a..acf21e8 100644 --- a/rmrl/render.py +++ b/rmrl/render.py @@ -390,7 +390,6 @@ def do_apply_ocg(basepage, rmpage, i, uses_base_pdf, ocgprop, annotations): return ocgorderinner def invert_coords(point) -> Tuple[float]: - print(point) x = (point.x * PTPERPX) y = PDFHEIGHT - (point.y * PTPERPX) return (x, y) @@ -410,7 +409,6 @@ def apply_annotations(rmpage, page_annot, ocgorderinner): w = x2-x1 h = y1-y2 - print(a.quadpoints.points) qp = [c for p in map(invert_coords, a.quadpoints.points) for c in p] pdf_a = PdfDict(Type=PdfName('Annot'), @@ -505,6 +503,8 @@ def merge_pages(basepage, rmpage, changed_page, expand_pages): else: assert False, f"Unexpected rotation: {effective_rotation}" + annot_adjust = [0, 0] + if bpage_ratio <= rpage_ratio: # These ratios < 1, so this indicates the basepage is more # narrow, and thus we need to extend the width. Extra space @@ -521,6 +521,9 @@ def merge_pages(basepage, rmpage, changed_page, expand_pages): # Height and width are flipped for the basepage new_height = rpage_ratio * bpage_w scale = bpage_w / rpage_h + # Not needed in the x-dim b/c extra space is added to the + # right side, which doesn't impact alignment + annot_adjust[1] = bpage_box[3] - new_height if effective_rotation == 90: bpage_box[3] = new_height + bpage_box[1] else: @@ -531,6 +534,7 @@ def merge_pages(basepage, rmpage, changed_page, expand_pages): if not flip_base_dims: new_height = 1/rpage_ratio * bpage_w scale = bpage_w / rpage_w + annot_adjust[1] = bpage_box[3] - new_height if effective_rotation == 0: bpage_box[1] = bpage_box[3] - new_height else: @@ -585,7 +589,6 @@ def merge_pages(basepage, rmpage, changed_page, expand_pages): qp = annot.QuadPoints rmpage.Annots[a].QuadPoints = PdfArray(rotate_annot_points(qp)) - annot_adjust = [0, 0] if '/Annots' in rmpage: for a, annot in enumerate(rmpage.Annots): From 21a91a08dc06215544e9abcce3e10d08f48cb7bf Mon Sep 17 00:00:00 2001 From: Ben Rush Date: Wed, 21 Apr 2021 12:41:47 -0700 Subject: [PATCH 3/4] Update to support 2.7 firmware --- rmrl/annotation.py | 17 ++++++++- rmrl/document.py | 82 ++++++++++++++++++---------------------- rmrl/pens/highlighter.py | 3 ++ rmrl/render.py | 64 +++++++++++++++---------------- 4 files changed, 87 insertions(+), 79 deletions(-) diff --git a/rmrl/annotation.py b/rmrl/annotation.py index 9ab061a..3ede464 100644 --- a/rmrl/annotation.py +++ b/rmrl/annotation.py @@ -107,14 +107,16 @@ class Annotation(): annotype: str rect: Rect quadpoints: QuadPoints + contents: str - def __init__(self, annotype: str, rect: Rect, quadpoints: list = None): + def __init__(self, annotype: str, rect: Rect, quadpoints: list = None, contents: str = ""): self.annotype = annotype self.rect = rect if quadpoints: self.quadpoints = quadpoints else: self.quadpoints = QuadPoints.fromRect(rect) + self.contents = contents def united(self, annot: Annotation) -> Annotation: if self.annotype != annot.annotype: @@ -122,7 +124,18 @@ def united(self, annot: Annotation) -> Annotation: return Annotation(self.annotype, self.rect.union(annot.rect), - self.quadpoints.append(annot.quadpoints)) + self.quadpoints.append(annot.quadpoints), + self.contents + annot.contents) + + + @staticmethod + def union(annotA: Annotation, annotB: Annotation) -> Annotation: + if annotA is None: + return annotB + elif annotB is None: + return annotA + else: + return annotA.united(annotB) def intersects(self, annot: Annotation) -> bool: return self.rect.intersects(annot.rect) diff --git a/rmrl/document.py b/rmrl/document.py index f626a1b..759e3a4 100644 --- a/rmrl/document.py +++ b/rmrl/document.py @@ -23,6 +23,7 @@ from . import lines, pens from .constants import DISPLAY, PDFHEIGHT, PDFWIDTH, PTPERPX, TEMPLATE_PATH +from .annotation import Annotation, Rect, Point from typing import List, Tuple @@ -35,6 +36,12 @@ def __init__(self, source, pid, pagenum): self.source = source self.num = pagenum + self.highlights = None + highlightspath = f'{{ID}}.highlights/{pid}.json' + if source.exists(highlightspath): + with source.open(highlightspath, 'r') as f: + self.highlights = json.load(f)["highlights"] + # On disk, these files are named by a UUID self.rmpath = f'{{ID}}/{pid}.rm' if not source.exists(self.rmpath): @@ -72,15 +79,37 @@ def __init__(self, source, pid, pagenum): self.load_layers() def get_grouped_annotations(self): - # Return the annotations grouped by proximity. If they are - # within a distance of each other, count them as a single - # annotation. - - # Annotations should be delivered in an array, where each - # index is a tuple (LayerName, annotations = [] - for layer in self.layers: - annotations.append(layer.get_grouped_annotations()) + if self.highlights is None: return [] + + for h in self.highlights: + note = None + cursor = -1 + for stroke in h: + log.debug(stroke) + rect = None + for r in stroke["rects"]: # I guess in theory there could be more than one? + ll = Point(r["x"], r["y"]) + ur = Point(r["x"]+r["width"], r["y"]+r["height"]) + if rect: rect = rect.union(Rect(ll,ur)) + else: rect = Rect(ll, ur) + + + contents = stroke["text"] + " " + newnote = Annotation("Highlight", rect, contents=contents) + + if cursor > 0 and (stroke["start"] - cursor > 10): # sometimes there are small gaps due to whitespace? + # For now, treat non-continuous highlights as separate notes + annotations.append(note) + note = newnote + else: + note = Annotation.union(note, newnote) + + cursor = stroke["start"]+stroke["length"] + + if note: + annotations.append(note) + return annotations def load_layers(self): @@ -173,43 +202,6 @@ def __init__(self, page, name=None): # PDF layers are ever implemented. self.annot_paths = [] - def get_grouped_annotations(self) -> Tuple[str, list]: - # return: (LayerName, [Annotations]) - - # Compare all the annot_paths to each other. If any overlap, - # they will be grouped together. This is done recursively. - def grouping_func(pathset): - newset = [] - - for p in pathset: - annotype = p.annotype - #path = p[1] #returns (xmin, ymin, xmax, ymax) - did_fit = False - for i, g in enumerate(newset): - gannotype = g.annotype - #group = g[1] - # Only compare annotations of the same type - if gannotype != annotype: - continue - if p.intersects(g): - did_fit = True - newset[i] = g.united(p) #left off here, need to build united and quadpoints - break - if did_fit: - continue - # Didn't fit, so place into a new group - newset.append(p) - - if len(newset) != len(pathset): - # Might have stuff left to group - return grouping_func(newset) - else: - # Nothing was grouped, so done - return newset - - grouped = grouping_func(self.annot_paths) - return (self.name, grouped) - def paint_strokes(self, canvas, vector): for stroke in self.strokes: pen, color, unk1, width, unk2, segments = stroke diff --git a/rmrl/pens/highlighter.py b/rmrl/pens/highlighter.py index 2257d12..156930b 100644 --- a/rmrl/pens/highlighter.py +++ b/rmrl/pens/highlighter.py @@ -19,6 +19,9 @@ from reportlab.pdfgen.pathobject import PDFPathObject from ..annotation import Annotation, Point, Rect, QuadPoints +# In software version 2.7, reMarkable phased out the highlighter pen in favor +# of a separate .highlights file. This code is likely obsolete and can be removed +# once we are confident this change is stable class HighlighterPen(GenericPen): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/rmrl/render.py b/rmrl/render.py index acf21e8..8b40c31 100644 --- a/rmrl/render.py +++ b/rmrl/render.py @@ -21,7 +21,7 @@ import re from pdfrw import PdfReader, PdfWriter, PageMerge, PdfDict, PdfArray, PdfName, \ - IndirectPdfDict, uncompress, compress + IndirectPdfDict, PdfString, uncompress, compress from reportlab.pdfgen import canvas @@ -75,9 +75,11 @@ def render(source, *, # key of zero length, so it doesn't break the rest of the # process. pages = [] + highlihgts = [] if source.exists('{ID}.content'): with source.open('{ID}.content', 'r') as f: pages = json.load(f).get('pages', []) + # Render each page as a pdf tmpfh = tempfile.TemporaryFile() @@ -88,7 +90,7 @@ def render(source, *, # about 500 pages could use up to 3 GB of RAM. Create them by # iteration so they get released by garbage collector. changed_pages = [] - annotations = [] # [pages[layers[(layer, [Annotations])]]] + annotations = [] # [pages[Annotations]] for i in range(0, len(pages)): page = document.DocumentPage(source, pages[i], i) if source.exists(page.rmpath): @@ -396,36 +398,34 @@ def invert_coords(point) -> Tuple[float]: def apply_annotations(rmpage, page_annot, ocgorderinner): # page_annot = layers[(layer, [Annotations])] - for k, layer_a in enumerate(page_annot): - # layer_a = (layer, [Annotations]) - layerannots = layer_a[1] - for a in layerannots: - # PDF origin is in bottom-left, so invert all - # y-coordinates. - author = 'reMarkable' #self.model.device_info['rcuname'] - - x1, y1 = invert_coords(a.rect.ll) - x2, y2 = invert_coords(a.rect.ur) - - w = x2-x1 - h = y1-y2 - qp = [c for p in map(invert_coords, a.quadpoints.points) for c in p] - - pdf_a = PdfDict(Type=PdfName('Annot'), - Rect=PdfArray([x1, y1, x2, y2]), - QuadPoints=PdfArray(qp), - T=author, - ANN='pdfmark', - Subtype=PdfName(a.annotype), - P=rmpage) - # Set to indirect because it makes a cleaner PDF - # output. - pdf_a.indirect = True - if ocgorderinner: - pdf_a.OC = ocgorderinner[k] - if not '/Annots' in rmpage: - rmpage.Annots = PdfArray() - rmpage.Annots.append(pdf_a) + for a in page_annot: + # PDF origin is in bottom-left, so invert all + # y-coordinates. + author = 'reMarkable' #self.model.device_info['rcuname'] + + x1, y1 = invert_coords(a.rect.ll) + x2, y2 = invert_coords(a.rect.ur) + + w = x2-x1 + h = y1-y2 + qp = [c for p in map(invert_coords, a.quadpoints.points) for c in p] + + pdf_a = PdfDict(Type=PdfName('Annot'), + Rect=PdfArray([x1, y1, x2, y2]), + QuadPoints=PdfArray(qp), + Contents=a.contents, + T=author, + ANN='pdfmark', + Subtype=PdfName(a.annotype), + P=rmpage) + # Set to indirect because it makes a cleaner PDF + # output. + pdf_a.indirect = True + if ocgorderinner: + pdf_a.OC = ocgorderinner[k] + if not '/Annots' in rmpage: + rmpage.Annots = PdfArray() + rmpage.Annots.append(pdf_a) def merge_pages(basepage, rmpage, changed_page, expand_pages): From 25b9d74bb71235795a6174b426085596fa1a6fd3 Mon Sep 17 00:00:00 2001 From: Ben Rush Date: Wed, 21 Apr 2021 17:07:24 -0700 Subject: [PATCH 4/4] re-implement support for legacy highlighter --- rmrl/constants.py | 1 + rmrl/document.py | 97 ++++++++++++++++++++++++++++------------ rmrl/pens/highlighter.py | 7 +-- rmrl/render.py | 63 +++++++++++++------------- 4 files changed, 105 insertions(+), 63 deletions(-) diff --git a/rmrl/constants.py b/rmrl/constants.py index 7d1d0f0..2ca1934 100644 --- a/rmrl/constants.py +++ b/rmrl/constants.py @@ -23,3 +23,4 @@ TEMPLATE_PATH = xdg_data_home() / 'rmrl' / 'templates' VERSION = pkg_resources.get_distribution('rmrl').version +HIGHLIGHTCOLOR = [1, 0.941177, 0.4] diff --git a/rmrl/document.py b/rmrl/document.py index 759e3a4..ab2510b 100644 --- a/rmrl/document.py +++ b/rmrl/document.py @@ -80,35 +80,39 @@ def __init__(self, source, pid, pagenum): def get_grouped_annotations(self): annotations = [] - if self.highlights is None: return [] - - for h in self.highlights: - note = None - cursor = -1 - for stroke in h: - log.debug(stroke) - rect = None - for r in stroke["rects"]: # I guess in theory there could be more than one? - ll = Point(r["x"], r["y"]) - ur = Point(r["x"]+r["width"], r["y"]+r["height"]) - if rect: rect = rect.union(Rect(ll,ur)) - else: rect = Rect(ll, ur) - - - contents = stroke["text"] + " " - newnote = Annotation("Highlight", rect, contents=contents) - - if cursor > 0 and (stroke["start"] - cursor > 10): # sometimes there are small gaps due to whitespace? - # For now, treat non-continuous highlights as separate notes - annotations.append(note) - note = newnote - else: - note = Annotation.union(note, newnote) - - cursor = stroke["start"]+stroke["length"] + if self.highlights is not None: + annotations.append(("Highlights",[])) + + for h in self.highlights: + note = None + cursor = -1 + for stroke in h: + log.debug(stroke) + rect = None + for r in stroke["rects"]: # I guess in theory there could be more than one? + ll = Point(r["x"], r["y"]) + ur = Point(r["x"]+r["width"], r["y"]+r["height"]) + if rect: rect = rect.union(Rect(ll,ur)) + else: rect = Rect(ll, ur) - if note: - annotations.append(note) + + contents = stroke["text"] + " " + newnote = Annotation("Highlight", rect, contents=contents) + + if cursor > 0 and (stroke["start"] - cursor > 10): # sometimes there are small gaps due to whitespace? + # For now, treat non-continuous highlights as separate notes + annotations[0][1].append(note) + note = newnote + else: + note = Annotation.union(note, newnote) + + cursor = stroke["start"]+stroke["length"] + + if note: + annotations[0][1].append(note) + + for layer in self.layers: + annotations.append(layer.get_grouped_annotations()) return annotations @@ -202,6 +206,43 @@ def __init__(self, page, name=None): # PDF layers are ever implemented. self.annot_paths = [] + def get_grouped_annotations(self) -> Tuple[str, list]: + # return: (LayerName, [Annotations]) + + # Compare all the annot_paths to each other. If any overlap, + # they will be grouped together. This is done recursively. + def grouping_func(pathset): + newset = [] + + for p in pathset: + annotype = p.annotype + #path = p[1] #returns (xmin, ymin, xmax, ymax) + did_fit = False + for i, g in enumerate(newset): + gannotype = g.annotype + #group = g[1] + # Only compare annotations of the same type + if gannotype != annotype: + continue + if p.intersects(g): + did_fit = True + newset[i] = g.united(p) #left off here, need to build united and quadpoints + break + if did_fit: + continue + # Didn't fit, so place into a new group + newset.append(p) + + if len(newset) != len(pathset): + # Might have stuff left to group + return grouping_func(newset) + else: + # Nothing was grouped, so done + return newset + + grouped = grouping_func(self.annot_paths) + return (self.name, grouped) + def paint_strokes(self, canvas, vector): for stroke in self.strokes: pen, color, unk1, width, unk2, segments = stroke diff --git a/rmrl/pens/highlighter.py b/rmrl/pens/highlighter.py index 156930b..0f1dc53 100644 --- a/rmrl/pens/highlighter.py +++ b/rmrl/pens/highlighter.py @@ -19,9 +19,6 @@ from reportlab.pdfgen.pathobject import PDFPathObject from ..annotation import Annotation, Point, Rect, QuadPoints -# In software version 2.7, reMarkable phased out the highlighter pen in favor -# of a separate .highlights file. This code is likely obsolete and can be removed -# once we are confident this change is stable class HighlighterPen(GenericPen): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -34,8 +31,8 @@ def paint_stroke(self, canvas, stroke): canvas.setLineCap(2) # Square canvas.setLineJoin(1) # Round #canvas.setDash ?? for solid line - yellow = (1.000, 0.914, 0.290) - canvas.setStrokeColor(yellow, alpha=0.392) + white = (1, 1, 1) #color handled by annotation object in PDF + canvas.setStrokeColor(white, alpha=0.0) canvas.setLineWidth(stroke.width) path = canvas.beginPath() diff --git a/rmrl/render.py b/rmrl/render.py index 8b40c31..2841dd7 100644 --- a/rmrl/render.py +++ b/rmrl/render.py @@ -26,7 +26,7 @@ from reportlab.pdfgen import canvas from . import document, sources -from .constants import PDFHEIGHT, PDFWIDTH, PTPERPX, SPOOL_MAX +from .constants import PDFHEIGHT, PDFWIDTH, PTPERPX, SPOOL_MAX, HIGHLIGHTCOLOR from typing import Tuple, List @@ -90,7 +90,7 @@ def render(source, *, # about 500 pages could use up to 3 GB of RAM. Create them by # iteration so they get released by garbage collector. changed_pages = [] - annotations = [] # [pages[Annotations]] + annotations = [] # [pages[layers[(layer, [Annotations])]]] for i in range(0, len(pages)): page = document.DocumentPage(source, pages[i], i) if source.exists(page.rmpath): @@ -398,34 +398,37 @@ def invert_coords(point) -> Tuple[float]: def apply_annotations(rmpage, page_annot, ocgorderinner): # page_annot = layers[(layer, [Annotations])] - for a in page_annot: - # PDF origin is in bottom-left, so invert all - # y-coordinates. - author = 'reMarkable' #self.model.device_info['rcuname'] - - x1, y1 = invert_coords(a.rect.ll) - x2, y2 = invert_coords(a.rect.ur) - - w = x2-x1 - h = y1-y2 - qp = [c for p in map(invert_coords, a.quadpoints.points) for c in p] - - pdf_a = PdfDict(Type=PdfName('Annot'), - Rect=PdfArray([x1, y1, x2, y2]), - QuadPoints=PdfArray(qp), - Contents=a.contents, - T=author, - ANN='pdfmark', - Subtype=PdfName(a.annotype), - P=rmpage) - # Set to indirect because it makes a cleaner PDF - # output. - pdf_a.indirect = True - if ocgorderinner: - pdf_a.OC = ocgorderinner[k] - if not '/Annots' in rmpage: - rmpage.Annots = PdfArray() - rmpage.Annots.append(pdf_a) + for k, layer_a in enumerate(page_annot): + layerannots = layer_a[1] + for a in layerannots: + # PDF origin is in bottom-left, so invert all + # y-coordinates. + author = 'reMarkable' #self.model.device_info['rcuname'] + + x1, y1 = invert_coords(a.rect.ll) + x2, y2 = invert_coords(a.rect.ur) + + w = x2-x1 + h = y1-y2 + qp = [c for p in map(invert_coords, a.quadpoints.points) for c in p] + + pdf_a = PdfDict(Type=PdfName('Annot'), + Rect=PdfArray([x1, y1, x2, y2]), + QuadPoints=PdfArray(qp), + C=PdfArray(HIGHLIGHTCOLOR), + Contents=a.contents, + T=author, + ANN='pdfmark', + Subtype=PdfName(a.annotype), + P=rmpage) + # Set to indirect because it makes a cleaner PDF + # output. + pdf_a.indirect = True + if ocgorderinner: + pdf_a.OC = ocgorderinner[k] + if not '/Annots' in rmpage: + rmpage.Annots = PdfArray() + rmpage.Annots.append(pdf_a) def merge_pages(basepage, rmpage, changed_page, expand_pages):