{
  "components": [
    {
      "best_practices": [
        "Nodes and relationships are stored as frozensets internally for immutability, but exposed as mutable sets through properties",
        "When creating a Subgraph with relationships, you don't need to explicitly provide the connected nodes - they are automatically extracted",
        "The unbind() method creates a deep copy with unbound nodes and relationships, useful for detaching from a parent graph",
        "Set operations (|, &, -, ^) create new Subgraph instances rather than modifying existing ones",
        "The graph property can be set after instantiation to bind the subgraph to a parent graph",
        "Boolean evaluation returns True if the subgraph contains any nodes, False otherwise",
        "Nodes must have a 'UID' attribute for the unbind() method to work correctly",
        "The unbind() method assumes relationships have start_node, end_node, labels, and relationship attributes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Private attribute storing reference to the parent graph object",
            "is_class_variable": false,
            "name": "_graph",
            "type": "object or None"
          },
          {
            "description": "Private immutable set storing all node objects in the subgraph",
            "is_class_variable": false,
            "name": "_nodes",
            "type": "frozenset"
          },
          {
            "description": "Private immutable set storing all relationship objects in the subgraph",
            "is_class_variable": false,
            "name": "_relationships",
            "type": "frozenset"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "graph": "Optional parent graph reference",
              "nodes": "Optional iterable of node objects",
              "relationships": "Optional iterable of relationship objects"
            },
            "purpose": "Initialize a Subgraph with optional nodes, relationships, and parent graph reference",
            "returns": "None (constructor)",
            "signature": "__init__(nodes=None, relationships=None, graph=None)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "nodes",
            "parameters": {},
            "purpose": "Get a mutable set of all nodes in the subgraph",
            "returns": "Set containing all node objects in the subgraph",
            "signature": "@property nodes(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "relationships",
            "parameters": {},
            "purpose": "Get a mutable set of all relationships in the subgraph",
            "returns": "Set containing all relationship objects in the subgraph",
            "signature": "@property relationships(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {},
            "purpose": "Get the parent graph reference",
            "returns": "The parent graph object or None if unbound",
            "signature": "@property graph(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {
              "graph": "The parent graph object to bind to"
            },
            "purpose": "Set the parent graph reference",
            "returns": "None",
            "signature": "@graph.setter graph(self, graph)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "unbind",
            "parameters": {},
            "purpose": "Create an unbound copy of the subgraph with unbound nodes and relationships",
            "returns": "A new Subgraph instance with unbound copies of all nodes and relationships",
            "signature": "unbind(self) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return string representation of the subgraph",
            "returns": "String showing nodes and relationships",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return detailed string representation of the subgraph",
            "returns": "String showing nodes and relationships",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__bool__",
            "parameters": {},
            "purpose": "Check if subgraph contains any nodes",
            "returns": "True if subgraph has nodes, False otherwise",
            "signature": "__bool__(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__or__",
            "parameters": {
              "other": "Another Subgraph instance to union with"
            },
            "purpose": "Perform union operation with another subgraph",
            "returns": "New Subgraph containing union of nodes and relationships",
            "signature": "__or__(self, other: Subgraph) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__and__",
            "parameters": {
              "other": "Another Subgraph instance to intersect with"
            },
            "purpose": "Perform intersection operation with another subgraph",
            "returns": "New Subgraph containing intersection of nodes and relationships",
            "signature": "__and__(self, other: Subgraph) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__sub__",
            "parameters": {
              "other": "Another Subgraph instance to subtract"
            },
            "purpose": "Perform difference operation with another subgraph",
            "returns": "New Subgraph containing elements in self but not in other, preserving nodes connected by remaining relationships",
            "signature": "__sub__(self, other: Subgraph) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__xor__",
            "parameters": {
              "other": "Another Subgraph instance for symmetric difference"
            },
            "purpose": "Perform symmetric difference operation with another subgraph",
            "returns": "New Subgraph containing elements in either subgraph but not both, preserving nodes connected by remaining relationships",
            "signature": "__xor__(self, other: Subgraph) -> Subgraph"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:04:54",
      "decorators": [],
      "dependencies": [
        "uuid",
        "itertools"
      ],
      "description": "A class representing a graph subgraph containing nodes and relationships, with support for set operations and graph binding/unbinding.",
      "docstring": null,
      "id": 2143,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4",
        "from itertools import chain"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 459,
      "line_start": 393,
      "name": "Subgraph",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "graph": "Optional reference to a parent graph object that this subgraph belongs to. Used for maintaining context and binding state.",
        "nodes": "Optional iterable of node objects to include in the subgraph. Can be None or empty. These nodes will be stored as a frozenset internally.",
        "relationships": "Optional iterable of relationship objects connecting nodes. Can be None or empty. The subgraph will automatically include all nodes referenced by these relationships, even if not explicitly provided in the nodes parameter."
      },
      "parent_class": null,
      "purpose": "The Subgraph class models a subset of a graph structure, managing collections of nodes and relationships. It provides graph algebra operations (union, intersection, difference, symmetric difference) and supports binding/unbinding from a parent graph. This is typically used in graph database operations to work with portions of larger graphs, allowing manipulation and combination of graph fragments.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Subgraph object containing the specified nodes and relationships. The nodes property returns a mutable set of nodes, relationships property returns a mutable set of relationships, and graph property returns the parent graph reference. The unbind() method returns a new unbound Subgraph copy. Set operations (__or__, __and__, __sub__, __xor__) return new Subgraph instances.",
      "settings_required": [
        "Requires Node and Relationship classes to be defined in the same module or imported",
        "Node objects must support 'UID' key access and unbind() method",
        "Relationship objects must have start_node, end_node, labels, relationship, and nodes attributes"
      ],
      "source_code": "class Subgraph():\n    \n    def __init__(self, nodes=None, relationships=None, graph=None):\n        self._graph = graph\n        self._nodes = frozenset(nodes or [])\n        self._relationships = frozenset(relationships or [])\n        self._nodes |= frozenset(chain.from_iterable(r.nodes for r in self._relationships))\n        \n    @property\n    def nodes(self):\n        return set(self._nodes)\n    \n    @property\n    def relationships(self):\n        return set(self._relationships)\n    \n    @property\n    def graph(self):\n        return self._graph\n    \n    @graph.setter\n    def graph(self, graph):\n        self._graph = graph\n    \n    def unbind(self):\n        \"\"\"Returns an unbound copy of itself\"\"\"\n        out = Subgraph()\n        created_nodes = []\n        for r in self.relationships:\n            if r.start_node['UID'] in created_nodes:\n                start = [i for i in out.nodes if i['UID'] == r.start_node['UID']][0]\n            else:\n                start = r.start_node.unbind()\n                created_nodes.append(start['UID'])\n            if r.end_node['UID'] in created_nodes:\n                end = [i for i in out.nodes if i['UID'] == r.end_node['UID']][0]\n            else:\n                end = r.end_node.unbind()\n                created_nodes.append(end['UID'])\n            out = out | Relationship(start, _Relationship(*r.labels, **r.relationship), end)\n        return out\n    \n    def __str__(self):\n        return \"Nodes(%s), \\nRelationships(%s)\" % (self.nodes,\n                                                   self.relationships)\n    \n    def __repr__(self):\n        return \"Nodes(%s), \\nRelationships(%s)\" % (self.nodes,\n                                                   self.relationships)\n    def __bool__(self):\n        return len(self.nodes) > 0\n    \n    def __or__(self, other):\n        return Subgraph(set(self.nodes) | set(other.nodes), set(self.relationships) | set(other.relationships))\n    \n    def __and__(self, other):\n        return Subgraph(set(self.nodes) & set(other.nodes), set(self.relationships) & set(other.relationships))\n\n    def __sub__(self, other):\n        r = set(self.relationships) - set(other.relationships)\n        n = (set(self.nodes) - set(other.nodes)) | set().union(*(set(rel.nodes) for rel in r))\n        return Subgraph(n, r)\n\n    def __xor__(self, other):\n        r = set(self.relationships) ^ set(other.relationships)\n        n = (set(self.nodes) ^ set(other.nodes)) | set().union(*(set(rel.nodes) for rel in r))\n        return Subgraph(n, r)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph",
        "subgraph",
        "nodes",
        "relationships",
        "set-operations",
        "graph-database",
        "data-structure",
        "graph-algebra"
      ],
      "updated_at": "2025-12-07T02:04:54.924392",
      "usage_example": "# Assuming Node and Relationship classes are available\nfrom itertools import chain\n\n# Create nodes\nnode1 = Node('Person', name='Alice', UID='uid1')\nnode2 = Node('Person', name='Bob', UID='uid2')\n\n# Create a relationship\nrel = Relationship(node1, 'KNOWS', node2)\n\n# Create subgraph with nodes and relationships\nsubgraph1 = Subgraph(nodes=[node1, node2], relationships=[rel])\n\n# Create another subgraph\nnode3 = Node('Person', name='Charlie', UID='uid3')\nsubgraph2 = Subgraph(nodes=[node3])\n\n# Combine subgraphs using union\ncombined = subgraph1 | subgraph2\n\n# Get intersection\ncommon = subgraph1 & subgraph2\n\n# Access nodes and relationships\nall_nodes = combined.nodes\nall_rels = combined.relationships\n\n# Unbind from graph\nunbound_copy = subgraph1.unbind()\n\n# Check if subgraph has nodes\nif subgraph1:\n    print('Subgraph has nodes')"
    },
    {
      "best_practices": [
        "Always ensure start_node and end_node are valid Node instances before instantiation",
        "When using a Node object as relationship parameter, ensure it has exactly one label",
        "Use tuple format (label, properties_dict) for relationships with properties",
        "Be aware that Node objects used as relationships carry over UID properties which _Relationship does not enforce",
        "Use unbind() method to create a copy detached from the graph database",
        "The graph parameter is automatically inferred from nodes if not provided",
        "The relationship is directional: start_node -> end_node",
        "Use _from_neo4j_node() classmethod when reconstructing relationships from database queries",
        "The _nodes attribute is a frozenset for immutability, but nodes property returns a mutable set"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Private attribute storing the graph instance this relationship is bound to",
            "is_class_variable": false,
            "name": "_graph",
            "type": "object or None"
          },
          {
            "description": "The source/origin node of the relationship",
            "is_class_variable": false,
            "name": "start_node",
            "type": "Node"
          },
          {
            "description": "The target/destination node of the relationship",
            "is_class_variable": false,
            "name": "end_node",
            "type": "Node"
          },
          {
            "description": "Immutable set containing both start_node and end_node",
            "is_class_variable": false,
            "name": "_nodes",
            "type": "frozenset"
          },
          {
            "description": "List containing this relationship instance (for interface consistency)",
            "is_class_variable": false,
            "name": "_relationships",
            "type": "list"
          },
          {
            "description": "The underlying _Relationship object containing the label and properties",
            "is_class_variable": false,
            "name": "relationship",
            "type": "_Relationship"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "end_node": "Node object representing the target node",
              "graph": "Optional graph instance to bind to",
              "relationship": "Relationship definition (string, tuple, Node, or _Relationship)",
              "start_node": "Node object representing the source node"
            },
            "purpose": "Initialize a Relationship object with start node, relationship definition, and end node",
            "returns": "None (constructor)",
            "signature": "__init__(self, start_node, relationship, end_node, graph=None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_from_neo4j_node",
            "parameters": {
              "kwargs": "Additional keyword arguments, particularly 'graph' for binding",
              "original": "Neo4j relationship object with start_node, end_node, type, and element_id attributes"
            },
            "purpose": "Class method to reconstruct a Relationship object from a Neo4j relationship object",
            "returns": "A new Relationship instance reconstructed from the Neo4j object",
            "signature": "_from_neo4j_node(cls, original, **kwargs) -> Relationship"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {},
            "purpose": "Get the graph instance this relationship is bound to",
            "returns": "The graph instance or None if unbound",
            "signature": "@property graph(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {
              "graph": "Graph instance to bind to"
            },
            "purpose": "Set the graph instance this relationship is bound to",
            "returns": "None",
            "signature": "@graph.setter graph(self, graph)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "nodes",
            "parameters": {},
            "purpose": "Get a set containing both start and end nodes",
            "returns": "A set containing the start_node and end_node",
            "signature": "@property nodes(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "relationships",
            "parameters": {},
            "purpose": "Get a list of relationships (contains self)",
            "returns": "A list containing this relationship instance",
            "signature": "@property relationships(self) -> list"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {},
            "purpose": "Get the database element ID of the relationship",
            "returns": "The element_id from the underlying _Relationship object",
            "signature": "@property element_id(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {
              "element_id": "The element ID to assign"
            },
            "purpose": "Set the database element ID of the relationship",
            "returns": "None",
            "signature": "@element_id.setter element_id(self, element_id)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {},
            "purpose": "Get the labels of the relationship",
            "returns": "The labels from the underlying _Relationship object",
            "signature": "@property labels(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {
              "labels": "The labels to assign to the relationship"
            },
            "purpose": "Set the labels of the relationship",
            "returns": "None",
            "signature": "@labels.setter labels(self, labels)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "unbind",
            "parameters": {},
            "purpose": "Create and return an unbound copy of this relationship",
            "returns": "A new Relationship instance with unbound nodes and relationship, detached from any graph",
            "signature": "unbind(self) -> Relationship"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return a human-readable string representation of the relationship",
            "returns": "String in format 'Relationship(start_node [relationship] end_node)'",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a developer-friendly string representation of the relationship",
            "returns": "String in format 'Relationship(start_node relationship end_node)'",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "clear",
            "parameters": {},
            "purpose": "Clear all properties from the underlying relationship object",
            "returns": "None",
            "signature": "clear(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update",
            "parameters": {
              "kwargs": "Key-value pairs of properties to update"
            },
            "purpose": "Update properties of the underlying relationship object",
            "returns": "None",
            "signature": "update(self, **kwargs)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:04:21",
      "decorators": [],
      "dependencies": [
        "warnings",
        "uuid"
      ],
      "description": "A class representing a graph relationship between two nodes, wrapping a _Relationship object with start and end Node objects.",
      "docstring": "A relationship is represented as a collection of two nodes and a base _Relationship, which is similar to a Node except it may only have 1 label and does not enforce the UID property",
      "id": 2142,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 390,
      "line_start": 291,
      "name": "Relationship",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "end_node": "A Node object representing the target/destination node of the relationship. Must be an instance of the Node class.",
        "graph": "Optional graph instance to bind this relationship to. If None, will attempt to use the graph from start_node or end_node if either has one. Defaults to None.",
        "relationship": "The relationship definition, which can be: (1) a _Relationship object, (2) a Node object with exactly 1 label (will be converted to _Relationship), (3) a string representing the relationship label, or (4) a tuple in format (label, properties_dict) where properties_dict is optional.",
        "start_node": "A Node object representing the source/origin node of the relationship. Must be an instance of the Node class."
      },
      "parent_class": null,
      "purpose": "The Relationship class models a directed edge in a graph database (Neo4j-style), connecting two Node objects through a labeled relationship. It encapsulates the start node, end node, and relationship properties, providing a unified interface for managing graph relationships. The class supports multiple initialization formats (string, tuple, Node, _Relationship) and can be bound to a graph instance for database operations.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Relationship object that encapsulates the start node, end node, and relationship properties. The object provides access to graph database operations and relationship metadata through properties and methods.",
      "settings_required": [
        "Requires Node class to be defined in the same module or imported",
        "Requires _Relationship class to be defined in the same module or imported",
        "May require a graph database connection object if performing database operations"
      ],
      "source_code": "class Relationship():\n    \"\"\"\n    A relationship is represented as a collection of two nodes and a base _Relationship, which is similar to a Node except it may only have 1 label and does not enforce the UID property\n    \"\"\"\n    \n    def __init__(self, start_node, relationship, end_node, graph=None):\n        assert isinstance(start_node, Node), \"Please supply a Node as start_node\"\n        assert isinstance(end_node, Node), \"Please supply a Node as end_node\"\n        if graph is None and any([start_node.graph, end_node.graph]):\n            graph = start_node.graph or end_node.graph\n        self._graph=graph\n        self.start_node=start_node\n        self.end_node=end_node\n        self._nodes = frozenset([self.start_node, self.end_node])\n        self._relationships=[self]\n        if isinstance(relationship, _Relationship):\n            self.relationship = relationship\n        elif isinstance(relationship, Node):\n            assert len(relationship.labels) == 1, \"When passing a Node object as relationship, make sure it has exactly 1 label\"\n            warnings.warn(\"Please be aware Node objects are forced to have a UID properties, and this property is carried over to the Relationship, which does not enforce UIDs\")\n            self.relationship = _Relationship(relationship.labels, **relationship)\n        elif isinstance(relationship, str):\n            self.relationship = _Relationship(relationship)\n        elif isinstance(relationship, tuple):\n            assert len(relationship) < 3, \"When passing a tuple, please ensure only one label is passed and all properties are formatted as a dict, e.g. (RELATIONSHIP, {PROPERTIES})\"\n            if isinstance(relationship[-1], dict):\n                self.relationship = _Relationship(relationship[0], **relationship[-1])\n            else:\n                warnings.warn(\"When passing a tuple, properties must be formatted as a single dictionary or they will be ignored\")\n                self.relationship = _Relationship(relationship[0])\n        else:\n            raise TypeError(\"Please supply a Node, string or tuple as relationship\")\n            \n    @classmethod\n    def _from_neo4j_node(cls, original, **kwargs):\n        start_node = Node._from_neo4j_node(original.start_node, graph=kwargs.get('graph',None))\n        start_node.pull()\n        end_node = Node._from_neo4j_node(original.end_node, graph=kwargs.get('graph',None))\n        end_node.pull()\n        try:\n            element_id = int(original.element_id)\n        except:\n            if ':' in original.element_id:\n                element_id = int(original.element_id.split(':')[-1])\n            else:\n                raise Exception(f\"Could not obtain element ID. Found ID: {original.element_id}\")\n        return cls(start_node, \n                   _Relationship(original.type, _element_id=element_id, **dict(original)), \n                   end_node,\n                   graph=kwargs.get('graph',None))\n            \n    @property\n    def graph(self):\n        return self._graph\n    \n    @graph.setter\n    def graph(self, graph):\n        self._graph = graph\n    \n    @property\n    def nodes(self):\n        return set(self._nodes)\n    \n    @property\n    def relationships(self):\n        return self._relationships\n    \n    @property\n    def element_id(self):\n        return self.relationship.element_id\n    \n    @element_id.setter\n    def element_id(self, element_id):\n        self.relationship.element_id = element_id\n        \n    @property\n    def labels(self):\n        return self.relationship.labels\n    \n    @labels.setter\n    def labels(self, labels):\n        self.relationship.labels = labels\n        \n    def unbind(self):\n        \"\"\"Returns an unbound copy of itself\"\"\"\n        return Relationship(self.start_node.unbind(), _Relationship(*self.relationship.labels, **self.relationship), self.end_node.unbind())\n    \n    def __str__(self):\n        return \"Relationship(%s [%s] %s)\" % (self.start_node, self.relationship, self.end_node)\n    \n    def __repr__(self):\n        return \"Relationship(%s %s %s)\" % (self.start_node, self.relationship, self.end_node)\n    \n    def clear(self):\n        \"Redirect PropertyDict funcs to the underlying _relationship class\"\n        self._relationship.clear()\n    \n    def update(self, **kwargs):\n        \"Redirect PropertyDict funcs to the underlying _relationship class\"\n        self._relationship.update(**kwargs)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph",
        "relationship",
        "edge",
        "neo4j",
        "database",
        "node",
        "graph-database",
        "data-structure"
      ],
      "updated_at": "2025-12-07T02:04:21.029143",
      "usage_example": "# Basic instantiation with string relationship\nstart = Node('Person', name='Alice')\nend = Node('Person', name='Bob')\nrel = Relationship(start, 'KNOWS', end)\n\n# With properties using tuple format\nrel = Relationship(start, ('KNOWS', {'since': 2020, 'strength': 0.8}), end)\n\n# With graph binding\nrel = Relationship(start, 'KNOWS', end, graph=my_graph)\n\n# Access properties\nprint(rel.labels)  # Relationship labels\nprint(rel.nodes)  # Set of start and end nodes\nprint(rel.element_id)  # Database element ID if bound\n\n# Create unbound copy\nunbound_rel = rel.unbind()\n\n# Update relationship properties\nrel.update(weight=5, active=True)"
    },
    {
      "best_practices": [
        "Always pass _element_id as a keyword argument (with underscore prefix) to avoid confusion with labels",
        "The element_id should be set by the database system after persistence; use None for new relationships",
        "Labels are stored as a set internally but the setter converts lists and strings appropriately",
        "When setting labels via the property setter, pass either a list of strings or a single string",
        "Properties can be accessed and modified using dictionary-style syntax (inherited from PropertyDict)",
        "The class maintains immutability of the original labels set by returning a copy via the labels property getter",
        "Element_id must be convertible to an integer; attempting to set non-numeric values will raise ValueError",
        "Use __str__ for human-readable output and __repr__ for debugging/logging purposes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Internal storage for relationship labels, initially a set but can become a list via setter",
            "is_class_variable": false,
            "name": "_labels",
            "type": "set | list"
          },
          {
            "description": "Internal storage for the database element identifier, validated to be integer or None",
            "is_class_variable": false,
            "name": "_element_id",
            "type": "int | None"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "**properties": "Arbitrary keyword arguments for relationship properties",
              "*labels": "Variable number of label strings for the relationship type",
              "_element_id": "Optional integer identifier for the relationship in the database (default: None)"
            },
            "purpose": "Initialize a relationship with labels, optional element ID, and properties",
            "returns": "None (constructor)",
            "signature": "__init__(self, *labels, _element_id=None, **properties)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {},
            "purpose": "Get a copy of the relationship's labels as a set",
            "returns": "A set containing all label strings for this relationship",
            "signature": "@property labels(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {
              "labels": "Either a list of label strings or a single label string"
            },
            "purpose": "Set the relationship's labels from a list or string",
            "returns": "None (setter)",
            "signature": "@labels.setter labels(self, labels)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {},
            "purpose": "Get the relationship's database element identifier",
            "returns": "Integer element ID or None if not set",
            "signature": "@property element_id(self) -> int | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {
              "x": "Value that can be coerced to an integer"
            },
            "purpose": "Set the relationship's element ID with validation",
            "returns": "None (setter)",
            "signature": "@element_id.setter element_id(self, x)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return a human-readable string representation of the relationship",
            "returns": "Formatted string like '[label1, label2 { prop1: value1, prop2: value2 }]'",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a detailed string representation suitable for debugging",
            "returns": "Formatted string like '[label1, label2, prop1=value1, prop2=value2]'",
            "signature": "__repr__(self) -> str"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:03:43",
      "decorators": [],
      "dependencies": [],
      "description": "A class representing a graph relationship (edge) with labels, properties, and an optional element ID, inheriting from PropertyDict to manage key-value properties.",
      "docstring": null,
      "id": 2141,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 289,
      "line_start": 240,
      "name": "_Relationship",
      "parameters": [
        {
          "annotation": "PropertyDict",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "**properties": "Arbitrary keyword arguments representing property key-value pairs for the relationship. These are passed to the parent PropertyDict class and can be accessed like dictionary items.",
        "*labels": "Variable number of string arguments representing the relationship type labels. These are stored as a set internally to ensure uniqueness. Can be zero or more label strings.",
        "_element_id": "Optional keyword-only parameter representing the database element identifier. Must be convertible to an integer or None. Used to track the relationship's identity in a graph database. Defaults to None for new relationships not yet persisted."
      },
      "parent_class": null,
      "purpose": "This class models a relationship in a graph database structure, storing labels (relationship types), properties (key-value pairs), and an optional element_id for database identification. It provides property-based access to labels and element_id with validation, and custom string representations for debugging and display. The class is designed to work with graph database systems where relationships connect nodes and have typed labels and arbitrary properties.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a _Relationship object that behaves like a dictionary for properties while maintaining labels and element_id as separate managed attributes. The __str__ method returns a formatted string like '[label1, label2 { prop1: value1, prop2: value2 }]'. The __repr__ method returns a similar representation suitable for debugging.",
      "settings_required": [
        "Requires PropertyDict class to be available in the same module or imported, as _Relationship inherits from it"
      ],
      "source_code": "class _Relationship(PropertyDict):\n    \n    def __init__(self, *labels, _element_id=None, **properties):\n        self._labels = set(labels)\n        if _element_id is None:\n            self._element_id = None\n        else:\n            try:\n                self._element_id = int(_element_id)\n            except ValueError:\n                raise ValueError(f\"element_id must be an integer or None, got {_element_id}\")\n        PropertyDict.__init__(self, properties)\n        \n        \n    @property\n    def labels(self):\n        return set(self._labels)\n    \n    @labels.setter\n    def labels(self, labels):\n        if isinstance(labels, list):\n            self._labels=labels\n        elif isinstance(labels, str):\n            self._labels=[labels]\n        else:\n            raise ValueError(\"Please pass a list or string as label\")\n    \n    @property\n    def element_id(self):\n        return self._element_id\n    \n    @element_id.setter\n    def element_id(self, x):\n        try:\n            int(x)\n        except:\n            raise ValueError(\"Invalid input for element_id, value cannot be coerced to int\")\n        self._element_id = x\n\n    def __str__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}: {!r}\".format(k, v) for k, v in kwargs.items())\n        return \"[{} {{ {} }}]\".format(labels_str, props_str)\n\n    def __repr__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}={!r}\".format(k, v) for k, v in kwargs.items())\n        return \"[{}, {}]\".format(labels_str, props_str)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph-database",
        "relationship",
        "edge",
        "property-graph",
        "data-structure",
        "neo4j-style",
        "labeled-graph",
        "dictionary-like"
      ],
      "updated_at": "2025-12-07T02:03:43.991915",
      "usage_example": "# Create a relationship with labels and properties\nrel = _Relationship('KNOWS', 'FRIEND_OF', _element_id=123, since=2020, strength=0.8)\n\n# Access labels\nprint(rel.labels)  # {'KNOWS', 'FRIEND_OF'}\n\n# Modify labels\nrel.labels = ['WORKS_WITH', 'COLLEAGUE']\n\n# Access properties (inherited from PropertyDict)\nprint(rel['since'])  # 2020\nrel['verified'] = True\n\n# Access element_id\nprint(rel.element_id)  # 123\n\n# Update element_id\nrel.element_id = 456\n\n# String representation\nprint(str(rel))  # [WORKS_WITH, COLLEAGUE { since: 2020, strength: 0.8, verified: True }]\nprint(repr(rel))  # [WORKS_WITH, COLLEAGUE, since=2020, strength=0.8, verified=True]"
    },
    {
      "best_practices": [
        "Always check if a node is bound to a graph (has element_id and graph) before performing database operations",
        "Use unbind() to create independent copies of nodes that won't sync with the database",
        "The _lock class variable prevents automatic database pulls when True - use carefully to avoid unintended synchronization",
        "Labels are stored in a set and are case-sensitive; ensure consistent label naming conventions",
        "The UID property is automatically generated and should not be manually modified",
        "When working with Neo4j nodes, use the _from_neo4j_node() class method for proper conversion",
        "The __ensure_labels() private method automatically pulls from database before label operations if the node is bound",
        "Use clear_element_id() to unbind a node from the database without creating a new instance",
        "Properties are managed through PropertyDict inheritance, so use dictionary-style access for properties",
        "The node maintains immutable label sets through the labels property; use add_label/remove_label methods to modify"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Class variable that prevents automatic database pulls when True. Default is False",
            "is_class_variable": true,
            "name": "_lock",
            "type": "bool"
          },
          {
            "description": "The Neo4j database element ID for this node. None if unbound",
            "is_class_variable": false,
            "name": "_element_id",
            "type": "int | None"
          },
          {
            "description": "Reference to the Graph object this node belongs to. None if unbound",
            "is_class_variable": false,
            "name": "_graph",
            "type": "Graph | None"
          },
          {
            "description": "Internal set storing the node's labels",
            "is_class_variable": false,
            "name": "_labels",
            "type": "set"
          },
          {
            "description": "List containing this node instance (for graph structure compatibility)",
            "is_class_variable": false,
            "name": "_nodes",
            "type": "list"
          },
          {
            "description": "Unique identifier (UUID) for this node instance, stored as 'UID' property",
            "is_class_variable": false,
            "name": "_uid",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "**properties": "Key-value pairs for node properties",
              "*labels": "Variable number of label strings to assign to the node",
              "_element_id": "Optional integer ID from Neo4j database, or None for unbound nodes",
              "_graph": "Optional Graph object reference for database operations"
            },
            "purpose": "Initialize a new Node instance with labels, optional database binding, and properties",
            "returns": "None (constructor)",
            "signature": "__init__(self, *labels, _element_id=None, _graph=None, **properties)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_from_neo4j_node",
            "parameters": {
              "**kwargs": "Optional keyword arguments, particularly 'graph' for binding to a Graph instance",
              "original": "Neo4j node object with element_id, labels, and properties"
            },
            "purpose": "Class method to create a Node instance from a Neo4j native node object",
            "returns": "New Node instance populated with data from the Neo4j node",
            "signature": "_from_neo4j_node(cls, original, **kwargs) -> Node"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "keys",
            "parameters": {},
            "purpose": "Return the keys of the node's properties (inherited from PropertyDict)",
            "returns": "View of property keys",
            "signature": "keys(self) -> KeysView"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__hash__",
            "parameters": {},
            "purpose": "Return hash value based on the node's UID for use in sets and dictionaries",
            "returns": "Integer hash value",
            "signature": "__hash__(self) -> int"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return human-readable string representation of the node",
            "returns": "String in format 'Node(Label1, Label2 { prop1: value1, prop2: value2 })'",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return developer-friendly string representation suitable for debugging",
            "returns": "String in format 'Node(Label1, Label2, prop1=value1, prop2=value2)'",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "pull",
            "parameters": {},
            "purpose": "Synchronize node data from the database if bound to a graph",
            "returns": "None, but updates the node's labels and properties from the database",
            "signature": "pull(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "unbind",
            "parameters": {},
            "purpose": "Create an unbound copy of the node without graph or element_id references",
            "returns": "New Node instance with same labels and properties but no database binding",
            "signature": "unbind(self) -> Node"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "nodes",
            "parameters": {},
            "purpose": "Property that returns a list containing this node (for compatibility with graph structures)",
            "returns": "List containing only this node instance",
            "signature": "nodes(self) -> list"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "relationships",
            "parameters": {},
            "purpose": "Property that returns relationships connected to this node",
            "returns": "Empty set (base implementation, may be overridden in subclasses)",
            "signature": "relationships(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {},
            "purpose": "Property to get or set the Neo4j database element ID",
            "returns": "Integer element ID if bound to database, None otherwise",
            "signature": "element_id(self) -> int | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {},
            "purpose": "Property to get or set the Graph instance this node is bound to",
            "returns": "Graph object if bound, None otherwise",
            "signature": "graph(self) -> Graph | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {},
            "purpose": "Property that returns an immutable set of all labels on this node",
            "returns": "Set of label strings (immutable copy)",
            "signature": "labels(self) -> set"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "clear_element_id",
            "parameters": {},
            "purpose": "Remove the element_id, effectively unbinding the node from the database",
            "returns": "None, sets element_id to None",
            "signature": "clear_element_id(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "has_label",
            "parameters": {
              "label": "String label or tuple of label strings to check for"
            },
            "purpose": "Check if the node has a specific label or tuple of labels",
            "returns": "True if node has the label(s), False otherwise. For tuples, returns True only if all labels are present",
            "signature": "has_label(self, label) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "add_label",
            "parameters": {
              "label": "String label or tuple of label strings to add"
            },
            "purpose": "Add a label or multiple labels (if tuple) to the node",
            "returns": "None, modifies the node's label set",
            "signature": "add_label(self, label) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "remove_label",
            "parameters": {
              "label": "String label or tuple of label strings to remove"
            },
            "purpose": "Remove a label or multiple labels (if tuple) from the node",
            "returns": "None, modifies the node's label set. Does nothing if label doesn't exist",
            "signature": "remove_label(self, label) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "clear_labels",
            "parameters": {},
            "purpose": "Remove all labels from the node",
            "returns": "None, empties the node's label set",
            "signature": "clear_labels(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_labels",
            "parameters": {
              "labels": "Iterable of label strings to add"
            },
            "purpose": "Add multiple labels from an iterable to the node",
            "returns": "None, adds all labels from the iterable to the node",
            "signature": "update_labels(self, labels) -> None"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:03:13",
      "decorators": [],
      "dependencies": [
        "uuid"
      ],
      "description": "A Node class representing a graph node with labels and properties, designed to work with Neo4j graph databases. It extends PropertyDict to manage node properties and provides methods for label management and graph synchronization.",
      "docstring": "    ",
      "id": 2140,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 238,
      "line_start": 97,
      "name": "Node",
      "parameters": [
        {
          "annotation": "PropertyDict",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "**properties": "Arbitrary keyword arguments representing the properties (key-value pairs) to store on this node. These are passed to the PropertyDict parent class for management.",
        "*labels": "Variable number of string arguments representing the labels to assign to this node. Labels are used to categorize nodes in the graph database (e.g., 'Person', 'Company').",
        "_element_id": "Optional integer or None. The unique identifier assigned by Neo4j when the node is persisted to the database. None indicates an unbound node that hasn't been saved yet. Must be coercible to an integer.",
        "_graph": "Optional reference to a Graph object that this node belongs to. Used for database operations like pulling updated data from the database. None indicates the node is not associated with a graph instance."
      },
      "parent_class": null,
      "purpose": "This class represents a node in a graph database (specifically Neo4j). It manages node labels, properties, and maintains a connection to a graph instance. The Node can be bound to a database (with an element_id) or exist as an unbound entity. It provides functionality for label manipulation, property management through inheritance from PropertyDict, and synchronization with the database through pull operations. The class supports creating nodes from Neo4j native node objects and maintains a unique identifier (UID) for each instance.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Node object. The object contains labels, properties, and optional graph binding. Key method returns: pull() returns None but updates the node's state; unbind() returns a new unbound Node instance; has_label() returns boolean; labels property returns an immutable set of label strings; element_id property returns integer or None; graph property returns Graph object or None.",
      "settings_required": [
        "Requires PropertyDict parent class to be available in the same module or imported",
        "For database operations, requires a Graph object instance with a pull() method that accepts a Node"
      ],
      "source_code": "class Node(PropertyDict):\n    \"\"\" \n    \"\"\"\n    \n    _lock = False\n    \n    def __init__(self, *labels, _element_id=None, _graph=None, **properties):\n        if _element_id is None:\n            self._element_id = None\n        else:\n            try:\n                self._element_id = int(_element_id)\n            except ValueError:\n                raise ValueError(f\"element_id must be an integer or None, got {_element_id}\")\n        self._graph = _graph\n        self._labels = set(labels)\n        self._nodes = [self]\n        PropertyDict.__init__(self, properties)\n        self._uid = self.setdefault('UID', str(uuid4()))\n        \n    @classmethod\n    def _from_neo4j_node(cls, original, **kwargs):\n        try:\n            element_id = int(original.element_id)\n        except:\n            if ':' in original.element_id:\n                element_id = int(original.element_id.split(':')[-1])\n            else:\n                raise Exception(f\"Could not obtain element ID. Found ID: {original.element_id}\")\n        return cls(*list(original.labels), _element_id=element_id, _graph=kwargs.get('graph', None), **dict(original))\n\n    def __ensure_labels(self):\n        if self._graph and self.element_id and not self._lock:\n            self.graph.pull(self)\n\n    def keys(self):\n        return PropertyDict.keys(self)\n    \n    def __hash__(self):\n        return hash(self._uid)\n    \n    def __str__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}: {!r}\".format(k, v) for k, v in kwargs.items())\n        return \"Node({} {{ {} }})\".format(labels_str, props_str)\n\n    def __repr__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}={!r}\".format(k, v) for k, v in kwargs.items())\n        return \"Node({}, {})\".format(labels_str, props_str)\n    \n    def pull(self):\n        if self._graph and self.element_id and not self._lock:\n            self.graph.pull(self)\n        \n    def unbind(self):\n        \"\"\"Returns an unbound copy of itself\"\"\"\n        return Node(*self.labels, **dict(self))\n    \n    @property\n    def nodes(self):\n        return self._nodes\n    \n    @property\n    def relationships(self):\n        return set([])\n    \n    @property\n    def element_id(self):\n        return self._element_id\n    \n    @element_id.setter\n    def element_id(self, x):\n        try:\n            int(x)\n        except:\n            raise TypeError(\"Invalid input for element_id, value cannot be coerced to int\")\n        self._element_id = x\n    \n    @property\n    def graph(self):\n        return self._graph\n    \n    @graph.setter\n    def graph(self, graph):\n        self._graph = graph\n    \n    @property\n    def labels(self):\n        \"\"\" The full set of labels associated with with this *node*.\n        This set is immutable and cannot be used to add or remove\n        labels. Use methods such as :meth:`.add_label` and\n        :meth:`.remove_label` for that instead.\n        \"\"\"\n        return set(self._labels)\n    \n    def clear_element_id(self):\n        self._element_id = None\n\n    def has_label(self, label):\n        \"\"\" Return :const:`True` if this node has the label `label`,\n        :const:`False` otherwise.\n        \"\"\"\n        self.__ensure_labels()\n        if isinstance(label, tuple):\n            return all(lab in self._labels for lab in label)\n        else:\n            return label in self._labels\n\n    def add_label(self, label):\n        \"\"\" Add the label `label` to this node.\n        \"\"\"\n        self.__ensure_labels()\n        if isinstance(label, tuple):\n            self._labels.update(label)\n        else:\n            self._labels.add(label)\n\n    def remove_label(self, label):\n        \"\"\" Remove the label `label` from this node, if it exists.\n        \"\"\"\n        self.__ensure_labels()\n        if isinstance(label, tuple):\n            for lab in label:\n                self._labels.discard(lab)\n        else:\n            self._labels.discard(label)\n\n    def clear_labels(self):\n        \"\"\" Remove all labels from this node.\n        \"\"\"\n        self._labels.clear()\n\n    def update_labels(self, labels):\n        \"\"\" Add multiple labels to this node from the iterable\n        `labels`.\n        \"\"\"\n        self.__ensure_labels()\n        for label in labels:\n            self.add_label(label)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph-database",
        "neo4j",
        "node",
        "graph-node",
        "labels",
        "properties",
        "database-orm",
        "data-model",
        "graph-structure"
      ],
      "updated_at": "2025-12-07T02:03:13.739345",
      "usage_example": "# Create an unbound node\nnode = Node('Person', 'Employee', name='John Doe', age=30)\n\n# Access labels\nprint(node.labels)  # {'Person', 'Employee'}\n\n# Check for label\nif node.has_label('Person'):\n    print('Is a Person')\n\n# Add/remove labels\nnode.add_label('Manager')\nnode.remove_label('Employee')\n\n# Access properties (inherited from PropertyDict)\nprint(node['name'])  # 'John Doe'\nnode['department'] = 'Engineering'\n\n# Create from Neo4j node object\n# neo4j_node = session.run('MATCH (n) RETURN n').single()['n']\n# node = Node._from_neo4j_node(neo4j_node, graph=my_graph)\n\n# Bind to graph and pull updates\n# node.graph = my_graph\n# node.element_id = 123\n# node.pull()  # Syncs with database\n\n# Create unbound copy\nunbound_copy = node.unbind()\n\n# Access unique identifier\nprint(node._uid)  # UUID string"
    },
    {
      "best_practices": [
        "Always call close() when done with the Graph instance to properly release database connections",
        "Use context managers or try-finally blocks to ensure connections are closed even if errors occur",
        "The class automatically handles ServiceUnavailable exceptions and attempts to reconnect, but persistent connection issues should be investigated",
        "Nodes and relationships must have a UID property for proper MERGE operations in create()",
        "Before calling push(), ensure the node has been created in the database (has an element_id)",
        "The pull() method locks nodes during update to prevent concurrent modifications",
        "Label parameters can be passed as a single string or list of strings; they are automatically converted to lists internally",
        "Element IDs must be integers; the class attempts to coerce string IDs to integers but will raise ValueError if coercion fails",
        "When creating entities, the graph property is automatically set on nodes and relationships",
        "The delete() method performs DETACH DELETE, which removes all relationships before deleting nodes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Neo4j driver instance used for all database connections and operations",
            "is_class_variable": false,
            "name": "driver",
            "type": "neo4j.GraphDatabase.driver"
          },
          {
            "description": "Name of the specific Neo4j database to connect to, or None for default database",
            "is_class_variable": false,
            "name": "database",
            "type": "str or None"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "URI": "Connection URI for Neo4j database",
              "auth": "Authentication credentials (username, password tuple or auth object)",
              "database": "Optional database name to connect to",
              "name": "Alternative parameter for database name (deprecated)"
            },
            "purpose": "Initialize a Graph instance with connection parameters to a Neo4j database",
            "returns": "None (constructor)",
            "signature": "__init__(self, URI, auth, database=None, name=None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "close",
            "parameters": {},
            "purpose": "Close the Neo4j driver connection",
            "returns": "None",
            "signature": "close(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "open",
            "parameters": {},
            "purpose": "Open/reopen the Neo4j driver connection",
            "returns": "None",
            "signature": "open(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a string representation of the Graph instance showing host and database",
            "returns": "String describing the graph interface connection",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_get_label_strings",
            "parameters": {
              "labels": "List of label strings or None"
            },
            "purpose": "Convert a list of labels into a Cypher-compatible label string (e.g., ':Person:Employee')",
            "returns": "Formatted label string for Cypher queries, empty string if None in labels",
            "signature": "_get_label_strings(labels) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "catch_service_unavailable",
            "parameters": {
              "func": "Function to wrap with exception handling"
            },
            "purpose": "Decorator that catches ServiceUnavailable exceptions and attempts to reconnect before retrying",
            "returns": "Wrapped function with automatic reconnection on service unavailability",
            "signature": "catch_service_unavailable(func)"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_run",
            "parameters": {
              "kwargs": "Parameters to pass to the query",
              "query": "Cypher query string to execute",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Execute a Cypher query within a transaction and return all records",
            "returns": "List of result records from the query",
            "signature": "_run(tx, query, **kwargs) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "run",
            "parameters": {
              "kwargs": "Parameters to pass to the query",
              "query": "Cypher query string to execute"
            },
            "purpose": "Execute a Cypher query and return results wrapped in a ResultWrapper object",
            "returns": "ResultWrapper object containing query results",
            "signature": "run(self, query, **kwargs) -> ResultWrapper"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_by_id",
            "parameters": {
              "label": "List of labels to filter by",
              "tx": "Neo4j transaction object",
              "x": "Integer ID of the node"
            },
            "purpose": "Internal method to match a node by its Neo4j internal ID within a transaction",
            "returns": "Neo4j node object",
            "signature": "_match_by_id(tx, x, label)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_by_id",
            "parameters": {
              "label": "Optional label or list of labels to filter by",
              "x": "Integer or string ID of the node (will be coerced to int)"
            },
            "purpose": "Match and return a node by its Neo4j internal ID",
            "returns": "Node object representing the matched node",
            "signature": "match_by_id(self, x, label=None) -> Node"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_by_uid",
            "parameters": {
              "label": "List of labels to filter by",
              "tx": "Neo4j transaction object",
              "uid": "UID property value to match"
            },
            "purpose": "Internal method to match a node by its UID property within a transaction",
            "returns": "Neo4j node object",
            "signature": "_match_by_uid(tx, uid, label)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_by_uid",
            "parameters": {
              "label": "Optional label or list of labels to filter by",
              "uid": "UID property value to match"
            },
            "purpose": "Match and return a node by its UID property",
            "returns": "Node object representing the matched node",
            "signature": "match_by_uid(self, uid, label=None) -> Node"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_by_name",
            "parameters": {
              "label": "List of labels to filter by",
              "name": "Name property value to match",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to match a node by its N (name) property within a transaction",
            "returns": "Neo4j node object",
            "signature": "_match_by_name(tx, name, label)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_by_name",
            "parameters": {
              "label": "Optional label or list of labels to filter by",
              "name": "Name property value to match"
            },
            "purpose": "Match and return a node by its N (name) property",
            "returns": "Node object representing the matched node",
            "signature": "match_by_name(self, name, label=None) -> Node"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_relationship_by_id",
            "parameters": {
              "tx": "Neo4j transaction object",
              "x": "Integer ID of the relationship"
            },
            "purpose": "Internal method to match a relationship by its Neo4j internal ID within a transaction",
            "returns": "Neo4j relationship object",
            "signature": "_match_relationship_by_id(tx, x)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_relationship_by_id",
            "parameters": {
              "x": "Integer or string ID of the relationship (will be coerced to int)"
            },
            "purpose": "Match and return a relationship by its Neo4j internal ID",
            "returns": "Relationship object representing the matched relationship",
            "signature": "match_relationship_by_id(self, x) -> Relationship"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_push",
            "parameters": {
              "element_id": "Integer ID of the node to update",
              "properties": "Dictionary of properties to set on the node",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to update a node's properties in the database within a transaction",
            "returns": "List of result records",
            "signature": "_push(tx, element_id, properties)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "push",
            "parameters": {
              "node": "Node object to push to the database (must have element_id and be bound to this graph)"
            },
            "purpose": "Update an existing node's properties in the database",
            "returns": "None",
            "signature": "push(self, node) -> None"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_node_pull",
            "parameters": {
              "ids": "List of node IDs to retrieve",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to retrieve node data by IDs within a transaction",
            "returns": "List of tuples containing (id, labels, properties) for each node",
            "signature": "_node_pull(tx, ids) -> list"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_relationship_pull",
            "parameters": {
              "ids": "List of relationship IDs to retrieve",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to retrieve relationship data by IDs within a transaction",
            "returns": "List of tuples containing (id, properties) for each relationship",
            "signature": "_relationship_pull(tx, ids) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "pull",
            "parameters": {
              "entity": "Entity object (Node, Relationship, or Subgraph) to refresh from database"
            },
            "purpose": "Refresh an entity (node or subgraph) with the latest data from the database",
            "returns": "None (updates entity in place)",
            "signature": "pull(self, entity) -> None"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_create",
            "parameters": {
              "data": "List of data dictionaries to create",
              "query": "Cypher query string for creating entities",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to execute a create/merge query within a transaction",
            "returns": "List of result records",
            "signature": "_create(tx, query, data) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create",
            "parameters": {
              "entity": "Entity object (Node, Relationship, or Subgraph) to create in database"
            },
            "purpose": "Create nodes and relationships in the database, using MERGE to avoid duplicates based on UID",
            "returns": "None (updates entity element_ids in place)",
            "signature": "create(self, entity) -> None"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_delete",
            "parameters": {
              "identities": "List of element IDs to delete",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to delete nodes and relationships by ID within a transaction",
            "returns": "List of result records",
            "signature": "_delete(tx, identities) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "delete",
            "parameters": {
              "entity": "Entity object (Node, Relationship, or Subgraph) to delete from database"
            },
            "purpose": "Delete nodes and relationships from the database (performs DETACH DELETE)",
            "returns": "None",
            "signature": "delete(self, entity) -> None"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required for Node, Relationship, and ResultWrapper classes that are used throughout the Graph class methods",
          "import": "from neo4j_objects import *",
          "optional": false
        },
        {
          "condition": "Imported in source file but not directly used in this class; may be used by related classes or for data processing",
          "import": "import pandas as pd",
          "optional": true
        }
      ],
      "created_at": "2025-12-07 01:01:51",
      "decorators": [],
      "dependencies": [
        "neo4j",
        "functools",
        "warnings",
        "pandas"
      ],
      "description": "A Graph class that provides an interface for interacting with a Neo4j graph database, supporting CRUD operations on nodes and relationships through Cypher queries.",
      "docstring": "A Graph class for interacting with a Neo4j graph database. The Graph class has methods for running Cypher queries, matching nodes by ID, UID, and name, and matching relationships by ID.",
      "id": 2139,
      "imports": [
        "import neo4j",
        "from functools import wraps",
        "from neo4j import GraphDatabase",
        "from neo4j.exceptions import ServiceUnavailable",
        "from neo4j_objects import *",
        "import warnings",
        "import pandas as pd"
      ],
      "imports_required": [
        "from neo4j import GraphDatabase",
        "from neo4j.exceptions import ServiceUnavailable",
        "from functools import wraps",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 328,
      "line_start": 92,
      "name": "Graph",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "URI": "The connection URI for the Neo4j database (e.g., 'bolt://localhost:7687' or 'neo4j://localhost:7687'). This specifies the protocol, host, and port for the database connection.",
        "auth": "Authentication credentials for the Neo4j database, typically a tuple of (username, password) or an auth object created by neo4j.basic_auth(). Required for database access.",
        "database": "Optional name of the specific database to connect to within the Neo4j instance. If not provided, falls back to the 'name' parameter or uses the default database.",
        "name": "Alternative parameter name for specifying the database name. Deprecated in favor of 'database' parameter but maintained for backward compatibility."
      },
      "parent_class": null,
      "purpose": "This class serves as a wrapper around the Neo4j Python driver, providing convenient methods for connecting to a Neo4j database and performing common operations like matching nodes by ID/UID/name, creating/updating/deleting nodes and relationships, and executing custom Cypher queries. It handles connection management, automatic reconnection on service unavailability, and provides a clean interface for working with graph entities.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Graph object that maintains a connection to the Neo4j database. Methods return various types: run() returns a ResultWrapper object, match methods return Node or Relationship objects, create/push/pull/delete methods return None or modify entities in place.",
      "settings_required": [
        "Neo4j database must be running and accessible at the specified URI",
        "Valid authentication credentials (username and password) for the Neo4j database",
        "Network connectivity to the Neo4j server",
        "neo4j_objects module must be available with Node, Relationship, and ResultWrapper classes defined"
      ],
      "source_code": "class Graph():\n    \"\"\"\n    A Graph class for interacting with a Neo4j graph database. The Graph class has methods for running Cypher queries, matching nodes by ID, UID, and name, and matching relationships by ID.\n    \"\"\"\n    def __init__(self, URI, auth, database=None, name=None):\n        self.driver = GraphDatabase.driver(URI, auth=auth)\n        self.database = database or name\n        \n    def close(self):\n        self.driver.close()\n        \n    def open(self):\n        self.driver.open()\n        \n    def __repr__(self):\n        return \"Graph interface bound to host %s to database '%s'\" % (self.driver.initial_addresses[0], self.database)\n    \n    __str__ = __repr__\n    \n    @staticmethod\n    def _get_label_strings(labels):\n        if None in labels:\n            return ''\n        return ''.join(\":\" + i for i in labels)\n    \n    def catch_service_unavailable(func):\n        @wraps(func)\n        def wrapper(self,*args, **kwargs):\n            try:\n                return func(self,*args, **kwargs)\n            except ServiceUnavailable:\n                self.open()\n                return func(self,*args, **kwargs)\n        return wrapper\n            \n    @staticmethod\n    def _run(tx, query, **kwargs):\n        result = tx.run(query, **kwargs)\n        records = list(result)\n        summary = result.consume()\n        return records\n    \n    @catch_service_unavailable\n    def run(self, query, **kwargs):\n        with self.driver.session(database=self.database) as session:\n            result = self._run(session, query, **kwargs)\n        return ResultWrapper(list(result), graph=self)\n    \n    @staticmethod\n    def _match_by_id(tx, x, label):\n        result = tx.run(\"MATCH (o%s) WHERE id(o) = $x RETURN o\" % Graph._get_label_strings(label), x=x)\n        return result.single()[0]\n    \n    @catch_service_unavailable\n    def match_by_id(self, x, label=None):\n        if not isinstance(x, int):\n            try:\n                x=int(x)\n            except:\n                raise ValueError(\"Failed to coerce id to type int. Element id must be of type int, passed '%s' of type %s\" % (x, type(x)))\n        if not isinstance(label, list):\n            label=[label]\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_by_id, x, label)\n        return Node._from_neo4j_node(result, graph = self)\n    \n    @staticmethod\n    def _match_by_uid(tx, uid, label):\n        result = tx.run(\"MATCH (o%s {UID:$uid}) RETURN o\" % Graph._get_label_strings(label), uid=uid)\n        record = result.single()[0]\n        summary = result.consume()\n        return record\n    \n    @catch_service_unavailable\n    def match_by_uid(self, uid, label=None):\n        if not isinstance(label, list):\n            label=[label]\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_by_uid, uid, label)\n        return Node._from_neo4j_node(result, graph=self)\n    \n    @staticmethod\n    def _match_by_name(tx, name, label):\n        result = tx.run(\"MATCH (o%s {N:$name}) RETURN n\" % Graph._get_label_strings(label), name=name)\n        return result.single()[0]\n    \n    @catch_service_unavailable\n    def match_by_name(self, name, label=None):\n        if not isinstance(label, list):\n            label=[label]\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_by_name, name, label)\n        return Node._from_neo4j_node(result, graph=self)\n    \n    @staticmethod\n    def _match_relationship_by_id(tx, x):\n        result = tx.run(\"MATCH ()-[_]->() WHERE id(_) = $x RETURN _\", x=x)\n        return result.single()[0]\n    \n    @catch_service_unavailable\n    def match_relationship_by_id(self, x):\n        if not isinstance(x, int):\n            try:\n                x=int(x)\n            except:\n                raise ValueError(\"Failed to coerce id to type int. Element id must be of type int, passed '%s' of type %s\" % (x, type(x)))\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_relationship_by_id, x)\n        return Relationship._from_neo4j_node(result, graph=self)\n    \n    @staticmethod\n    def _push(tx, element_id, properties):\n        result = tx.run(\"MATCH (o) WHERE id(o) = $x SET o = $properties\", x=int(element_id), properties=properties)\n        records = list(result)\n        summary = result.consume()\n        return records\n    \n    @catch_service_unavailable\n    def push(self, node):\n        assert node.graph, \"Node is not associated with any database. Please use graph.create for new nodes, or retrieve the node from the database first.\"\n        assert node.graph == self, \"Entity bound to different database.\"\n        assert node.element_id, \"Please run graph.create when creating a node for the first time.\"\n        items = dict(node)\n        with self.driver.session(database=self.database) as session:\n            session.execute_write(self._push, node.element_id, items)\n        return\n        \n    @staticmethod\n    def _node_pull(tx, ids):\n        query = tx.run(\"MATCH (_) WHERE id(_) in $x \"\n                       \"RETURN id(_), labels(_), properties(_)\", x=ids)\n        return list(query)\n    \n    @staticmethod\n    def _relationship_pull(tx, ids):\n        result = tx.run(\"MATCH ()-[_]->() WHERE id(_) in $x \"\n                       \"RETURN id(_), properties(_)\", x=ids)\n        return list(result)\n        \n    @catch_service_unavailable\n    def pull(self, entity):\n        nodes = {}\n        for node in entity.nodes:\n            if node.graph == self:\n                if not isinstance(node.element_id, int):\n                    try:\n                        node.element_id = int(node.element_id)\n                    except:\n                        warnings.warn(\"Could not coerce element id to int, skipped node %s\" % node.element_id, stacklevel=5)\n                        continue\n                nodes[node.element_id] = node\n                node._lock = True\n        with self.driver.session(database=self.database) as session:\n            query = session.execute_read(self._node_pull, list(nodes.keys()))\n        for element_id, new_labels, new_properties in query:\n            node = nodes[element_id]\n            node.clear_labels()\n            node.update_labels(new_labels)\n            node.clear()\n            node.update(new_properties)\n            node._lock = False\n        relationships = {}\n        for relationship in entity.relationships:\n            if relationship.graph == self:\n                relationships[relationship.element_id] = relationship\n        with self.driver.session(database=self.database) as session:\n            query = session.execute_read(self._relationship_pull, list(relationships.keys()))\n        for element_id, new_properties in query:\n            relationship = relationships[element_id]\n            relationship.clear()\n            relationship.update(new_properties)\n            \n    \n    @staticmethod\n    def _create(tx, query, data):\n        result = tx.run(query, data=data)\n        return list(result)\n            \n    @catch_service_unavailable\n    def create(self, entity):\n        entity.graph=self #mostly to bind subgraphs\n        node_dict={}\n        for node in entity.nodes:\n            if node:\n                if not node.element_id:\n                    key = frozenset(node.labels)\n                    node_dict.setdefault(key, []).append(node)\n        rel_dict = {}\n        for relationship in entity.relationships:\n            key = frozenset(relationship.labels)\n            rel_dict.setdefault(key, []).append(relationship)\n        for labels, nodes in node_dict.items():\n            query = \"\"\"\n            UNWIND $data AS d\n            MERGE (_%s {UID:d.UID})\n            ON CREATE\n                SET _ += d\n            RETURN id(_)\n            \"\"\" % self._get_label_strings(labels)\n            with self.driver.session(database=self.database) as session:\n                result = session.execute_write(self._create, query, list(map(dict, nodes)))\n                for i, return_id in enumerate(result):\n                    node = nodes[i]\n                    node.graph = self\n                    node.element_id = return_id.value()\n        for labels, relationships in rel_dict.items():\n            data = map(lambda r: [r.start_node.element_id, dict(r.relationship), r.end_node.element_id],\n                               relationships)\n            # print(list(data)) #calling prematurely exhausts the generator\n            query = \"\"\"\n            UNWIND $data as d\n            MATCH (a) WHERE id(a) = d[0]\n            MATCH (b) WHERE id(b) = d[2]\n            MERGE (a)-[_%s]->(b) SET _ = d[1]\n            RETURN id(_)\n            \"\"\" % self._get_label_strings(labels)\n            with self.driver.session(database=self.database) as session:\n                result = session.execute_write(self._create, query, list(data))\n                for i, return_id in enumerate(result):\n                    rel = relationships[i]\n                    rel.graph=self\n                    rel.element_id = return_id.value()\n    @staticmethod\n    def _delete(tx, identities):\n        result = tx.run(\"MATCH (_) WHERE id(_) IN $x DETACH DELETE _\", x=identities)\n        return list(result)\n               \n    @catch_service_unavailable\n    def delete(self, entity):\n        identities = []\n        for rel in entity.relationships:\n            identities.append(rel.element_id)\n        for node in entity.nodes:\n            if node.element_id:\n                identities.append(node.element_id)\n        with self.driver.session(database=self.database) as session:\n            session.execute_write(self._delete, identities)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_driver.py",
      "tags": [
        "neo4j",
        "graph-database",
        "database-interface",
        "cypher",
        "nodes",
        "relationships",
        "CRUD",
        "connection-management",
        "graph-operations"
      ],
      "updated_at": "2025-12-07T02:01:51.650352",
      "usage_example": "from neo4j import GraphDatabase\nfrom neo4j.auth import basic_auth\n\n# Create a Graph instance\ngraph = Graph(\n    URI='bolt://localhost:7687',\n    auth=basic_auth('neo4j', 'password'),\n    database='neo4j'\n)\n\n# Run a Cypher query\nresult = graph.run('MATCH (n:Person) RETURN n LIMIT 10')\n\n# Match a node by ID\nnode = graph.match_by_id(123, label=['Person'])\n\n# Match a node by UID property\nnode = graph.match_by_uid('unique-id-123', label=['Person'])\n\n# Create a new node (assuming Node class is available)\nfrom neo4j_objects import Node\nnew_node = Node(labels=['Person'], properties={'name': 'John', 'UID': 'john-123'})\ngraph.create(new_node)\n\n# Update an existing node\nnode['age'] = 30\ngraph.push(node)\n\n# Pull latest data from database\ngraph.pull(node)\n\n# Delete a node\ngraph.delete(node)\n\n# Close the connection when done\ngraph.close()"
    },
    {
      "best_practices": [
        "Always pass the graph parameter when instantiating to maintain proper graph context for Node and Relationship objects",
        "Use evaluate() for single-result queries; it warns if multiple nodes are matched but only processes the first",
        "Check if ResultWrapper is empty before calling methods to avoid errors",
        "Use to_subgraph() only with Neo4j path, node, or relationship objects; other types will raise TypeError",
        "Be aware that to_ndarray() is deprecated and only returns lists, not actual ndarrays",
        "The class is immutable (tuple subclass), so results cannot be modified after creation",
        "When working with collections of nodes/relationships, use COLLECT() in Cypher queries for proper handling with evaluate()",
        "The values property returns None if the ResultWrapper is empty, so check for None before iterating"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Private attribute storing reference to the parent Graph object, used for maintaining context when creating Node and Relationship objects",
            "is_class_variable": false,
            "name": "_graph",
            "type": "Graph | None"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "*args": "Variable positional arguments containing Neo4j query result records",
              "graph": "Optional reference to the parent Graph object for maintaining context"
            },
            "purpose": "Initialize the ResultWrapper with query results and optional graph reference",
            "returns": "None (constructor)",
            "signature": "__init__(self, *args, graph=None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "evaluate",
            "parameters": {},
            "purpose": "Evaluate the first result and convert it to appropriate Python objects (Node, Relationship, or primitive types)",
            "returns": "Returns None if empty; Node object for single node results; Relationship object for single relationship results; list of Node objects for node collections; list of Relationship objects for relationship collections; or the raw value for other types. Warns if multiple nodes matched but only processes first.",
            "signature": "evaluate(self) -> Node | Relationship | list | Any | None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "to_data_frame",
            "parameters": {},
            "purpose": "Convert the query results to a Pandas DataFrame",
            "returns": "Pandas DataFrame containing the results. If values are Node objects, converts them to dictionaries. Otherwise uses keys and values. Returns empty DataFrame on error.",
            "signature": "to_data_frame(self) -> pd.DataFrame"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "data",
            "parameters": {},
            "purpose": "Convert query results to a list of dictionaries",
            "returns": "List of dictionaries where each dictionary represents a result record. Node objects are converted to dicts, other values are zipped with keys. Returns empty list if values is None.",
            "signature": "data(self) -> list[dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "to_ndarray",
            "parameters": {},
            "purpose": "Deprecated method that returns values as a list (not an actual ndarray)",
            "returns": "Returns the values property (a list). Emits deprecation warning recommending use of collections and evaluate() instead.",
            "signature": "to_ndarray(self) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "to_subgraph",
            "parameters": {},
            "purpose": "Convert query results to a Subgraph object containing nodes and/or relationships",
            "returns": "Subgraph object constructed from the results. Handles Path, Node, and Relationship types. Returns empty Subgraph if ResultWrapper is empty. Raises TypeError for unsupported types.",
            "signature": "to_subgraph(self) -> Subgraph"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "values",
            "parameters": {},
            "purpose": "Property that returns the values from all result records, converting Neo4j objects to Python objects",
            "returns": "None if empty; list of Node objects if results contain nodes; list of Relationship objects if results contain relationships; otherwise list of raw values from each record.",
            "signature": "values(self) -> list | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "keys",
            "parameters": {},
            "purpose": "Property that returns all unique keys from the result records",
            "returns": "List of unique key names (strings) from all records in the results, preserving order of first appearance.",
            "signature": "keys(self) -> list[str]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only when calling to_data_frame() method",
          "import": "import pandas as pd",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 01:00:55",
      "decorators": [],
      "dependencies": [
        "neo4j",
        "pandas",
        "warnings",
        "neo4j_objects"
      ],
      "description": "ResultWrapper is a tuple subclass that wraps Cypher query results from Neo4j, providing methods to convert results into various formats including Node/Relationship objects, DataFrames, dictionaries, and Subgraphs.",
      "docstring": "The ResultWrapper class is a custom class that extends the built-in tuple class. It is used to wrap the results of Cypher queries and provide additional methods for working with the results. \nThe methods include evaluating the results to return Node and Relationship objects, converting the results to a Pandas DataFrame, returning the data as a list of dictionaries, \nand returning the results as a Subgraph object.",
      "id": 2138,
      "imports": [
        "import neo4j",
        "from functools import wraps",
        "from neo4j import GraphDatabase",
        "from neo4j.exceptions import ServiceUnavailable",
        "from neo4j_objects import *",
        "import warnings",
        "import pandas as pd"
      ],
      "imports_required": [
        "import neo4j",
        "import warnings",
        "from neo4j_objects import Node, Relationship, Subgraph"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 90,
      "line_start": 8,
      "name": "ResultWrapper",
      "parameters": [
        {
          "annotation": "tuple",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "*args": "Variable positional arguments passed to the tuple constructor, typically containing Neo4j query result records",
        "graph": "Optional reference to the parent Graph object that executed the query. Used to maintain graph context when creating Node and Relationship objects from Neo4j results. Defaults to None."
      },
      "parent_class": null,
      "purpose": "This class serves as a wrapper around Neo4j Cypher query results, extending the built-in tuple class to provide convenient methods for transforming query results into different data structures. It handles conversion of raw Neo4j graph objects (nodes, relationships, paths) into higher-level Python objects and data structures like Pandas DataFrames, lists of dictionaries, and Subgraph objects. The class maintains a reference to the parent graph for proper object instantiation.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a ResultWrapper object that behaves like a tuple but with additional methods. Key method returns: evaluate() returns Node, Relationship objects, or lists thereof, or primitive values; to_data_frame() returns a Pandas DataFrame; data() returns a list of dictionaries; to_subgraph() returns a Subgraph object; values property returns a list of converted values; keys property returns a list of unique keys from all records.",
      "settings_required": [
        "Neo4j database connection must be established",
        "neo4j_objects module must be available with Node, Relationship, and Subgraph classes defined"
      ],
      "source_code": "class ResultWrapper(tuple):\n    \"\"\"\n    The ResultWrapper class is a custom class that extends the built-in tuple class. It is used to wrap the results of Cypher queries and provide additional methods for working with the results. \n    The methods include evaluating the results to return Node and Relationship objects, converting the results to a Pandas DataFrame, returning the data as a list of dictionaries, \n    and returning the results as a Subgraph object.\n    \"\"\"\n    def __init__(self, *args, graph=None):\n        tuple.__init__(self)\n        self._graph=graph\n        \n    def evaluate(self):\n        if not self:\n            return None\n        if isinstance(self[0].value(), neo4j.graph.Node):\n            if len(self) > 1:\n                warnings.warn(\"Query matched multiple nodes, but .evaluate only processes a single node. Either use .subgraph or call `RETURN COLLECT(nodes)`\", stacklevel=2)\n            return Node._from_neo4j_node(self[0].value(), graph=self._graph)\n        elif isinstance(self[0].value(), list) and len(self[0].value()) > 0 and isinstance(self[0].value()[0], neo4j.graph.Node):\n            return [Node._from_neo4j_node(i, graph=self._graph) for i in self[0].value()]\n        elif isinstance(self[0].value(), list) and len(self[0].value()) > 0 and isinstance(self[0].value()[0], neo4j.graph.Relationship):\n            return [Relationship._from_neo4j_node(i, graph=self._graph) for i in self[0].value()]\n        elif isinstance(self[0].value(), neo4j.graph.Relationship):\n            return Relationship._from_neo4j_node(self[0].value(), graph=self._graph)\n        return self[0].value()\n    \n    def to_data_frame(self):\n        import pandas as pd\n        try:\n            if isinstance(self.values[0], Node):\n                return pd.DataFrame([dict(i) for i in self.values])\n            return pd.DataFrame(self.values, columns=self.keys)\n        except:\n            return pd.DataFrame()\n    \n    def data(self):\n        if self.values is None:\n            return []\n        data=[]\n        for values in self.values:\n            if isinstance(values, Node):\n                d=dict(values)\n            else:\n                d= dict(zip(self.keys, values))\n            data.append(d)\n        return data\n    \n    def to_ndarray(self):\n        warnings.warn(\"This function was only implemented to return lists and does not actually return an ndarray. Please return collections and call .evaluate instead\", stacklevel=5)\n        return self.values\n    \n    def to_subgraph(self):\n        if not self:\n            return Subgraph(graph=self._graph)\n        if isinstance(self[0].values(), neo4j.graph.Path) or isinstance(self[0].values()[0], neo4j.graph.Path):\n            relationships=[]\n            for path in self:\n                for relationship in path.value():\n                    relationships.append(relationship)\n            subgraph = Subgraph(relationships=[Relationship._from_neo4j_node(i) for i in relationships], graph=self)\n        elif isinstance(self[0].values()[0], neo4j.graph.Node):\n            subgraph = Subgraph(nodes=self.values, graph=self._graph)\n        elif isinstance(self[0].values()[0], neo4j.graph.Relationship):\n            subgraph = Subgraph(relationships=self.values, graph=self._graph)\n        else:\n            raise TypeError(\".to_subgraph only works on neo4j path, node and relationship objects.\")\n        return subgraph\n            \n        \n    @property\n    def values(self):\n        if len(self) == 0:\n            return None\n        if isinstance(self[0].value(), neo4j.graph.Node):\n            return [Node._from_neo4j_node(i.value(), graph=self._graph) for i in self]\n        elif isinstance(self[0].value(), neo4j.graph.Relationship):\n            return [Relationship._from_neo4j_node(i.value(), graph=self._graph) for i in self]\n        return [i.values() for i in self]\n    \n    @property\n    def keys(self):\n        keys = []\n        keys.extend(j for i in self for j in i.keys() if not j in keys)\n        return keys",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_driver.py",
      "tags": [
        "neo4j",
        "graph-database",
        "cypher",
        "query-results",
        "data-conversion",
        "wrapper",
        "tuple-subclass",
        "dataframe",
        "graph-objects"
      ],
      "updated_at": "2025-12-07T02:00:55.680734",
      "usage_example": "# Assuming you have a Neo4j graph connection and query results\nfrom neo4j import GraphDatabase\nfrom neo4j_objects import Node, Relationship, Subgraph\n\n# Execute a query that returns results\ndriver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'password'))\nwith driver.session() as session:\n    raw_results = session.run('MATCH (n:Person) RETURN n LIMIT 5')\n    \n    # Wrap results\n    results = ResultWrapper(*raw_results, graph=my_graph)\n    \n    # Get single node/relationship\n    node = results.evaluate()\n    \n    # Convert to DataFrame\n    df = results.to_data_frame()\n    \n    # Get as list of dictionaries\n    data_list = results.data()\n    \n    # Convert to Subgraph\n    subgraph = results.to_subgraph()\n    \n    # Access values and keys\n    values = results.values\n    keys = results.keys"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "replica_database_path": "Type: str",
              "session": "Type: requests.Session"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, session, replica_database_path)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_clear_document_context",
            "parameters": {},
            "purpose": "Clear the current document UUID context for new uploads",
            "returns": "None",
            "signature": "_clear_document_context(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_database",
            "parameters": {},
            "purpose": "Load the replica database",
            "returns": "Returns Dict[str, Any]",
            "signature": "_load_database(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_database",
            "parameters": {},
            "purpose": "Save the updated database",
            "returns": "None",
            "signature": "_save_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_hash",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute SHA256 hash of content",
            "returns": "Returns str",
            "signature": "_compute_hash(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_crc32c_header",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute CRC32C checksum and return as x-goog-hash header value",
            "returns": "Returns str",
            "signature": "_compute_crc32c_header(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_timestamp",
            "parameters": {},
            "purpose": "Generate reMarkable timestamp",
            "returns": "Returns str",
            "signature": "_generate_timestamp(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_generation",
            "parameters": {},
            "purpose": "Generate reMarkable generation number",
            "returns": "Returns int",
            "signature": "_generate_generation(self) -> int"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_raw_content",
            "parameters": {
              "content": "Type: bytes",
              "content_hash": "Type: str",
              "content_type": "Type: str",
              "filename": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload raw content and return its hash",
            "returns": "Returns Optional[str]",
            "signature": "upload_raw_content(self, content, content_hash, filename, content_type, system_filename) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_system_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload system files like roothash, root.docSchema with fixed filenames",
            "returns": "Returns Optional[str]",
            "signature": "upload_system_file(self, content, system_filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_document_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "filename": "Type: str"
            },
            "purpose": "Upload document files with UUID.extension pattern",
            "returns": "Returns Optional[str]",
            "signature": "upload_document_file(self, content, filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_metadata_json",
            "parameters": {
              "document_type": "Type: str",
              "name": "Type: str",
              "parent_uuid": "Type: str"
            },
            "purpose": "Create metadata JSON for a document",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_metadata_json(self, name, parent_uuid, document_type) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_content_json",
            "parameters": {
              "pages": "Type: List[str]",
              "template": "Type: str"
            },
            "purpose": "Create content JSON for a notebook with pages",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_content_json(self, pages, template) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_directory_listing",
            "parameters": {
              "child_objects": "Type: List[Dict]",
              "data_components": "Type: List[Dict]"
            },
            "purpose": "Create directory listing content",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_directory_listing(self, child_objects, data_components) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_hash",
            "parameters": {
              "new_root_hash": "Type: str"
            },
            "purpose": "Update the root hash in the cloud",
            "returns": "Returns bool",
            "signature": "update_root_hash(self, new_root_hash) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "edit_document_metadata",
            "parameters": {
              "document_uuid": "Type: str",
              "new_name": "Type: str",
              "new_parent": "Type: str"
            },
            "purpose": "Edit an existing document's metadata",
            "returns": "Returns bool",
            "signature": "edit_document_metadata(self, document_uuid, new_name, new_parent) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_pdf_document",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "pdf_path": "Type: str"
            },
            "purpose": "Upload a new PDF document to reMarkable following the correct sequence from app logs",
            "returns": "Returns bool",
            "signature": "upload_pdf_document(self, pdf_path, name, parent_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_notebook",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "template": "Type: str"
            },
            "purpose": "Create a new empty notebook",
            "returns": "Returns bool",
            "signature": "create_notebook(self, name, parent_uuid, template) -> bool"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:00:05",
      "decorators": [],
      "dependencies": [],
      "description": "Manages uploads to reMarkable cloud",
      "docstring": "Manages uploads to reMarkable cloud",
      "id": 2137,
      "imports": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid",
        "import base64",
        "import binascii",
        "import zlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import time",
        "import crc32c",
        "import sys",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 665,
      "line_start": 32,
      "name": "RemarkableUploadManager_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Manages uploads to reMarkable cloud",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableUploadManager:\n    \"\"\"Manages uploads to reMarkable cloud\"\"\"\n    \n    def __init__(self, session: requests.Session, replica_database_path: str):\n        self.session = session\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        \n        # Load replica database\n        self.database_path = Path(replica_database_path)\n        self.database = self._load_database()\n        \n        # Track uploads\n        self.upload_queue: List[Dict[str, Any]] = []\n        self.uploaded_hashes: Dict[str, str] = {}  # hash -> upload_status\n        self._current_document_uuid: Optional[str] = None  # UUID for consistent rm-filename headers\n        \n    def _clear_document_context(self):\n        \"\"\"Clear the current document UUID context for new uploads\"\"\"\n        self._current_document_uuid = None\n        \n    def _load_database(self) -> Dict[str, Any]:\n        \"\"\"Load the replica database\"\"\"\n        if not self.database_path.exists():\n            raise FileNotFoundError(f\"Database not found: {self.database_path}\")\n            \n        with open(self.database_path, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    \n    def _save_database(self):\n        \"\"\"Save the updated database\"\"\"\n        with open(self.database_path, 'w', encoding='utf-8') as f:\n            json.dump(self.database, f, indent=2, ensure_ascii=False)\n    \n    def _compute_hash(self, content: bytes) -> str:\n        \"\"\"Compute SHA256 hash of content\"\"\"\n        return hashlib.sha256(content).hexdigest()\n    \n    def _compute_crc32c_header(self, content: bytes) -> str:\n        \"\"\"Compute CRC32C checksum and return as x-goog-hash header value\"\"\"\n        try:\n            # Use proper crc32c library if available\n            if HAS_CRC32C:\n                checksum = crc32c.crc32c(content)\n            else:\n                # Fallback to standard CRC32 (not ideal but better than nothing)\n                checksum = zlib.crc32(content) & 0xffffffff\n            \n            # Convert to bytes and base64 encode\n            checksum_bytes = checksum.to_bytes(4, byteorder='big')\n            checksum_b64 = base64.b64encode(checksum_bytes).decode('ascii')\n            \n            return f\"crc32c={checksum_b64}\"\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Warning: Failed to compute CRC32C checksum: {e}\")\n            # Return empty string to skip the header if computation fails\n            return \"\"\n    \n    def _generate_timestamp(self) -> str:\n        \"\"\"Generate reMarkable timestamp\"\"\"\n        return str(int(time.time() * 1000))\n    \n    def _generate_generation(self) -> int:\n        \"\"\"Generate reMarkable generation number\"\"\"\n        return int(time.time() * 1000000)\n    \n    def upload_raw_content(self, content: bytes, content_hash: str = None, filename: str = None, \n                          content_type: str = \"application/octet-stream\", system_filename: str = None) -> Optional[str]:\n        \"\"\"Upload raw content and return its hash\"\"\"\n        if content_hash is None:\n            content_hash = self._compute_hash(content)\n        \n        # Check if already uploaded\n        if content_hash in self.uploaded_hashes:\n            print(f\"\u2705 Content already uploaded: {content_hash[:16]}...\")\n            return content_hash\n        \n        try:\n            url = f\"{self.base_url}/sync/v3/files/{content_hash}\"\n            \n            # Prepare headers like the reMarkable app\n            headers = {\n                'Content-Type': content_type,\n                'rm-batch-number': '1',\n                'rm-sync-id': str(uuid.uuid4()),\n                'User-Agent': 'desktop/3.20.0.922 (macos 15.4)',\n                'Accept-Encoding': 'gzip, deflate',\n                'Accept-Language': 'en-BE,*',\n                'Connection': 'Keep-Alive'\n            }\n            \n            # Add rm-filename header - REQUIRED for all PUT requests\n            # Handle different patterns: UUID-based files vs system files\n            if system_filename:\n                # System files like \"roothash\", \"root.docSchema\" (no UUID)\n                rm_filename = system_filename\n                print(f\"\ud83c\udff7\ufe0f rm-filename (system): {rm_filename}\")\n            elif filename:\n                # Document files with UUID pattern\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    # Generate and store new UUID for this document\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                    print(f\"\ud83d\udcca Generated new document UUID: {doc_uuid}\")\n                \n                # Use the filename as provided or construct UUID.extension format\n                if '.' in filename and len(filename.split('.')[0]) == 36:  # Already UUID.extension\n                    rm_filename = filename\n                else:\n                    # Determine extension and construct UUID.extension\n                    if content_type == 'application/pdf' or filename.lower().endswith('.pdf'):\n                        rm_filename = f\"{doc_uuid}.pdf\"\n                    elif 'metadata' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.metadata\"\n                    elif filename.lower().endswith('.content'):\n                        rm_filename = f\"{doc_uuid}.content\"\n                    elif filename.lower().endswith('.rm'):\n                        # Page data keeps original filename for .rm files\n                        rm_filename = filename\n                    elif filename.lower().endswith('.docschema') or 'docschema' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.docSchema\"\n                    elif filename.lower().endswith('.pagedata'):\n                        rm_filename = f\"{doc_uuid}.pagedata\"\n                    else:\n                        # Default construction\n                        rm_filename = f\"{doc_uuid}.{filename}\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (document): {rm_filename}\")\n            else:\n                # Fallback - generate basic filename\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                \n                if content_type == 'application/pdf':\n                    rm_filename = f\"{doc_uuid}.pdf\"\n                elif content_type == 'application/octet-stream':\n                    rm_filename = f\"{doc_uuid}.metadata\"\n                else:\n                    rm_filename = f\"{doc_uuid}.content\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (fallback): {rm_filename}\")\n            \n            headers['rm-filename'] = rm_filename\n            \n            # Add CRC32C checksum (this is the missing piece!)\n            crc32c_header = self._compute_crc32c_header(content)\n            if crc32c_header:\n                headers['x-goog-hash'] = crc32c_header\n            \n            print(f\"\ud83d\udd0d Debug: Upload headers for {content_hash[:16]}...\")\n            for key, value in headers.items():\n                print(f\"    {key}: {value}\")\n            \n            # Make the PUT request\n            response = self.session.put(url, data=content, headers=headers)\n            \n            print(f\"\ud83d\udd0d Debug: Response status: {response.status_code}\")\n            print(f\"\ud83d\udd0d Debug: Response text: {response.text}\")\n            \n            response.raise_for_status()\n            \n            self.uploaded_hashes[content_hash] = \"uploaded\"\n            print(f\"\u2705 Uploaded content: {content_hash[:16]}... ({len(content)} bytes)\")\n            return content_hash\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload content {content_hash[:16]}...: {e}\")\n            if hasattr(e, 'response') and e.response is not None:\n                print(f\"    Response: {e.response.text}\")\n            return None\n    \n    def upload_system_file(self, content: bytes, system_filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload system files like roothash, root.docSchema with fixed filenames\"\"\"\n        print(f\"\ud83d\udcc1 Uploading system file: {system_filename}\")\n        return self.upload_raw_content(content, system_filename=system_filename, content_type=content_type)\n    \n    def upload_document_file(self, content: bytes, filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload document files with UUID.extension pattern\"\"\"\n        print(f\"\ud83d\udcc4 Uploading document file: {filename}\")\n        return self.upload_raw_content(content, filename=filename, content_type=content_type)\n\n    def create_metadata_json(self, name: str, parent_uuid: str = \"\", document_type: str = \"DocumentType\") -> Tuple[bytes, str]:\n        \"\"\"Create metadata JSON for a document\"\"\"\n        timestamp = self._generate_timestamp()\n        \n        metadata = {\n            \"createdTime\": timestamp,\n            \"lastModified\": timestamp,\n            \"lastOpened\": timestamp,\n            \"lastOpenedPage\": 0,\n            \"new\": False,\n            \"parent\": parent_uuid,\n            \"pinned\": False,\n            \"source\": \"\",\n            \"type\": document_type,\n            \"visibleName\": name\n        }\n        \n        content = json.dumps(metadata, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_content_json(self, pages: List[str], template: str = \"Blank\") -> Tuple[bytes, str]:\n        \"\"\"Create content JSON for a notebook with pages\"\"\"\n        timestamp_base = f\"2:{len(pages)}\"\n        \n        # Create pages structure\n        pages_list = []\n        for i, page_id in enumerate(pages):\n            pages_list.append({\n                \"id\": page_id,\n                \"idx\": {\n                    \"timestamp\": f\"2:{i+2}\",\n                    \"value\": chr(ord('a') + i) if i < 26 else f\"page_{i}\"\n                },\n                \"template\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": template\n                }\n            })\n        \n        content_data = {\n            \"cPages\": {\n                \"lastOpened\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": pages[0] if pages else \"\"\n                },\n                \"original\": {\n                    \"timestamp\": \"0:0\",\n                    \"value\": -1\n                },\n                \"pages\": pages_list\n            },\n            \"extraMetadata\": {},\n            \"fileType\": \"notebook\",\n            \"fontName\": \"\",\n            \"lineHeight\": -1,\n            \"margins\": 180,\n            \"pageCount\": len(pages),\n            \"textScale\": 1,\n            \"transform\": {}\n        }\n        \n        content = json.dumps(content_data, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_directory_listing(self, child_objects: List[Dict], data_components: List[Dict]) -> Tuple[bytes, str]:\n        \"\"\"Create directory listing content\"\"\"\n        lines = [str(len(child_objects) + len(data_components))]\n        \n        # Add child objects (folders/documents)\n        for obj in child_objects:\n            line = f\"{obj['hash']}:80000000:{obj['uuid']}:{obj['type']}:{obj['size']}\"\n            lines.append(line)\n        \n        # Add data components (.content, .metadata, .rm files, etc.)\n        for comp in data_components:\n            line = f\"{comp['hash']}:0:{comp['component']}:0:{comp['size']}\"\n            lines.append(line)\n        \n        content = '\\n'.join(lines).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def update_root_hash(self, new_root_hash: str) -> bool:\n        \"\"\"Update the root hash in the cloud\"\"\"\n        try:\n            generation = self._generate_generation()\n            \n            root_data = {\n                \"broadcast\": True,\n                \"generation\": generation,\n                \"hash\": new_root_hash\n            }\n            \n            url = f\"{self.base_url}/sync/v3/root\"\n            response = self.session.put(url, json=root_data)\n            response.raise_for_status()\n            \n            print(f\"\u2705 Updated root hash: {new_root_hash}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to update root hash: {e}\")\n            return False\n    \n    def edit_document_metadata(self, document_uuid: str, new_name: str = None, new_parent: str = None) -> bool:\n        \"\"\"Edit an existing document's metadata\"\"\"\n        try:\n            # Find the document in database\n            if document_uuid not in self.database['nodes']:\n                raise ValueError(f\"Document {document_uuid} not found in database\")\n            \n            node = self.database['nodes'][document_uuid]\n            print(f\"\ud83d\udcdd Editing document: {node['name']}\")\n            \n            # Get current metadata\n            current_metadata = node['metadata'].copy()\n            \n            # Update metadata\n            if new_name:\n                current_metadata['visibleName'] = new_name\n            if new_parent is not None:\n                current_metadata['parent'] = new_parent\n            \n            current_metadata['lastModified'] = self._generate_timestamp()\n            \n            # Create new metadata content\n            metadata_content = json.dumps(current_metadata, indent=4).encode('utf-8')\n            metadata_hash = self._compute_hash(metadata_content)\n            \n            # Upload metadata\n            self.upload_raw_content(metadata_content, metadata_hash)\n            \n            # Update component hashes\n            old_metadata_hash = node['component_hashes']['metadata']\n            node['component_hashes']['metadata'] = metadata_hash\n            \n            # Get parent node to update its directory listing\n            parent_uuid = current_metadata.get('parent', '')\n            if parent_uuid and parent_uuid in self.database['nodes']:\n                parent_node = self.database['nodes'][parent_uuid]\n                \n                # Rebuild parent's directory listing\n                child_objects = []\n                data_components = []\n                \n                # Find all children of this parent\n                for uuid, child_node in self.database['nodes'].items():\n                    if child_node.get('parent_uuid') == parent_uuid:\n                        if child_node['node_type'] == 'folder':\n                            type_val = '1'\n                        else:\n                            type_val = '3'\n                        \n                        child_objects.append({\n                            'hash': child_node['hash'],\n                            'uuid': uuid,\n                            'type': type_val,\n                            'size': len(str(child_node).encode('utf-8'))  # Approximate\n                        })\n                \n                # Add metadata components for this updated document\n                comp_hashes = node['component_hashes']\n                for comp_type, comp_hash in comp_hashes.items():\n                    if comp_hash:\n                        if comp_type == 'rm_files':\n                            for i, rm_hash in enumerate(comp_hash):\n                                data_components.append({\n                                    'hash': rm_hash,\n                                    'component': f\"{document_uuid}/{uuid.uuid4()}.rm\",\n                                    'size': 14661  # Typical RM file size\n                                })\n                        else:\n                            data_components.append({\n                                'hash': comp_hash,\n                                'component': f\"{document_uuid}.{comp_type}\",\n                                'size': len(metadata_content) if comp_type == 'metadata' else 2209\n                            })\n                \n                # Create and upload new directory listing\n                dir_content, dir_hash = self.create_directory_listing(child_objects, data_components)\n                self.upload_raw_content(dir_content, dir_hash)\n                \n                # Update parent node hash\n                parent_node['hash'] = dir_hash\n                self.database['hash_registry'][dir_hash] = {\n                    'uuid': parent_uuid,\n                    'type': 'node',\n                    'last_seen': datetime.now().isoformat()\n                }\n                \n                # Update root if parent is root\n                if not parent_node.get('parent_uuid'):\n                    self.update_root_hash(dir_hash)\n            \n            # Update database\n            node['metadata'] = current_metadata\n            node['last_modified'] = current_metadata['lastModified']\n            node['sync_status'] = 'updated'\n            node['last_synced'] = datetime.now().isoformat()\n            \n            # Update hash registry\n            self.database['hash_registry'][metadata_hash] = {\n                'uuid': document_uuid,\n                'type': 'metadata',\n                'last_seen': datetime.now().isoformat()\n            }\n            \n            self._save_database()\n            print(f\"\u2705 Successfully updated document metadata\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to edit document metadata: {e}\")\n            return False\n    \n    def upload_pdf_document(self, pdf_path: str, name: str, parent_uuid: str = \"\") -> bool:\n        \"\"\"Upload a new PDF document to reMarkable following the correct sequence from app logs\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            pdf_file = Path(pdf_path)\n            if not pdf_file.exists():\n                raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n            print(f\"\ud83d\udcc4 Uploading PDF: {name}\")\n            \n            # Generate UUID for new document and set it for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Read PDF content\n            with open(pdf_file, 'rb') as f:\n                pdf_content = f.read()\n            \n            # FOLLOW APP LOGS UPLOAD ORDER:\n            # 1. Content (if any) - for PDFs this might be empty or minimal\n            # 2. Page data (.rm files) - not needed for PDF\n            # 3. Metadata\n            # 4. PDF content\n            \n            print(\"\ud83d\udcdd Step 1: Creating and uploading content...\")\n            # Create minimal content for PDF (empty content structure)\n            content_data, content_hash = self.create_content_json([], \"PDF\")\n            self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            \n            print(\"\ud83d\udcdd Step 2: Creating and uploading metadata...\")\n            # Create metadata\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            \n            print(\"\ud83d\udcdd Step 3: Uploading PDF content...\")\n            # Upload PDF content LAST (as per app logs)\n            pdf_hash = self.upload_raw_content(\n                content=pdf_content,\n                content_type='application/pdf',\n                filename=f\"{document_uuid}.pdf\"\n            )\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': pdf_hash,\n                    'component': f\"{document_uuid}.pdf\",\n                    'size': len(pdf_content)\n                }\n            ]\n            \n            doc_dir_content, doc_dir_hash = self.create_directory_listing([], data_components)\n            self.upload_raw_content(doc_dir_content, doc_dir_hash)\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_dir_hash,\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [str(pdf_file)],\n                'component_hashes': {\n                    'content': None,\n                    'metadata': metadata_hash,\n                    'pdf': pdf_hash,\n                    'pagedata': None,\n                    'rm_files': []\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'uploaded',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n            self.database['nodes'][document_uuid] = new_node\n            \n            # Update hash registry\n            for hash_val, info in [\n                (doc_dir_hash, {'uuid': document_uuid, 'type': 'node'}),\n                (metadata_hash, {'uuid': document_uuid, 'type': 'metadata'}),\n                (pdf_hash, {'uuid': document_uuid, 'type': 'pdf'})\n            ]:\n                self.database['hash_registry'][hash_val] = {\n                    **info,\n                    'last_seen': datetime.now().isoformat()\n                }\n            \n            # Update parent directory and root if needed\n            if parent_uuid and parent_uuid in self.database['nodes']:\n                # TODO: Update parent directory listing\n                pass\n            else:\n                # Document added to root - update root hash\n                self.update_root_hash(doc_dir_hash)\n            \n            self._save_database()\n            print(f\"\u2705 Successfully uploaded PDF document: {name}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload PDF document: {e}\")\n            return False\n    \n    def create_notebook(self, name: str, parent_uuid: str = \"\", template: str = \"Blank\") -> bool:\n        \"\"\"Create a new empty notebook\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            print(f\"\ud83d\udcd3 Creating notebook: {name}\")\n            \n            # Generate UUIDs and set current document UUID for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            page_uuid = str(uuid.uuid4())\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Create empty .rm content for first page\n            rm_content = b'\\x00' * 1000  # Minimal empty page content\n            rm_hash = self.upload_raw_content(\n                content=rm_content,\n                content_type='application/octet-stream',\n                filename=f\"{page_uuid}.rm\"\n            )\n            \n            # Create content.json\n            content_data, content_hash = self.create_content_json([page_uuid], template)\n            self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            \n            # Create metadata\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': content_hash,\n                    'component': f\"{document_uuid}.content\",\n                    'size': len(content_data)\n                },\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': rm_hash,\n                    'component': f\"{document_uuid}/{page_uuid}.rm\",\n                    'size': len(rm_content)\n                }\n            ]\n            \n            doc_dir_content, doc_dir_hash = self.create_directory_listing([], data_components)\n            self.upload_raw_content(doc_dir_content, doc_dir_hash)\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_dir_hash,\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [],\n                'component_hashes': {\n                    'content': content_hash,\n                    'metadata': metadata_hash,\n                    'pdf': None,\n                    'pagedata': None,\n                    'rm_files': [rm_hash]\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'created',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n            self.database['nodes'][document_uuid] = new_node\n            \n            # Update hash registry\n            for hash_val, info in [\n                (doc_dir_hash, {'uuid': document_uuid, 'type': 'node'}),\n                (content_hash, {'uuid': document_uuid, 'type': 'content'}),\n                (metadata_hash, {'uuid': document_uuid, 'type': 'metadata'}),\n                (rm_hash, {'uuid': document_uuid, 'type': 'rm_0'})\n            ]:\n                self.database['hash_registry'][hash_val] = {\n                    **info,\n                    'last_seen': datetime.now().isoformat()\n                }\n            \n            # Update root hash (simplified for demo)\n            self.update_root_hash(doc_dir_hash)\n            \n            self._save_database()\n            print(f\"\u2705 Successfully created notebook: {name}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to create notebook: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_manager_old.py",
      "tags": [
        "class",
        "remarkableuploadmanager"
      ],
      "updated_at": "2025-12-07T02:00:05.203641",
      "usage_example": "# Example usage:\n# result = RemarkableUploadManager(bases)"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "replica_database_path": "Type: str",
              "session": "Type: requests.Session"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, session, replica_database_path)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_clear_document_context",
            "parameters": {},
            "purpose": "Clear the current document UUID context for new uploads",
            "returns": "None",
            "signature": "_clear_document_context(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_database",
            "parameters": {},
            "purpose": "Load the replica database",
            "returns": "Returns Dict[str, Any]",
            "signature": "_load_database(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_database",
            "parameters": {},
            "purpose": "Save the updated database",
            "returns": "None",
            "signature": "_save_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_hash",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute SHA256 hash of content",
            "returns": "Returns str",
            "signature": "_compute_hash(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_crc32c_header",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute CRC32C checksum and return as x-goog-hash header value",
            "returns": "Returns str",
            "signature": "_compute_crc32c_header(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_timestamp",
            "parameters": {},
            "purpose": "Generate reMarkable timestamp",
            "returns": "Returns str",
            "signature": "_generate_timestamp(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_generation",
            "parameters": {},
            "purpose": "Generate reMarkable generation number",
            "returns": "Returns int",
            "signature": "_generate_generation(self) -> int"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_capture_server_generation",
            "parameters": {},
            "purpose": "Capture the current server generation for use in final root update",
            "returns": "Returns bool",
            "signature": "_capture_server_generation(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_raw_content",
            "parameters": {
              "content": "Type: bytes",
              "content_hash": "Type: str",
              "content_type": "Type: str",
              "filename": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload raw content and return its hash",
            "returns": "Returns Optional[str]",
            "signature": "upload_raw_content(self, content, content_hash, filename, content_type, system_filename) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_system_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload system files like roothash, root.docSchema with fixed filenames",
            "returns": "Returns Optional[str]",
            "signature": "upload_system_file(self, content, system_filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_document_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "filename": "Type: str"
            },
            "purpose": "Upload document files with UUID.extension pattern",
            "returns": "Returns Optional[str]",
            "signature": "upload_document_file(self, content, filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_metadata_json",
            "parameters": {
              "document_type": "Type: str",
              "name": "Type: str",
              "parent_uuid": "Type: str"
            },
            "purpose": "Create metadata JSON for a document",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_metadata_json(self, name, parent_uuid, document_type) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_pdf_content_json",
            "parameters": {
              "document_name": "Type: str",
              "pdf_content": "Type: bytes"
            },
            "purpose": "Create content JSON for a PDF document based on real app patterns",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_pdf_content_json(self, pdf_content, document_name) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_content_json",
            "parameters": {
              "pages": "Type: List[str]",
              "template": "Type: str"
            },
            "purpose": "Create content JSON for a notebook with pages",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_content_json(self, pages, template) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_doc_schema",
            "parameters": {
              "content_hash": "Type: str",
              "content_size": "Type: int",
              "document_uuid": "Type: str",
              "metadata_hash": "Type: str",
              "metadata_size": "Type: int",
              "pagedata_hash": "Type: str",
              "pagedata_size": "Type: int",
              "pdf_hash": "Type: str",
              "pdf_size": "Type: int"
            },
            "purpose": "Create document schema content in the exact format expected by reMarkable",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_doc_schema(self, document_uuid, metadata_hash, pagedata_hash, pdf_hash, content_hash, metadata_size, pagedata_size, pdf_size, content_size) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_directory_listing",
            "parameters": {
              "child_objects": "Type: List[Dict]",
              "data_components": "Type: List[Dict]"
            },
            "purpose": "Create directory listing content",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_directory_listing(self, child_objects, data_components) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_directory",
            "parameters": {},
            "purpose": "Update the root directory listing by adding the new document to existing entries",
            "returns": "Returns bool",
            "signature": "update_root_directory(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_current_root_entries",
            "parameters": {},
            "purpose": "Get current root.docSchema entries from server to preserve existing data",
            "returns": "Returns Optional[List[str]]",
            "signature": "_get_current_root_entries(self) -> Optional[List[str]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_add_new_document_to_root_entries",
            "parameters": {
              "existing_entries": "Type: List[str]"
            },
            "purpose": "Add the current document being uploaded to the root entries list",
            "returns": "Returns bool",
            "signature": "_add_new_document_to_root_entries(self, existing_entries) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_root_directory_from_entries",
            "parameters": {
              "entries": "Type: List[str]"
            },
            "purpose": "Create root.docSchema content from list of entries",
            "returns": "Returns bytes",
            "signature": "_create_root_directory_from_entries(self, entries) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_root_directory_listing",
            "parameters": {
              "root_entries": "Type: List[Dict]"
            },
            "purpose": "Create root directory listing with version header '3' (matching /sync/v3/ API version)",
            "returns": "Returns bytes",
            "signature": "create_root_directory_listing(self, root_entries) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_hash",
            "parameters": {
              "new_root_hash": "Type: str"
            },
            "purpose": "Update the root hash in the cloud - send as text body with proper headers like other files",
            "returns": "Returns bool",
            "signature": "update_root_hash(self, new_root_hash) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "edit_document_metadata",
            "parameters": {
              "document_uuid": "Type: str",
              "new_name": "Type: str",
              "new_parent": "Type: str"
            },
            "purpose": "Edit an existing document's metadata",
            "returns": "Returns bool",
            "signature": "edit_document_metadata(self, document_uuid, new_name, new_parent) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_pdf_document",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "pdf_path": "Type: str"
            },
            "purpose": "Upload a new PDF document to reMarkable following the correct sequence from app logs",
            "returns": "Returns bool",
            "signature": "upload_pdf_document(self, pdf_path, name, parent_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_notebook",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "template": "Type: str"
            },
            "purpose": "Create a new empty notebook",
            "returns": "Returns bool",
            "signature": "create_notebook(self, name, parent_uuid, template) -> bool"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:59:32",
      "decorators": [],
      "dependencies": [],
      "description": "Manages uploads to reMarkable cloud",
      "docstring": "Manages uploads to reMarkable cloud",
      "id": 2134,
      "imports": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid",
        "import base64",
        "import binascii",
        "import zlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import time",
        "import crc32c",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "from local_replica_v2 import RemarkableReplicaBuilder",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1125,
      "line_start": 32,
      "name": "RemarkableUploadManager",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Manages uploads to reMarkable cloud",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableUploadManager:\n    \"\"\"Manages uploads to reMarkable cloud\"\"\"\n    \n    def __init__(self, session: requests.Session, replica_database_path: str):\n        self.session = session\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        \n        # Load replica database\n        self.database_path = Path(replica_database_path)\n        self.database = self._load_database()\n        \n        # Track uploads\n        self.upload_queue: List[Dict[str, Any]] = []\n        self.uploaded_hashes: Dict[str, str] = {}  # hash -> upload_status\n        self._current_document_uuid: Optional[str] = None  # UUID for consistent rm-filename headers\n        self._server_generation: Optional[int] = None  # Store generation from server for final root update\n        \n    def _clear_document_context(self):\n        \"\"\"Clear the current document UUID context for new uploads\"\"\"\n        self._current_document_uuid = None\n        \n    def _load_database(self) -> Dict[str, Any]:\n        \"\"\"Load the replica database\"\"\"\n        if not self.database_path.exists():\n            raise FileNotFoundError(f\"Database not found: {self.database_path}\")\n            \n        with open(self.database_path, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    \n    def _save_database(self):\n        \"\"\"Save the updated database\"\"\"\n        with open(self.database_path, 'w', encoding='utf-8') as f:\n            json.dump(self.database, f, indent=2, ensure_ascii=False)\n    \n    def _compute_hash(self, content: bytes) -> str:\n        \"\"\"Compute SHA256 hash of content\"\"\"\n        return hashlib.sha256(content).hexdigest()\n    \n    def _compute_crc32c_header(self, content: bytes) -> str:\n        \"\"\"Compute CRC32C checksum and return as x-goog-hash header value\"\"\"\n        try:\n            # Use proper crc32c library if available\n            if HAS_CRC32C:\n                checksum = crc32c.crc32c(content)\n            else:\n                # Fallback to standard CRC32 (not ideal but better than nothing)\n                checksum = zlib.crc32(content) & 0xffffffff\n            \n            # Convert to bytes and base64 encode\n            checksum_bytes = checksum.to_bytes(4, byteorder='big')\n            checksum_b64 = base64.b64encode(checksum_bytes).decode('ascii')\n            \n            return f\"crc32c={checksum_b64}\"\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Warning: Failed to compute CRC32C checksum: {e}\")\n            # Return empty string to skip the header if computation fails\n            return \"\"\n    \n    def _generate_timestamp(self) -> str:\n        \"\"\"Generate reMarkable timestamp\"\"\"\n        return str(int(time.time() * 1000))\n    \n    def _generate_generation(self) -> int:\n        \"\"\"Generate reMarkable generation number\"\"\"\n        return int(time.time() * 1000000)\n    \n    def _capture_server_generation(self) -> bool:\n        \"\"\"Capture the current server generation for use in final root update\"\"\"\n        try:\n            print(f\"\ud83d\udce1 Capturing server generation for upload sequence...\")\n            root_url = f\"{self.base_url}/sync/v4/root\"\n            root_response = self.session.get(root_url)\n            root_response.raise_for_status()\n            \n            current_root = root_response.json()\n            self._server_generation = current_root.get('generation')\n            \n            print(f\"\ud83d\udd0d Captured server generation: {self._server_generation}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to capture server generation: {e}\")\n            self._server_generation = None\n            return False\n    \n    def upload_raw_content(self, content: bytes, content_hash: str = None, filename: str = None, \n                          content_type: str = \"application/octet-stream\", system_filename: str = None) -> Optional[str]:\n        \"\"\"Upload raw content and return its hash\"\"\"\n        if content_hash is None:\n            content_hash = self._compute_hash(content)\n        \n        # Check if already uploaded\n        if content_hash in self.uploaded_hashes:\n            print(f\"\u2705 Content already uploaded: {content_hash[:16]}...\")\n            return content_hash\n        \n        try:\n            url = f\"{self.base_url}/sync/v3/files/{content_hash}\"\n            \n            # Prepare headers like the reMarkable app\n            headers = {\n                'Content-Type': content_type,\n                'rm-batch-number': '1',\n                'rm-sync-id': str(uuid.uuid4()),\n                'User-Agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 FIXED: Match real app\n                'Accept-Encoding': 'gzip, deflate',\n                'Accept-Language': 'en-BE,*',\n                'Connection': 'Keep-Alive'\n            }\n            \n            # Add rm-filename header - REQUIRED for all PUT requests\n            # Handle different patterns: UUID-based files vs system files\n            if system_filename:\n                # System files like \"roothash\", \"root.docSchema\" (no UUID)\n                rm_filename = system_filename\n                print(f\"\ud83c\udff7\ufe0f rm-filename (system): {rm_filename}\")\n            elif filename:\n                # Document files with UUID pattern\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    # Generate and store new UUID for this document\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                    print(f\"\ud83d\udcca Generated new document UUID: {doc_uuid}\")\n                \n                # Use the filename as provided or construct UUID.extension format\n                if '.' in filename and len(filename.split('.')[0]) == 36:  # Already UUID.extension\n                    rm_filename = filename\n                else:\n                    # Determine extension and construct UUID.extension\n                    if content_type == 'application/pdf' or filename.lower().endswith('.pdf'):\n                        rm_filename = f\"{doc_uuid}.pdf\"\n                    elif 'metadata' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.metadata\"\n                    elif filename.lower().endswith('.content'):\n                        rm_filename = f\"{doc_uuid}.content\"\n                    elif filename.lower().endswith('.rm'):\n                        # Page data keeps original filename for .rm files\n                        rm_filename = filename\n                    elif filename.lower().endswith('.docschema') or 'docschema' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.docSchema\"\n                    elif filename.lower().endswith('.pagedata'):\n                        rm_filename = f\"{doc_uuid}.pagedata\"\n                    else:\n                        # Default construction\n                        rm_filename = f\"{doc_uuid}.{filename}\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (document): {rm_filename}\")\n            else:\n                # Fallback - generate basic filename\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                \n                if content_type == 'application/pdf':\n                    rm_filename = f\"{doc_uuid}.pdf\"\n                elif content_type == 'application/octet-stream':\n                    rm_filename = f\"{doc_uuid}.metadata\"\n                else:\n                    rm_filename = f\"{doc_uuid}.content\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (fallback): {rm_filename}\")\n            \n            headers['rm-filename'] = rm_filename\n            \n            # Add CRC32C checksum (this is the missing piece!)\n            crc32c_header = self._compute_crc32c_header(content)\n            if crc32c_header:\n                headers['x-goog-hash'] = crc32c_header\n            \n            print(f\"\ud83d\udd0d Debug: Upload headers for {content_hash[:16]}...\")\n            for key, value in headers.items():\n                print(f\"    {key}: {value}\")\n            \n            # Make the PUT request\n            response = self.session.put(url, data=content, headers=headers)\n            \n            print(f\"\ud83d\udd0d Debug: Response status: {response.status_code}\")\n            print(f\"\ud83d\udd0d Debug: Response text: {response.text}\")\n            \n            response.raise_for_status()\n            \n            self.uploaded_hashes[content_hash] = \"uploaded\"\n            print(f\"\u2705 Uploaded content: {content_hash[:16]}... ({len(content)} bytes)\")\n            return content_hash\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload content {content_hash[:16]}...: {e}\")\n            if hasattr(e, 'response') and e.response is not None:\n                print(f\"    Response: {e.response.text}\")\n            return None\n    \n    def upload_system_file(self, content: bytes, system_filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload system files like roothash, root.docSchema with fixed filenames\"\"\"\n        print(f\"\ud83d\udcc1 Uploading system file: {system_filename}\")\n        return self.upload_raw_content(content, system_filename=system_filename, content_type=content_type)\n    \n    def upload_document_file(self, content: bytes, filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload document files with UUID.extension pattern\"\"\"\n        print(f\"\ud83d\udcc4 Uploading document file: {filename}\")\n        return self.upload_raw_content(content, filename=filename, content_type=content_type)\n\n    def create_metadata_json(self, name: str, parent_uuid: str = \"\", document_type: str = \"DocumentType\") -> Tuple[bytes, str]:\n        \"\"\"Create metadata JSON for a document\"\"\"\n        timestamp = self._generate_timestamp()\n        \n        metadata = {\n            \"createdTime\": timestamp,\n            \"lastModified\": timestamp,\n            \"lastOpened\": \"0\",  # Real app sets this to \"0\" for never-opened documents\n            \"lastOpenedPage\": 0,\n            \"new\": False,\n            \"parent\": parent_uuid,\n            \"pinned\": False,\n            \"source\": \"com.remarkable.macos\",  # \u2705 FIXED: Match real app behavior\n            \"type\": document_type,\n            \"visibleName\": name\n        }\n        \n        content = json.dumps(metadata, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_pdf_content_json(self, pdf_content: bytes, document_name: str = \"\") -> Tuple[bytes, str]:\n        \"\"\"Create content JSON for a PDF document based on real app patterns\"\"\"\n        \n        # Basic PDF content structure based on real app analysis\n        content_data = {\n            \"coverPageNumber\": 0,\n            \"customZoomCenterX\": 0,\n            \"customZoomCenterY\": 936,\n            \"customZoomOrientation\": \"portrait\",\n            \"customZoomPageHeight\": 1872,\n            \"customZoomPageWidth\": 1404,\n            \"customZoomScale\": 1,\n            \"documentMetadata\": {\n                \"title\": document_name if document_name else \"Untitled\"\n            },\n            \"extraMetadata\": {},\n            \"fileType\": \"pdf\",\n            \"fontName\": \"\",\n            \"formatVersion\": 1,\n            \"lineHeight\": -1,\n            \"orientation\": \"portrait\",\n            \"originalPageCount\": 1,  # Will be updated based on actual PDF\n            \"pageCount\": 1,  # Will be updated based on actual PDF\n            \"pageTags\": [],\n            \"pages\": [\"6a22f0dc-5606-4d40-946f-ccbc14f777ff\"],  # Default page UUID\n            \"redirectionPageMap\": [0],\n            \"sizeInBytes\": len(pdf_content),\n            \"tags\": [],\n            \"textAlignment\": \"justify\",\n            \"textScale\": 1,\n            \"zoomMode\": \"bestFit\"\n        }\n        \n        content = json.dumps(content_data, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n\n    def create_content_json(self, pages: List[str], template: str = \"Blank\") -> Tuple[bytes, str]:\n        \"\"\"Create content JSON for a notebook with pages\"\"\"\n        timestamp_base = f\"2:{len(pages)}\"\n        \n        # Create pages structure\n        pages_list = []\n        for i, page_id in enumerate(pages):\n            pages_list.append({\n                \"id\": page_id,\n                \"idx\": {\n                    \"timestamp\": f\"2:{i+2}\",\n                    \"value\": chr(ord('a') + i) if i < 26 else f\"page_{i}\"\n                },\n                \"template\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": template\n                }\n            })\n        \n        content_data = {\n            \"cPages\": {\n                \"lastOpened\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": pages[0] if pages else \"\"\n                },\n                \"original\": {\n                    \"timestamp\": \"0:0\",\n                    \"value\": -1\n                },\n                \"pages\": pages_list\n            },\n            \"extraMetadata\": {},\n            \"fileType\": \"notebook\",\n            \"fontName\": \"\",\n            \"lineHeight\": -1,\n            \"margins\": 180,\n            \"pageCount\": len(pages),\n            \"textScale\": 1,\n            \"transform\": {}\n        }\n        \n        content = json.dumps(content_data, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_doc_schema(self, document_uuid: str, metadata_hash: str, pagedata_hash: str, \n                         pdf_hash: str, content_hash: str, metadata_size: int, pagedata_size: int,\n                         pdf_size: int, content_size: int) -> Tuple[bytes, str]:\n        \"\"\"Create document schema content in the exact format expected by reMarkable\"\"\"\n        # Based on raw logs: 4 components for a document\n        lines = ['4']\n        \n        # Add components in specific order (content, metadata, pagedata, pdf)\n        lines.append(f\"{content_hash}:0:{document_uuid}.content:0:{content_size}\")\n        lines.append(f\"{metadata_hash}:0:{document_uuid}.metadata:0:{metadata_size}\")\n        lines.append(f\"{pagedata_hash}:0:{document_uuid}.pagedata:0:{pagedata_size}\")\n        lines.append(f\"{pdf_hash}:0:{document_uuid}.pdf:0:{pdf_size}\")\n        \n        content = '\\n'.join(lines).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n\n    def create_directory_listing(self, child_objects: List[Dict], data_components: List[Dict]) -> Tuple[bytes, str]:\n        \"\"\"Create directory listing content\"\"\"\n        lines = [str(len(child_objects) + len(data_components))]\n        \n        # Add child objects (folders/documents)\n        for obj in child_objects:\n            line = f\"{obj['hash']}:80000000:{obj['uuid']}:{obj['type']}:{obj['size']}\"\n            lines.append(line)\n        \n        # Add data components (.content, .metadata, .rm files, etc.)\n        for comp in data_components:\n            line = f\"{comp['hash']}:0:{comp['component']}:0:{comp['size']}\"\n            lines.append(line)\n        \n        content = '\\n'.join(lines).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def update_root_directory(self) -> bool:\n        \"\"\"Update the root directory listing by adding the new document to existing entries\"\"\"\n        try:\n            print(\"\ud83d\udcc1 Updating root directory listing...\")\n            \n            # Get the current root.docSchema from the server to preserve existing entries\n            current_root_entries = self._get_current_root_entries()\n            if current_root_entries is None:\n                print(\"\u274c Failed to get current root entries\")\n                return False\n            \n            # Add the new document entry if it doesn't already exist\n            new_doc_added = self._add_new_document_to_root_entries(current_root_entries)\n            \n            if not new_doc_added:\n                print(\"\ud83d\udcc4 No new document to add to root directory\")\n                return True\n            \n            # Create the updated root directory listing\n            root_dir_content = self._create_root_directory_from_entries(current_root_entries)\n            root_dir_hash = self._compute_hash(root_dir_content)\n            \n            print(f\"\ud83d\udcc2 Updated root directory hash: {root_dir_hash}\")\n            \n            # Upload the updated root directory listing\n            uploaded_hash = self.upload_system_file(root_dir_content, \"root.docSchema\")\n            if not uploaded_hash:\n                return False\n            \n            # Update the root hash in the cloud\n            return self.update_root_hash(root_dir_hash)\n            \n        except Exception as e:\n            print(f\"\u274c Failed to update root directory: {e}\")\n            return False\n    \n    def _get_current_root_entries(self) -> Optional[List[str]]:\n        \"\"\"Get current root.docSchema entries from server to preserve existing data\"\"\"\n        try:\n            # Get current root hash\n            root_url = f\"{self.base_url}/sync/v4/root\"\n            root_response = self.session.get(root_url)\n            root_response.raise_for_status()\n            \n            current_root = root_response.json()\n            current_root_hash = current_root.get('hash')\n            \n            if not current_root_hash:\n                print(\"\u274c No current root hash found\")\n                return None\n            \n            # Fetch the current root.docSchema content\n            root_content_url = f\"{self.base_url}/sync/v3/files/{current_root_hash}\"\n            root_content_response = self.session.get(root_content_url)\n            root_content_response.raise_for_status()\n            \n            # Parse the content to extract existing entries\n            content_lines = root_content_response.text.strip().split('\\n')\n            \n            # First line should be version header \"3\"\n            if not content_lines or content_lines[0] != '3':\n                print(f\"\u274c Unexpected root.docSchema format: {content_lines[0] if content_lines else 'empty'}\")\n                return None\n            \n            # Return all entries (excluding the version header)\n            existing_entries = content_lines[1:] if len(content_lines) > 1 else []\n            \n            print(f\"\ud83d\udccb Found {len(existing_entries)} existing root entries\")\n            for entry in existing_entries[:5]:  # Show first 5 for debugging\n                parts = entry.split(':')\n                if len(parts) >= 3:\n                    uuid = parts[2]\n                    size = parts[-1] if len(parts) > 4 else 'unknown'\n                    print(f\"   - {uuid}: size={size}\")\n            \n            return existing_entries\n            \n        except Exception as e:\n            print(f\"\u274c Failed to get current root entries: {e}\")\n            return None\n    \n    def _add_new_document_to_root_entries(self, existing_entries: List[str]) -> bool:\n        \"\"\"Add the current document being uploaded to the root entries list\"\"\"\n        if not self._current_document_uuid:\n            print(\"\u26a0\ufe0f No current document UUID to add\")\n            return False\n        \n        # Check if this document is already in the entries\n        doc_uuid = self._current_document_uuid\n        for entry in existing_entries:\n            if doc_uuid in entry:\n                print(f\"\ufffd Document {doc_uuid} already exists in root entries\")\n                return False\n        \n        # Find the document in our database to get its info\n        document_node = None\n        for node_uuid, node in self.database['nodes'].items():\n            if node_uuid == doc_uuid:\n                document_node = node\n                break\n        \n        if not document_node:\n            print(f\"\u274c Document {doc_uuid} not found in database\")\n            return False\n        \n        # Get the document's hash and size\n        doc_hash = document_node.get('hash')\n        if not doc_hash:\n            print(f\"\u274c No hash found for document {doc_uuid}\")\n            return False\n        \n        # Determine the correct node type code based on the document type\n        # From analysis: Type 1/2 = folders, Type 3 = notebook documents, Type 4 = PDF documents\n        doc_metadata = document_node.get('metadata', {})\n        doc_type = doc_metadata.get('type', 'DocumentType')\n        \n        if doc_type == 'DocumentType' and 'fileType' in doc_metadata.get('content_data', ''):\n            # Check if it's a PDF or notebook\n            content_data_str = doc_metadata.get('content_data', '')\n            if '\"fileType\": \"pdf\"' in content_data_str:\n                node_type_code = 4  # PDF document\n            elif '\"fileType\": \"notebook\"' in content_data_str:\n                node_type_code = 3  # Notebook document\n            else:\n                node_type_code = 4  # Default to PDF for documents\n        else:\n            node_type_code = 4  # Default to PDF for documents\n        \n        # The size in root.docSchema is the actual document content size (PDF size for PDFs)\n        # Get the actual PDF file size or document content size\n        doc_size = 0\n        \n        # First try to get the PDF file size from metadata\n        doc_metadata = document_node.get('metadata', {})\n        content_data_str = doc_metadata.get('content_data', '')\n        if '\"sizeInBytes\"' in content_data_str:\n            # Extract sizeInBytes from the content_data JSON string\n            import re\n            size_match = re.search(r'\"sizeInBytes\":\\s*\"(\\d+)\"', content_data_str)\n            if size_match:\n                doc_size = int(size_match.group(1))\n        \n        # Fallback: try to get size from the node itself\n        if doc_size == 0:\n            doc_size = document_node.get('size', 0)\n        \n        # If still no size, use a reasonable default for new documents\n        if doc_size == 0:\n            doc_size = 50000  # Reasonable default for a new PDF\n        \n        # Create the new entry in the same format as existing ones\n        # Format: hash:80000000:uuid:type:actual_document_size\n        new_entry = f\"{doc_hash}:80000000:{doc_uuid}:{node_type_code}:{doc_size}\"\n        existing_entries.append(new_entry)\n        \n        print(f\"\u2705 Added new document entry: {doc_uuid} (size={doc_size})\")\n        return True\n    \n    def _create_root_directory_from_entries(self, entries: List[str]) -> bytes:\n        \"\"\"Create root.docSchema content from list of entries\"\"\"\n        # Always start with version header \"3\"\n        lines = [\"3\"] + entries\n        \n        # Sort entries by UUID for consistency (skip the version header)\n        if len(lines) > 1:\n            entry_lines = lines[1:]\n            # Sort by UUID (3rd field after splitting by ':')\n            entry_lines.sort(key=lambda x: x.split(':')[2] if ':' in x else x)\n            lines = [\"3\"] + entry_lines\n        \n        # Create content with newline separator\n        content = '\\n'.join(lines) + '\\n'\n        \n        print(f\"\ud83d\udd0d Debug: Updated root directory content:\")\n        print(f\"   Version header: 3\")\n        print(f\"   Entry count: {len(entries)}\")\n        print(f\"   Total lines: {len(lines)}\")\n        print(f\"   Content length: {len(content.encode('utf-8'))} bytes\")\n        print(f\"   Preview: {content[:100]}...\")\n        \n        return content.encode('utf-8')\n    \n    def create_root_directory_listing(self, root_entries: List[Dict]) -> bytes:\n        \"\"\"Create root directory listing with version header '3' (matching /sync/v3/ API version)\"\"\"\n        # Always use \"3\" as version header (not count) - this matches the /sync/v3/ API version\n        lines = [\"3\"]\n        \n        # Add each entry in the format: hash:80000000:uuid:node_type:size\n        # Sort by UUID for consistent ordering (like document components)\n        sorted_entries = sorted(root_entries, key=lambda x: x['uuid'])\n        \n        for entry in sorted_entries:\n            line = f\"{entry['hash']}:80000000:{entry['uuid']}:{entry['node_type']}:{entry['size']}\"\n            lines.append(line)\n        \n        # Use the same approach as document uploads - with newline\n        content = '\\n'.join(lines) + '\\n'\n        \n        print(f\"\ud83d\udd0d Debug: Root directory content:\")\n        print(f\"   Version header: 3 (API version, not count)\")\n        print(f\"   Entry count: {len(root_entries)}\")\n        print(f\"   Total lines: {len(lines)}\")\n        print(f\"   Content length: {len(content.encode('utf-8'))} bytes\")\n        print(f\"   Preview: {content[:100]}...\")\n        \n        return content.encode('utf-8')\n\n    def update_root_hash(self, new_root_hash: str) -> bool:\n        \"\"\"Update the root hash in the cloud - send as text body with proper headers like other files\"\"\"\n        try:\n            # Use the server generation captured at the start of upload sequence\n            if self._server_generation is None:\n                print(f\"\u26a0\ufe0f Warning: No server generation captured, capturing now...\")\n                if not self._capture_server_generation():\n                    print(f\"\u274c Failed to get server generation, aborting root hash update\")\n                    return False\n            \n            generation = self._server_generation\n            \n            print(f\"\ud83d\udd0d Using server generation: {generation}\")\n            print(f\"\ud83d\udd0d New root hash: {new_root_hash}\")\n            \n            # Create the root data exactly like the real app\n            root_data = {\n                \"broadcast\": True,\n                \"generation\": generation,\n                \"hash\": new_root_hash\n            }\n            \n            # Convert to JSON text with same formatting as real app (pretty-printed with 2-space indent)\n            root_content = json.dumps(root_data, indent=2).encode('utf-8')\n            \n            # Set up headers exactly like the real app (case-sensitive and ordered correctly)\n            headers = {\n                'Content-Type': 'application/json',\n                'rm-batch-number': '1',\n                'rm-filename': 'roothash',\n                'rm-sync-id': str(uuid.uuid4()),\n                'User-Agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 FIXED: Match real app\n                'Accept-Encoding': 'gzip, deflate',\n                'Accept-Language': 'en-BE,*',\n                'Connection': 'Keep-Alive',\n            }\n            \n            # Add CRC32C checksum\n            crc32c_header = self._compute_crc32c_header(root_content)\n            if crc32c_header:\n                headers['x-goog-hash'] = crc32c_header\n            \n            print(f\"\ud83d\udd0d Debug: Root hash update headers:\")\n            for key, value in headers.items():\n                print(f\"    {key}: {value}\")\n            print(f\"\ud83d\udd0d Debug: Root hash content: {root_content.decode('utf-8')}\")\n            \n            url = f\"{self.base_url}/sync/v3/root\"\n            response = self.session.put(url, data=root_content, headers=headers)\n            \n            print(f\"\ud83d\udd0d Debug: Root hash response status: {response.status_code}\")\n            print(f\"\ud83d\udd0d Debug: Root hash response text: {response.text}\")\n            \n            response.raise_for_status()\n            \n            print(f\"\u2705 Updated root hash: {new_root_hash}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to update root hash: {e}\")\n            if hasattr(e, 'response') and e.response is not None:\n                print(f\"    Response: {e.response.text}\")\n            return False\n    \n    def edit_document_metadata(self, document_uuid: str, new_name: str = None, new_parent: str = None) -> bool:\n        \"\"\"Edit an existing document's metadata\"\"\"\n        try:\n            # Find the document in database\n            if document_uuid not in self.database['nodes']:\n                raise ValueError(f\"Document {document_uuid} not found in database\")\n            \n            node = self.database['nodes'][document_uuid]\n            print(f\"\ud83d\udcdd Editing document: {node['name']}\")\n            \n            # Get current metadata\n            current_metadata = node['metadata'].copy()\n            \n            # Update metadata\n            if new_name:\n                current_metadata['visibleName'] = new_name\n            if new_parent is not None:\n                current_metadata['parent'] = new_parent\n            \n            current_metadata['lastModified'] = self._generate_timestamp()\n            \n            # Create new metadata content\n            metadata_content = json.dumps(current_metadata, indent=4).encode('utf-8')\n            metadata_hash = self._compute_hash(metadata_content)\n            \n            # Upload metadata\n            self.upload_raw_content(metadata_content, metadata_hash)\n            \n            # Update component hashes\n            old_metadata_hash = node['component_hashes']['metadata']\n            node['component_hashes']['metadata'] = metadata_hash\n            \n            # Get parent node to update its directory listing\n            parent_uuid = current_metadata.get('parent', '')\n            if parent_uuid and parent_uuid in self.database['nodes']:\n                parent_node = self.database['nodes'][parent_uuid]\n                \n                # Rebuild parent's directory listing\n                child_objects = []\n                data_components = []\n                \n                # Find all children of this parent\n                for uuid, child_node in self.database['nodes'].items():\n                    if child_node.get('parent_uuid') == parent_uuid:\n                        if child_node['node_type'] == 'folder':\n                            type_val = '1'\n                        else:\n                            type_val = '3'\n                        \n                        child_objects.append({\n                            'hash': child_node['hash'],\n                            'uuid': uuid,\n                            'type': type_val,\n                            'size': len(str(child_node).encode('utf-8'))  # Approximate\n                        })\n                \n                # Add metadata components for this updated document\n                comp_hashes = node['component_hashes']\n                for comp_type, comp_hash in comp_hashes.items():\n                    if comp_hash:\n                        if comp_type == 'rm_files':\n                            for i, rm_hash in enumerate(comp_hash):\n                                data_components.append({\n                                    'hash': rm_hash,\n                                    'component': f\"{document_uuid}/{uuid.uuid4()}.rm\",\n                                    'size': 14661  # Typical RM file size\n                                })\n                        else:\n                            data_components.append({\n                                'hash': comp_hash,\n                                'component': f\"{document_uuid}.{comp_type}\",\n                                'size': len(metadata_content) if comp_type == 'metadata' else 2209\n                            })\n                \n                # Create and upload new directory listing\n                dir_content, dir_hash = self.create_directory_listing(child_objects, data_components)\n                self.upload_raw_content(dir_content, dir_hash)\n                \n                # Update parent node hash\n                parent_node['hash'] = dir_hash\n                self.database['hash_registry'][dir_hash] = {\n                    'uuid': parent_uuid,\n                    'type': 'node',\n                    'last_seen': datetime.now().isoformat()\n                }\n                \n                # Always update root directory after any upload to trigger sync\n                # This ensures the generation increments for both root and folder uploads\n                print(\"\ud83d\udd04 Updating root directory to trigger server generation increment...\")\n                self.update_root_directory()\n            \n            # Update database\n            node['metadata'] = current_metadata\n            node['last_modified'] = current_metadata['lastModified']\n            node['sync_status'] = 'updated'\n            node['last_synced'] = datetime.now().isoformat()\n            \n            # Update hash registry\n            self.database['hash_registry'][metadata_hash] = {\n                'uuid': document_uuid,\n                'type': 'metadata',\n                'last_seen': datetime.now().isoformat()\n            }\n            \n            self._save_database()\n            print(f\"\u2705 Successfully updated document metadata\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to edit document metadata: {e}\")\n            return False\n    \n    def upload_pdf_document(self, pdf_path: str, name: str, parent_uuid: str = \"\") -> bool:\n        \"\"\"Upload a new PDF document to reMarkable following the correct sequence from app logs\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            # FIRST: Capture server generation (like real app does with /sync/v4/root call)\n            if not self._capture_server_generation():\n                print(f\"\u274c Failed to capture server generation, aborting upload\")\n                return False\n            \n            pdf_file = Path(pdf_path)\n            if not pdf_file.exists():\n                raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n            print(f\"\ud83d\udcc4 Uploading PDF: {name}\")\n            \n            # Generate UUID for new document and set it for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Read PDF content\n            with open(pdf_file, 'rb') as f:\n                pdf_content = f.read()\n            \n            # EXACT SEQUENCE FROM APP LOGS:\n            # 1. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.metadata\n            # 2. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.pagedata  \n            # 3. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.pdf\n            # 4. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.content\n            # 5. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.docSchema\n            # 6. root.docSchema\n            # 7. roothash\n            \n            print(\"\ud83d\udcdd Step 1: Creating and uploading metadata...\")\n            # Create metadata FIRST (as per app logs)\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            metadata_upload_hash = self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            if not metadata_upload_hash:\n                raise Exception(\"Failed to upload metadata\")\n            \n            print(\"\ud83d\udcdd Step 2: Creating and uploading pagedata...\")\n            # For PDFs, create minimal pagedata (single newline like real app)\n            pagedata_content = b'\\n'  # \u2705 FIXED: Real app uses newline, not empty string\n            pagedata_upload_hash = self.upload_raw_content(\n                content=pagedata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.pagedata\"\n            )\n            if not pagedata_upload_hash:\n                raise Exception(\"Failed to upload pagedata\")\n            \n            print(\"\ud83d\udcdd Step 3: Uploading PDF content...\")\n            pdf_upload_hash = self.upload_raw_content(\n                content=pdf_content,\n                content_type='application/pdf',\n                filename=f\"{document_uuid}.pdf\"\n            )\n            if not pdf_upload_hash:\n                raise Exception(\"Failed to upload PDF content\")\n            \n            print(\"\ud83d\udcdd Step 4: Creating and uploading content...\")\n            # Create proper PDF content structure based on real app patterns\n            content_data, content_hash = self.create_pdf_content_json(pdf_content, name)\n            content_upload_hash = self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            if not content_upload_hash:\n                raise Exception(\"Failed to upload content\")\n            \n            print(\"\ud83d\udcdd Step 5: Creating and uploading document schema...\")\n            # Create document schema in exact format from raw logs (4 components but count is 3)\n            doc_schema_entries = [\n                f\"{content_hash}:0:{document_uuid}.content:0:{len(content_data)}\",\n                f\"{metadata_hash}:0:{document_uuid}.metadata:0:{len(metadata_content)}\",\n                f\"{pagedata_upload_hash}:0:{document_uuid}.pagedata:0:{len(pagedata_content)}\",\n                f\"{pdf_upload_hash}:0:{document_uuid}.pdf:0:{len(pdf_content)}\"\n            ]\n            # Note: count is 3 even though there are 4 entries (PDF doesn't count)\n            doc_schema_content = f\"3\\n\" + \"\\n\".join(doc_schema_entries)\n            doc_schema_bytes = doc_schema_content.encode('utf-8')\n            doc_schema_hash = self._compute_hash(doc_schema_bytes)\n            \n            doc_schema_upload_hash = self.upload_raw_content(\n                content=doc_schema_bytes,\n                content_type='text/plain; charset=UTF-8',\n                filename=f\"{document_uuid}.docSchema\"\n            )\n            if not doc_schema_upload_hash:\n                raise Exception(\"Failed to upload document schema\")\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': pagedata_upload_hash,\n                    'component': f\"{document_uuid}.pagedata\",\n                    'size': len(pagedata_content)\n                },\n                {\n                    'hash': pdf_upload_hash,\n                    'component': f\"{document_uuid}.pdf\",\n                    'size': len(pdf_content)\n                },\n                {\n                    'hash': content_hash,\n                    'component': f\"{document_uuid}.content\",\n                    'size': len(content_data)\n                }\n            ]\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_schema_hash,  # Document hash is the docSchema hash\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [str(pdf_file)],\n                'component_hashes': {\n                    'content': content_hash,\n                    'metadata': metadata_hash,\n                    'pdf': pdf_upload_hash,\n                    'pagedata': pagedata_upload_hash,\n                    'docSchema': doc_schema_hash,\n                    'rm_files': []\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'uploaded',\n                'last_synced': datetime.now().isoformat(),\n                'size': len(pdf_content)  # Store the actual PDF file size\n            }\n            \n            # Update the metadata to include content_data with sizeInBytes for proper root.docSchema sizing\n            content_data = {\n                \"fileType\": \"pdf\",\n                \"sizeInBytes\": str(len(pdf_content)),\n                \"pageCount\": 1,\n                \"formatVersion\": 1,\n                \"orientation\": \"portrait\"\n            }\n            new_node['metadata']['content_data'] = json.dumps(content_data)\n            \n\n            # \ud83d\udeab REMOVED: Direct database manipulation for final state\n            # Do NOT add to database permanently - let replica sync handle final state\n            \n            # \ud83d\udeab REMOVED: Hash registry updates \n            # Let replica sync discover and register all hashes properly\n            \n            # CRITICAL: Complete the proper upload sequence from real app logs\n            print(\"\ud83d\udcdd Step 6: Updating root.docSchema with new document...\")\n            \n            # Temporarily add document to database for root.docSchema update\n            temp_node = {\n                'uuid': document_uuid,\n                'hash': doc_schema_hash,  # Document hash is the docSchema hash\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'component_hashes': {\n                    'docSchema': doc_schema_hash\n                },\n                'size': len(doc_schema_bytes)  # Use docSchema size for root.docSchema\n            }\n            \n            # Add temporarily for root update\n            self.database['nodes'][document_uuid] = temp_node\n            \n            root_update_success = self.update_root_directory()\n            if not root_update_success:\n                print(\"\u26a0\ufe0f Warning: Root directory update failed - document may not appear in real app\")\n                # Remove temporary entry if root update failed\n                del self.database['nodes'][document_uuid]\n                # Don't fail the upload completely, but warn user\n            else:\n                print(\"\u2705 Root directory updated successfully\")\n                # Remove temporary entry - let replica sync handle final database state\n                del self.database['nodes'][document_uuid]\n            \n            # Always trigger replica sync after any upload (root or folder)\n            # This ensures the new document is properly downloaded and cataloged with final state\n            print(\"\ud83d\udcdd Step 7: Running final replica sync to verify upload...\")\n            \n            try:\n                from local_replica_v2 import RemarkableReplicaBuilder\n                replica_builder = RemarkableReplicaBuilder(self.session)\n                \n                print(\"\ud83d\udd04 Running replica sync to discover new document...\")\n                replica_builder.build_complete_replica()\n                \n                # Reload our database to get the freshly synced data\n                print(\"\ud83d\udd04 Reloading database with fresh sync data...\")\n                self.database = self._load_database()\n                \n                # Verify the document was properly synced\n                if document_uuid in self.database['nodes']:\n                    synced_node = self.database['nodes'][document_uuid]\n                    print(f\"\u2705 Document synced successfully: {synced_node['name']}\")\n                    print(f\"   UUID: {document_uuid}\")\n                    print(f\"   Hash: {synced_node['hash']}\")\n                    print(f\"   Local path: {synced_node.get('local_path', 'Not set')}\")\n                    print(f\"   Extracted files: {synced_node.get('extracted_files', [])}\")\n                else:\n                    print(f\"\u26a0\ufe0f Document {document_uuid} not found in synced database - may need more time to propagate\")\n                    \n            except Exception as sync_e:\n                print(f\"\u26a0\ufe0f Replica sync failed, but upload may have succeeded: {sync_e}\")\n                # Don't fail the entire upload if sync fails\n                pass\n            \n            # \ud83d\udeab REMOVED: Final database save - let replica sync handle database updates\n            # self._save_database()\n            print(f\"\u2705 Successfully uploaded PDF document: {name}\")\n            print(f\"\ud83d\udd04 Document should appear in your device shortly after sync\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload PDF document: {e}\")\n            return False\n    \n    def create_notebook(self, name: str, parent_uuid: str = \"\", template: str = \"Blank\") -> bool:\n        \"\"\"Create a new empty notebook\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            print(f\"\ud83d\udcd3 Creating notebook: {name}\")\n            \n            # Generate UUIDs and set current document UUID for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            page_uuid = str(uuid.uuid4())\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Create empty .rm content for first page\n            rm_content = b'\\x00' * 1000  # Minimal empty page content\n            rm_hash = self.upload_raw_content(\n                content=rm_content,\n                content_type='application/octet-stream',\n                filename=f\"{page_uuid}.rm\"\n            )\n            \n            # Create content.json\n            content_data, content_hash = self.create_content_json([page_uuid], template)\n            self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            \n            # Create metadata\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': content_hash,\n                    'component': f\"{document_uuid}.content\",\n                    'size': len(content_data)\n                },\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': rm_hash,\n                    'component': f\"{document_uuid}/{page_uuid}.rm\",\n                    'size': len(rm_content)\n                }\n            ]\n            \n            doc_dir_content, doc_dir_hash = self.create_directory_listing([], data_components)\n            self.upload_raw_content(doc_dir_content, doc_dir_hash)\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_dir_hash,\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [],\n                'component_hashes': {\n                    'content': content_hash,\n                    'metadata': metadata_hash,\n                    'pdf': None,\n                    'pagedata': None,\n                    'rm_files': [rm_hash]\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'created',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n            # \ud83d\udeab REMOVED: Direct database manipulation \n            # Do NOT add to database directly - let replica sync handle it properly\n            # self.database['nodes'][document_uuid] = new_node\n            \n            # \ud83d\udeab REMOVED: Hash registry updates \n            # Let replica sync discover and register all hashes properly\n            # Hash registry should only be populated from actual cloud downloads\n            \n            # \ud83d\udd04 CRITICAL FIX: Instead of manually updating database, trigger fresh replica sync\n            # This ensures the new notebook is properly downloaded and cataloged\n            try:\n                from local_replica_v2 import RemarkableReplicaBuilder\n                replica_builder = RemarkableReplicaBuilder(self.session)\n                \n                print(\"\ud83d\udd04 Running replica sync to discover new notebook...\")\n                replica_builder.build_complete_replica()\n                \n                # Reload our database to get the freshly synced data\n                print(\"\ud83d\udd04 Reloading database with fresh sync data...\")\n                self.database = self._load_database()\n                \n                # Verify the notebook was properly synced\n                if document_uuid in self.database['nodes']:\n                    synced_node = self.database['nodes'][document_uuid]\n                    print(f\"\u2705 Notebook synced successfully: {synced_node['name']}\")\n                    print(f\"   UUID: {document_uuid}\")\n                    print(f\"   Hash: {synced_node['hash']}\")\n                    print(f\"   Local path: {synced_node.get('local_path', 'Not set')}\")\n                    print(f\"   Extracted files: {synced_node.get('extracted_files', [])}\")\n                else:\n                    print(f\"\u26a0\ufe0f Notebook {document_uuid} not found in synced database - may need more time to propagate\")\n                    \n            except Exception as sync_e:\n                print(f\"\u26a0\ufe0f Replica sync failed, but upload may have succeeded: {sync_e}\")\n                # Don't fail the entire upload if sync fails\n                pass\n            \n            print(f\"\u2705 Successfully created notebook: {name}\")\n            print(f\"\ud83d\udd04 Notebook should appear in your device shortly after sync\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to create notebook: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_manager.py",
      "tags": [
        "class",
        "remarkableuploadmanager"
      ],
      "updated_at": "2025-12-07T01:59:32.407270",
      "usage_example": "# Example usage:\n# result = RemarkableUploadManager(bases)"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_root_info",
            "parameters": {},
            "purpose": "Get current root.docSchema info using working method",
            "returns": "None",
            "signature": "get_current_root_info(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_info",
            "parameters": {
              "doc_uuid": "Type: str",
              "root_content": "Type: str"
            },
            "purpose": "Find document entry in root.docSchema",
            "returns": "None",
            "signature": "get_document_info(self, doc_uuid, root_content)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_schema",
            "parameters": {
              "doc_hash": "Type: str"
            },
            "purpose": "Retrieve document's docSchema",
            "returns": "None",
            "signature": "get_document_schema(self, doc_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_metadata",
            "parameters": {
              "doc_lines": "Type: list"
            },
            "purpose": "Extract and fetch current metadata",
            "returns": "None",
            "signature": "get_current_metadata(self, doc_lines)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_updated_metadata",
            "parameters": {
              "current_metadata": "Type: dict",
              "new_parent": "Type: str"
            },
            "purpose": "Create updated metadata with new parent",
            "returns": "None",
            "signature": "create_updated_metadata(self, current_metadata, new_parent)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_metadata",
            "parameters": {
              "doc_uuid": "Type: str",
              "metadata_json": "Type: str"
            },
            "purpose": "Upload new metadata and return hash",
            "returns": "See docstring for return details",
            "signature": "upload_new_metadata(self, metadata_json, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_real_pagedata",
            "parameters": {
              "doc_uuid": "Type: str"
            },
            "purpose": "Upload real pagedata (newline) to match real app documents",
            "returns": "None",
            "signature": "upload_real_pagedata(self, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_new_document_schema",
            "parameters": {
              "doc_lines": "Type: list",
              "metadata_line": "Type: str",
              "new_metadata_hash": "Type: str",
              "new_pagedata_hash": "Type: str"
            },
            "purpose": "Create new document schema with updated metadata hash and pagedata",
            "returns": "None",
            "signature": "create_new_document_schema(self, doc_lines, new_metadata_hash, metadata_line, new_pagedata_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_document_schema",
            "parameters": {
              "doc_content": "Type: str",
              "doc_uuid": "Type: str"
            },
            "purpose": "Upload new document schema",
            "returns": "None",
            "signature": "upload_new_document_schema(self, doc_content, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_docschema",
            "parameters": {
              "doc_info": "Type: dict",
              "new_doc_hash": "Type: str",
              "root_content": "Type: str"
            },
            "purpose": "Update root.docSchema with new document hash",
            "returns": "None",
            "signature": "update_root_docschema(self, root_content, doc_info, new_doc_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_root",
            "parameters": {
              "generation": "Type: int",
              "root_content": "Type: str"
            },
            "purpose": "Upload new root.docSchema and update roothash",
            "returns": "None",
            "signature": "upload_new_root(self, root_content, generation)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "move_document_from_trash",
            "parameters": {
              "doc_uuid": "Type: str"
            },
            "purpose": "Complete process to move document from trash to root",
            "returns": "None",
            "signature": "move_document_from_trash(self, doc_uuid)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:35",
      "decorators": [],
      "dependencies": [],
      "description": "Moves documents between folders using the working upload mechanism",
      "docstring": "Moves documents between folders using the working upload mechanism",
      "id": 2123,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 520,
      "line_start": 52,
      "name": "DocumentMover",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Moves documents between folders using the working upload mechanism",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class DocumentMover:\n    \"\"\"Moves documents between folders using the working upload mechanism\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\udd04 Document Mover Initialized\")\n    \n    def get_current_root_info(self):\n        \"\"\"Get current root.docSchema info using working method\"\"\"\n        print(\"\\n\ud83d\udccb Step 1: Getting current root.docSchema...\")\n        \n        # Get root info\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        print(f\"\u2705 Current root hash: {root_data['hash']}\")\n        print(f\"\u2705 Current generation: {root_data.get('generation')}\")\n        print(f\"\u2705 Root content size: {len(root_content)} bytes\")\n        \n        return root_data, root_content\n    \n    def get_document_info(self, doc_uuid: str, root_content: str):\n        \"\"\"Find document entry in root.docSchema\"\"\"\n        print(f\"\\n\ud83d\udcc4 Step 2: Finding document {doc_uuid[:8]}... in root.docSchema\")\n        \n        lines = root_content.strip().split('\\n')\n        for line in lines[1:]:  # Skip version header\n            if doc_uuid in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    doc_info = {\n                        'hash': parts[0],\n                        'uuid': parts[2],\n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n                    print(f\"\u2705 Found document entry:\")\n                    print(f\"   Hash: {doc_info['hash']}\")\n                    print(f\"   Type: {doc_info['type']}\")\n                    print(f\"   Size: {doc_info['size']}\")\n                    print(f\"   Full line: {doc_info['full_line']}\")\n                    return doc_info\n        \n        raise ValueError(f\"Document {doc_uuid} not found in root.docSchema\")\n    \n    def get_document_schema(self, doc_hash: str):\n        \"\"\"Retrieve document's docSchema\"\"\"\n        print(f\"\\n\ud83d\udcc4 Step 3: Retrieving document docSchema...\")\n        \n        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n        doc_response.raise_for_status()\n        doc_content = doc_response.text\n        \n        print(f\"\u2705 Document docSchema size: {len(doc_content)} bytes\")\n        print(f\"\ud83d\udcc4 Document docSchema content:\")\n        \n        lines = doc_content.strip().split('\\n')\n        for i, line in enumerate(lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return doc_content, lines\n    \n    def get_current_metadata(self, doc_lines: list):\n        \"\"\"Extract and fetch current metadata\"\"\"\n        print(f\"\\n\ud83d\udcdd Step 4: Getting current metadata...\")\n        \n        metadata_hash = None\n        metadata_line = None\n        \n        # Find metadata component\n        for line in doc_lines[1:]:  # Skip version\n            if ':' in line and '.metadata' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    metadata_hash = parts[0]\n                    metadata_line = line\n                    break\n        \n        if not metadata_hash:\n            raise ValueError(\"Metadata component not found in document schema\")\n        \n        print(f\"\u2705 Metadata hash: {metadata_hash}\")\n        \n        # Fetch current metadata\n        metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n        metadata_response.raise_for_status()\n        current_metadata = json.loads(metadata_response.text)\n        \n        print(f\"\u2705 Current metadata:\")\n        for key, value in current_metadata.items():\n            print(f\"   {key}: {value}\")\n        \n        return current_metadata, metadata_line\n    \n    def create_updated_metadata(self, current_metadata: dict, new_parent: str = \"\"):\n        \"\"\"Create updated metadata with new parent\"\"\"\n        print(f\"\\n\ud83d\udd04 Step 5: Creating updated metadata...\")\n        \n        # Copy current metadata and update parent\n        updated_metadata = current_metadata.copy()\n        old_parent = updated_metadata.get('parent', '')\n        updated_metadata['parent'] = new_parent\n        \n        print(f\"\u2705 Updating parent: '{old_parent}' \u2192 '{new_parent}'\")\n        \n        # Add/update source field to match real app documents (use macOS like real invoice)\n        updated_metadata['source'] = 'com.remarkable.macos'  # Always set to match real invoice\n        print(f\"\u2705 Setting 'source' field: com.remarkable.macos\")\n        \n        # Fix lastOpened to match real app behavior (use 0 for unopened)\n        if 'lastOpened' in updated_metadata and updated_metadata['lastOpened'] != 0:\n            updated_metadata['lastOpened'] = 0  # Real app uses 0 for unopened documents\n            print(f\"\u2705 Setting lastOpened to 0 (real app behavior)\")\n        \n        # Make metadata match real app behavior (don't mark as modified for moves)\n        updated_metadata['lastModified'] = int(time.time() * 1000)\n        updated_metadata['metadatamodified'] = False  # Real app doesn't mark as modified\n        updated_metadata['modified'] = False  # Real app doesn't mark as modified\n        \n        # Convert to JSON\n        updated_metadata_json = json.dumps(updated_metadata, separators=(',', ':'))\n        \n        print(f\"\u2705 Updated metadata ({len(updated_metadata_json)} bytes):\")\n        print(f\"   {updated_metadata_json[:100]}...\")\n        \n        return updated_metadata_json\n    \n    def upload_new_metadata(self, metadata_json: str, doc_uuid: str):\n        \"\"\"Upload new metadata and return hash\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 6: Uploading new metadata...\")\n        \n        # Calculate hash\n        metadata_hash = hashlib.sha256(metadata_json.encode()).hexdigest()\n        print(f\"\u2705 New metadata hash: {metadata_hash}\")\n        \n        # Upload using working method from upload_manager.py\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.metadata',  # Required: UUID.metadata format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',  # Use Windows UA\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(metadata_json.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\",\n            data=metadata_json.encode(),\n            headers=headers \n        )\n        \n        print(f\"\u2705 Metadata upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Metadata upload failed: {upload_response.status_code}\")\n        \n        return metadata_hash\n    \n    def upload_real_pagedata(self, doc_uuid: str):\n        \"\"\"Upload real pagedata (newline) to match real app documents\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 6.5: Uploading real pagedata...\")\n        \n        # Real app pagedata is just a newline character\n        pagedata_content = \"\\n\"\n        pagedata_hash = hashlib.sha256(pagedata_content.encode()).hexdigest()\n        \n        print(f\"\u2705 Real pagedata hash: {pagedata_hash}\")\n        print(f\"\u2705 Real pagedata content: {repr(pagedata_content)} ({len(pagedata_content)} bytes)\")\n        \n        # Upload pagedata using working method\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.pagedata',  # Required: UUID.pagedata format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(pagedata_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}\",\n            data=pagedata_content.encode(),\n            headers=headers \n        )\n        \n        print(f\"\u2705 Pagedata upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Pagedata upload failed: {upload_response.status_code}\")\n        \n        return pagedata_hash\n    \n    def create_new_document_schema(self, doc_lines: list, new_metadata_hash: str, metadata_line: str, new_pagedata_hash: str = None):\n        \"\"\"Create new document schema with updated metadata hash and pagedata\"\"\"\n        print(f\"\\n\ud83c\udfd7\ufe0f Step 7: Creating new document schema...\")\n        \n        # Replace metadata line and pagedata line with new hashes\n        new_lines = []\n        pagedata_line = None\n        \n        # Find pagedata line\n        for line in doc_lines[1:]:  # Skip version\n            if ':' in line and '.pagedata' in line:\n                pagedata_line = line\n                break\n        \n        for line in doc_lines:\n            if line == metadata_line:\n                # Replace metadata hash but keep size\n                parts = line.split(':')\n                parts[0] = new_metadata_hash  # Update hash\n                new_line = ':'.join(parts)\n                new_lines.append(new_line)\n                print(f\"\u2705 Updated metadata line:\")\n                print(f\"   Old: {line}\")\n                print(f\"   New: {new_line}\")\n            elif new_pagedata_hash and line == pagedata_line:\n                # Replace pagedata hash and update size to 1 byte\n                parts = line.split(':')\n                parts[0] = new_pagedata_hash  # Update hash\n                parts[4] = '1'  # Update size to 1 byte (newline)\n                new_line = ':'.join(parts)\n                new_lines.append(new_line)\n                print(f\"\u2705 Updated pagedata line:\")\n                print(f\"   Old: {line}\")\n                print(f\"   New: {new_line}\")\n            else:\n                new_lines.append(line)\n        \n        new_doc_content = '\\n'.join(new_lines)\n        \n        print(f\"\u2705 New document schema ({len(new_doc_content)} bytes):\")\n        for i, line in enumerate(new_lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return new_doc_content\n    \n    def upload_new_document_schema(self, doc_content: str, doc_uuid: str):\n        \"\"\"Upload new document schema\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 8: Uploading new document schema...\")\n        \n        # Calculate hash\n        doc_hash = hashlib.sha256(doc_content.encode()).hexdigest()\n        print(f\"\u2705 New document schema hash: {doc_hash}\")\n        \n        # Upload using working method\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.docSchema',  # Required: UUID.docSchema format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(doc_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\",\n            data=doc_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Document schema upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Document schema upload failed: {upload_response.status_code}\")\n        \n        return doc_hash\n    \n    def update_root_docschema(self, root_content: str, doc_info: dict, new_doc_hash: str):\n        \"\"\"Update root.docSchema with new document hash\"\"\"\n        print(f\"\\n\ud83d\udd04 Step 9: Updating root.docSchema...\")\n        \n        # Replace old document line with new hash\n        old_line = doc_info['full_line']\n        parts = old_line.split(':')\n        parts[0] = new_doc_hash  # Update document hash\n        new_line = ':'.join(parts)\n        \n        print(f\"\u2705 Updating root.docSchema entry:\")\n        print(f\"   Old: {old_line}\")\n        print(f\"   New: {new_line}\")\n        \n        # Replace in root content\n        new_root_content = root_content.replace(old_line, new_line)\n        \n        print(f\"\u2705 New root.docSchema size: {len(new_root_content)} bytes\")\n        \n        return new_root_content\n    \n    def upload_new_root(self, root_content: str, generation: int):\n        \"\"\"Upload new root.docSchema and update roothash\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 10: Uploading new root.docSchema...\")\n        \n        # Calculate hash\n        root_hash = hashlib.sha256(root_content.encode()).hexdigest()\n        print(f\"\u2705 New root hash: {root_hash}\")\n        \n        # Upload root content using working method\n        headers = {\n            'Content-Type': 'text/plain',\n            'rm-batch-number': '1',\n            'rm-filename': 'root.docSchema',  # System filename for root.docSchema\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum (from test_uploads.py method)\n        crc32c_header = compute_crc32c_header(root_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\",\n            data=root_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root content upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Root content upload failed: {upload_response.status_code}\")\n        \n        # Update root hash pointer using working method\n        print(f\"\\n\ud83d\udd04 Step 11: Updating root hash pointer...\")\n        \n        # Create root data exactly like working upload_manager.py\n        root_update_data = {\n            \"broadcast\": True,\n            \"generation\": generation,  # Use generation parameter\n            \"hash\": root_hash\n        }\n        \n        # Convert to JSON with 2-space indent like real app\n        root_content_body = json.dumps(root_update_data, indent=2).encode('utf-8')\n        \n        # Headers exactly like working upload_manager.py\n        headers = {\n            'Content-Type': 'application/json',\n            'rm-batch-number': '1',\n            'rm-filename': 'roothash',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(root_content_body)\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        # Use /sync/v3/root endpoint like working code\n        root_update_response = self.session.put(\n            \"https://eu.tectonic.remarkable.com/sync/v3/root\",\n            data=root_content_body,\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root update response: {root_update_response.status_code}\")\n        if root_update_response.status_code not in [200, 202]:\n            print(f\"\u274c Root update failed: {root_update_response.text}\")\n            raise RuntimeError(f\"Root update failed: {root_update_response.status_code}\")\n        \n        return root_hash\n    \n    def move_document_from_trash(self, doc_uuid: str):\n        \"\"\"Complete process to move document from trash to root\"\"\"\n        print(f\"\ud83d\udd04 Moving Document from Trash to Root\")\n        print(f\"Document UUID: {doc_uuid}\")\n        print(\"=\" * 60)\n        \n        try:\n            # Step 1: Get current root info\n            root_data, root_content = self.get_current_root_info()\n            \n            # Step 2: Find document in root\n            doc_info = self.get_document_info(doc_uuid, root_content)\n            \n            # Step 3: Get document schema\n            doc_content, doc_lines = self.get_document_schema(doc_info['hash'])\n            \n            # Step 4: Get current metadata\n            current_metadata, metadata_line = self.get_current_metadata(doc_lines)\n            \n            # Check current parent and determine move action\n            current_parent = current_metadata.get('parent', '')\n            if current_parent == 'trash':\n                print(f\"\ud83d\udcc1 Document is in trash, moving to gpt_in folder...\")\n                target_parent = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"  # gpt_in folder\n                move_description = \"from trash to gpt_in folder\"\n            elif current_parent == '':\n                print(f\"\ud83d\udcc1 Document is in root, moving to gpt_in folder...\")\n                target_parent = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"  # gpt_in folder  \n                move_description = \"from root to gpt_in folder\"\n            else:\n                print(f\"\ud83d\udcc1 Document is in folder '{current_parent}', moving to gpt_in folder...\")\n                target_parent = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"  # gpt_in folder\n                move_description = f\"from folder '{current_parent}' to gpt_in folder\"\n            \n            # Step 5: Create updated metadata (move to gpt_in folder)\n            updated_metadata_json = self.create_updated_metadata(current_metadata, new_parent=target_parent)\n            \n            # Step 6: Upload new metadata\n            new_metadata_hash = self.upload_new_metadata(updated_metadata_json, doc_uuid)\n            \n            # Step 6.5: Upload real pagedata to match real app\n            new_pagedata_hash = self.upload_real_pagedata(doc_uuid)\n            \n            # Step 7: Create new document schema\n            new_doc_content = self.create_new_document_schema(doc_lines, new_metadata_hash, metadata_line, new_pagedata_hash)\n            \n            # Step 8: Upload new document schema\n            new_doc_hash = self.upload_new_document_schema(new_doc_content, doc_uuid)\n            \n            # Step 9: Update root.docSchema\n            new_root_content = self.update_root_docschema(root_content, doc_info, new_doc_hash)\n            \n            # Step 10-11: Upload new root and update pointer\n            new_root_hash = self.upload_new_root(new_root_content, root_data['generation'])\n            \n            print(f\"\\n\ud83c\udf89 SUCCESS! Document moved {move_description}\")\n            print(f\"   Document: {current_metadata.get('visibleName')}\")\n            print(f\"   Old parent: {current_parent or '(root)'}\")\n            print(f\"   New parent: gpt_in ({target_parent})\")\n            print(f\"   New root hash: {new_root_hash}\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\\n\u274c Move operation failed: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_move_from_trash.py",
      "tags": [
        "class",
        "documentmover"
      ],
      "updated_at": "2025-12-07T01:56:35.177857",
      "usage_example": "# Example usage:\n# result = DocumentMover(bases)"
    },
    {
      "best_practices": [
        "Always backup your source files before running the fixer as it modifies files in place",
        "Run the fixer in a version-controlled environment so changes can be reviewed and reverted if needed",
        "Call methods in sequence: fix methods first, then create_fixed_upload_test(), then generate_fix_summary()",
        "Review the fixes_applied list after execution to verify all intended changes were made",
        "The fix_jwt_device_description method only flags issues for manual review rather than automatically fixing them",
        "Check console output for error messages (\u274c) indicating files that couldn't be updated",
        "The class assumes specific file names and patterns exist in the codebase; verify these exist before running",
        "The fixer is idempotent for most operations but may create duplicate entries if run multiple times on already-fixed code",
        "Ensure the base_dir path is correct; it defaults to the parent directory of the script file"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The base directory path where source files are located (parent directory of the script)",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Accumulates descriptions of all fixes successfully applied during the session",
            "is_class_variable": false,
            "name": "fixes_applied",
            "type": "list[str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the ImplementationFixer with base directory and empty fixes tracking list",
            "returns": "None",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_user_agent",
            "parameters": {},
            "purpose": "Replace old user-agent string with real app user-agent in upload_manager.py, auth.py, and test_uploads.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_user_agent(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_metadata_source",
            "parameters": {},
            "purpose": "Change metadata source field from 'com.remarkable.windows' to 'com.remarkable.macos' in upload_manager.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_metadata_source(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_pagedata_content",
            "parameters": {},
            "purpose": "Replace empty string pagedata with newline character ('\\n') in upload_manager.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_pagedata_content(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_last_opened_field",
            "parameters": {},
            "purpose": "Ensure lastOpened field is consistently set to '0' in metadata structures in upload_manager.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_last_opened_field(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_jwt_device_description",
            "parameters": {},
            "purpose": "Analyze auth.py for device description patterns and flag for manual review (does not auto-fix)",
            "returns": "None (prints warnings and updates fixes_applied list with manual action items)",
            "signature": "fix_jwt_device_description(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_fixed_upload_test",
            "parameters": {},
            "purpose": "Generate a new test script (fixed_upload_test.py) that incorporates all identified fixes",
            "returns": "None (creates new executable Python file and updates fixes_applied list)",
            "signature": "create_fixed_upload_test(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_fix_summary",
            "parameters": {},
            "purpose": "Print and save a JSON summary of all fixes applied during the session",
            "returns": "None (prints summary to console and saves JSON file to test_results directory)",
            "signature": "generate_fix_summary(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:00",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "json",
        "os",
        "time",
        "uuid"
      ],
      "description": "A utility class that automatically fixes implementation discrepancies between a custom reMarkable tablet upload implementation and the real reMarkable app behavior by modifying source files.",
      "docstring": "Fix our implementation to match real app behavior",
      "id": 2121,
      "imports": [
        "import json",
        "import os",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any"
      ],
      "imports_required": [
        "from pathlib import Path",
        "import json",
        "import os",
        "import time",
        "import uuid"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 365,
      "line_start": 14,
      "name": "ImplementationFixer",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes the base directory (parent directory of the current file) and an empty list to track applied fixes."
      },
      "parent_class": null,
      "purpose": "This class identifies and applies fixes to source code files to ensure the custom implementation matches the real reMarkable app's behavior. It updates user-agent strings, metadata source fields, pagedata content, lastOpened fields, and JWT device descriptions across multiple Python files. It also generates a fixed test script and provides a summary of all applied fixes. The class is designed to be run as a one-time fixer or as part of a maintenance workflow to align custom code with observed real app behavior.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an ImplementationFixer object. Methods do not return values but modify files in place and print status messages. The fixes_applied attribute accumulates a list of strings describing each fix applied. The generate_fix_summary method creates a JSON file with the summary but does not return it.",
      "settings_required": [
        "Write permissions to the directory containing the script and target files (upload_manager.py, auth.py, test_uploads.py)",
        "A 'test_results' directory will be created if it doesn't exist for storing summaries and test files"
      ],
      "source_code": "class ImplementationFixer:\n    \"\"\"Fix our implementation to match real app behavior\"\"\"\n    \n    def __init__(self):\n        self.base_dir = Path(__file__).parent\n        self.fixes_applied = []\n    \n    def fix_user_agent(self):\n        \"\"\"Fix user-agent to match real app\"\"\"\n        print(\"\ud83d\udd27 Fixing User-Agent...\")\n        \n        # Find files that contain user-agent strings\n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\",\n            self.base_dir / \"auth.py\",\n            self.base_dir / \"test_uploads.py\"\n        ]\n        \n        old_ua = \"reMarkable-desktop-win/3.11.1.1951\"\n        new_ua = \"desktop/3.20.0.922 (macos 15.4)\"\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        content = f.read()\n                    \n                    if old_ua in content:\n                        updated_content = content.replace(old_ua, new_ua)\n                        with open(file_path, 'w') as f:\n                            f.write(updated_content)\n                        print(f\"   \u2705 Updated {file_path.name}\")\n                        self.fixes_applied.append(f\"Updated user-agent in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_metadata_source(self):\n        \"\"\"Fix metadata source field to match real app\"\"\"\n        print(\"\ud83d\udd27 Fixing Metadata Source Field...\")\n        \n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\"\n        ]\n        \n        old_source = '\"source\": \"com.remarkable.windows\"'\n        new_source = '\"source\": \"com.remarkable.macos\"'\n        \n        # Also fix the alternative format\n        old_source_alt = \"'source': 'com.remarkable.windows'\"\n        new_source_alt = \"'source': 'com.remarkable.macos'\"\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        content = f.read()\n                    \n                    updated = False\n                    if old_source in content:\n                        content = content.replace(old_source, new_source)\n                        updated = True\n                    \n                    if old_source_alt in content:\n                        content = content.replace(old_source_alt, new_source_alt)\n                        updated = True\n                    \n                    if updated:\n                        with open(file_path, 'w') as f:\n                            f.write(content)\n                        print(f\"   \u2705 Updated source field in {file_path.name}\")\n                        self.fixes_applied.append(f\"Updated metadata source in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_pagedata_content(self):\n        \"\"\"Fix pagedata to use newline character instead of empty string\"\"\"\n        print(\"\ud83d\udd27 Fixing Pagedata Content...\")\n        \n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\"\n        ]\n        \n        # Look for pagedata creation patterns\n        old_patterns = [\n            'pagedata = \"\"',\n            \"pagedata = ''\",\n            'pagedata_content = \"\"',\n            \"pagedata_content = ''\"\n        ]\n        \n        new_pattern = 'pagedata = \"\\\\n\"'\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        content = f.read()\n                    \n                    updated = False\n                    for old_pattern in old_patterns:\n                        if old_pattern in content:\n                            content = content.replace(old_pattern, new_pattern)\n                            updated = True\n                    \n                    if updated:\n                        with open(file_path, 'w') as f:\n                            f.write(content)\n                        print(f\"   \u2705 Updated pagedata content in {file_path.name}\")\n                        self.fixes_applied.append(f\"Updated pagedata content in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_last_opened_field(self):\n        \"\"\"Ensure lastOpened is consistently set to '0'\"\"\"\n        print(\"\ud83d\udd27 Fixing LastOpened Field...\")\n        \n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\"\n        ]\n        \n        # Look for lastOpened patterns that might not be \"0\"\n        patterns_to_check = [\n            '\"lastOpened\":',\n            \"'lastOpened':\"\n        ]\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        lines = f.readlines()\n                    \n                    updated = False\n                    for i, line in enumerate(lines):\n                        for pattern in patterns_to_check:\n                            if pattern in line and '\"0\"' not in line and \"'0'\" not in line:\n                                # Fix the line to use \"0\"\n                                if '\"lastOpened\":' in line:\n                                    lines[i] = line.split('\"lastOpened\":')[0] + '\"lastOpened\": \"0\",' + line.split(':')[1].split(',', 1)[1] if ',' in line.split(':')[1] else '\\n'\n                                updated = True\n                    \n                    if updated:\n                        with open(file_path, 'w') as f:\n                            f.writelines(lines)\n                        print(f\"   \u2705 Updated lastOpened field in {file_path.name}\")\n                        self.fixes_applied.append(f\"Fixed lastOpened field in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_jwt_device_description(self):\n        \"\"\"Update JWT generation to use macOS device description\"\"\"\n        print(\"\ud83d\udd27 Fixing JWT Device Description...\")\n        \n        # This requires updating the authentication process\n        auth_file = self.base_dir / \"auth.py\"\n        \n        if auth_file.exists():\n            try:\n                with open(auth_file, 'r') as f:\n                    content = f.read()\n                \n                # Look for device description patterns\n                old_patterns = [\n                    'desktop-windows',\n                    'desktop-win',\n                    'windows'\n                ]\n                \n                new_replacement = 'desktop-macos'\n                \n                updated = False\n                for old_pattern in old_patterns:\n                    if old_pattern in content.lower():\n                        # This is more complex - we need to identify the specific context\n                        print(f\"   \u26a0\ufe0f Found '{old_pattern}' in auth.py - manual review needed\")\n                        print(f\"   \ud83d\udcdd Action: Update device registration to use 'desktop-macos'\")\n                        self.fixes_applied.append(f\"JWT device description needs manual update in auth.py\")\n                        updated = True\n                \n                if not updated:\n                    print(\"   \u2139\ufe0f No obvious device description patterns found in auth.py\")\n                    print(\"   \ud83d\udcdd Note: JWT device description may be set during token generation\")\n            \n            except Exception as e:\n                print(f\"   \u274c Failed to analyze auth.py: {e}\")\n    \n    def create_fixed_upload_test(self):\n        \"\"\"Create a test script with all fixes applied\"\"\"\n        print(\"\ud83d\udd27 Creating Fixed Upload Test...\")\n        \n        fixed_test_content = '''#!/usr/bin/env python3\n\"\"\"\nFixed Upload Test - Matches Real App Behavior\n\nThis test script incorporates all the fixes identified by dry run analysis.\n\"\"\"\n\nimport os\nimport json\nimport time\nfrom pathlib import Path\nimport uuid\n\ndef create_test_document_with_fixes():\n    \"\"\"Create a test document with all real app fixes applied\"\"\"\n    \n    # Generate document UUID\n    doc_uuid = str(uuid.uuid4())\n    \n    # Fixed metadata (matches real app)\n    metadata = {\n        \"createdTime\": str(int(time.time() * 1000)),\n        \"lastModified\": str(int(time.time() * 1000)),\n        \"lastOpened\": \"0\",  # \u2705 Fixed: Always \"0\"\n        \"lastOpenedPage\": 0,\n        \"metadatamodified\": False,\n        \"modified\": False,\n        \"parent\": \"\",\n        \"pinned\": False,\n        \"source\": \"com.remarkable.macos\",  # \u2705 Fixed: Changed from windows to macos\n        \"type\": \"DocumentType\",\n        \"visibleName\": \"Fixed_Test_Document\",\n        \"version\": 1\n    }\n    \n    # Fixed content structure\n    content = {\n        \"coverPageNumber\": 0,\n        \"customZoomCenterX\": 0,\n        \"customZoomCenterY\": 936,\n        \"customZoomOrientation\": \"portrait\",\n        \"customZoomPageHeight\": 1872,\n        \"customZoomPageWidth\": 1404,\n        \"customZoomScale\": 1,\n        \"documentMetadata\": {},\n        \"extraMetadata\": {},\n        \"fileType\": \"pdf\",\n        \"fontName\": \"\",\n        \"formatVersion\": 1,\n        \"lineHeight\": -1,\n        \"orientation\": \"portrait\",\n        \"originalPageCount\": 1,\n        \"pageCount\": 1,\n        \"pageTags\": [],\n        \"pages\": [str(uuid.uuid4())],\n        \"redirectionPageMap\": [0],\n        \"sizeInBytes\": \"1000\",\n        \"tags\": [],\n        \"textAlignment\": \"justify\",\n        \"textScale\": 1,\n        \"zoomMode\": \"bestFit\"\n    }\n    \n    # Fixed pagedata content\n    pagedata = \"\\\\n\"  # \u2705 Fixed: Changed from empty string to newline\n    \n    # Fixed headers (for reference)\n    headers_template = {\n        'host': 'eu.tectonic.remarkable.com',\n        'authorization': 'Bearer YOUR_TOKEN_HERE',\n        'content-type': 'application/octet-stream',\n        'rm-batch-number': '1',\n        'rm-sync-id': str(uuid.uuid4()),\n        'user-agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 Fixed: Matches real app\n        'connection': 'Keep-Alive',\n        'accept-encoding': 'gzip, deflate',\n        'accept-language': 'en-BE,*'  # \u2705 Fixed: Matches real app locale\n    }\n    \n    print(\"\u2705 Test document created with all real app fixes applied:\")\n    print(f\"   \ud83d\udcdd Document UUID: {doc_uuid}\")\n    print(f\"   \ud83d\udd27 Metadata source: {metadata['source']}\")\n    print(f\"   \ud83d\udd27 LastOpened: {metadata['lastOpened']}\")\n    print(f\"   \ud83d\udd27 Pagedata: {repr(pagedata)}\")\n    print(f\"   \ud83d\udd27 User-Agent: {headers_template['user-agent']}\")\n    \n    return {\n        'uuid': doc_uuid,\n        'metadata': metadata,\n        'content': content,\n        'pagedata': pagedata,\n        'headers_template': headers_template\n    }\n\nif __name__ == \"__main__\":\n    print(\"\ud83e\uddea FIXED UPLOAD TEST - REAL APP BEHAVIOR\")\n    print(\"=\" * 50)\n    \n    test_doc = create_test_document_with_fixes()\n    \n    # Save test data for analysis\n    output_file = Path(__file__).parent / \"test_results\" / \"fixed_document_structure.json\"\n    output_file.parent.mkdir(exist_ok=True)\n    \n    with open(output_file, 'w') as f:\n        json.dump(test_doc, f, indent=2, default=str)\n    \n    print(f\"\\\\n\ud83d\udcbe Fixed document structure saved to: {output_file}\")\n    print(\"\\\\n\ud83c\udfaf Ready for real app behavior testing!\")\n'''\n        \n        fixed_test_file = self.base_dir / \"fixed_upload_test.py\"\n        \n        try:\n            with open(fixed_test_file, 'w') as f:\n                f.write(fixed_test_content)\n            \n            # Make executable\n            os.chmod(fixed_test_file, 0o755)\n            \n            print(f\"   \u2705 Created {fixed_test_file}\")\n            self.fixes_applied.append(f\"Created fixed upload test script\")\n        \n        except Exception as e:\n            print(f\"   \u274c Failed to create fixed test: {e}\")\n    \n    def generate_fix_summary(self):\n        \"\"\"Generate a summary of all fixes applied\"\"\"\n        print(\"\\n\ud83d\udccb FIX SUMMARY\")\n        print(\"=\" * 50)\n        \n        if self.fixes_applied:\n            print(\"\u2705 Fixes Applied:\")\n            for i, fix in enumerate(self.fixes_applied, 1):\n                print(f\"   {i}. {fix}\")\n        else:\n            print(\"\u274c No fixes were applied\")\n        \n        # Save summary\n        summary = {\n            'timestamp': time.time(),\n            'fixes_applied': self.fixes_applied,\n            'critical_fixes': [\n                'User-Agent changed to: desktop/3.20.0.922 (macos 15.4)',\n                'Metadata source changed to: com.remarkable.macos',\n                'Pagedata content changed to: newline character',\n                'LastOpened field standardized to: \"0\"',\n                'JWT device description flagged for manual update'\n            ]\n        }\n        \n        summary_file = self.base_dir / \"test_results\" / f\"implementation_fixes_{int(time.time())}.json\"\n        summary_file.parent.mkdir(exist_ok=True)\n        \n        with open(summary_file, 'w') as f:\n            json.dump(summary, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Fix summary saved to: {summary_file}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/implementation_fixer.py",
      "tags": [
        "code-fixer",
        "file-modification",
        "remarkable-tablet",
        "implementation-alignment",
        "automation",
        "refactoring",
        "string-replacement",
        "metadata-correction",
        "testing-utilities"
      ],
      "updated_at": "2025-12-07T01:56:00.111096",
      "usage_example": "from implementation_fixer import ImplementationFixer\n\n# Instantiate the fixer\nfixer = ImplementationFixer()\n\n# Apply individual fixes\nfixer.fix_user_agent()\nfixer.fix_metadata_source()\nfixer.fix_pagedata_content()\nfixer.fix_last_opened_field()\nfixer.fix_jwt_device_description()\n\n# Create a fixed test script\nfixer.create_fixed_upload_test()\n\n# Generate and save summary\nfixer.generate_fix_summary()\n\n# Check what was fixed\nprint(f\"Applied {len(fixer.fixes_applied)} fixes\")\nfor fix in fixer.fixes_applied:\n    print(f\"  - {fix}\")"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "workspace_dir": "Type: str"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, workspace_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_logging",
            "parameters": {},
            "purpose": "Setup logging to file",
            "returns": "None",
            "signature": "setup_logging(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_authenticate",
            "parameters": {},
            "purpose": "Authenticate with the reMarkable cloud service using token-based approach",
            "returns": "Returns Optional[requests.Session]",
            "signature": "_authenticate(self) -> Optional[requests.Session]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_new_token",
            "parameters": {},
            "purpose": "Get a new authentication token",
            "returns": "Returns Optional[requests.Session]",
            "signature": "_get_new_token(self) -> Optional[requests.Session]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "sync_replica",
            "parameters": {},
            "purpose": "Perform replica synchronization using the proven 3-step process:\n1. Discovery - Get all nodes from cloud\n2. Hierarchy - Build proper folder structure\n3. Extraction - Download content to correct locations",
            "returns": "Returns bool",
            "signature": "sync_replica(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_database",
            "parameters": {},
            "purpose": "Load or create replica database",
            "returns": "Returns Dict[str, Any]",
            "signature": "_load_database(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_database",
            "parameters": {},
            "purpose": "Save database to disk",
            "returns": "None",
            "signature": "_save_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_file_content",
            "parameters": {
              "content": "Type: bytes",
              "content_hash": "Type: str",
              "filename": "Type: str"
            },
            "purpose": "Save file content to local content directory",
            "returns": "Returns bool",
            "signature": "_save_file_content(self, content_hash, content, filename) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_file_type",
            "parameters": {
              "filename": "Type: str"
            },
            "purpose": "Determine file type from filename",
            "returns": "Returns str",
            "signature": "_get_file_type(self, filename) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_hash",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute SHA256 hash of content",
            "returns": "Returns str",
            "signature": "_compute_hash(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "sync_complete_replica",
            "parameters": {},
            "purpose": "Perform complete replica synchronization",
            "returns": "Returns bool",
            "signature": "sync_complete_replica(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fetch_node_details",
            "parameters": {
              "node_hash": "Type: str",
              "node_size": "Type: str",
              "node_type": "Type: str",
              "node_uuid": "Type: str"
            },
            "purpose": "Fetch detailed information about a node",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "_fetch_node_details(self, node_uuid, node_hash, node_type, node_size) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_parse_folder_node",
            "parameters": {
              "folder_content": "Type: str",
              "node_hash": "Type: str",
              "node_uuid": "Type: str"
            },
            "purpose": "Parse folder node content",
            "returns": "Returns Dict[str, Any]",
            "signature": "_parse_folder_node(self, node_uuid, node_hash, folder_content) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_parse_document_node",
            "parameters": {
              "doc_content": "Type: str",
              "node_hash": "Type: str",
              "node_type": "Type: str",
              "node_uuid": "Type: str"
            },
            "purpose": "Parse document node content (docSchema)",
            "returns": "Returns Dict[str, Any]",
            "signature": "_parse_document_node(self, node_uuid, node_hash, doc_content, node_type) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fetch_metadata",
            "parameters": {
              "metadata_hash": "Type: str"
            },
            "purpose": "Fetch and parse document metadata",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "_fetch_metadata(self, metadata_hash) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_summary",
            "parameters": {},
            "purpose": "Save human-readable summary",
            "returns": "None",
            "signature": "_save_summary(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_content_index",
            "parameters": {},
            "purpose": "Create an index of all downloaded content files",
            "returns": "None",
            "signature": "_create_content_index(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_content_file_path",
            "parameters": {
              "content_hash": "Type: str"
            },
            "purpose": "Get the local path for a content file",
            "returns": "Returns Path",
            "signature": "get_content_file_path(self, content_hash) -> Path"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_folder_structure",
            "parameters": {},
            "purpose": "Create readable folder structure with documents in their proper folders",
            "returns": "None",
            "signature": "_create_folder_structure(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_sanitize_filename",
            "parameters": {
              "filename": "Type: str"
            },
            "purpose": "Sanitize filename for filesystem use",
            "returns": "Returns str",
            "signature": "_sanitize_filename(self, filename) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_node_by_uuid",
            "parameters": {
              "uuid": "Type: str"
            },
            "purpose": "Get a specific node by UUID",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "get_node_by_uuid(self, uuid) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_documents_in_folder",
            "parameters": {
              "folder_uuid": "Type: str"
            },
            "purpose": "Get all documents in a specific folder",
            "returns": "Returns List[Dict[str, Any]]",
            "signature": "get_documents_in_folder(self, folder_uuid) -> List[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_folders",
            "parameters": {},
            "purpose": "Get all folders",
            "returns": "Returns List[Dict[str, Any]]",
            "signature": "get_folders(self) -> List[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_root_documents",
            "parameters": {},
            "purpose": "Get all documents in root (no parent)",
            "returns": "Returns List[Dict[str, Any]]",
            "signature": "get_root_documents(self) -> List[Dict[str, Any]]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:55:13",
      "decorators": [],
      "dependencies": [],
      "description": "Standalone replica synchronization using proven local_replica_v2 approach",
      "docstring": "Standalone replica synchronization using proven local_replica_v2 approach",
      "id": 2119,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass",
        "import re",
        "import shutil"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 824,
      "line_start": 59,
      "name": "RemarkableReplicaSync_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Standalone replica synchronization using proven local_replica_v2 approach",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableReplicaSync:\n    \"\"\"Standalone replica synchronization using proven local_replica_v2 approach\"\"\"\n    \n    def __init__(self, workspace_dir: str = None):\n        self.workspace_dir = Path(workspace_dir) if workspace_dir else Path(__file__).parent\n        self.replica_dir = self.workspace_dir / \"remarkable_replica_v2\"\n        self.content_dir = self.replica_dir / \"content\"\n        \n        # Create directories\n        for directory in [self.replica_dir, self.content_dir]:\n            directory.mkdir(parents=True, exist_ok=True)\n        \n        # Setup logging\n        self.log_file = self.replica_dir / \"build.log\"\n        self.setup_logging()\n        \n        # Initialize authentication\n        self.session = self._authenticate()\n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        # State matching local_replica_v2.py\n        self.nodes: Dict[str, RemarkableNode] = {}\n        self.all_hashes: Set[str] = set()\n        self.failed_downloads: Set[str] = set()\n        \n        # Statistics\n        self.stats = {\n            'total_nodes': 0,\n            'folders': 0,\n            'documents': 0,\n            'pdfs_extracted': 0,\n            'rm_files_extracted': 0,\n            'rm_pdfs_converted': 0,\n            'nodes_added': 0\n        }\n        \n    def setup_logging(self):\n        \"\"\"Setup logging to file\"\"\"\n        logging.basicConfig(\n            level=logging.INFO,\n            format='%(asctime)s - %(levelname)s - %(message)s',\n            handlers=[\n                logging.FileHandler(self.log_file, mode='w'),\n                logging.StreamHandler()\n            ]\n        )\n        self.logger = logging.getLogger(__name__)\n    \n    def _authenticate(self) -> Optional[requests.Session]:\n        \"\"\"Authenticate with the reMarkable cloud service using token-based approach\"\"\"\n        token_file = self.workspace_dir / '.remarkable_token'\n        \n        if token_file.exists():\n            print(\"\ufffd Using existing reMarkable token...\")\n            try:\n                with open(token_file, 'r') as f:\n                    token_data = json.load(f)\n                \n                session = requests.Session()\n                session.headers.update({\n                    'Authorization': f'Bearer {token_data[\"access_token\"]}',\n                    'User-Agent': 'remarkable-replica-sync/1.0'\n                })\n                \n                # Quick test - try to get document root\n                test_url = f'{token_data[\"service_manager_url\"]}/document-storage/json/2/docs'\n                response = session.get(test_url)\n                \n                if response.status_code == 200:\n                    print(\"\u2705 Authentication successful\")\n                    return session\n                else:\n                    print(f\"\u274c Token test failed with status {response.status_code}\")\n                    \n            except Exception as e:\n                print(f\"\u274c Token authentication failed: {e}\")\n        \n        # Need new token\n        print(\"\ud83d\udd10 No valid token found. Getting new token...\")\n        return self._get_new_token()\n    \n    def _get_new_token(self) -> Optional[requests.Session]:\n        \"\"\"Get a new authentication token\"\"\"\n        device_token = '9c4e7c2b-c6c7-4831-8b2a-3f5a2e8f9c3d'\n        \n        try:\n            # Step 1: Register device\n            register_url = 'https://webapp-production-dot-remarkable-production.appspot.com/token/json/2/device/new'\n            register_data = {\n                'code': device_token,\n                'deviceDesc': 'desktop-linux',\n                'deviceID': hashlib.sha256(f\"replica-sync-{int(time.time())}\".encode()).hexdigest()[:8]\n            }\n            \n            response = requests.post(register_url, json=register_data)\n            if response.status_code != 200:\n                print(f\"\u274c Device registration failed: {response.status_code}\")\n                return None\n                \n            device_bearer = response.text.strip('\"')\n            \n            # Step 2: Get user token\n            user_url = 'https://webapp-production-dot-remarkable-production.appspot.com/token/json/2/user/new'\n            user_response = requests.post(\n                user_url,\n                headers={'Authorization': f'Bearer {device_bearer}'}\n            )\n            \n            if user_response.status_code != 200:\n                print(f\"\u274c User token failed: {user_response.status_code}\")\n                return None\n                \n            user_token = user_response.text.strip('\"')\n            \n            # Step 3: Get service discovery\n            discovery_url = 'https://service-manager-production-dot-remarkable-production.appspot.com/service/json/1/document-storage?environment=production&group=auth0%7C5a68dc51cb30df3877a1d7c4&apiVer=2'\n            discovery_response = requests.get(\n                discovery_url,\n                headers={'Authorization': f'Bearer {user_token}'}\n            )\n            \n            if discovery_response.status_code != 200:\n                print(f\"\u274c Service discovery failed: {discovery_response.status_code}\")\n                return None\n                \n            service_info = discovery_response.json()\n            service_url = service_info.get('Host')\n            \n            if not service_url:\n                print(\"\u274c No service URL in discovery response\")\n                return None\n            \n            # Save token info\n            token_data = {\n                'access_token': user_token,\n                'service_manager_url': service_url,\n                'created_at': datetime.now().isoformat()\n            }\n            \n            token_file = self.workspace_dir / '.remarkable_token'\n            with open(token_file, 'w') as f:\n                json.dump(token_data, f, indent=2)\n            \n            # Create session\n            session = requests.Session()\n            session.headers.update({\n                'Authorization': f'Bearer {user_token}',\n                'User-Agent': 'remarkable-replica-sync/1.0'\n            })\n            \n            print(\"\u2705 New authentication token obtained and saved\")\n            return session\n            \n        except Exception as e:\n            print(f\"\u274c Authentication failed: {e}\")\n            return None\n            \n            return None\n    \n    def sync_replica(self) -> bool:\n        \"\"\"\n        Perform replica synchronization using the proven 3-step process:\n        1. Discovery - Get all nodes from cloud\n        2. Hierarchy - Build proper folder structure\n        3. Extraction - Download content to correct locations\n        \"\"\"\n        try:\n            self.logger.info(\"\ud83d\ude80 Starting reMarkable replica sync\")\n            \n            # Phase 1: Discovery\n            if not self._discover_all_nodes():\n                self.logger.error(\"\u274c Discovery phase failed\")\n                return False\n            \n            # Phase 2: Build hierarchy \n            if not self._build_folder_hierarchy():\n                self.logger.error(\"\u274c Hierarchy phase failed\")\n                return False\n            \n            # Phase 3: Extract content\n            if not self._extract_content():\n                self.logger.error(\"\u274c Content extraction phase failed\")\n                return False\n            \n            # Generate summary\n            self._generate_summary()\n            \n            self.logger.info(\"\u2705 Replica sync completed successfully\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Sync failed: {e}\")\n            return False\n    \n    def _load_database(self) -> Dict[str, Any]:\n        \"\"\"Load or create replica database\"\"\"\n        if self.database_path.exists():\n            try:\n                with open(self.database_path, 'r') as f:\n                    db = json.load(f)\n                print(f\"\ud83d\udcc2 Loaded existing database with {len(db.get('nodes', {}))} nodes\")\n                return db\n            except Exception as e:\n                print(f\"\u26a0\ufe0f Database corrupted, creating new: {e}\")\n        \n        # Create new database\n        db = {\n            'nodes': {},\n            'hash_registry': {},\n            'metadata': {\n                'last_sync': None,\n                'sync_count': 0,\n                'created': datetime.now().isoformat()\n            }\n        }\n        print(\"\ud83d\udcc2 Created new replica database\")\n        return db\n    \n    def _save_database(self):\n        \"\"\"Save database to disk\"\"\"\n        try:\n            with open(self.database_path, 'w') as f:\n                json.dump(self.database, f, indent=2, default=str)\n            \n            # Update metadata\n            self.database['metadata']['last_sync'] = datetime.now().isoformat()\n            self.database['metadata']['sync_count'] += 1\n            \n        except Exception as e:\n            print(f\"\u274c Failed to save database: {e}\")\n    \n    def _save_file_content(self, content_hash: str, content: bytes, filename: str) -> bool:\n        \"\"\"Save file content to local content directory\"\"\"\n        try:\n            # Create file path using hash (first 2 chars as subdirectory)\n            subdir = content_hash[:2]\n            file_dir = self.content_dir / subdir\n            file_dir.mkdir(exist_ok=True)\n            \n            file_path = file_dir / content_hash\n            \n            # Only save if file doesn't exist (avoid re-downloading)\n            if not file_path.exists():\n                with open(file_path, 'wb') as f:\n                    f.write(content)\n                print(f\"   \ud83d\udcbe Saved {filename} ({len(content)} bytes)\")\n            \n            # Register in hash registry\n            if content_hash not in self.database.get('hash_registry', {}):\n                if 'hash_registry' not in self.database:\n                    self.database['hash_registry'] = {}\n                \n                self.database['hash_registry'][content_hash] = {\n                    'filename': filename,\n                    'size': len(content),\n                    'type': self._get_file_type(filename),\n                    'downloaded': datetime.now().isoformat()\n                }\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to save {filename}: {e}\")\n            return False\n    \n    def _get_file_type(self, filename: str) -> str:\n        \"\"\"Determine file type from filename\"\"\"\n        if filename.endswith('.pdf'):\n            return 'pdf'\n        elif filename.endswith('.metadata'):\n            return 'metadata'\n        elif filename.endswith('.content'):\n            return 'content'\n        elif filename.endswith('.pagedata'):\n            return 'pagedata'\n        elif filename.endswith('.rm'):\n            return 'notebook_page'\n        elif filename.endswith('.docSchema'):\n            return 'docschema'\n        else:\n            return 'unknown'\n\n    def _compute_hash(self, content: bytes) -> str:\n        \"\"\"Compute SHA256 hash of content\"\"\"\n        return hashlib.sha256(content).hexdigest()\n    \n    def sync_complete_replica(self) -> bool:\n        \"\"\"Perform complete replica synchronization\"\"\"\n        try:\n            print(\"\\n\ud83d\ude80 STARTING COMPLETE REPLICA SYNC\")\n            print(\"=\" * 50)\n            \n            # Step 1: Get current root state\n            print(\"\ud83d\udccb Step 1: Getting root state from server...\")\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            \n            current_root_hash = root_data['hash']\n            current_generation = root_data['generation']\n            \n            print(f\"\ud83c\udf31 Root hash: {current_root_hash}\")\n            print(f\"\ud83d\udd22 Generation: {current_generation}\")\n            \n            # Step 2: Fetch root.docSchema\n            print(\"\ud83d\udccb Step 2: Fetching root.docSchema...\")\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{current_root_hash}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            print(f\"\ud83d\udcc4 Root.docSchema size: {len(root_content)} bytes\")\n            \n            # Step 3: Parse and discover all nodes\n            print(\"\ud83d\udccb Step 3: Discovering all nodes...\")\n            discovered_nodes = {}\n            \n            lines = root_content.strip().split('\\n')\n            if len(lines) < 1:\n                print(\"\u274c Empty root.docSchema\")\n                return False\n            \n            version = lines[0]\n            print(f\"\ud83d\udcca Schema version: {version}\")\n            \n            # Process each entry in root.docSchema\n            for line_num, line in enumerate(lines[1:], 1):\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        node_hash = parts[0]\n                        node_uuid = parts[2]\n                        node_type = parts[3]\n                        node_size = parts[4]\n                        \n                        # Fetch node details\n                        node_info = self._fetch_node_details(node_uuid, node_hash, node_type, node_size)\n                        if node_info:\n                            discovered_nodes[node_uuid] = node_info\n                            \n                        # Progress indicator\n                        if line_num % 5 == 0:\n                            print(f\"   \ud83d\udcca Processed {line_num}/{len(lines)-1} entries...\")\n            \n            print(f\"\u2705 Discovered {len(discovered_nodes)} nodes\")\n            \n            # Step 4: Update database\n            print(\"\ud83d\udccb Step 4: Updating database...\")\n            \n            # Count changes\n            new_nodes = 0\n            updated_nodes = 0\n            \n            for uuid, node_info in discovered_nodes.items():\n                if uuid not in self.database['nodes']:\n                    new_nodes += 1\n                else:\n                    # Check if updated\n                    existing_node = self.database['nodes'][uuid]\n                    if existing_node.get('hash') != node_info.get('hash'):\n                        updated_nodes += 1\n                \n                self.database['nodes'][uuid] = node_info\n            \n            # Remove nodes no longer in cloud\n            cloud_uuids = set(discovered_nodes.keys())\n            local_uuids = set(self.database['nodes'].keys())\n            removed_uuids = local_uuids - cloud_uuids\n            \n            for uuid in removed_uuids:\n                del self.database['nodes'][uuid]\n            \n            print(f\"\ud83d\udcca Database changes:\")\n            print(f\"   \ud83c\udd95 New nodes: {new_nodes}\")\n            print(f\"   \ud83d\udd04 Updated nodes: {updated_nodes}\")\n            print(f\"   \ud83d\uddd1\ufe0f Removed nodes: {len(removed_uuids)}\")\n            \n            # Step 5: Save database and summary\n            print(\"\ud83d\udccb Step 5: Saving database and summary...\")\n            self._save_database()\n            self._save_summary()\n            \n            # Step 6: Create content index\n            print(\"\ud83d\udccb Step 6: Creating content index...\")\n            self._create_content_index()\n            \n            # Step 7: Create folder structure with files\n            print(\"\ud83d\udccb Step 7: Creating folder structure...\")\n            self._create_folder_structure()\n            \n            print(f\"\\n\ud83c\udf89 REPLICA SYNC COMPLETED!\")\n            print(f\"\ud83d\udcca Total nodes: {len(self.database['nodes'])}\")\n            print(f\"\ud83d\udcc1 Database: {self.database_path}\")\n            print(f\"\ud83d\udcc4 Summary: {self.summary_path}\")\n            print(f\"\ud83d\udcbe Content files: {self.content_dir}\")\n            print(f\"\ud83d\udcc2 Folder structure: {self.replica_dir / 'documents'}\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Replica sync failed: {e}\")\n            return False\n    \n    def _fetch_node_details(self, node_uuid: str, node_hash: str, node_type: str, node_size: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch detailed information about a node\"\"\"\n        try:\n            # Fetch node content (docSchema or metadata)\n            node_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{node_hash}\")\n            node_response.raise_for_status()\n            node_content = node_response.text\n            node_content_bytes = node_response.content\n            \n            # Save the node content locally\n            if node_type in ['1', '2']:  # Folder\n                self._save_file_content(node_hash, node_content_bytes, f\"{node_uuid}.metadata\")\n            else:  # Document\n                self._save_file_content(node_hash, node_content_bytes, f\"{node_uuid}.docSchema\")\n            \n            # Determine node type and parse\n            if node_type in ['1', '2']:  # Folder\n                return self._parse_folder_node(node_uuid, node_hash, node_content)\n            elif node_type in ['3', '4']:  # Document\n                return self._parse_document_node(node_uuid, node_hash, node_content, node_type)\n            else:\n                print(f\"\u26a0\ufe0f Unknown node type {node_type} for {node_uuid[:8]}...\")\n                return None\n                \n        except Exception as e:\n            print(f\"\u274c Failed to fetch node {node_uuid[:8]}...: {e}\")\n            return None\n    \n    def _parse_folder_node(self, node_uuid: str, node_hash: str, folder_content: str) -> Dict[str, Any]:\n        \"\"\"Parse folder node content\"\"\"\n        try:\n            # For folders, the content is the metadata JSON\n            metadata = json.loads(folder_content)\n            \n            return {\n                'uuid': node_uuid,\n                'hash': node_hash,\n                'name': metadata.get('visibleName', 'Unknown Folder'),\n                'node_type': 'folder',\n                'metadata': metadata,\n                'last_modified': metadata.get('lastModified', '0'),\n                'parent_uuid': metadata.get('parent', ''),\n                'sync_status': 'synced',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n        except Exception as e:\n            print(f\"\u274c Failed to parse folder {node_uuid[:8]}...: {e}\")\n            return None\n    \n    def _parse_document_node(self, node_uuid: str, node_hash: str, doc_content: str, node_type: str) -> Dict[str, Any]:\n        \"\"\"Parse document node content (docSchema)\"\"\"\n        try:\n            # Parse docSchema to get components\n            lines = doc_content.strip().split('\\n')\n            if len(lines) < 2:\n                print(f\"\u26a0\ufe0f Invalid docSchema for {node_uuid[:8]}...\")\n                return None\n            \n            version = lines[0]\n            component_hashes = {}\n            metadata = None\n            \n            # Extract component hashes and download components\n            for line in lines[1:]:\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 3:\n                        comp_hash = parts[0]\n                        comp_name = parts[2]\n                        \n                        # Download the component\n                        try:\n                            comp_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{comp_hash}\")\n                            comp_response.raise_for_status()\n                            \n                            # Save component content locally\n                            self._save_file_content(comp_hash, comp_response.content, comp_name)\n                            \n                            # Store component hash\n                            if comp_name.endswith('.metadata'):\n                                component_hashes['metadata'] = comp_hash\n                                # Parse metadata\n                                try:\n                                    metadata = json.loads(comp_response.text)\n                                except:\n                                    print(f\"\u26a0\ufe0f Failed to parse metadata for {node_uuid[:8]}...\")\n                            elif comp_name.endswith('.content'):\n                                component_hashes['content'] = comp_hash\n                            elif comp_name.endswith('.pdf'):\n                                component_hashes['pdf'] = comp_hash\n                            elif comp_name.endswith('.pagedata'):\n                                component_hashes['pagedata'] = comp_hash\n                            elif comp_name.endswith('.rm'):\n                                if 'rm_files' not in component_hashes:\n                                    component_hashes['rm_files'] = []\n                                component_hashes['rm_files'].append(comp_hash)\n                                \n                        except Exception as e:\n                            print(f\"\u26a0\ufe0f Failed to download component {comp_name}: {e}\")\n                        \n                        component_hashes['docSchema'] = node_hash\n            \n            # Determine document name\n            doc_name = \"Unknown Document\"\n            if metadata:\n                doc_name = metadata.get('visibleName', doc_name)\n            \n            return {\n                'uuid': node_uuid,\n                'hash': node_hash,\n                'name': doc_name,\n                'node_type': 'document',\n                'metadata': metadata or {},\n                'component_hashes': component_hashes,\n                'last_modified': metadata.get('lastModified', '0') if metadata else '0',\n                'parent_uuid': metadata.get('parent', '') if metadata else '',\n                'version': int(version) if version.isdigit() else 1,\n                'sync_status': 'synced',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n        except Exception as e:\n            print(f\"\u274c Failed to parse document {node_uuid[:8]}...: {e}\")\n            return None\n    \n    def _fetch_metadata(self, metadata_hash: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch and parse document metadata\"\"\"\n        try:\n            metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n            metadata_response.raise_for_status()\n            return json.loads(metadata_response.text)\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Failed to fetch metadata {metadata_hash[:8]}...: {e}\")\n            return None\n    \n    def _save_summary(self):\n        \"\"\"Save human-readable summary\"\"\"\n        try:\n            with open(self.summary_path, 'w') as f:\n                f.write(\"reMarkable Replica Summary\\n\")\n                f.write(\"=\" * 50 + \"\\n\\n\")\n                \n                f.write(f\"Last sync: {self.database['metadata'].get('last_sync', 'Never')}\\n\")\n                f.write(f\"Total syncs: {self.database['metadata'].get('sync_count', 0)}\\n\")\n                f.write(f\"Total nodes: {len(self.database['nodes'])}\\n\\n\")\n                \n                # Count by type\n                folders = [n for n in self.database['nodes'].values() if n.get('node_type') == 'folder']\n                documents = [n for n in self.database['nodes'].values() if n.get('node_type') == 'document']\n                \n                f.write(f\"\ud83d\udcc2 Folders: {len(folders)}\\n\")\n                f.write(f\"\ud83d\udcc4 Documents: {len(documents)}\\n\\n\")\n                \n                # List folders\n                if folders:\n                    f.write(\"Folders:\\n\")\n                    f.write(\"-\" * 20 + \"\\n\")\n                    for folder in sorted(folders, key=lambda x: x.get('name', '')):\n                        f.write(f\"  \ud83d\udcc2 {folder['name']} ({folder['uuid'][:8]}...)\\n\")\n                    f.write(\"\\n\")\n                \n                # List documents\n                if documents:\n                    f.write(\"Documents:\\n\")\n                    f.write(\"-\" * 20 + \"\\n\")\n                    for doc in sorted(documents, key=lambda x: x.get('name', '')):\n                        parent_info = \"\"\n                        if doc.get('parent_uuid'):\n                            parent_name = \"Unknown Folder\"\n                            for folder in folders:\n                                if folder['uuid'] == doc['parent_uuid']:\n                                    parent_name = folder['name']\n                                    break\n                            parent_info = f\" [in {parent_name}]\"\n                        \n                        f.write(f\"  \ud83d\udcc4 {doc['name']} ({doc['uuid'][:8]}...){parent_info}\\n\")\n                \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Failed to save summary: {e}\")\n    \n    def _create_content_index(self):\n        \"\"\"Create an index of all downloaded content files\"\"\"\n        try:\n            index_path = self.replica_dir / \"content_index.txt\"\n            \n            with open(index_path, 'w') as f:\n                f.write(\"reMarkable Content Index\\n\")\n                f.write(\"=\" * 50 + \"\\n\\n\")\n                \n                f.write(f\"Generated: {datetime.now().isoformat()}\\n\")\n                f.write(f\"Total files: {len(self.database.get('hash_registry', {}))}\\n\\n\")\n                \n                # Group by file type\n                by_type = {}\n                for hash_val, info in self.database.get('hash_registry', {}).items():\n                    file_type = info.get('type', 'unknown')\n                    if file_type not in by_type:\n                        by_type[file_type] = []\n                    by_type[file_type].append((hash_val, info))\n                \n                for file_type, files in sorted(by_type.items()):\n                    f.write(f\"{file_type.upper()} Files ({len(files)}):\\n\")\n                    f.write(\"-\" * 30 + \"\\n\")\n                    \n                    for hash_val, info in sorted(files, key=lambda x: x[1].get('filename', '')):\n                        filename = info.get('filename', 'unknown')\n                        size = info.get('size', 0)\n                        subdir = hash_val[:2]\n                        f.write(f\"  {filename} ({size} bytes)\\n\")\n                        f.write(f\"    Hash: {hash_val}\\n\")\n                        f.write(f\"    Path: content/{subdir}/{hash_val}\\n\\n\")\n                    \n                    f.write(\"\\n\")\n            \n            print(f\"\ud83d\udccb Content index saved: {index_path}\")\n            \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Failed to create content index: {e}\")\n    \n    def get_content_file_path(self, content_hash: str) -> Path:\n        \"\"\"Get the local path for a content file\"\"\"\n        subdir = content_hash[:2]\n        return self.content_dir / subdir / content_hash\n    \n    def _create_folder_structure(self):\n        \"\"\"Create readable folder structure with documents in their proper folders\"\"\"\n        try:\n            # Create documents directory\n            documents_dir = self.replica_dir / \"documents\"\n            documents_dir.mkdir(exist_ok=True)\n            \n            print(f\"\ud83d\udcc1 Creating folder structure in: {documents_dir}\")\n            \n            # Build folder hierarchy\n            folders = self.get_folders()\n            documents = [n for n in self.database['nodes'].values() if n.get('node_type') == 'document']\n            \n            # Create folder directories\n            folder_paths = {}\n            \n            # Process root folders first\n            for folder in folders:\n                if not folder.get('parent_uuid'):\n                    folder_path = documents_dir / self._sanitize_filename(folder['name'])\n                    folder_path.mkdir(exist_ok=True)\n                    folder_paths[folder['uuid']] = folder_path\n                    print(f\"   \ud83d\udcc2 Created root folder: {folder['name']}\")\n            \n            # Process nested folders\n            remaining_folders = [f for f in folders if f.get('parent_uuid')]\n            max_iterations = 10  # Prevent infinite loops\n            \n            while remaining_folders and max_iterations > 0:\n                processed_this_round = []\n                \n                for folder in remaining_folders:\n                    parent_uuid = folder.get('parent_uuid')\n                    if parent_uuid in folder_paths:\n                        # Parent folder exists, create this folder\n                        parent_path = folder_paths[parent_uuid]\n                        folder_path = parent_path / self._sanitize_filename(folder['name'])\n                        folder_path.mkdir(exist_ok=True)\n                        folder_paths[folder['uuid']] = folder_path\n                        processed_this_round.append(folder)\n                        print(f\"   \ud83d\udcc2 Created nested folder: {folder['name']}\")\n                \n                # Remove processed folders\n                for folder in processed_this_round:\n                    remaining_folders.remove(folder)\n                \n                max_iterations -= 1\n            \n            # Extract documents to their folders\n            for doc in documents:\n                doc_name = self._sanitize_filename(doc['name'])\n                parent_uuid = doc.get('parent_uuid')\n                \n                # Determine target directory\n                if parent_uuid and parent_uuid in folder_paths:\n                    target_dir = folder_paths[parent_uuid]\n                else:\n                    target_dir = documents_dir\n                \n                # Extract PDF if available\n                pdf_hash = doc.get('component_hashes', {}).get('pdf')\n                if pdf_hash:\n                    pdf_path = target_dir / f\"{doc_name}.pdf\"\n                    source_path = self.get_content_file_path(pdf_hash)\n                    \n                    if source_path.exists():\n                        try:\n                            # Copy PDF to folder structure\n                            import shutil\n                            shutil.copy2(source_path, pdf_path)\n                            print(f\"   \ud83d\udcc4 Extracted PDF: {doc_name}.pdf\")\n                        except Exception as e:\n                            print(f\"   \u274c Failed to copy PDF {doc_name}: {e}\")\n                    else:\n                        print(f\"   \u26a0\ufe0f PDF source not found: {pdf_hash[:16]}...\")\n                \n                # For notebooks (with .rm files), create a note that it's a notebook\n                rm_files = doc.get('component_hashes', {}).get('rm_files', [])\n                if rm_files and not pdf_hash:\n                    notebook_info_path = target_dir / f\"{doc_name}_notebook_info.txt\"\n                    try:\n                        with open(notebook_info_path, 'w') as f:\n                            f.write(f\"reMarkable Notebook: {doc['name']}\\n\")\n                            f.write(f\"UUID: {doc['uuid']}\\n\")\n                            f.write(f\"Created: {doc.get('metadata', {}).get('lastModified', 'Unknown')}\\n\")\n                            f.write(f\"Pages: {len(rm_files)}\\n\\n\")\n                            f.write(\"This is a reMarkable notebook with handwritten content.\\n\")\n                            f.write(\"The original .rm files are stored in the content directory.\\n\")\n                        print(f\"   \ud83d\udcd3 Created notebook info: {doc_name}_notebook_info.txt\")\n                    except Exception as e:\n                        print(f\"   \u274c Failed to create notebook info: {e}\")\n            \n            print(f\"\u2705 Folder structure created successfully\")\n            \n        except Exception as e:\n            print(f\"\u274c Failed to create folder structure: {e}\")\n    \n    def _sanitize_filename(self, filename: str) -> str:\n        \"\"\"Sanitize filename for filesystem use\"\"\"\n        # Remove or replace invalid characters\n        import re\n        sanitized = re.sub(r'[<>:\"/\\\\|?*]', '_', filename)\n        sanitized = sanitized.strip('. ')\n        \n        # Ensure it's not empty\n        if not sanitized:\n            sanitized = \"unnamed\"\n        \n        # Limit length\n        if len(sanitized) > 200:\n            sanitized = sanitized[:200]\n        \n        return sanitized\n    \n    def get_node_by_uuid(self, uuid: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Get a specific node by UUID\"\"\"\n        return self.database['nodes'].get(uuid)\n    \n    def get_documents_in_folder(self, folder_uuid: str) -> List[Dict[str, Any]]:\n        \"\"\"Get all documents in a specific folder\"\"\"\n        return [\n            node for node in self.database['nodes'].values()\n            if node.get('node_type') == 'document' and node.get('parent_uuid') == folder_uuid\n        ]\n    \n    def get_folders(self) -> List[Dict[str, Any]]:\n        \"\"\"Get all folders\"\"\"\n        return [\n            node for node in self.database['nodes'].values()\n            if node.get('node_type') == 'folder'\n        ]\n    \n    def get_root_documents(self) -> List[Dict[str, Any]]:\n        \"\"\"Get all documents in root (no parent)\"\"\"\n        return [\n            node for node in self.database['nodes'].values()\n            if node.get('node_type') == 'document' and not node.get('parent_uuid')\n        ]",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica.py",
      "tags": [
        "class",
        "remarkablereplicasync"
      ],
      "updated_at": "2025-12-07T01:55:13.688284",
      "usage_example": "# Example usage:\n# result = RemarkableReplicaSync(bases)"
    },
    {
      "best_practices": [
        "Always instantiate within a try-except block to handle authentication failures gracefully",
        "The class maintains an authenticated session as instance state - reuse the same instance for multiple operations to avoid re-authentication",
        "Call get_root_info() to ensure you're working with the latest document state before analysis",
        "The compare_documents() method uses hardcoded UUIDs - modify these in the source or extend the method to accept parameters",
        "Network errors may occur during API calls - implement retry logic or error handling around method calls",
        "Large documents may take time to download and analyze - consider adding timeouts for production use",
        "The session object is stateful and may expire - implement session refresh logic for long-running processes",
        "Document UUIDs must exist in the reMarkable cloud; non-existent UUIDs will return None from analyze_document"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated HTTP session object for making API requests to reMarkable cloud services. Initialized in __init__ via RemarkableAuth.get_authenticated_session()",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the DocumentComparator with an authenticated reMarkable session",
            "returns": "None - initializes the instance with self.session attribute",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_root_info",
            "parameters": {},
            "purpose": "Retrieve the current root.docSchema information from reMarkable cloud",
            "returns": "Tuple of (root_data: dict, root_content: str) where root_data contains the root document metadata and root_content is the raw schema content",
            "signature": "get_root_info(self) -> tuple"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_document",
            "parameters": {
              "doc_name": "Human-readable name for the document (used in console output)",
              "doc_uuid": "The UUID of the document to analyze (full UUID string)"
            },
            "purpose": "Perform comprehensive analysis of a single document's structure, components, and metadata",
            "returns": "Dictionary with keys 'entry' (root entry data), 'schema_lines' (document schema lines), 'components' (dict of component types to their data), and 'metadata' (parsed metadata or None). Returns None if document not found.",
            "signature": "analyze_document(self, doc_uuid: str, doc_name: str) -> dict | None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "compare_documents",
            "parameters": {},
            "purpose": "Compare two hardcoded documents (Poulpharm invoice vs uploaded document) to identify structural differences",
            "returns": "Tuple of (invoice_data: dict, upload_data: dict) containing full analysis results for both documents, or None if either document cannot be analyzed",
            "signature": "compare_documents(self) -> tuple | None"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:54:28",
      "decorators": [],
      "dependencies": [
        "json",
        "requests",
        "auth"
      ],
      "description": "A class that compares reMarkable cloud documents to analyze and identify structural differences between them, particularly useful for debugging document upload issues.",
      "docstring": "Compare documents to find structural differences",
      "id": 2117,
      "imports": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 235,
      "line_start": 13,
      "name": "DocumentComparator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes authentication with reMarkable cloud services using RemarkableAuth and establishes an authenticated session. Raises RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "DocumentComparator provides comprehensive analysis and comparison of reMarkable cloud documents by examining their root.docSchema entries, document schemas, components (metadata, content, pagedata), and structural properties. It's designed to help developers understand differences between documents created through the official app versus custom uploads, enabling debugging of document structure issues.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a DocumentComparator object with an authenticated session. The analyze_document method returns a dictionary containing document entry data, schema lines, components dictionary, and metadata (or None if analysis fails). The compare_documents method returns a tuple of (invoice_data, upload_data) dictionaries with full analysis results for both documents.",
      "settings_required": [
        "RemarkableAuth must be properly configured with reMarkable cloud credentials",
        "Network access to eu.tectonic.remarkable.com API endpoints",
        "Valid reMarkable cloud authentication tokens/session"
      ],
      "source_code": "class DocumentComparator:\n    \"\"\"Compare documents to find structural differences\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\udd04 Document Comparator Initialized\")\n    \n    def get_root_info(self):\n        \"\"\"Get current root.docSchema info\"\"\"\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        return root_data, root_content\n    \n    def analyze_document(self, doc_uuid: str, doc_name: str):\n        \"\"\"Analyze a single document structure\"\"\"\n        print(f\"\\n\ud83d\udcc4 Analyzing {doc_name} ({doc_uuid[:8]}...)\")\n        print(\"=\" * 50)\n        \n        # Get root info\n        root_data, root_content = self.get_root_info()\n        \n        # Find document in root\n        lines = root_content.strip().split('\\n')\n        doc_entry = None\n        \n        for line in lines[1:]:  # Skip version header\n            if doc_uuid in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    doc_entry = {\n                        'hash': parts[0],\n                        'flags': parts[1],\n                        'uuid': parts[2],\n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n                    break\n        \n        if not doc_entry:\n            print(f\"\u274c Document not found in root.docSchema\")\n            return None\n        \n        print(f\"\ud83d\udccb ROOT.DOCSCHEMA ENTRY:\")\n        print(f\"   Hash: {doc_entry['hash']}\")\n        print(f\"   Flags: {doc_entry['flags']}\")\n        print(f\"   UUID: {doc_entry['uuid']}\")\n        print(f\"   Type: {doc_entry['type']}\")\n        print(f\"   Size: {doc_entry['size']}\")\n        print(f\"   Full line: {doc_entry['full_line']}\")\n        \n        # Get document schema\n        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_entry['hash']}\")\n        doc_response.raise_for_status()\n        doc_content = doc_response.text\n        doc_lines = doc_content.strip().split('\\n')\n        \n        print(f\"\\n\ud83d\udcc4 DOCUMENT SCHEMA:\")\n        print(f\"   Version: {doc_lines[0]}\")\n        print(f\"   Components: {len(doc_lines) - 1}\")\n        \n        components = {}\n        for i, line in enumerate(doc_lines[1:], 1):\n            parts = line.split(':')\n            if len(parts) >= 5:\n                component_type = parts[2].split('.')[-1]  # Get extension\n                components[component_type] = {\n                    'hash': parts[0],\n                    'flags': parts[1],\n                    'filename': parts[2],\n                    'type': parts[3],\n                    'size': parts[4],\n                    'line': line\n                }\n                print(f\"   Component {i}: {component_type} ({parts[4]} bytes)\")\n        \n        # Analyze metadata\n        if 'metadata' in components:\n            metadata_hash = components['metadata']['hash']\n            metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n            metadata_response.raise_for_status()\n            metadata = json.loads(metadata_response.text)\n            \n            print(f\"\\n\ud83d\udcdd METADATA:\")\n            for key, value in sorted(metadata.items()):\n                print(f\"   {key}: {value}\")\n        \n        # Analyze content if exists\n        if 'content' in components:\n            content_hash = components['content']['hash']\n            content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}\")\n            content_response.raise_for_status()\n            content_data = content_response.text\n            \n            print(f\"\\n\ud83d\udcc4 CONTENT:\")\n            print(f\"   Size: {len(content_data)} bytes\")\n            print(f\"   Type: {type(content_data)}\")\n            if len(content_data) < 200:\n                print(f\"   Data: {content_data}\")\n            else:\n                print(f\"   Preview: {content_data[:100]}...\")\n        \n        # Analyze pagedata if exists\n        if 'pagedata' in components:\n            pagedata_hash = components['pagedata']['hash']\n            pagedata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}\")\n            pagedata_response.raise_for_status()\n            pagedata_data = pagedata_response.text\n            \n            print(f\"\\n\ud83d\udcc4 PAGEDATA:\")\n            print(f\"   Size: {len(pagedata_data)} bytes\")\n            if len(pagedata_data) < 200:\n                print(f\"   Data: {pagedata_data}\")\n            else:\n                print(f\"   Preview: {pagedata_data[:100]}...\")\n        \n        return {\n            'entry': doc_entry,\n            'schema_lines': doc_lines,\n            'components': components,\n            'metadata': metadata if 'metadata' in components else None\n        }\n    \n    def compare_documents(self):\n        \"\"\"Compare the real invoice vs our uploaded document\"\"\"\n        print(f\"\ud83d\udd0d Comparing Real App Document vs Our Upload\")\n        print(\"=\" * 60)\n        \n        # Analyze invoice document (real app) - Poulpharm invoice\n        invoice_uuid = \"cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc\"  # Poulpharm invoice\n        invoice_data = self.analyze_document(invoice_uuid, \"Poulpharm Invoice (Real App)\")\n        \n        # Analyze our uploaded document\n        upload_uuid = \"206f5df3-07c2-4341-8afd-2b7362aefa91\"\n        upload_data = self.analyze_document(upload_uuid, \"Our Upload\")\n        \n        if not invoice_data or not upload_data:\n            print(f\"\u274c Could not analyze both documents\")\n            return\n        \n        # Compare key differences\n        print(f\"\\n\ud83d\udd0d KEY DIFFERENCES:\")\n        print(\"=\" * 30)\n        \n        # Compare root entry flags\n        invoice_flags = invoice_data['entry']['flags']\n        upload_flags = upload_data['entry']['flags']\n        if invoice_flags != upload_flags:\n            print(f\"\ud83d\udccb ROOT FLAGS DIFFER:\")\n            print(f\"   Invoice: {invoice_flags}\")\n            print(f\"   Upload:  {upload_flags}\")\n        \n        # Compare document types\n        invoice_type = invoice_data['entry']['type']\n        upload_type = upload_data['entry']['type']\n        if invoice_type != upload_type:\n            print(f\"\ud83d\udccb DOCUMENT TYPE DIFFERS:\")\n            print(f\"   Invoice: {invoice_type}\")\n            print(f\"   Upload:  {upload_type}\")\n        \n        # Compare components\n        invoice_components = set(invoice_data['components'].keys())\n        upload_components = set(upload_data['components'].keys())\n        \n        missing_in_upload = invoice_components - upload_components\n        extra_in_upload = upload_components - invoice_components\n        \n        if missing_in_upload:\n            print(f\"\ud83d\udcc4 COMPONENTS MISSING IN UPLOAD: {missing_in_upload}\")\n        if extra_in_upload:\n            print(f\"\ud83d\udcc4 EXTRA COMPONENTS IN UPLOAD: {extra_in_upload}\")\n        \n        # Compare metadata keys\n        if invoice_data['metadata'] and upload_data['metadata']:\n            invoice_keys = set(invoice_data['metadata'].keys())\n            upload_keys = set(upload_data['metadata'].keys())\n            \n            missing_keys = invoice_keys - upload_keys\n            extra_keys = upload_keys - invoice_keys\n            \n            if missing_keys:\n                print(f\"\ud83d\udcdd METADATA KEYS MISSING IN UPLOAD: {missing_keys}\")\n            if extra_keys:\n                print(f\"\ud83d\udcdd EXTRA METADATA KEYS IN UPLOAD: {extra_keys}\")\n            \n            # Compare specific metadata values\n            common_keys = invoice_keys & upload_keys\n            for key in common_keys:\n                if invoice_data['metadata'][key] != upload_data['metadata'][key]:\n                    print(f\"\ud83d\udcdd METADATA DIFFERS for '{key}':\")\n                    print(f\"   Invoice: {invoice_data['metadata'][key]}\")\n                    print(f\"   Upload:  {upload_data['metadata'][key]}\")\n        \n        # Compare component flags and sizes\n        for component in invoice_components & upload_components:\n            invoice_comp = invoice_data['components'][component]\n            upload_comp = upload_data['components'][component]\n            \n            if invoice_comp['flags'] != upload_comp['flags']:\n                print(f\"\ud83d\udcc4 COMPONENT FLAGS DIFFER for {component}:\")\n                print(f\"   Invoice: {invoice_comp['flags']}\")\n                print(f\"   Upload:  {upload_comp['flags']}\")\n            \n            if invoice_comp['type'] != upload_comp['type']:\n                print(f\"\ud83d\udcc4 COMPONENT TYPE DIFFERS for {component}:\")\n                print(f\"   Invoice: {invoice_comp['type']}\")\n                print(f\"   Upload:  {upload_comp['type']}\")\n        \n        print(f\"\\n\ud83d\udca1 ANALYSIS COMPLETE\")\n        return invoice_data, upload_data",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/compare_documents.py",
      "tags": [
        "remarkable",
        "document-comparison",
        "cloud-sync",
        "debugging",
        "document-analysis",
        "api-client",
        "schema-analysis",
        "metadata-extraction"
      ],
      "updated_at": "2025-12-07T01:54:28.751255",
      "usage_example": "# Initialize the comparator\ncomparator = DocumentComparator()\n\n# Analyze a single document\ndoc_uuid = 'cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc'\nresult = comparator.analyze_document(doc_uuid, 'My Document')\nif result:\n    print(f\"Components: {result['components'].keys()}\")\n    print(f\"Metadata: {result['metadata']}\")\n\n# Compare two documents (uses hardcoded UUIDs)\ninvoice_data, upload_data = comparator.compare_documents()\n\n# Get current root info\nroot_data, root_content = comparator.get_root_info()"
    },
    {
      "best_practices": [
        "Always instantiate FolderDebugger in a try-except block to handle authentication failures gracefully",
        "Call get_root_info() first to obtain the root_content needed by other analysis methods",
        "The class makes multiple API calls, so be mindful of rate limiting and network latency",
        "Methods print diagnostic information to stdout, making them suitable for interactive debugging sessions",
        "The session attribute maintains authentication state and should not be modified directly",
        "Methods may raise HTTP exceptions if API calls fail; wrap calls in try-except blocks for production use",
        "The class is stateless except for the session; each method can be called independently after initialization",
        "analyze_gpt_in_folder() is hardcoded to analyze a specific folder UUID; modify for other folders as needed"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated HTTP session for making API requests to reMarkable cloud services",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the FolderDebugger with an authenticated reMarkable session",
            "returns": "None (constructor)",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_root_info",
            "parameters": {},
            "purpose": "Retrieve the current root.docSchema information including hash and generation",
            "returns": "Tuple of (root_data dict containing hash and generation, root_content string with full schema)",
            "signature": "get_root_info(self) -> tuple"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_gpt_in_folder",
            "parameters": {
              "root_content": "The root document schema content as a string, obtained from get_root_info()"
            },
            "purpose": "Analyze the gpt_in folder structure in detail, including its docSchema and metadata",
            "returns": "Tuple of (gpt_in_entry dict with hash/uuid/type/size, folder_metadata dict) or None if folder not found",
            "signature": "analyze_gpt_in_folder(self, root_content: str) -> tuple | None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "find_documents_in_folder",
            "parameters": {
              "folder_uuid": "The UUID of the folder to search for documents in",
              "root_content": "The root document schema content as a string"
            },
            "purpose": "Find all documents that have the specified folder as their parent",
            "returns": "List of dictionaries, each containing uuid, hash, type, name, parent, and deleted status of documents",
            "signature": "find_documents_in_folder(self, root_content: str, folder_uuid: str) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "check_web_app_sync_status",
            "parameters": {},
            "purpose": "Check the current sync status and generation from the reMarkable API",
            "returns": "Dictionary containing sync generation, hash, and optional broadcast flags, or None if check fails",
            "signature": "check_web_app_sync_status(self) -> dict | None"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:53:46",
      "decorators": [],
      "dependencies": [
        "json",
        "requests"
      ],
      "description": "A debugging utility class for analyzing and troubleshooting folder structure and visibility issues in the reMarkable cloud sync system.",
      "docstring": "Debug folder structure and visibility issues",
      "id": 2115,
      "imports": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 197,
      "line_start": 12,
      "name": "FolderDebugger",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "The constructor takes no parameters. It automatically initializes authentication with the reMarkable service using RemarkableAuth, obtains an authenticated session, and prints a confirmation message. Raises RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "FolderDebugger provides comprehensive diagnostic capabilities for investigating folder structure problems in the reMarkable cloud storage. It authenticates with the reMarkable API, retrieves and analyzes the root document schema, examines specific folder structures (particularly the 'gpt_in' folder), finds documents within folders, and checks sync status. This class is designed for debugging scenarios where folders or documents are not appearing correctly in the reMarkable web interface or devices.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a FolderDebugger object with an authenticated session. Methods return various data structures: get_root_info() returns a tuple of (root_data dict, root_content string); analyze_gpt_in_folder() returns a tuple of (gpt_in_entry dict, folder_metadata dict) or None; find_documents_in_folder() returns a list of document dictionaries; check_web_app_sync_status() returns sync_data dict or None.",
      "settings_required": [
        "RemarkableAuth must be properly configured with valid reMarkable cloud credentials",
        "Network access to eu.tectonic.remarkable.com API endpoints",
        "Valid reMarkable account authentication tokens/credentials"
      ],
      "source_code": "class FolderDebugger:\n    \"\"\"Debug folder structure and visibility issues\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\udd04 Folder Debugger Initialized\")\n    \n    def get_root_info(self):\n        \"\"\"Get current root.docSchema info\"\"\"\n        print(\"\\n\ud83d\udccb Getting current root.docSchema...\")\n        \n        # Get root info\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        print(f\"\u2705 Current root hash: {root_data['hash']}\")\n        print(f\"\u2705 Current generation: {root_data.get('generation')}\")\n        \n        return root_data, root_content\n    \n    def analyze_gpt_in_folder(self, root_content: str):\n        \"\"\"Analyze the gpt_in folder in detail\"\"\"\n        print(f\"\\n\ud83d\udcc1 Analyzing gpt_in folder structure...\")\n        \n        gpt_in_uuid = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"\n        \n        # Find gpt_in folder in root\n        lines = root_content.strip().split('\\n')\n        gpt_in_entry = None\n        \n        for line in lines[1:]:  # Skip version header\n            if gpt_in_uuid in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    gpt_in_entry = {\n                        'hash': parts[0],\n                        'uuid': parts[2],\n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n                    break\n        \n        if not gpt_in_entry:\n            print(f\"\u274c gpt_in folder not found in root.docSchema!\")\n            return None\n            \n        print(f\"\u2705 Found gpt_in folder entry:\")\n        print(f\"   Hash: {gpt_in_entry['hash']}\")\n        print(f\"   UUID: {gpt_in_entry['uuid']}\")\n        print(f\"   Type: {gpt_in_entry['type']}\")\n        print(f\"   Size: {gpt_in_entry['size']}\")\n        print(f\"   Full line: {gpt_in_entry['full_line']}\")\n        \n        # Get gpt_in folder's docSchema\n        print(f\"\\n\ud83d\udcc4 Getting gpt_in folder docSchema...\")\n        folder_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{gpt_in_entry['hash']}\")\n        folder_response.raise_for_status()\n        folder_content = folder_response.text\n        \n        print(f\"\u2705 gpt_in docSchema size: {len(folder_content)} bytes\")\n        print(f\"\ud83d\udcc4 gpt_in docSchema content:\")\n        \n        folder_lines = folder_content.strip().split('\\n')\n        for i, line in enumerate(folder_lines):\n            print(f\"   Line {i}: {line}\")\n        \n        # Get gpt_in folder metadata\n        metadata_hash = None\n        for line in folder_lines[1:]:  # Skip version\n            if ':' in line and '.metadata' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    metadata_hash = parts[0]\n                    break\n        \n        if metadata_hash:\n            print(f\"\\n\ud83d\udcdd Getting gpt_in folder metadata...\")\n            metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n            metadata_response.raise_for_status()\n            folder_metadata = json.loads(metadata_response.text)\n            \n            print(f\"\u2705 gpt_in folder metadata:\")\n            for key, value in folder_metadata.items():\n                print(f\"   {key}: {value}\")\n            \n            return gpt_in_entry, folder_metadata\n        else:\n            print(f\"\u274c Could not find metadata for gpt_in folder\")\n            return gpt_in_entry, None\n    \n    def find_documents_in_folder(self, root_content: str, folder_uuid: str):\n        \"\"\"Find all documents that claim to be in the specified folder\"\"\"\n        print(f\"\\n\ud83d\udd0d Finding documents with parent '{folder_uuid}'...\")\n        \n        documents_in_folder = []\n        lines = root_content.strip().split('\\n')\n        \n        for line in lines[1:]:  # Skip version header\n            if ':' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    doc_uuid = parts[2]\n                    doc_hash = parts[0]\n                    doc_type = parts[3]\n                    \n                    # Skip the folder itself\n                    if doc_uuid == folder_uuid:\n                        continue\n                    \n                    # Get document metadata to check parent\n                    try:\n                        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n                        doc_response.raise_for_status()\n                        doc_content = doc_response.text\n                        \n                        # Find metadata hash in document schema\n                        doc_lines = doc_content.strip().split('\\n')\n                        metadata_hash = None\n                        for doc_line in doc_lines[1:]:\n                            if ':' in doc_line and '.metadata' in doc_line:\n                                metadata_parts = doc_line.split(':')\n                                if len(metadata_parts) >= 5:\n                                    metadata_hash = metadata_parts[0]\n                                    break\n                        \n                        if metadata_hash:\n                            metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n                            metadata_response.raise_for_status()\n                            metadata = json.loads(metadata_response.text)\n                            \n                            if metadata.get('parent') == folder_uuid:\n                                documents_in_folder.append({\n                                    'uuid': doc_uuid,\n                                    'hash': doc_hash,\n                                    'type': doc_type,\n                                    'name': metadata.get('visibleName', 'Unknown'),\n                                    'parent': metadata.get('parent'),\n                                    'deleted': metadata.get('deleted', False)\n                                })\n                    \n                    except Exception as e:\n                        print(f\"   \u26a0\ufe0f Could not check document {doc_uuid[:8]}...: {e}\")\n                        continue\n        \n        print(f\"\u2705 Found {len(documents_in_folder)} documents in folder '{folder_uuid}':\")\n        for doc in documents_in_folder:\n            status = \"\ud83d\uddd1\ufe0f DELETED\" if doc['deleted'] else \"\u2705 Active\"\n            print(f\"   {status} {doc['name']} ({doc['uuid'][:8]}...)\")\n        \n        return documents_in_folder\n    \n    def check_web_app_sync_status(self):\n        \"\"\"Check if there are any sync-related issues\"\"\"\n        print(f\"\\n\ud83c\udf10 Checking web app sync indicators...\")\n        \n        # Check if there are any pending sync operations\n        try:\n            sync_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            sync_response.raise_for_status()\n            sync_data = sync_response.json()\n            \n            print(f\"\u2705 Current sync generation: {sync_data.get('generation')}\")\n            print(f\"\u2705 Current sync hash: {sync_data.get('hash')}\")\n            \n            # Check for any broadcast flags or sync indicators\n            if 'broadcast' in sync_data:\n                print(f\"\ud83d\udce1 Broadcast flag: {sync_data['broadcast']}\")\n            \n            return sync_data\n            \n        except Exception as e:\n            print(f\"\u274c Could not check sync status: {e}\")\n            return None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/debug_gpt_in_folder.py",
      "tags": [
        "debugging",
        "remarkable",
        "cloud-sync",
        "folder-structure",
        "api-client",
        "diagnostics",
        "document-management",
        "metadata-analysis"
      ],
      "updated_at": "2025-12-07T01:53:46.339828",
      "usage_example": "# Initialize the debugger\ndebugger = FolderDebugger()\n\n# Get root document schema information\nroot_data, root_content = debugger.get_root_info()\n\n# Analyze the gpt_in folder structure\ngpt_in_entry, folder_metadata = debugger.analyze_gpt_in_folder(root_content)\n\n# Find all documents in the gpt_in folder\nfolder_uuid = '99c6551f-2855-44cf-a4e4-c9c586558f42'\ndocuments = debugger.find_documents_in_folder(root_content, folder_uuid)\n\n# Check sync status\nsync_status = debugger.check_web_app_sync_status()"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "parse_directory_listing",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Parse directory listing using the correct reMarkable format (from local_replica_v2.py)",
            "returns": "None",
            "signature": "parse_directory_listing(self, content)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fetch_hash_content",
            "parameters": {
              "hash_ref": "Type: str"
            },
            "purpose": "Fetch content from reMarkable cloud by hash",
            "returns": "None",
            "signature": "fetch_hash_content(self, hash_ref)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_metadata",
            "parameters": {
              "metadata_hash": "Type: str"
            },
            "purpose": "Extract metadata from hash",
            "returns": "None",
            "signature": "extract_metadata(self, metadata_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_cloud_state",
            "parameters": {},
            "purpose": "Analyze current cloud state using proper discovery method from local_replica_v2.py",
            "returns": "None",
            "signature": "analyze_cloud_state(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "find_target_items",
            "parameters": {
              "documents": "Parameter",
              "folders": "Parameter"
            },
            "purpose": "Find pylontech document, Myfolder, and Otherfolder",
            "returns": "None",
            "signature": "find_target_items(self, documents, folders)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_schema",
            "parameters": {
              "doc_hash": "Type: str"
            },
            "purpose": "Retrieve document's docSchema",
            "returns": "None",
            "signature": "get_document_schema(self, doc_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_metadata",
            "parameters": {
              "doc_lines": "Type: list"
            },
            "purpose": "Extract and fetch current metadata",
            "returns": "None",
            "signature": "get_current_metadata(self, doc_lines)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_updated_metadata",
            "parameters": {
              "current_metadata": "Type: dict",
              "new_parent_uuid": "Type: str",
              "old_parent_uuid": "Type: str"
            },
            "purpose": "Create updated metadata with new parent",
            "returns": "None",
            "signature": "create_updated_metadata(self, current_metadata, new_parent_uuid, old_parent_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_metadata",
            "parameters": {
              "doc_uuid": "Type: str",
              "metadata_json": "Type: str"
            },
            "purpose": "Upload new metadata and return hash",
            "returns": "See docstring for return details",
            "signature": "upload_new_metadata(self, metadata_json, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_real_pagedata",
            "parameters": {
              "doc_uuid": "Type: str"
            },
            "purpose": "Upload real pagedata (newline) to match real app documents",
            "returns": "None",
            "signature": "upload_real_pagedata(self, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_new_document_schema",
            "parameters": {
              "doc_lines": "Type: list",
              "metadata_line": "Type: str",
              "new_metadata_hash": "Type: str",
              "new_pagedata_hash": "Type: str"
            },
            "purpose": "Create new document schema with updated metadata hash and pagedata",
            "returns": "None",
            "signature": "create_new_document_schema(self, doc_lines, new_metadata_hash, metadata_line, new_pagedata_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_document_schema",
            "parameters": {
              "doc_content": "Type: str",
              "doc_uuid": "Type: str"
            },
            "purpose": "Upload new document schema",
            "returns": "None",
            "signature": "upload_new_document_schema(self, doc_content, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_docschema",
            "parameters": {
              "doc_info": "Type: dict",
              "new_doc_hash": "Type: str",
              "root_content": "Type: str"
            },
            "purpose": "Update root.docSchema with new document hash",
            "returns": "None",
            "signature": "update_root_docschema(self, root_content, doc_info, new_doc_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_root",
            "parameters": {
              "generation": "Type: int",
              "root_content": "Type: str"
            },
            "purpose": "Upload new root.docSchema and update roothash",
            "returns": "None",
            "signature": "upload_new_root(self, root_content, generation)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_before_move",
            "parameters": {},
            "purpose": "Analyze cloud state and show what the script will do before executing",
            "returns": "None",
            "signature": "analyze_before_move(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "execute_move",
            "parameters": {
              "analysis_data": "Parameter"
            },
            "purpose": "Execute the actual move operation using the analysis data",
            "returns": "None",
            "signature": "execute_move(self, analysis_data)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:53:04",
      "decorators": [],
      "dependencies": [],
      "description": "Moves 'pylontech' document from 'Myfolder' to 'Otherfolder' using the working upload mechanism",
      "docstring": "Moves 'pylontech' document from 'Myfolder' to 'Otherfolder' using the working upload mechanism",
      "id": 2113,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "import re",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 796,
      "line_start": 54,
      "name": "PylontechMover",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Moves 'pylontech' document from 'Myfolder' to 'Otherfolder' using the working upload mechanism",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class PylontechMover:\n    \"\"\"Moves 'pylontech' document from 'Myfolder' to 'Otherfolder' using the working upload mechanism\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\udd04 Pylontech Document Mover Initialized\")\n    \n    def parse_directory_listing(self, content: bytes):\n        \"\"\"Parse directory listing using the correct reMarkable format (from local_replica_v2.py)\"\"\"\n        try:\n            text_content = content.decode('utf-8')\n        except UnicodeDecodeError:\n            return {'child_objects': [], 'data_components': []}\n        \n        result = {\n            'child_objects': [],\n            'data_components': []\n        }\n        \n        lines = text_content.split('\\n')\n        if lines and lines[0].strip().isdigit():\n            lines = lines[1:]  # Skip count line\n        \n        entry_pattern = r'^([a-f0-9]{64}):([0-9a-fA-F]+):([a-f0-9-/]+(?:\\.[^:]+)?):(\\d+):(\\d+)$'\n        \n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n                \n            match = re.match(entry_pattern, line, re.IGNORECASE)\n            if match:\n                hash_val, flags, uuid_component, type_val, size_val = match.groups()\n                \n                entry_info = {\n                    'hash': hash_val,\n                    'flags': flags,\n                    'uuid_component': uuid_component,\n                    'type': type_val,\n                    'size': int(size_val)\n                }\n                \n                if '.' in uuid_component:\n                    # Data component (.content, .metadata, .pdf, .rm, etc.)\n                    component_type = uuid_component.split('.')[-1]\n                    if '/' in component_type:  # Handle .rm files like \"uuid/filename.rm\"\n                        component_type = component_type.split('/')[-1]\n                    entry_info['component_type'] = component_type\n                    result['data_components'].append(entry_info)\n                else:\n                    # Child object (pure UUID)\n                    result['child_objects'].append(entry_info)\n        \n        return result\n    \n    def fetch_hash_content(self, hash_ref: str):\n        \"\"\"Fetch content from reMarkable cloud by hash\"\"\"\n        try:\n            url = f\"https://eu.tectonic.remarkable.com/sync/v3/files/{hash_ref}\"\n            response = self.session.get(url)\n            response.raise_for_status()\n            \n            return {\n                'hash': hash_ref,\n                'content': response.content,\n                'size': len(response.content)\n            }\n            \n        except Exception as e:\n            print(f\"\u274c Failed to fetch {hash_ref[:16]}...: {e}\")\n            return None\n    \n    def extract_metadata(self, metadata_hash: str):\n        \"\"\"Extract metadata from hash\"\"\"\n        content_info = self.fetch_hash_content(metadata_hash)\n        if not content_info:\n            return None\n        \n        try:\n            text_content = content_info['content'].decode('utf-8')\n            return json.loads(text_content)\n        except (UnicodeDecodeError, json.JSONDecodeError) as e:\n            print(f\"\u274c Failed to parse metadata {metadata_hash[:16]}...: {e}\")\n            return None\n    \n    def analyze_cloud_state(self):\n        \"\"\"Analyze current cloud state using proper discovery method from local_replica_v2.py\"\"\"\n        print(\"\\n\ud83d\udd0d STEP 1: ANALYZING CLOUD STATE\")\n        print(\"=\" * 50)\n        \n        # Get current root info\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        print(f\"\u2705 Current root hash: {root_data['hash']}\")\n        print(f\"\u2705 Current generation: {root_data.get('generation')}\")\n        print(f\"\u2705 Root content size: {len(root_content)} bytes\")\n        \n        # Use proper discovery method like local_replica_v2.py\n        all_nodes = {}\n        discovered_hashes = set()\n        hashes_to_process = [root_data['hash']]\n        \n        print(f\"\\n\ud83d\udccb Discovering all nodes from root...\")\n        \n        while hashes_to_process:\n            current_hash = hashes_to_process.pop(0)\n            \n            if current_hash in discovered_hashes:\n                continue\n                \n            discovered_hashes.add(current_hash)\n            print(f\"  Processing: {current_hash[:16]}...\")\n            \n            # Fetch and parse content\n            content_info = self.fetch_hash_content(current_hash)\n            if not content_info:\n                continue\n            \n            parsed = self.parse_directory_listing(content_info['content'])\n            \n            # Extract metadata if available\n            metadata = {}\n            metadata_hash = None\n            node_name = f\"unknown_{current_hash[:8]}\"\n            node_type = \"folder\"\n            parent_uuid = None\n            \n            for component in parsed['data_components']:\n                if component['component_type'] == 'metadata':\n                    metadata_hash = component['hash']\n                    extracted_metadata = self.extract_metadata(metadata_hash)\n                    if extracted_metadata:\n                        metadata = extracted_metadata\n                        node_name = metadata.get('visibleName', node_name)\n                        if metadata.get('type') == 'DocumentType':\n                            node_type = \"document\"\n                        elif metadata.get('type') == 'CollectionType':\n                            node_type = \"folder\"\n                        parent_uuid = metadata.get('parent', '') or None\n                    break\n            \n            # Determine node UUID\n            node_uuid = None\n            for component in parsed['child_objects']:\n                node_uuid = component['uuid_component']\n                break\n            if not node_uuid and parsed['data_components']:\n                component_name = parsed['data_components'][0]['uuid_component']\n                if '.' in component_name:\n                    node_uuid = component_name.split('.')[0]\n            if not node_uuid:\n                node_uuid = current_hash[:32]  # Fallback\n            \n            # Store node\n            all_nodes[node_uuid] = {\n                'uuid': node_uuid,\n                'hash': current_hash,\n                'name': node_name,\n                'node_type': node_type,\n                'parent_uuid': parent_uuid,\n                'metadata': metadata,\n                'metadata_hash': metadata_hash\n            }\n            \n            print(f\"    \u2192 {node_type.upper()}: {node_name} (parent: {parent_uuid or 'ROOT'})\")\n            \n            # Add child hashes to process\n            for child_obj in parsed['child_objects']:\n                if child_obj['hash'] not in discovered_hashes:\n                    hashes_to_process.append(child_obj['hash'])\n        \n        # Organize by type\n        documents = {uuid: node for uuid, node in all_nodes.items() if node['node_type'] == 'document'}\n        folders = {uuid: node for uuid, node in all_nodes.items() if node['node_type'] == 'folder'}\n        \n        print(f\"\u2705 Found {len(documents)} documents and {len(folders)} folders\")\n        \n        return root_data, root_content, documents, folders\n    \n    def find_target_items(self, documents, folders):\n        \"\"\"Find pylontech document, Myfolder, and Otherfolder\"\"\"\n        print(f\"\\n\ud83c\udfaf STEP 2: FINDING TARGET ITEMS\")\n        print(\"-\" * 30)\n        \n        print(f\"\ud83d\udcc4 Available documents:\")\n        for doc_uuid, doc_data in documents.items():\n            print(f\"   - {doc_data['name']} (UUID: {doc_uuid[:8]}...) in parent: {doc_data['parent_uuid'] or 'root'}\")\n        \n        print(f\"\\n\ud83d\udcc1 Available folders:\")\n        for folder_uuid, folder_data in folders.items():\n            print(f\"   - {folder_data['name']} (UUID: {folder_uuid[:8]}...) in parent: {folder_data['parent_uuid'] or 'root'}\")\n        \n        # Find pylontech document\n        pylontech_doc = None\n        pylontech_uuid = None\n        \n        for doc_uuid, doc_data in documents.items():\n            if 'pylontech' in doc_data['name'].lower():\n                pylontech_doc = doc_data\n                pylontech_uuid = doc_uuid\n                break\n        \n        if not pylontech_doc:\n            print(\"\u274c Could not find 'pylontech' document\")\n            return None, None, None, None, None, None\n        \n        # Find Myfolder\n        myfolder_uuid = None\n        myfolder_data = None\n        \n        for folder_uuid, folder_data in folders.items():\n            if 'myfolder' in folder_data['name'].lower():\n                myfolder_uuid = folder_uuid\n                myfolder_data = folder_data\n                break\n        \n        if not myfolder_data:\n            print(\"\u274c Could not find 'Myfolder' folder\")\n            return None, None, None, None, None, None\n        \n        # Find Otherfolder\n        otherfolder_uuid = None\n        otherfolder_data = None\n        \n        for folder_uuid, folder_data in folders.items():\n            if 'otherfolder' in folder_data['name'].lower():\n                otherfolder_uuid = folder_uuid\n                otherfolder_data = folder_data\n                break\n        \n        if not otherfolder_data:\n            print(\"\u274c Could not find 'Otherfolder' folder\")\n            return None, None, None, None, None, None\n        \n        # Verify pylontech is currently in Myfolder\n        if pylontech_doc['parent_uuid'] != myfolder_uuid:\n            print(f\"\u26a0\ufe0f WARNING: 'pylontech' document is not currently in 'Myfolder'\")\n            print(f\"   Current parent: {pylontech_doc['parent_uuid']}\")\n            print(f\"   Expected parent (Myfolder): {myfolder_uuid}\")\n            print(f\"   Will proceed with move from current location to Otherfolder\")\n        \n        print(f\"\u2705 Found all target items:\")\n        print(f\"   \ud83d\udcc4 Document: {pylontech_doc['name']} (UUID: {pylontech_uuid[:8]}...)\")\n        print(f\"   \ud83d\udcc1 Source: {myfolder_data['name']} (UUID: {myfolder_uuid[:8]}...)\")\n        print(f\"   \ud83d\udcc1 Target: {otherfolder_data['name']} (UUID: {otherfolder_uuid[:8]}...)\")\n        \n        return pylontech_uuid, pylontech_doc, myfolder_uuid, myfolder_data, otherfolder_uuid, otherfolder_data\n    \n    def get_document_schema(self, doc_hash: str):\n        \"\"\"Retrieve document's docSchema\"\"\"\n        print(f\"\\n\ud83d\udcc4 STEP 3: RETRIEVING DOCUMENT SCHEMA\")\n        print(\"-\" * 30)\n        \n        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n        doc_response.raise_for_status()\n        doc_content = doc_response.text\n        \n        print(f\"\u2705 Document docSchema size: {len(doc_content)} bytes\")\n        print(f\"\ud83d\udcc4 Document docSchema content:\")\n        \n        lines = doc_content.strip().split('\\n')\n        for i, line in enumerate(lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return doc_content, lines\n    \n    def get_current_metadata(self, doc_lines: list):\n        \"\"\"Extract and fetch current metadata\"\"\"\n        print(f\"\\n\ud83d\udcdd STEP 4: GETTING CURRENT METADATA\")\n        print(\"-\" * 30)\n        \n        metadata_hash = None\n        metadata_line = None\n        \n        # Find metadata component\n        for line in doc_lines[1:]:  # Skip version\n            if ':' in line and '.metadata' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    metadata_hash = parts[0]\n                    metadata_line = line\n                    break\n        \n        if not metadata_hash:\n            raise ValueError(\"Metadata component not found in document schema\")\n        \n        print(f\"\u2705 Metadata hash: {metadata_hash}\")\n        \n        # Fetch current metadata\n        metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n        metadata_response.raise_for_status()\n        current_metadata = json.loads(metadata_response.text)\n        \n        print(f\"\u2705 Current metadata:\")\n        for key, value in current_metadata.items():\n            print(f\"   {key}: {value}\")\n        \n        return current_metadata, metadata_line\n    \n    def create_updated_metadata(self, current_metadata: dict, new_parent_uuid: str, old_parent_uuid: str):\n        \"\"\"Create updated metadata with new parent\"\"\"\n        print(f\"\\n\ud83d\udd04 STEP 5: CREATING UPDATED METADATA\")\n        print(\"-\" * 30)\n        \n        # Copy current metadata and update parent\n        updated_metadata = current_metadata.copy()\n        updated_metadata['parent'] = new_parent_uuid\n        \n        print(f\"\u2705 Updating parent:\")\n        print(f\"   Old parent: {old_parent_uuid}\")\n        print(f\"   New parent: {new_parent_uuid}\")\n        \n        # Add/update source field to match real app documents\n        updated_metadata['source'] = 'com.remarkable.macos'\n        print(f\"\u2705 Setting 'source' field: com.remarkable.macos\")\n        \n        # Fix lastOpened to match real app behavior\n        if 'lastOpened' in updated_metadata and updated_metadata['lastOpened'] != 0:\n            updated_metadata['lastOpened'] = 0\n            print(f\"\u2705 Setting lastOpened to 0 (real app behavior)\")\n        \n        # Make metadata match real app behavior for moves\n        updated_metadata['lastModified'] = int(time.time() * 1000)\n        updated_metadata['metadatamodified'] = False\n        updated_metadata['modified'] = False\n        \n        # Convert to JSON\n        updated_metadata_json = json.dumps(updated_metadata, separators=(',', ':'))\n        \n        print(f\"\u2705 Updated metadata ({len(updated_metadata_json)} bytes):\")\n        print(f\"   {updated_metadata_json[:100]}...\")\n        \n        return updated_metadata_json\n    \n    def upload_new_metadata(self, metadata_json: str, doc_uuid: str):\n        \"\"\"Upload new metadata and return hash\"\"\"\n        print(f\"\\n\u2b06\ufe0f STEP 6: UPLOADING NEW METADATA\")\n        print(\"-\" * 30)\n        \n        # Calculate hash\n        metadata_hash = hashlib.sha256(metadata_json.encode()).hexdigest()\n        print(f\"\u2705 New metadata hash: {metadata_hash}\")\n        \n        # Upload using working method from test_move_from_trash.py\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.metadata',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(metadata_json.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\",\n            data=metadata_json.encode(),\n            headers=headers \n        )\n        \n        print(f\"\u2705 Metadata upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Metadata upload failed: {upload_response.status_code}\")\n        \n        return metadata_hash\n    \n    def upload_real_pagedata(self, doc_uuid: str):\n        \"\"\"Upload real pagedata (newline) to match real app documents\"\"\"\n        print(f\"\\n\u2b06\ufe0f STEP 7: UPLOADING REAL PAGEDATA\")\n        print(\"-\" * 30)\n        \n        # Real app pagedata is just a newline character\n        pagedata_content = \"\\n\"\n        pagedata_hash = hashlib.sha256(pagedata_content.encode()).hexdigest()\n        \n        print(f\"\u2705 Real pagedata hash: {pagedata_hash}\")\n        print(f\"\u2705 Real pagedata content: {repr(pagedata_content)} ({len(pagedata_content)} bytes)\")\n        \n        # Upload pagedata using working method\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.pagedata',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(pagedata_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}\",\n            data=pagedata_content.encode(),\n            headers=headers \n        )\n        \n        print(f\"\u2705 Pagedata upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Pagedata upload failed: {upload_response.status_code}\")\n        \n        return pagedata_hash\n    \n    def create_new_document_schema(self, doc_lines: list, new_metadata_hash: str, metadata_line: str, new_pagedata_hash: str = None):\n        \"\"\"Create new document schema with updated metadata hash and pagedata\"\"\"\n        print(f\"\\n\ud83c\udfd7\ufe0f STEP 8: CREATING NEW DOCUMENT SCHEMA\")\n        print(\"-\" * 30)\n        \n        # Replace metadata line and pagedata line with new hashes\n        new_lines = []\n        pagedata_line = None\n        \n        # Find pagedata line\n        for line in doc_lines[1:]:  # Skip version\n            if ':' in line and '.pagedata' in line:\n                pagedata_line = line\n                break\n        \n        for line in doc_lines:\n            if line == metadata_line:\n                # Replace metadata hash but keep size\n                parts = line.split(':')\n                parts[0] = new_metadata_hash\n                new_line = ':'.join(parts)\n                new_lines.append(new_line)\n                print(f\"\u2705 Updated metadata line:\")\n                print(f\"   Old: {line}\")\n                print(f\"   New: {new_line}\")\n            elif new_pagedata_hash and line == pagedata_line:\n                # Replace pagedata hash and update size to 1 byte\n                parts = line.split(':')\n                parts[0] = new_pagedata_hash\n                parts[4] = '1'  # Update size to 1 byte (newline)\n                new_line = ':'.join(parts)\n                new_lines.append(new_line)\n                print(f\"\u2705 Updated pagedata line:\")\n                print(f\"   Old: {line}\")\n                print(f\"   New: {new_line}\")\n            else:\n                new_lines.append(line)\n        \n        new_doc_content = '\\n'.join(new_lines)\n        \n        print(f\"\u2705 New document schema ({len(new_doc_content)} bytes):\")\n        for i, line in enumerate(new_lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return new_doc_content\n    \n    def upload_new_document_schema(self, doc_content: str, doc_uuid: str):\n        \"\"\"Upload new document schema\"\"\"\n        print(f\"\\n\u2b06\ufe0f STEP 9: UPLOADING NEW DOCUMENT SCHEMA\")\n        print(\"-\" * 30)\n        \n        # Calculate hash\n        doc_hash = hashlib.sha256(doc_content.encode()).hexdigest()\n        print(f\"\u2705 New document schema hash: {doc_hash}\")\n        \n        # Upload using working method\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.docSchema',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(doc_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\",\n            data=doc_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Document schema upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Document schema upload failed: {upload_response.status_code}\")\n        \n        return doc_hash\n    \n    def update_root_docschema(self, root_content: str, doc_info: dict, new_doc_hash: str):\n        \"\"\"Update root.docSchema with new document hash\"\"\"\n        print(f\"\\n\ud83d\udd04 STEP 10: UPDATING ROOT.DOCSCHEMA\")\n        print(\"-\" * 30)\n        \n        # Replace old document line with new hash\n        old_line = doc_info['line']\n        parts = old_line.split(':')\n        parts[0] = new_doc_hash\n        new_line = ':'.join(parts)\n        \n        print(f\"\u2705 Updating root.docSchema entry:\")\n        print(f\"   Old: {old_line}\")\n        print(f\"   New: {new_line}\")\n        \n        # Replace in root content\n        new_root_content = root_content.replace(old_line, new_line)\n        \n        print(f\"\u2705 New root.docSchema size: {len(new_root_content)} bytes\")\n        \n        return new_root_content\n    \n    def upload_new_root(self, root_content: str, generation: int):\n        \"\"\"Upload new root.docSchema and update roothash\"\"\"\n        print(f\"\\n\u2b06\ufe0f STEP 11: UPLOADING NEW ROOT.DOCSCHEMA\")\n        print(\"-\" * 30)\n        \n        # Calculate hash\n        root_hash = hashlib.sha256(root_content.encode()).hexdigest()\n        print(f\"\u2705 New root hash: {root_hash}\")\n        \n        # Upload root content using working method\n        headers = {\n            'Content-Type': 'text/plain',\n            'rm-batch-number': '1',\n            'rm-filename': 'root.docSchema',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(root_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\",\n            data=root_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root content upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Root content upload failed: {upload_response.status_code}\")\n        \n        # Update root hash pointer\n        print(f\"\\n\ud83d\udd04 STEP 12: UPDATING ROOT HASH POINTER\")\n        print(\"-\" * 30)\n        \n        # Create root data exactly like working upload_manager.py\n        root_update_data = {\n            \"broadcast\": True,\n            \"generation\": generation,\n            \"hash\": root_hash\n        }\n        \n        # Convert to JSON with 2-space indent like real app\n        root_content_body = json.dumps(root_update_data, indent=2).encode('utf-8')\n        \n        # Headers exactly like working upload_manager.py\n        headers = {\n            'Content-Type': 'application/json',\n            'rm-batch-number': '1',\n            'rm-filename': 'roothash',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(root_content_body)\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        # Use /sync/v3/root endpoint like working code\n        root_update_response = self.session.put(\n            \"https://eu.tectonic.remarkable.com/sync/v3/root\",\n            data=root_content_body,\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root update response: {root_update_response.status_code}\")\n        if root_update_response.status_code not in [200, 202]:\n            print(f\"\u274c Root update failed: {root_update_response.text}\")\n            raise RuntimeError(f\"Root update failed: {root_update_response.status_code}\")\n        \n        return root_hash\n    \n    def analyze_before_move(self):\n        \"\"\"Analyze cloud state and show what the script will do before executing\"\"\"\n        print(f\"\ud83d\udd0d PYLONTECH DOCUMENT MOVE ANALYSIS\")\n        print(\"=\" * 60)\n        print(\"This script will analyze the current cloud state and show you exactly\")\n        print(\"what it will do before performing any move operations.\")\n        print(\"\")\n        \n        try:\n            # Step 1-2: Analyze and find items\n            root_data, root_content, documents, folders = self.analyze_cloud_state()\n            \n            pylontech_uuid, pylontech_doc, myfolder_uuid, myfolder_data, otherfolder_uuid, otherfolder_data = self.find_target_items(documents, folders)\n            \n            if not all([pylontech_uuid, pylontech_doc, otherfolder_uuid, otherfolder_data]):\n                print(\"\\n\u274c ANALYSIS FAILED: Missing required items\")\n                return False\n            \n            # Show what will happen\n            print(f\"\\n\ud83d\udccb MOVE OPERATION PLAN\")\n            print(\"=\" * 30)\n            print(f\"\ud83d\udcc4 Document to move: '{pylontech_doc['name']}'\")\n            print(f\"   UUID: {pylontech_uuid}\")\n            print(f\"   Current parent: {pylontech_doc['parent_uuid'][:8] if pylontech_doc['parent_uuid'] else 'root'}...\")\n            print(f\"   Current location: {myfolder_data['name'] if pylontech_doc['parent_uuid'] == myfolder_uuid else 'Other location'}\")\n            print(f\"\")\n            print(f\"\ud83d\udcc1 Target folder: '{otherfolder_data['name']}'\")\n            print(f\"   UUID: {otherfolder_uuid}\")\n            print(f\"\")\n            print(f\"\ud83d\udd04 Operations that will be performed:\")\n            print(f\"   1. Retrieve pylontech document schema\")\n            print(f\"   2. Get current metadata from document\")\n            print(f\"   3. Update metadata parent field: {pylontech_doc['parent_uuid'][:8] if pylontech_doc['parent_uuid'] else 'root'}... \u2192 {otherfolder_uuid[:8]}...\")\n            print(f\"   4. Upload new metadata with new hash\")\n            print(f\"   5. Upload updated pagedata with new hash\")\n            print(f\"   6. Create new document schema with new component hashes\")\n            print(f\"   7. Upload new document schema\")\n            print(f\"   8. Update root.docSchema with new document hash\")\n            print(f\"   9. Upload new root.docSchema\")\n            print(f\"   10. Update root hash pointer\")\n            print(f\"\")\n            print(f\"\u2705 Analysis complete. Ready to perform move operation.\")\n            \n            return {\n                'root_data': root_data,\n                'root_content': root_content,\n                'pylontech_uuid': pylontech_uuid,\n                'pylontech_doc': pylontech_doc,\n                'otherfolder_uuid': otherfolder_uuid,\n                'otherfolder_data': otherfolder_data\n            }\n            \n        except Exception as e:\n            print(f\"\\n\u274c Analysis failed: {e}\")\n            return False\n    \n    def execute_move(self, analysis_data):\n        \"\"\"Execute the actual move operation using the analysis data\"\"\"\n        print(f\"\\n\ud83d\ude80 EXECUTING PYLONTECH MOVE OPERATION\")\n        print(\"=\" * 60)\n        \n        try:\n            root_data = analysis_data['root_data']\n            root_content = analysis_data['root_content']\n            pylontech_uuid = analysis_data['pylontech_uuid']\n            pylontech_doc = analysis_data['pylontech_doc']\n            otherfolder_uuid = analysis_data['otherfolder_uuid']\n            otherfolder_data = analysis_data['otherfolder_data']\n            \n            # Step 3: Get document schema\n            doc_content, doc_lines = self.get_document_schema(pylontech_doc['hash'])\n            \n            # Step 4: Get current metadata\n            current_metadata, metadata_line = self.get_current_metadata(doc_lines)\n            \n            # Step 5: Create updated metadata\n            updated_metadata_json = self.create_updated_metadata(\n                current_metadata, \n                otherfolder_uuid, \n                current_metadata.get('parent', '')\n            )\n            \n            # Step 6: Upload new metadata\n            new_metadata_hash = self.upload_new_metadata(updated_metadata_json, pylontech_uuid)\n            \n            # Step 7: Upload real pagedata\n            new_pagedata_hash = self.upload_real_pagedata(pylontech_uuid)\n            \n            # Step 8: Create new document schema\n            new_doc_content = self.create_new_document_schema(doc_lines, new_metadata_hash, metadata_line, new_pagedata_hash)\n            \n            # Step 9: Upload new document schema\n            new_doc_hash = self.upload_new_document_schema(new_doc_content, pylontech_uuid)\n            \n            # Step 10: Update root.docSchema\n            doc_info_for_root = {\n                'line': None,  # We'll need to find this in root content\n                'hash': pylontech_doc['hash']\n            }\n            \n            # Find the document line in root content for updating\n            lines = root_content.strip().split('\\n')\n            for line in lines[1:]:  # Skip version header\n                if pylontech_uuid in line:\n                    doc_info_for_root['line'] = line\n                    break\n            \n            if not doc_info_for_root['line']:\n                raise RuntimeError(\"Could not find document line in root.docSchema\")\n            \n            new_root_content = self.update_root_docschema(root_content, doc_info_for_root, new_doc_hash)\n            \n            # Step 11-12: Upload new root and update pointer\n            new_root_hash = self.upload_new_root(new_root_content, root_data['generation'])\n            \n            print(f\"\\n\ud83c\udf89 SUCCESS! Pylontech document moved successfully!\")\n            print(f\"   Document: {current_metadata.get('visibleName')}\")\n            print(f\"   From: {current_metadata.get('parent', 'root')}\")\n            print(f\"   To: {otherfolder_data['name']} ({otherfolder_uuid})\")\n            print(f\"   New root hash: {new_root_hash}\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\\n\u274c Move operation failed: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_move_pylontech_fixed.py",
      "tags": [
        "class",
        "pylontechmover"
      ],
      "updated_at": "2025-12-07T01:53:04.184212",
      "usage_example": "# Example usage:\n# result = PylontechMover(bases)"
    },
    {
      "best_practices": [
        "Always call cleanup() method after finishing uploads to remove temporary directories and databases",
        "Check that PDF files exist and have .pdf extension before calling upload_pdf",
        "Handle authentication failures during instantiation with try-except blocks",
        "Use list_folders() to get valid parent_uuid values before uploading to specific folders",
        "The class creates temporary resources in __init__, so instantiate only when needed",
        "Each instance creates its own temporary database, so reuse the same instance for multiple uploads in a session",
        "The upload_pdf method returns boolean for success/failure - always check the return value",
        "Parent_uuid=None uploads to root folder, use a valid UUID to upload to a specific folder"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Path to the temporary directory created for this upload session, used to store temporary database",
            "is_class_variable": false,
            "name": "temp_dir",
            "type": "str"
          },
          {
            "description": "Instance of RemarkableUploadManager that handles the actual upload operations and cloud synchronization",
            "is_class_variable": false,
            "name": "upload_manager",
            "type": "RemarkableUploadManager"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the uploader by creating temporary storage, authenticating with reMarkable services, and setting up the upload manager",
            "returns": "None (constructor)",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "list_folders",
            "parameters": {},
            "purpose": "Retrieve a list of available folders from the reMarkable cloud for selecting upload targets",
            "returns": "Dictionary mapping folder UUIDs (str) to folder names (str). Returns empty dict on error.",
            "signature": "list_folders(self) -> dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_pdf",
            "parameters": {
              "document_name": "Name to assign to the document on reMarkable device",
              "parent_uuid": "UUID of the parent folder (None for root folder, or valid folder UUID string)",
              "pdf_path": "Path to the PDF file to upload (string or Path object)"
            },
            "purpose": "Upload a PDF file to reMarkable cloud storage with specified name and optional parent folder",
            "returns": "Boolean: True if upload was successful, False if upload failed or file validation failed",
            "signature": "upload_pdf(self, pdf_path: str, document_name: str, parent_uuid: str = None) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "cleanup",
            "parameters": {},
            "purpose": "Remove temporary directories and databases created during the upload session",
            "returns": "None",
            "signature": "cleanup(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "imported in __init__ method during instantiation",
          "import": "from cloudtest.auth import RemarkableAuth",
          "optional": false
        },
        {
          "condition": "imported in cleanup method when cleaning temporary resources",
          "import": "import shutil",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:52:16",
      "decorators": [],
      "dependencies": [
        "tempfile",
        "os",
        "pathlib",
        "shutil",
        "cloudtest.auth",
        "cloudtest.upload_manager"
      ],
      "description": "A standalone PDF uploader class that manages uploading PDF documents to reMarkable cloud storage using authenticated sessions and temporary database storage.",
      "docstring": "Standalone PDF uploader using proven upload_manager logic",
      "id": 2112,
      "imports": [
        "import sys",
        "import os",
        "import tempfile",
        "from pathlib import Path",
        "from cloudtest.upload_manager import RemarkableUploadManager",
        "from cloudtest.auth import RemarkableAuth",
        "import shutil"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "import tempfile",
        "from pathlib import Path",
        "from cloudtest.upload_manager import RemarkableUploadManager"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 117,
      "line_start": 19,
      "name": "RemarkablePDFUploader_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically creates a temporary directory and database, authenticates with reMarkable services, and initializes the upload manager."
      },
      "parent_class": null,
      "purpose": "This class provides a high-level interface for uploading PDF files to reMarkable devices/cloud. It handles authentication, temporary database creation for upload sessions, folder listing, and PDF document uploads. It wraps the RemarkableUploadManager with a simplified API and manages the lifecycle of temporary resources needed for uploads.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkablePDFUploader object. The upload_pdf method returns a boolean (True for successful upload, False otherwise). The list_folders method returns a dictionary mapping folder UUIDs to folder names. The cleanup method has no return value.",
      "settings_required": [
        "reMarkable authentication credentials (handled by RemarkableAuth class)",
        "Write permissions for temporary directory creation",
        "Network access to reMarkable cloud services"
      ],
      "source_code": "class RemarkablePDFUploader:\n    \"\"\"Standalone PDF uploader using proven upload_manager logic\"\"\"\n    \n    def __init__(self):\n        # Create a temporary database for this upload session\n        self.temp_dir = tempfile.mkdtemp(prefix=\"remarkable_upload_\")\n        temp_db = os.path.join(self.temp_dir, \"temp_upload.db\")\n        \n        print(f\"\ud83d\udd27 Initializing upload manager with temp DB: {temp_db}\")\n        \n        # Import and create session (following upload_manager pattern)\n        from cloudtest.auth import RemarkableAuth\n        \n        # Create authenticated session\n        auth = RemarkableAuth()\n        session = auth.get_authenticated_session()\n        \n        if not session:\n            raise Exception(\"Authentication failed\")\n        \n        # Initialize the proven upload manager\n        self.upload_manager = RemarkableUploadManager(session, temp_db)\n        \n    def list_folders(self):\n        \"\"\"List available folders for upload target selection\"\"\"\n        try:\n            # Get the folder structure from the upload manager\n            folders = {}\n            replica = self.upload_manager.get_replica()\n            \n            for uuid, doc in replica.items():\n                if doc.get('Type') == 'CollectionType':\n                    name = doc.get('VissibleName', 'Unnamed Folder')\n                    folders[uuid] = name\n                    \n            return folders\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Error listing folders: {e}\")\n            return {}\n    \n    def upload_pdf(self, pdf_path, document_name, parent_uuid=None):\n        \"\"\"\n        Upload a PDF using the proven upload_manager logic\n        \n        Args:\n            pdf_path: Path to the PDF file\n            document_name: Name for the document on reMarkable\n            parent_uuid: UUID of parent folder (None for root)\n            \n        Returns:\n            bool: True if upload successful, False otherwise\n        \"\"\"\n        try:\n            pdf_file = Path(pdf_path)\n            if not pdf_file.exists():\n                print(f\"\u274c PDF file not found: {pdf_path}\")\n                return False\n                \n            if not pdf_file.suffix.lower() == '.pdf':\n                print(f\"\u274c File is not a PDF: {pdf_path}\")\n                return False\n                \n            print(f\"\ud83d\udcc4 Uploading: {pdf_file.name}\")\n            print(f\"\ud83d\udcdd Document name: {document_name}\")\n            if parent_uuid:\n                print(f\"\ud83d\udcc1 Target folder: {parent_uuid}\")\n            else:\n                print(\"\ud83d\udcc1 Target: Root folder\")\n                \n            # Use the proven upload manager to handle the upload\n            print(f\"\\n\ud83d\udd27 Using proven upload_manager.py for upload...\")\n            \n            # Call the working upload method\n            success = self.upload_manager.upload_pdf_document(\n                pdf_path=str(pdf_file),\n                name=document_name,\n                parent_uuid=parent_uuid\n            )\n            \n            if success:\n                print(\"\u2705 Upload successful!\")\n                return True\n            else:\n                print(\"\u274c Upload failed!\")\n                return False\n                \n        except Exception as e:\n            print(f\"\u274c Upload error: {e}\")\n            return False\n    \n    def cleanup(self):\n        \"\"\"Clean up temporary resources\"\"\"\n        try:\n            import shutil\n            if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):\n                shutil.rmtree(self.temp_dir)\n                print(f\"\ud83e\uddf9 Cleaned up temp directory: {self.temp_dir}\")\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Cleanup warning: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_pdf.py",
      "tags": [
        "pdf",
        "upload",
        "remarkable",
        "cloud-storage",
        "document-management",
        "file-transfer",
        "authentication",
        "temporary-storage"
      ],
      "updated_at": "2025-12-07T01:52:16.095203",
      "usage_example": "# Create uploader instance\nuploader = RemarkablePDFUploader()\n\n# List available folders\nfolders = uploader.list_folders()\nfor uuid, name in folders.items():\n    print(f'{name}: {uuid}')\n\n# Upload PDF to root folder\nsuccess = uploader.upload_pdf(\n    pdf_path='/path/to/document.pdf',\n    document_name='My Document',\n    parent_uuid=None\n)\n\n# Upload PDF to specific folder\nsuccess = uploader.upload_pdf(\n    pdf_path='/path/to/document.pdf',\n    document_name='My Document',\n    parent_uuid='folder-uuid-here'\n)\n\n# Clean up temporary resources\nuploader.cleanup()"
    },
    {
      "best_practices": [
        "Always run with dry_run=True first to preview changes before applying them",
        "The class automatically creates backups in a 'docschema_repair' subdirectory - never delete these backups until repair is verified",
        "Authentication happens automatically during instantiation - ensure RemarkableAuth is properly configured before creating an instance",
        "The repair process is stateless - each run_repair() call performs a complete backup-analyze-fix-upload cycle",
        "Check the backup_dir after each run to review backup files and rebuilt schemas",
        "The class makes network requests to reMarkable servers - ensure stable internet connection",
        "If upload_fixed_root_docschema() fails, the original data remains intact on the server",
        "The tool preserves working entries (folders and working PDFs) unchanged while fixing broken documents",
        "Document size calculation is based on summing component sizes from the document's schema",
        "The class uses SHA256 hashing to generate content hashes for upload",
        "All operations are logged to console with emoji indicators for easy monitoring",
        "The repair process can be interrupted safely - no changes are made until upload_fixed_root_docschema() succeeds"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated HTTP session for making API requests to reMarkable servers",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          },
          {
            "description": "Base directory path where the script is located",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Directory path for storing backup files (base_dir/docschema_repair), created if it doesn't exist",
            "is_class_variable": false,
            "name": "backup_dir",
            "type": "Path"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initializes the repair tool by authenticating with reMarkable, setting up session, and creating backup directory",
            "returns": "None - raises RuntimeError if authentication fails",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "backup_current_state",
            "parameters": {},
            "purpose": "Backs up the current root.docSchema and related data to a timestamped JSON file",
            "returns": "Dictionary containing timestamp, root_info (hash and generation), root_content (schema text), and backup_reason",
            "signature": "backup_current_state(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_current_entries",
            "parameters": {
              "root_content": "The raw text content of root.docSchema to analyze"
            },
            "purpose": "Parses and categorizes root.docSchema entries into folders, working PDFs, broken documents, and unknown types",
            "returns": "Dictionary with keys 'folders', 'working_pdfs', 'broken_documents', 'unknown', each containing list of entry dictionaries with hash, uuid, type, size, and full_line",
            "signature": "analyze_current_entries(self, root_content: str) -> Dict[str, List[Dict]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "calculate_correct_document_size",
            "parameters": {
              "doc_hash": "The hash identifier of the document's schema",
              "doc_uuid": "The UUID of the document for logging purposes"
            },
            "purpose": "Calculates the correct size for a document by fetching its schema and summing all component sizes",
            "returns": "Tuple of (correct_size as int, details dict with docschema_size, component_count, component_total, and components list)",
            "signature": "calculate_correct_document_size(self, doc_hash: str, doc_uuid: str) -> Tuple[int, Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_document_entries",
            "parameters": {
              "broken_documents": "List of document entry dictionaries identified as broken during analysis"
            },
            "purpose": "Fixes broken document entries by recalculating their sizes and reconstructing their schema lines",
            "returns": "List of fixed schema lines (strings) with corrected sizes, or original lines if fixing failed",
            "signature": "fix_document_entries(self, broken_documents: List[Dict]) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "rebuild_root_docschema",
            "parameters": {
              "categorized": "Dictionary of categorized entries from analyze_current_entries()",
              "fixed_document_lines": "List of fixed document lines from fix_document_entries()"
            },
            "purpose": "Rebuilds the complete root.docSchema by combining unchanged entries with fixed document lines",
            "returns": "Complete root.docSchema content as a newline-separated string with version header",
            "signature": "rebuild_root_docschema(self, categorized: Dict, fixed_document_lines: List[str]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_fixed_root_docschema",
            "parameters": {
              "new_content": "The complete rebuilt root.docSchema content to upload"
            },
            "purpose": "Uploads the fixed root.docSchema content to reMarkable servers and updates the root hash",
            "returns": "True if upload and root hash update succeeded and were verified, False otherwise",
            "signature": "upload_fixed_root_docschema(self, new_content: str) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "run_repair",
            "parameters": {
              "dry_run": "If True, performs all steps except upload to preview changes; if False, applies changes to server"
            },
            "purpose": "Executes the complete repair process: backup, analyze, fix, rebuild, and optionally upload",
            "returns": "True if repair process completed successfully (or dry run completed), False if any step failed",
            "signature": "run_repair(self, dry_run: bool = False) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "imported lazily inside __init__ method when class is instantiated",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:50:56",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "pathlib",
        "typing",
        "requests",
        "auth"
      ],
      "description": "A repair tool for fixing corrupted root.docSchema entries in reMarkable cloud storage by recalculating document sizes and rebuilding the schema.",
      "docstring": "Repairs corrupted root.docSchema entries",
      "id": 2108,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Any",
        "import requests",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, List, Tuple, Any",
        "import requests"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 402,
      "line_start": 24,
      "name": "RootDocSchemaRepair",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "No constructor parameters": "The __init__ method takes no parameters. It automatically initializes authentication, creates backup directories, and prepares the repair environment."
      },
      "parent_class": null,
      "purpose": "This class provides a comprehensive solution for repairing corrupted root.docSchema entries in the reMarkable cloud sync system. It authenticates with reMarkable servers, backs up the current state, analyzes document entries to identify broken ones, recalculates correct document sizes by summing component sizes, rebuilds the root.docSchema with corrected entries, and uploads the fixed schema back to the server. The tool categorizes entries into folders, working PDFs, broken documents, and unknown types, preserving working entries while fixing corrupted ones. It supports dry-run mode for safe testing before applying changes.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RootDocSchemaRepair object. The main method run_repair() returns a boolean indicating success (True) or failure (False) of the repair process. Other methods return various types: backup_current_state() returns Dict[str, Any] with backup data, analyze_current_entries() returns Dict[str, List[Dict]] with categorized entries, calculate_correct_document_size() returns Tuple[int, Dict] with size and details, fix_document_entries() returns List[str] of fixed lines, rebuild_root_docschema() returns str with new content, and upload_fixed_root_docschema() returns bool for success status.",
      "settings_required": [
        "RemarkableAuth module must be available and properly configured for authentication",
        "Valid reMarkable account credentials for authentication",
        "Network access to eu.tectonic.remarkable.com API endpoints",
        "Write permissions in the directory where the script is located (for backup_dir creation)"
      ],
      "source_code": "class RootDocSchemaRepair:\n    \"\"\"Repairs corrupted root.docSchema entries\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        from auth import RemarkableAuth\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        self.base_dir = Path(__file__).parent\n        self.backup_dir = self.base_dir / \"docschema_repair\"\n        self.backup_dir.mkdir(exist_ok=True)\n        \n        print(\"\ud83d\udd27 reMarkable Root DocSchema Repair Tool Initialized\")\n    \n    def backup_current_state(self) -> Dict[str, Any]:\n        \"\"\"Backup current root.docSchema and related data\"\"\"\n        print(\"\\n\ud83d\udcbe Step 1: Backing up current state...\")\n        \n        try:\n            # Get current root info\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            \n            # Get current root.docSchema content\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            # Save backup\n            timestamp = int(time.time())\n            backup_data = {\n                'timestamp': timestamp,\n                'root_info': root_data,\n                'root_content': root_content,\n                'backup_reason': 'Pre-repair backup'\n            }\n            \n            backup_file = self.backup_dir / f\"root_backup_{timestamp}.json\"\n            with open(backup_file, 'w') as f:\n                json.dump(backup_data, f, indent=2)\n            \n            print(f\"\u2705 Current state backed up to: {backup_file}\")\n            print(f\"   Root hash: {root_data['hash']}\")\n            print(f\"   Generation: {root_data.get('generation')}\")\n            print(f\"   Content size: {len(root_content)} bytes\")\n            \n            return backup_data\n            \n        except Exception as e:\n            print(f\"\u274c Backup failed: {e}\")\n            raise\n    \n    def analyze_current_entries(self, root_content: str) -> Dict[str, List[Dict]]:\n        \"\"\"Analyze current root.docSchema entries and categorize them\"\"\"\n        print(\"\\n\ud83d\udd0d Step 2: Analyzing current root.docSchema entries...\")\n        \n        lines = root_content.strip().split('\\n')\n        version = lines[0]\n        entries = lines[1:]\n        \n        print(f\"\ud83d\udcca DocSchema version: {version}\")\n        print(f\"\ud83d\udcca Total entries: {len(entries)}\")\n        \n        categorized = {\n            'folders': [],\n            'working_pdfs': [],  # The two invoice PDFs that still work\n            'broken_documents': [],  # Documents that need fixing\n            'unknown': []\n        }\n        \n        # Known working documents (the two invoices that still work)\n        working_pdf_names = ['invoice vicebio', 'invoice poulpharm']\n        \n        for i, line in enumerate(entries):\n            if ':' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    entry = {\n                        'line_number': i + 1,\n                        'hash': parts[0],\n                        'uuid': parts[2],\n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n                    \n                    # Categorize by type\n                    if entry['type'] == '2':  # Folders\n                        categorized['folders'].append(entry)\n                        print(f\"\ud83d\udcc1 Folder: {entry['uuid'][:8]}... (size: {entry['size']})\")\n                    \n                    elif entry['type'] in ['4', '5']:  # PDF documents or notebooks\n                        # Try to identify if this is one of the working invoices\n                        try:\n                            # Fetch the document's metadata to get its name\n                            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{entry['hash']}\")\n                            if doc_response.status_code == 200:\n                                doc_content = doc_response.text\n                                # Look for metadata component\n                                for doc_line in doc_content.split('\\n')[1:]:\n                                    if '.metadata' in doc_line and ':' in doc_line:\n                                        metadata_hash = doc_line.split(':')[0]\n                                        metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n                                        if metadata_response.status_code == 200:\n                                            metadata = json.loads(metadata_response.text)\n                                            doc_name = metadata.get('visibleName', '').lower()\n                                            \n                                            if any(working_name in doc_name for working_name in working_pdf_names):\n                                                categorized['working_pdfs'].append(entry)\n                                                print(f\"\u2705 Working PDF: '{metadata.get('visibleName')}' {entry['uuid'][:8]}... (size: {entry['size']})\")\n                                                break\n                                        break\n                                else:\n                                    # Couldn't identify as working invoice\n                                    categorized['broken_documents'].append(entry)\n                                    print(f\"\ud83d\udd27 Document to fix: {entry['uuid'][:8]}... type {entry['type']} (size: {entry['size']})\")\n                            else:\n                                categorized['broken_documents'].append(entry)\n                                print(f\"\u274c Inaccessible document: {entry['uuid'][:8]}... type {entry['type']} (size: {entry['size']})\")\n                        \n                        except Exception as e:\n                            categorized['broken_documents'].append(entry)\n                            print(f\"\u274c Error analyzing document {entry['uuid'][:8]}...: {e}\")\n                    \n                    else:\n                        categorized['unknown'].append(entry)\n                        print(f\"\u2753 Unknown type {entry['type']}: {entry['uuid'][:8]}... (size: {entry['size']})\")\n        \n        print(f\"\\n\ud83d\udcca Categorization Summary:\")\n        print(f\"   \ud83d\udcc1 Folders (keep unchanged): {len(categorized['folders'])}\")\n        print(f\"   \u2705 Working PDFs (keep unchanged): {len(categorized['working_pdfs'])}\")\n        print(f\"   \ud83d\udd27 Documents to fix: {len(categorized['broken_documents'])}\")\n        print(f\"   \u2753 Unknown entries: {len(categorized['unknown'])}\")\n        \n        return categorized\n    \n    def calculate_correct_document_size(self, doc_hash: str, doc_uuid: str) -> Tuple[int, Dict]:\n        \"\"\"Calculate the correct size for a document by summing its components\"\"\"\n        print(f\"\\n\ud83e\uddee Calculating correct size for document {doc_uuid[:8]}...\")\n        \n        try:\n            # Fetch document schema\n            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n            doc_response.raise_for_status()\n            doc_content = doc_response.text\n            \n            print(f\"   \ud83d\udcc4 DocSchema size: {len(doc_content)} bytes\")\n            \n            # Parse components and sum their sizes\n            lines = doc_content.strip().split('\\n')\n            if len(lines) < 2:\n                print(f\"   \u274c Invalid docSchema format\")\n                return len(doc_content), {'error': 'Invalid format', 'components': []}\n            \n            component_sizes = []\n            component_details = []\n            total_component_size = 0\n            \n            for line in lines[1:]:  # Skip version header\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        comp_hash = parts[0]\n                        comp_name = parts[2]\n                        comp_size = int(parts[4])\n                        \n                        component_sizes.append(comp_size)\n                        component_details.append({\n                            'name': comp_name,\n                            'hash': comp_hash,\n                            'size': comp_size\n                        })\n                        total_component_size += comp_size\n                        \n                        print(f\"   \ud83d\udce6 Component {comp_name}: {comp_size} bytes\")\n            \n            # The correct size should be the sum of all component sizes\n            # (This is what we discovered from analyzing the real Pylontech document)\n            correct_size = total_component_size\n            \n            print(f\"   \ud83d\udcca Total component sizes: {total_component_size} bytes\")\n            print(f\"   \ud83d\udcca DocSchema itself: {len(doc_content)} bytes\")\n            print(f\"   \u2705 Correct root.docSchema size: {correct_size} bytes\")\n            \n            return correct_size, {\n                'docschema_size': len(doc_content),\n                'component_count': len(component_sizes),\n                'component_total': total_component_size,\n                'components': component_details\n            }\n            \n        except Exception as e:\n            print(f\"   \u274c Error calculating size: {e}\")\n            return len(doc_content) if 'doc_content' in locals() else 0, {'error': str(e)}\n    \n    def fix_document_entries(self, broken_documents: List[Dict]) -> List[str]:\n        \"\"\"Fix the broken document entries with correct sizes\"\"\"\n        print(f\"\\n\ud83d\udd27 Step 3: Fixing {len(broken_documents)} broken document entries...\")\n        \n        fixed_lines = []\n        \n        for i, entry in enumerate(broken_documents, 1):\n            print(f\"\\n\ud83d\udd27 Fixing document {i}/{len(broken_documents)}: {entry['uuid'][:8]}...\")\n            \n            # Calculate correct size\n            correct_size, details = self.calculate_correct_document_size(entry['hash'], entry['uuid'])\n            \n            if 'error' not in details:\n                # Reconstruct the line with correct size\n                parts = entry['full_line'].split(':')\n                parts[4] = str(correct_size)  # Replace size\n                fixed_line = ':'.join(parts)\n                \n                fixed_lines.append(fixed_line)\n                \n                print(f\"   \u2705 Fixed: {entry['uuid'][:8]}... size {entry['size']} \u2192 {correct_size}\")\n                print(f\"   \ud83d\udcc4 Old line: {entry['full_line']}\")\n                print(f\"   \ud83d\udcc4 New line: {fixed_line}\")\n            else:\n                # Keep original line if we can't fix it\n                fixed_lines.append(entry['full_line'])\n                print(f\"   \u26a0\ufe0f  Keeping original: {entry['uuid'][:8]}... (couldn't fix: {details.get('error')})\")\n        \n        return fixed_lines\n    \n    def rebuild_root_docschema(self, categorized: Dict, fixed_document_lines: List[str]) -> str:\n        \"\"\"Rebuild the complete root.docSchema with all entries\"\"\"\n        print(f\"\\n\ud83c\udfd7\ufe0f  Step 4: Rebuilding complete root.docSchema...\")\n        \n        # Start with version header (use original version)\n        new_lines = ['43']  # Standard version\n        \n        # Add all unchanged entries\n        unchanged_count = 0\n        \n        # Add folders (unchanged)\n        for folder in categorized['folders']:\n            new_lines.append(folder['full_line'])\n            unchanged_count += 1\n        \n        # Add working PDFs (unchanged)\n        for pdf in categorized['working_pdfs']:\n            new_lines.append(pdf['full_line'])\n            unchanged_count += 1\n        \n        # Add unknown entries (unchanged)\n        for unknown in categorized['unknown']:\n            new_lines.append(unknown['full_line'])\n            unchanged_count += 1\n        \n        # Add fixed document entries\n        for fixed_line in fixed_document_lines:\n            new_lines.append(fixed_line)\n        \n        new_content = '\\n'.join(new_lines)\n        \n        print(f\"\u2705 Root.docSchema rebuilt:\")\n        print(f\"   \ud83d\udcca Total entries: {len(new_lines) - 1}\")  # -1 for version header\n        print(f\"   \ud83d\udcc1 Unchanged entries: {unchanged_count}\")\n        print(f\"   \ud83d\udd27 Fixed entries: {len(fixed_document_lines)}\")\n        print(f\"   \ud83d\udccf Total content size: {len(new_content)} bytes\")\n        \n        return new_content\n    \n    def upload_fixed_root_docschema(self, new_content: str) -> bool:\n        \"\"\"Upload the fixed root.docSchema to the server\"\"\"\n        print(f\"\\n\u2b06\ufe0f  Step 5: Uploading fixed root.docSchema...\")\n        \n        try:\n            # Calculate new hash\n            new_hash = hashlib.sha256(new_content.encode()).hexdigest()\n            print(f\"   \ud83d\udcca New content hash: {new_hash}\")\n            \n            # Upload new content\n            upload_response = self.session.put(\n                f\"https://eu.tectonic.remarkable.com/sync/v3/files/{new_hash}\",\n                data=new_content.encode(),\n                headers={'Content-Type': 'text/plain'}\n            )\n            \n            if upload_response.status_code in [200, 202]:\n                print(f\"   \u2705 Content uploaded successfully ({upload_response.status_code})\")\n                \n                # Update root hash\n                root_update_response = self.session.put(\n                    \"https://eu.tectonic.remarkable.com/sync/v4/root\",\n                    json={'hash': new_hash}\n                )\n                \n                if root_update_response.status_code in [200, 202]:\n                    print(f\"   \u2705 Root hash updated successfully ({root_update_response.status_code})\")\n                    \n                    # Verify the update\n                    verify_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n                    if verify_response.status_code == 200:\n                        verify_data = verify_response.json()\n                        if verify_data['hash'] == new_hash:\n                            print(f\"   \u2705 Root hash verified: {new_hash}\")\n                            print(f\"   \ud83d\udcca New generation: {verify_data.get('generation')}\")\n                            return True\n                        else:\n                            print(f\"   \u274c Root hash verification failed: {verify_data['hash']} != {new_hash}\")\n                    else:\n                        print(f\"   \u26a0\ufe0f  Cannot verify root hash update\")\n                        return True  # Assume success\n                else:\n                    print(f\"   \u274c Root hash update failed: {root_update_response.status_code}\")\n                    print(f\"   \ud83d\udcc4 Response: {root_update_response.text}\")\n                    return False\n            else:\n                print(f\"   \u274c Content upload failed: {upload_response.status_code}\")\n                print(f\"   \ud83d\udcc4 Response: {upload_response.text}\")\n                return False\n                \n        except Exception as e:\n            print(f\"   \u274c Upload failed: {e}\")\n            return False\n    \n    def run_repair(self, dry_run: bool = False) -> bool:\n        \"\"\"Run the complete repair process\"\"\"\n        print(f\"\\n\ud83d\ude80 Starting Root DocSchema Repair Process\")\n        print(f\"\ud83d\udd0d Mode: {'DRY RUN (no changes)' if dry_run else 'LIVE REPAIR (will make changes)'}\")\n        print(\"=\" * 60)\n        \n        try:\n            # Step 1: Backup current state\n            backup_data = self.backup_current_state()\n            root_content = backup_data['root_content']\n            \n            # Step 2: Analyze entries\n            categorized = self.analyze_current_entries(root_content)\n            \n            # Step 3: Fix broken documents\n            if categorized['broken_documents']:\n                fixed_lines = self.fix_document_entries(categorized['broken_documents'])\n            else:\n                fixed_lines = []\n                print(\"\u2705 No broken documents found to fix\")\n            \n            # Step 4: Rebuild root.docSchema\n            new_content = self.rebuild_root_docschema(categorized, fixed_lines)\n            \n            # Save the rebuilt content for inspection\n            rebuilt_file = self.backup_dir / f\"rebuilt_root_{int(time.time())}.txt\"\n            with open(rebuilt_file, 'w') as f:\n                f.write(new_content)\n            print(f\"\ud83d\udcc4 Rebuilt root.docSchema saved to: {rebuilt_file}\")\n            \n            if dry_run:\n                print(f\"\\n\ud83d\udd0d DRY RUN COMPLETE - No changes made to server\")\n                print(f\"\u2705 Repair plan ready - run with dry_run=False to apply changes\")\n                return True\n            \n            # Step 5: Upload fixed root.docSchema\n            success = self.upload_fixed_root_docschema(new_content)\n            \n            if success:\n                print(f\"\\n\ud83c\udf89 ROOT DOCSCHEMA REPAIR COMPLETED SUCCESSFULLY!\")\n                print(f\"\u2705 The following should now be visible in your reMarkable app:\")\n                print(f\"   \ud83d\udcc1 All folders (unchanged)\")\n                print(f\"   \ud83d\udcc4 invoice vicebio (unchanged)\")\n                print(f\"   \ud83d\udcc4 invoice poulpharm (unchanged)\")\n                print(f\"   \ud83d\udcc4 All other documents (with corrected sizes)\")\n                print(f\"\\n\ud83d\udca1 Check your reMarkable device to verify the repair worked\")\n            else:\n                print(f\"\\n\u274c ROOT DOCSCHEMA REPAIR FAILED\")\n                print(f\"\ud83d\udca1 Your data is safe - the backup is available in {self.backup_dir}\")\n            \n            return success\n            \n        except Exception as e:\n            print(f\"\\n\u274c Repair process failed: {e}\")\n            print(f\"\ud83d\udca1 Your data is safe - check {self.backup_dir} for backups\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/fix_root_docschema.py",
      "tags": [
        "remarkable",
        "cloud-sync",
        "repair",
        "schema-repair",
        "document-management",
        "backup",
        "data-recovery",
        "api-client",
        "file-system",
        "hash-calculation"
      ],
      "updated_at": "2025-12-07T01:50:56.568024",
      "usage_example": "# Basic usage with dry run first\nrepair_tool = RootDocSchemaRepair()\n\n# First, run in dry-run mode to see what would be changed\nsuccess = repair_tool.run_repair(dry_run=True)\n\nif success:\n    # If dry run looks good, run the actual repair\n    success = repair_tool.run_repair(dry_run=False)\n    if success:\n        print(\"Repair completed successfully!\")\n    else:\n        print(\"Repair failed, check backups\")\n\n# Advanced usage: manual step-by-step repair\nrepair_tool = RootDocSchemaRepair()\n\n# Step 1: Backup\nbackup_data = repair_tool.backup_current_state()\n\n# Step 2: Analyze\ncategorized = repair_tool.analyze_current_entries(backup_data['root_content'])\n\n# Step 3: Fix broken documents\nif categorized['broken_documents']:\n    fixed_lines = repair_tool.fix_document_entries(categorized['broken_documents'])\n    \n    # Step 4: Rebuild\n    new_content = repair_tool.rebuild_root_docschema(categorized, fixed_lines)\n    \n    # Step 5: Upload\n    success = repair_tool.upload_fixed_root_docschema(new_content)"
    },
    {
      "best_practices": [
        "Always instantiate RootCleaner in a try-except block to handle authentication failures gracefully",
        "Use clear_root_completely() for the full workflow rather than calling individual methods unless you need fine-grained control",
        "The class maintains state through self.session - do not modify this attribute directly",
        "Methods should be called in order: get_current_root_info() -> create_empty_root() -> upload_empty_root() -> verify_empty_root()",
        "The clear_root_completely() method orchestrates all steps automatically and includes error handling",
        "This operation is destructive - all document references in root.docSchema will be removed from cloud",
        "The class uses the working reMarkable sync v3/v4 API endpoints with proper headers and checksums",
        "Generation numbers are preserved (not incremented) to match reMarkable's expected behavior",
        "Each method prints detailed progress information for debugging and monitoring",
        "Verification step is critical - always check the return value to ensure operation succeeded"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated HTTP session for making API requests to reMarkable cloud services, initialized by RemarkableAuth",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize RootCleaner with authenticated reMarkable session",
            "returns": "None - initializes instance with self.session attribute",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_root_info",
            "parameters": {},
            "purpose": "Retrieve current root.docSchema information including hash, generation, and content",
            "returns": "Tuple of (root_data dict containing 'hash' and 'generation', root_content string with document list)",
            "signature": "get_current_root_info(self) -> tuple[dict, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_empty_root",
            "parameters": {},
            "purpose": "Create an empty root.docSchema content with only the version line",
            "returns": "String '3\\n' representing empty root with version 3",
            "signature": "create_empty_root(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_empty_root",
            "parameters": {
              "current_generation": "Current generation number from root metadata to preserve",
              "empty_root_content": "The empty root content string (typically '3\\n')"
            },
            "purpose": "Upload empty root content to reMarkable cloud and update root hash pointer",
            "returns": "String containing the SHA256 hash of the uploaded empty root content",
            "signature": "upload_empty_root(self, empty_root_content: str, current_generation: int) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "verify_empty_root",
            "parameters": {},
            "purpose": "Verify that the root.docSchema is now empty by fetching and checking current state",
            "returns": "Boolean: True if root contains only version line '3' with 0 documents, False otherwise",
            "signature": "verify_empty_root(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "clear_root_completely",
            "parameters": {},
            "purpose": "Execute complete workflow to clear root.docSchema: get current state, create empty root, upload, and verify",
            "returns": "Boolean: True if entire clearing process succeeded and was verified, False if any step failed",
            "signature": "clear_root_completely(self) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:49:57",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "crc32c"
      ],
      "description": "A class that completely clears the reMarkable cloud's root.docSchema file, removing all document references while maintaining the proper file structure and version.",
      "docstring": "Clears root.docSchema using the working upload mechanism",
      "id": 2106,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 257,
      "line_start": 47,
      "name": "RootCleaner",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes authentication with reMarkable cloud services using RemarkableAuth and establishes an authenticated session. Raises RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "RootCleaner provides a safe and verified method to completely empty the reMarkable cloud storage by clearing the root.docSchema file. It handles authentication, retrieves current root state, creates an empty root structure (containing only the version line '3'), uploads it using the working reMarkable sync API endpoints, and verifies the operation succeeded. This is useful for resetting cloud storage or testing purposes.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RootCleaner object with an authenticated session. The main method clear_root_completely() returns a boolean: True if root was successfully cleared and verified, False otherwise. Individual methods return: get_current_root_info() returns tuple (root_data dict, root_content string), create_empty_root() returns string '3\\n', upload_empty_root() returns string (new root hash), verify_empty_root() returns boolean.",
      "settings_required": [
        "RemarkableAuth module must be available and properly configured for authentication",
        "Valid reMarkable cloud account credentials (handled by RemarkableAuth)",
        "compute_crc32c_header function must be available in scope for checksum calculation",
        "Network access to eu.tectonic.remarkable.com API endpoints"
      ],
      "source_code": "class RootCleaner:\n    \"\"\"Clears root.docSchema using the working upload mechanism\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83e\uddf9 Root Cleaner Initialized\")\n    \n    def get_current_root_info(self):\n        \"\"\"Get current root.docSchema info using working method\"\"\"\n        print(\"\\n\ud83d\udccb Step 1: Getting current root.docSchema...\")\n        \n        # Get root info\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        print(f\"\u2705 Current root hash: {root_data['hash']}\")\n        print(f\"\u2705 Current generation: {root_data.get('generation')}\")\n        print(f\"\u2705 Root content size: {len(root_content)} bytes\")\n        print(f\"\ud83d\udcc4 Current root content:\")\n        print(f\"   {repr(root_content)}\")\n        \n        # Count current documents\n        lines = root_content.strip().split('\\n')\n        doc_count = len(lines) - 1  # Subtract version line\n        print(f\"\ud83d\udcca Documents currently in root: {doc_count}\")\n        \n        return root_data, root_content\n    \n    def create_empty_root(self):\n        \"\"\"Create completely empty root.docSchema with only version line\"\"\"\n        print(f\"\\n\ud83e\uddf9 Step 2: Creating empty root.docSchema...\")\n        \n        # Empty root content: just version \"3\" and newline\n        empty_root_content = \"3\\n\"\n        \n        print(f\"\u2705 Empty root content: {repr(empty_root_content)}\")\n        print(f\"\u2705 Empty root size: {len(empty_root_content)} bytes\")\n        \n        return empty_root_content\n    \n    def upload_empty_root(self, empty_root_content: str, current_generation: int):\n        \"\"\"Upload empty root.docSchema and update roothash using WORKING method\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 3: Uploading empty root.docSchema...\")\n        \n        # Calculate hash for empty root\n        root_hash = hashlib.sha256(empty_root_content.encode()).hexdigest()\n        print(f\"\u2705 New empty root hash: {root_hash}\")\n        \n        # Upload root content using WORKING method from test_move_from_trash.py\n        headers = {\n            'Content-Type': 'text/plain',\n            'rm-batch-number': '1',\n            'rm-filename': 'root.docSchema',  # System filename for root.docSchema\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(empty_root_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        print(f\"\ud83d\udce4 PUT to: https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\")\n        print(f\"   Headers: {list(headers.keys())}\")\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\",  # WORKING ENDPOINT\n            data=empty_root_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root content upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Root content upload failed: {upload_response.status_code}\")\n        \n        # Update root hash pointer using WORKING method\n        print(f\"\\n\ud83d\udd04 Step 4: Updating root hash pointer...\")\n        \n        # Create root data exactly like working upload_manager.py\n        root_update_data = {\n            \"broadcast\": True,\n            \"generation\": current_generation,  # Use current generation, don't increment\n            \"hash\": root_hash\n        }\n        \n        # Convert to JSON with 2-space indent like real app\n        root_content_body = json.dumps(root_update_data, indent=2).encode('utf-8')\n        \n        print(f\"\u2705 Root update data:\")\n        print(f\"   Generation: {current_generation} (keeping current)\")\n        print(f\"   Hash: {root_hash}\")\n        \n        # Headers exactly like working upload_manager.py\n        headers = {\n            'Content-Type': 'application/json',\n            'rm-batch-number': '1',\n            'rm-filename': 'roothash',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(root_content_body)\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        # Use /sync/v3/root endpoint like working code\n        print(f\"\ud83d\udce4 PUT to: https://eu.tectonic.remarkable.com/sync/v3/root\")\n        \n        root_update_response = self.session.put(\n            \"https://eu.tectonic.remarkable.com/sync/v3/root\",  # WORKING ENDPOINT\n            data=root_content_body,\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root update response: {root_update_response.status_code}\")\n        if root_update_response.status_code not in [200, 202]:\n            print(f\"\u274c Root update failed: {root_update_response.text}\")\n            raise RuntimeError(f\"Root update failed: {root_update_response.status_code}\")\n        \n        return root_hash\n    \n    def verify_empty_root(self):\n        \"\"\"Verify that the root is now empty\"\"\"\n        print(f\"\\n\ud83d\udd0d Step 5: Verifying empty root...\")\n        \n        try:\n            # Get updated root info\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            \n            # Get root content\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            print(f\"\u2705 Verification - New root hash: {root_data['hash']}\")\n            print(f\"\u2705 Verification - New generation: {root_data.get('generation')}\")\n            print(f\"\u2705 Verification - Root content size: {len(root_content)} bytes\")\n            print(f\"\ud83d\udcc4 Verification - Root content: {repr(root_content)}\")\n            \n            # Check if truly empty\n            lines = root_content.strip().split('\\n')\n            doc_count = len(lines) - 1  # Subtract version line\n            \n            if doc_count == 0 and root_content.strip() == \"3\":\n                print(f\"\ud83c\udf89 SUCCESS: Root is completely empty!\")\n                print(f\"   Only version line '3' remains\")\n                print(f\"   Document count: 0\")\n                return True\n            else:\n                print(f\"\u26a0\ufe0f Root not completely empty:\")\n                print(f\"   Document count: {doc_count}\")\n                print(f\"   Content: {repr(root_content)}\")\n                return False\n                \n        except Exception as e:\n            print(f\"\u274c Verification failed: {e}\")\n            return False\n    \n    def clear_root_completely(self):\n        \"\"\"Complete process to clear root.docSchema\"\"\"\n        print(f\"\ud83e\uddf9 Clearing Root DocSchema Completely\")\n        print(\"=\" * 50)\n        \n        try:\n            # Step 1: Get current root info\n            root_data, root_content = self.get_current_root_info()\n            \n            # Step 2: Create empty root\n            empty_root_content = self.create_empty_root()\n            \n            # Step 3-4: Upload empty root and update pointer\n            new_root_hash = self.upload_empty_root(empty_root_content, root_data['generation'])\n            \n            # Step 5: Verify result\n            verification_success = self.verify_empty_root()\n            \n            if verification_success:\n                print(f\"\\n\ud83c\udf89 SUCCESS! Root cleared completely\")\n                print(f\"   New root hash: {new_root_hash}\")\n                print(f\"   Cloud is now completely empty from user perspective\")\n                print(f\"   All documents have been removed from root.docSchema\")\n                return True\n            else:\n                print(f\"\\n\u26a0\ufe0f Root clearing completed but verification failed\")\n                return False\n            \n        except Exception as e:\n            print(f\"\\n\u274c Root clearing failed: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/clear_root_docschema.py",
      "tags": [
        "remarkable",
        "cloud-storage",
        "sync",
        "cleanup",
        "api-client",
        "document-management",
        "authentication",
        "file-upload",
        "hash-verification",
        "state-management"
      ],
      "updated_at": "2025-12-07T01:49:57.381046",
      "usage_example": "# Basic usage to clear root completely\ncleaner = RootCleaner()\nsuccess = cleaner.clear_root_completely()\nif success:\n    print('Root cleared successfully')\n\n# Step-by-step usage with individual methods\ncleaner = RootCleaner()\n\n# Get current state\nroot_data, root_content = cleaner.get_current_root_info()\nprint(f'Current generation: {root_data[\"generation\"]}')\n\n# Create empty root\nempty_content = cleaner.create_empty_root()\n\n# Upload and update\nnew_hash = cleaner.upload_empty_root(empty_content, root_data['generation'])\n\n# Verify\nif cleaner.verify_empty_root():\n    print('Verification passed')"
    },
    {
      "best_practices": [
        "Always instantiate the class before calling any methods - authentication happens in __init__",
        "Handle RuntimeError during instantiation if authentication fails",
        "The move_document_to_trash() method is the main entry point - other methods are internal steps",
        "Methods are designed to be called in sequence (steps 1-11) for the complete workflow",
        "Each method prints detailed progress information for debugging and monitoring",
        "The class maintains stateless operation except for the authenticated session",
        "Check if document is already in trash before attempting move (handled automatically)",
        "All uploads use the 'working method' that mimics official reMarkable client behavior",
        "CRC32C checksums are required for data integrity verification",
        "Generation numbers must be preserved when updating root to prevent conflicts",
        "The session object is reused across all API calls for efficiency",
        "Methods raise ValueError if documents or metadata are not found",
        "Methods raise RuntimeError if uploads fail with non-200/202 status codes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated HTTP session for making API requests to reMarkable cloud",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the DocumentToTrashMover with authenticated session",
            "returns": "None - initializes self.session attribute",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_root_info",
            "parameters": {},
            "purpose": "Retrieve current root.docSchema information including hash, generation, and content",
            "returns": "Tuple of (root_data dict with hash/generation, root_content string)",
            "signature": "get_current_root_info(self) -> tuple[dict, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_info",
            "parameters": {
              "doc_uuid": "UUID of the document to find",
              "root_content": "Content of root.docSchema as string"
            },
            "purpose": "Find and extract document entry from root.docSchema by UUID",
            "returns": "Dict with keys: hash, uuid, type, size, full_line",
            "signature": "get_document_info(self, doc_uuid: str, root_content: str) -> dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_schema",
            "parameters": {
              "doc_hash": "SHA256 hash of the document schema to retrieve"
            },
            "purpose": "Retrieve and parse a document's docSchema file from cloud storage",
            "returns": "Tuple of (doc_content string, doc_lines list)",
            "signature": "get_document_schema(self, doc_hash: str) -> tuple[str, list]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_metadata",
            "parameters": {
              "doc_lines": "List of lines from document schema"
            },
            "purpose": "Extract and fetch current metadata from document schema lines",
            "returns": "Tuple of (current_metadata dict, metadata_line string)",
            "signature": "get_current_metadata(self, doc_lines: list) -> tuple[dict, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_trash_metadata",
            "parameters": {
              "current_metadata": "Current metadata dictionary to update"
            },
            "purpose": "Create updated metadata JSON with parent field set to 'trash'",
            "returns": "JSON string of updated metadata with parent='trash'",
            "signature": "create_trash_metadata(self, current_metadata: dict) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_metadata",
            "parameters": {
              "doc_uuid": "UUID of the document (used in rm-filename header)",
              "metadata_json": "JSON string of metadata to upload"
            },
            "purpose": "Upload new metadata file to cloud storage using working upload method",
            "returns": "SHA256 hash of the uploaded metadata",
            "signature": "upload_new_metadata(self, metadata_json: str, doc_uuid: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_new_document_schema",
            "parameters": {
              "doc_lines": "List of lines from original document schema",
              "metadata_line": "Original metadata line to replace",
              "new_metadata_hash": "New hash to replace in metadata line"
            },
            "purpose": "Create new document schema with updated metadata hash",
            "returns": "New document schema content as string",
            "signature": "create_new_document_schema(self, doc_lines: list, new_metadata_hash: str, metadata_line: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_document_schema",
            "parameters": {
              "doc_content": "Document schema content to upload",
              "doc_uuid": "UUID of the document (used in rm-filename header)"
            },
            "purpose": "Upload new document schema to cloud storage using working upload method",
            "returns": "SHA256 hash of the uploaded document schema",
            "signature": "upload_new_document_schema(self, doc_content: str, doc_uuid: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_docschema",
            "parameters": {
              "doc_info": "Document info dict with full_line key",
              "new_doc_hash": "New document hash to replace in root",
              "root_content": "Current root.docSchema content"
            },
            "purpose": "Update root.docSchema content with new document hash",
            "returns": "Updated root.docSchema content as string",
            "signature": "update_root_docschema(self, root_content: str, doc_info: dict, new_doc_hash: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_root",
            "parameters": {
              "generation": "Generation number for root update",
              "root_content": "New root.docSchema content to upload"
            },
            "purpose": "Upload new root.docSchema and update root hash pointer using working method",
            "returns": "SHA256 hash of the new root",
            "signature": "upload_new_root(self, root_content: str, generation: int) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "move_document_to_trash",
            "parameters": {
              "doc_uuid": "UUID of the document to move to trash"
            },
            "purpose": "Complete workflow to move a document to trash (main entry point)",
            "returns": "True if successful, False if operation failed",
            "signature": "move_document_to_trash(self, doc_uuid: str) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:47:24",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "crc32c",
        "sys",
        "requests"
      ],
      "description": "A class that moves reMarkable documents to the trash by updating their metadata parent field to 'trash' and synchronizing changes through the reMarkable cloud API.",
      "docstring": "Moves documents TO trash using the working upload mechanism",
      "id": 2099,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c",
        "import sys"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c",
        "import sys"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 443,
      "line_start": 45,
      "name": "DocumentToTrashMover",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes authentication using RemarkableAuth and establishes an authenticated session with the reMarkable cloud service. Raises RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "This class provides a complete workflow for moving reMarkable documents to trash. It handles authentication, retrieves current document state from the cloud, modifies metadata to set parent='trash', uploads updated schemas, and synchronizes the root document structure. The implementation uses the 'working upload mechanism' that mimics the official reMarkable desktop client's behavior, including proper headers, CRC32C checksums, and endpoint usage.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a DocumentToTrashMover object with an authenticated session. The main method move_document_to_trash() returns a boolean: True if the document was successfully moved to trash, False if the operation failed. Other methods return various data structures: tuples of (dict, str) for root info, dict for document info, str/list for schemas, and str for hash values.",
      "settings_required": [
        "RemarkableAuth module must be available and properly configured",
        "Valid reMarkable cloud authentication credentials (handled by RemarkableAuth)",
        "compute_crc32c_header function must be available in scope",
        "Network access to eu.tectonic.remarkable.com API endpoints"
      ],
      "source_code": "class DocumentToTrashMover:\n    \"\"\"Moves documents TO trash using the working upload mechanism\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\uddd1\ufe0f Document to Trash Mover Initialized\")\n    \n    def get_current_root_info(self):\n        \"\"\"Get current root.docSchema info using working method\"\"\"\n        print(\"\\n\ud83d\udccb Step 1: Getting current root.docSchema...\")\n        \n        # Get root info\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        print(f\"\u2705 Current root hash: {root_data['hash']}\")\n        print(f\"\u2705 Current generation: {root_data.get('generation')}\")\n        print(f\"\u2705 Root content size: {len(root_content)} bytes\")\n        \n        return root_data, root_content\n    \n    def get_document_info(self, doc_uuid: str, root_content: str):\n        \"\"\"Find document entry in root.docSchema\"\"\"\n        print(f\"\\n\ud83d\udcc4 Step 2: Finding document {doc_uuid[:8]}... in root.docSchema\")\n        \n        lines = root_content.strip().split('\\n')\n        for line in lines[1:]:  # Skip version header\n            if doc_uuid in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    doc_info = {\n                        'hash': parts[0],\n                        'uuid': parts[2],\n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n                    print(f\"\u2705 Found document entry:\")\n                    print(f\"   Hash: {doc_info['hash']}\")\n                    print(f\"   Type: {doc_info['type']}\")\n                    print(f\"   Size: {doc_info['size']}\")\n                    print(f\"   Full line: {doc_info['full_line']}\")\n                    return doc_info\n        \n        raise ValueError(f\"Document {doc_uuid} not found in root.docSchema\")\n    \n    def get_document_schema(self, doc_hash: str):\n        \"\"\"Retrieve document's docSchema\"\"\"\n        print(f\"\\n\ud83d\udcc4 Step 3: Retrieving document docSchema...\")\n        \n        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n        doc_response.raise_for_status()\n        doc_content = doc_response.text\n        \n        print(f\"\u2705 Document docSchema size: {len(doc_content)} bytes\")\n        print(f\"\ud83d\udcc4 Document docSchema content:\")\n        \n        lines = doc_content.strip().split('\\n')\n        for i, line in enumerate(lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return doc_content, lines\n    \n    def get_current_metadata(self, doc_lines: list):\n        \"\"\"Extract and fetch current metadata\"\"\"\n        print(f\"\\n\ud83d\udcdd Step 4: Getting current metadata...\")\n        \n        metadata_hash = None\n        metadata_line = None\n        \n        # Find metadata component\n        for line in doc_lines[1:]:  # Skip version\n            if ':' in line and '.metadata' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    metadata_hash = parts[0]\n                    metadata_line = line\n                    break\n        \n        if not metadata_hash:\n            raise ValueError(\"Metadata component not found in document schema\")\n        \n        print(f\"\u2705 Metadata hash: {metadata_hash}\")\n        \n        # Fetch current metadata\n        metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n        metadata_response.raise_for_status()\n        current_metadata = json.loads(metadata_response.text)\n        \n        print(f\"\u2705 Current metadata:\")\n        for key, value in current_metadata.items():\n            print(f\"   {key}: {value}\")\n        \n        return current_metadata, metadata_line\n    \n    def create_trash_metadata(self, current_metadata: dict):\n        \"\"\"Create updated metadata with parent = 'trash'\"\"\"\n        print(f\"\\n\ud83d\uddd1\ufe0f Step 5: Creating trash metadata...\")\n        \n        # Copy current metadata and update parent to trash\n        updated_metadata = current_metadata.copy()\n        old_parent = updated_metadata.get('parent', '')\n        updated_metadata['parent'] = 'trash'  # Move to trash\n        \n        print(f\"\u2705 Updating parent: '{old_parent}' \u2192 'trash'\")\n        \n        # Keep other fields as they are for trash move\n        updated_metadata['lastModified'] = int(time.time() * 1000)\n        \n        # Convert to JSON\n        updated_metadata_json = json.dumps(updated_metadata, separators=(',', ':'))\n        \n        print(f\"\u2705 Updated metadata ({len(updated_metadata_json)} bytes):\")\n        print(f\"   {updated_metadata_json[:100]}...\")\n        \n        return updated_metadata_json\n    \n    def upload_new_metadata(self, metadata_json: str, doc_uuid: str):\n        \"\"\"Upload new metadata using WORKING method\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 6: Uploading new metadata using WORKING method...\")\n        \n        # Calculate hash\n        metadata_hash = hashlib.sha256(metadata_json.encode()).hexdigest()\n        print(f\"\u2705 New metadata hash: {metadata_hash}\")\n        \n        # Upload using WORKING method from test_move_from_trash.py\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.metadata',  # Required: UUID.metadata format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',  # Use Windows UA\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(metadata_json.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        print(f\"\ud83d\udce4 PUT to: https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n        print(f\"   Headers: {list(headers.keys())}\")\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\",  # WORKING ENDPOINT\n            data=metadata_json.encode(),\n            headers=headers \n        )\n        \n        print(f\"\u2705 Metadata upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Metadata upload failed: {upload_response.status_code}\")\n        \n        return metadata_hash\n    \n    def create_new_document_schema(self, doc_lines: list, new_metadata_hash: str, metadata_line: str):\n        \"\"\"Create new document schema with updated metadata hash\"\"\"\n        print(f\"\\n\ud83c\udfd7\ufe0f Step 7: Creating new document schema...\")\n        \n        # Replace metadata line with new hash\n        new_lines = []\n        \n        for line in doc_lines:\n            if line == metadata_line:\n                # Replace metadata hash but keep size\n                parts = line.split(':')\n                parts[0] = new_metadata_hash  # Update hash\n                new_line = ':'.join(parts)\n                new_lines.append(new_line)\n                print(f\"\u2705 Updated metadata line:\")\n                print(f\"   Old: {line}\")\n                print(f\"   New: {new_line}\")\n            else:\n                new_lines.append(line)\n        \n        new_doc_content = '\\n'.join(new_lines)\n        \n        print(f\"\u2705 New document schema ({len(new_doc_content)} bytes):\")\n        for i, line in enumerate(new_lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return new_doc_content\n    \n    def upload_new_document_schema(self, doc_content: str, doc_uuid: str):\n        \"\"\"Upload new document schema using WORKING method\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 8: Uploading new document schema...\")\n        \n        # Calculate hash\n        doc_hash = hashlib.sha256(doc_content.encode()).hexdigest()\n        print(f\"\u2705 New document schema hash: {doc_hash}\")\n        \n        # Upload using WORKING method\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.docSchema',  # Required: UUID.docSchema format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(doc_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        print(f\"\ud83d\udce4 PUT to: https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\",  # WORKING ENDPOINT\n            data=doc_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Document schema upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Document schema upload failed: {upload_response.status_code}\")\n        \n        return doc_hash\n    \n    def update_root_docschema(self, root_content: str, doc_info: dict, new_doc_hash: str):\n        \"\"\"Update root.docSchema with new document hash\"\"\"\n        print(f\"\\n\ud83d\udd04 Step 9: Updating root.docSchema...\")\n        \n        # Replace old document line with new hash\n        old_line = doc_info['full_line']\n        parts = old_line.split(':')\n        parts[0] = new_doc_hash  # Update document hash\n        new_line = ':'.join(parts)\n        \n        print(f\"\u2705 Updating root.docSchema entry:\")\n        print(f\"   Old: {old_line}\")\n        print(f\"   New: {new_line}\")\n        \n        # Replace in root content\n        new_root_content = root_content.replace(old_line, new_line)\n        \n        print(f\"\u2705 New root.docSchema size: {len(new_root_content)} bytes\")\n        \n        return new_root_content\n    \n    def upload_new_root(self, root_content: str, generation: int):\n        \"\"\"Upload new root.docSchema and update roothash using WORKING method\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 10: Uploading new root.docSchema...\")\n        \n        # Calculate hash\n        root_hash = hashlib.sha256(root_content.encode()).hexdigest()\n        print(f\"\u2705 New root hash: {root_hash}\")\n        \n        # Upload root content using WORKING method\n        headers = {\n            'Content-Type': 'text/plain',\n            'rm-batch-number': '1',\n            'rm-filename': 'root.docSchema',  # System filename for root.docSchema\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(root_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        print(f\"\ud83d\udce4 PUT to: https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\")\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\",  # WORKING ENDPOINT\n            data=root_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root content upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Root content upload failed: {upload_response.status_code}\")\n        \n        # Update root hash pointer using WORKING method\n        print(f\"\\n\ud83d\udd04 Step 11: Updating root hash pointer...\")\n        \n        # Create root data exactly like working upload_manager.py\n        root_update_data = {\n            \"broadcast\": True,\n            \"generation\": generation,  # Use generation parameter\n            \"hash\": root_hash\n        }\n        \n        # Convert to JSON with 2-space indent like real app\n        root_content_body = json.dumps(root_update_data, indent=2).encode('utf-8')\n        \n        # Headers exactly like working upload_manager.py\n        headers = {\n            'Content-Type': 'application/json',\n            'rm-batch-number': '1',\n            'rm-filename': 'roothash',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(root_content_body)\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        # Use /sync/v3/root endpoint like working code\n        print(f\"\ud83d\udce4 PUT to: https://eu.tectonic.remarkable.com/sync/v3/root\")\n        \n        root_update_response = self.session.put(\n            \"https://eu.tectonic.remarkable.com/sync/v3/root\",  # WORKING ENDPOINT\n            data=root_content_body,\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root update response: {root_update_response.status_code}\")\n        if root_update_response.status_code not in [200, 202]:\n            print(f\"\u274c Root update failed: {root_update_response.text}\")\n            raise RuntimeError(f\"Root update failed: {root_update_response.status_code}\")\n        \n        return root_hash\n    \n    def move_document_to_trash(self, doc_uuid: str):\n        \"\"\"Complete process to move document TO trash\"\"\"\n        print(f\"\ud83d\uddd1\ufe0f Moving Document TO Trash\")\n        print(f\"Document UUID: {doc_uuid}\")\n        print(\"=\" * 60)\n        \n        try:\n            # Step 1: Get current root info\n            root_data, root_content = self.get_current_root_info()\n            \n            # Step 2: Find document in root\n            doc_info = self.get_document_info(doc_uuid, root_content)\n            \n            # Step 3: Get document schema\n            doc_content, doc_lines = self.get_document_schema(doc_info['hash'])\n            \n            # Step 4: Get current metadata\n            current_metadata, metadata_line = self.get_current_metadata(doc_lines)\n            \n            # Check current parent\n            current_parent = current_metadata.get('parent', '')\n            if current_parent == 'trash':\n                print(f\"\u26a0\ufe0f Document is already in trash!\")\n                return True\n            \n            print(f\"\ud83d\udcc1 Moving document from '{current_parent or '(root)'}' to trash...\")\n            \n            # Step 5: Create trash metadata (set parent = 'trash')\n            updated_metadata_json = self.create_trash_metadata(current_metadata)\n            \n            # Step 6: Upload new metadata using WORKING method\n            new_metadata_hash = self.upload_new_metadata(updated_metadata_json, doc_uuid)\n            \n            # Step 7: Create new document schema\n            new_doc_content = self.create_new_document_schema(doc_lines, new_metadata_hash, metadata_line)\n            \n            # Step 8: Upload new document schema using WORKING method\n            new_doc_hash = self.upload_new_document_schema(new_doc_content, doc_uuid)\n            \n            # Step 9: Update root.docSchema\n            new_root_content = self.update_root_docschema(root_content, doc_info, new_doc_hash)\n            \n            # Step 10-11: Upload new root and update pointer using WORKING method\n            new_root_hash = self.upload_new_root(new_root_content, root_data['generation'])\n            \n            print(f\"\\n\ud83c\udf89 SUCCESS! Document moved to trash\")\n            print(f\"   Document: {current_metadata.get('visibleName')}\")\n            print(f\"   Old parent: {current_parent or '(root)'}\")\n            print(f\"   New parent: trash\")\n            print(f\"   New root hash: {new_root_hash}\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\\n\u274c Move to trash operation failed: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/move_documents_to_trash.py",
      "tags": [
        "remarkable",
        "cloud-sync",
        "document-management",
        "trash",
        "file-operations",
        "api-client",
        "metadata-update",
        "schema-management",
        "hash-based-storage"
      ],
      "updated_at": "2025-12-07T01:47:24.733316",
      "usage_example": "# Initialize the mover\nmover = DocumentToTrashMover()\n\n# Move a document to trash by UUID\ndoc_uuid = 'abc123-def456-ghi789'\nsuccess = mover.move_document_to_trash(doc_uuid)\n\nif success:\n    print('Document moved to trash successfully')\nelse:\n    print('Failed to move document to trash')\n\n# Advanced: Get current root info\nroot_data, root_content = mover.get_current_root_info()\nprint(f'Current root hash: {root_data[\"hash\"]}')\n\n# Advanced: Find document info\ndoc_info = mover.get_document_info(doc_uuid, root_content)\nprint(f'Document type: {doc_info[\"type\"]}')"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "output_dir": "Type: str",
              "session": "Type: requests.Session"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, session, output_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_logging",
            "parameters": {},
            "purpose": "Setup detailed logging to file and console",
            "returns": "None",
            "signature": "setup_logging(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "log_node_details",
            "parameters": {
              "content_info": "Type: Dict[str, Any]",
              "depth": "Type: int",
              "node": "Type: RemarkableNode"
            },
            "purpose": "Log comprehensive details about a discovered node",
            "returns": "None",
            "signature": "log_node_details(self, node, content_info, depth)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "log_raw_content",
            "parameters": {
              "content_type": "Type: str",
              "hash_ref": "Type: str",
              "raw_content": "Type: Any"
            },
            "purpose": "Log raw content before processing",
            "returns": "None",
            "signature": "log_raw_content(self, hash_ref, raw_content, content_type)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_root_hash",
            "parameters": {},
            "purpose": "Get the current root hash from sync/v4/root endpoint",
            "returns": "Returns Optional[str]",
            "signature": "get_root_hash(self) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fetch_hash_content",
            "parameters": {
              "hash_ref": "Type: str"
            },
            "purpose": "Fetch and analyze content for a given hash",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "fetch_hash_content(self, hash_ref) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "parse_zip_content",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Parse ZIP archive content",
            "returns": "Returns Dict[str, Any]",
            "signature": "parse_zip_content(self, content) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "parse_directory_listing",
            "parameters": {
              "text_content": "Type: str"
            },
            "purpose": "Parse text content as directory listing with proper hierarchy handling",
            "returns": "Returns Dict[str, Any]",
            "signature": "parse_directory_listing(self, text_content) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fetch_metadata_component",
            "parameters": {
              "hash_ref": "Type: str"
            },
            "purpose": "Fetch and parse a metadata component",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "fetch_metadata_component(self, hash_ref) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "parse_metadata_to_node",
            "parameters": {
              "content_info": "Type: Dict[str, Any]",
              "hash_ref": "Type: str"
            },
            "purpose": "Parse content info into a RemarkableNode with proper metadata handling",
            "returns": "Returns Optional[RemarkableNode]",
            "signature": "parse_metadata_to_node(self, hash_ref, content_info) -> Optional[RemarkableNode]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "discover_node_recursive",
            "parameters": {
              "depth": "Type: int",
              "hash_ref": "Type: str",
              "parent_path": "Type: str"
            },
            "purpose": "Recursively discover a node and all its children",
            "returns": "Returns Optional[RemarkableNode]",
            "signature": "discover_node_recursive(self, hash_ref, depth, parent_path) -> Optional[RemarkableNode]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_node_data",
            "parameters": {
              "content_info": "Type: Dict[str, Any]",
              "node": "Type: RemarkableNode"
            },
            "purpose": "Save detailed node data to JSON file",
            "returns": "None",
            "signature": "save_node_data(self, node, content_info)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "build_hierarchy_from_parents",
            "parameters": {},
            "purpose": "Build proper hierarchy using parent UUIDs from metadata",
            "returns": "Returns Dict[str, List[RemarkableNode]]",
            "signature": "build_hierarchy_from_parents(self) -> Dict[str, List[RemarkableNode]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "discover_all",
            "parameters": {},
            "purpose": "Complete discovery process from root",
            "returns": "Returns bool",
            "signature": "discover_all(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "print_proper_hierarchy",
            "parameters": {
              "hierarchy": "Type: Dict[str, Any]"
            },
            "purpose": "Print the proper hierarchy built from parent UUIDs",
            "returns": "None",
            "signature": "print_proper_hierarchy(self, hierarchy)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "print_tree",
            "parameters": {},
            "purpose": "Print the discovered tree structure",
            "returns": "None",
            "signature": "print_tree(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_discovery_summary",
            "parameters": {},
            "purpose": "Save complete discovery summary",
            "returns": "None",
            "signature": "save_discovery_summary(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:45:48",
      "decorators": [],
      "dependencies": [],
      "description": "Handles hierarchical discovery of reMarkable cloud content",
      "docstring": "Handles hierarchical discovery of reMarkable cloud content",
      "id": 2096,
      "imports": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging",
        "from enum import Enum",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import field",
        "from datetime import datetime",
        "from auth import get_authenticated_session",
        "import io",
        "import re"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 755,
      "line_start": 63,
      "name": "RemarkableDiscovery",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Handles hierarchical discovery of reMarkable cloud content",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableDiscovery:\n    \"\"\"Handles hierarchical discovery of reMarkable cloud content\"\"\"\n    \n    def __init__(self, session: requests.Session, output_dir: str = None):\n        self.session = session\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        \n        # Setup output directory\n        if output_dir:\n            self.output_dir = Path(output_dir)\n        else:\n            self.output_dir = Path.cwd() / \"remarkable_discovery\"\n        \n        self.output_dir.mkdir(parents=True, exist_ok=True)\n        \n        # Setup detailed logging\n        self.log_file = self.output_dir / \"discovery_detailed.log\"\n        self.setup_logging()\n        \n        # Discovery state\n        self.nodes: Dict[str, RemarkableNode] = {}\n        self.root_nodes: List[RemarkableNode] = []\n        self.failed_hashes: Set[str] = set()\n        \n        # Statistics\n        self.stats = {\n            'total_nodes': 0,\n            'folders': 0,\n            'documents': 0,\n            'successful_downloads': 0,\n            'failed_downloads': 0,\n            'bytes_downloaded': 0\n        }\n    \n    def setup_logging(self):\n        \"\"\"Setup detailed logging to file and console\"\"\"\n        # Create a custom logger\n        self.logger = logging.getLogger('RemarkableDiscovery')\n        self.logger.setLevel(logging.DEBUG)\n        \n        # Clear any existing handlers\n        self.logger.handlers.clear()\n        \n        # File handler for detailed logs\n        file_handler = logging.FileHandler(self.log_file, mode='w', encoding='utf-8')\n        file_handler.setLevel(logging.DEBUG)\n        file_formatter = logging.Formatter(\n            '%(asctime)s | %(levelname)-8s | %(message)s',\n            datefmt='%Y-%m-%d %H:%M:%S'\n        )\n        file_handler.setFormatter(file_formatter)\n        \n        # Console handler for important info\n        console_handler = logging.StreamHandler()\n        console_handler.setLevel(logging.INFO)\n        console_formatter = logging.Formatter('%(message)s')\n        console_handler.setFormatter(console_formatter)\n        \n        self.logger.addHandler(file_handler)\n        self.logger.addHandler(console_handler)\n        \n        self.logger.info(f\"\ud83d\udd0d DETAILED DISCOVERY LOG STARTED\")\n        self.logger.info(f\"\ud83d\udcc1 Output directory: {self.output_dir}\")\n        self.logger.info(f\"\ud83d\udcdd Log file: {self.log_file}\")\n    \n    def log_node_details(self, node: RemarkableNode, content_info: Dict[str, Any], depth: int):\n        \"\"\"Log comprehensive details about a discovered node\"\"\"\n        indent = \"  \" * depth\n        \n        # Basic node info\n        self.logger.debug(f\"{indent}NODE DISCOVERY DETAILS:\")\n        self.logger.debug(f\"{indent}  Hash: {node.hash}\")\n        self.logger.debug(f\"{indent}  Name: {node.name}\")\n        self.logger.debug(f\"{indent}  Type: {node.node_type.value}\")\n        self.logger.debug(f\"{indent}  Parent: {node.parent or 'ROOT'}\")\n        self.logger.debug(f\"{indent}  Depth: {depth}\")\n        self.logger.debug(f\"{indent}  Size: {node.size} bytes\")\n        \n        # Content info details\n        if content_info:\n            self.logger.debug(f\"{indent}  RAW CONTENT INFO:\")\n            self.logger.debug(f\"{indent}    Type: {type(content_info)}\")\n            self.logger.debug(f\"{indent}    Content: {json.dumps(content_info, indent=6, default=str)}\")\n        \n        # Component details for documents\n        if node.node_type != NodeType.FOLDER:\n            if node.content_hash:\n                self.logger.debug(f\"{indent}    Content Hash: {node.content_hash}\")\n            if node.metadata_hash:\n                self.logger.debug(f\"{indent}    Metadata Hash: {node.metadata_hash}\")\n            if node.pagedata_hash:\n                self.logger.debug(f\"{indent}    Pagedata Hash: {node.pagedata_hash}\")\n            if node.pdf_hash:\n                self.logger.debug(f\"{indent}    PDF Hash: {node.pdf_hash}\")\n        \n        self.logger.debug(f\"{indent}  {'='*50}\")\n    \n    def log_raw_content(self, hash_ref: str, raw_content: Any, content_type: str):\n        \"\"\"Log raw content before processing\"\"\"\n        self.logger.debug(f\"RAW CONTENT for {hash_ref[:16]}...:\")\n        self.logger.debug(f\"  Type: {content_type}\")\n        self.logger.debug(f\"  Size: {len(str(raw_content)) if raw_content else 'None'}\")\n        \n        if content_type == \"ZIP\":\n            self.logger.debug(f\"  ZIP Content Details:\")\n            if hasattr(raw_content, 'namelist'):\n                for name in raw_content.namelist():\n                    self.logger.debug(f\"    File: {name}\")\n        elif content_type == \"TEXT\":\n            self.logger.debug(f\"  TEXT Content (first 500 chars):\")\n            self.logger.debug(f\"    {str(raw_content)[:500]}\")\n        elif content_type == \"JSON\":\n            self.logger.debug(f\"  JSON Content:\")\n            self.logger.debug(f\"    {json.dumps(raw_content, indent=4, default=str)}\")\n        \n        self.logger.debug(f\"  {'='*60}\")\n    \n    def get_root_hash(self) -> Optional[str]:\n        \"\"\"Get the current root hash from sync/v4/root endpoint\"\"\"\n        url = f\"{self.base_url}/sync/v4/root\"\n        \n        try:\n            print(f\"\ud83d\udce1 Getting root hash from: {url}\")\n            response = self.session.get(url, timeout=30)\n            \n            if response.status_code == 200:\n                data = response.json()\n                root_hash = data.get('hash')\n                generation = data.get('generation', 'unknown')\n                \n                print(f\"\u2705 Root hash obtained: {root_hash}\")\n                print(f\"   Generation: {generation}\")\n                \n                # Save root response for reference\n                root_file = self.output_dir / \"root_response.json\"\n                root_file.write_text(json.dumps(data, indent=2))\n                \n                return root_hash\n            else:\n                print(f\"\u274c Root hash request failed: {response.status_code}\")\n                return None\n                \n        except Exception as e:\n            print(f\"\u274c Error getting root hash: {e}\")\n            return None\n    \n    def fetch_hash_content(self, hash_ref: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch and analyze content for a given hash\"\"\"\n        url = f\"{self.base_url}/sync/v3/files/{hash_ref}\"\n        \n        try:\n            self.logger.debug(f\"FETCHING CONTENT: {hash_ref}\")\n            self.logger.debug(f\"  URL: {url}\")\n            \n            response = self.session.get(url, timeout=30)\n            \n            if response.status_code != 200:\n                error_msg = f\"\u274c Failed to fetch {hash_ref[:16]}...: HTTP {response.status_code}\"\n                print(error_msg)\n                self.logger.error(error_msg)\n                self.failed_hashes.add(hash_ref)\n                self.stats['failed_downloads'] += 1\n                return None\n            \n            content = response.content\n            self.stats['successful_downloads'] += 1\n            self.stats['bytes_downloaded'] += len(content)\n            \n            self.logger.debug(f\"  Response size: {len(content)} bytes\")\n            self.logger.debug(f\"  Content-Type: {response.headers.get('content-type', 'unknown')}\")\n            \n            # Save raw content\n            raw_file = self.output_dir / f\"raw_{hash_ref[:16]}.bin\"\n            raw_file.write_bytes(content)\n            \n            # Analyze content\n            content_info = {\n                'hash': hash_ref,\n                'size': len(content),\n                'content': content,\n                'is_directory': False,\n                'hash_references': [],\n                'metadata': None\n            }\n            \n            # Content type detection and logging\n            if len(content) == 0:\n                self.log_raw_content(hash_ref, content, \"EMPTY\")\n                \n            elif content.startswith(b'PK'):\n                # ZIP archive - likely contains file data\n                self.log_raw_content(hash_ref, \"ZIP archive\", \"ZIP\")\n                content_info.update(self.parse_zip_content(content))\n                \n            elif content.startswith(b'%PDF'):\n                # PDF document\n                self.log_raw_content(hash_ref, \"PDF document\", \"PDF\")\n                \n            else:\n                try:\n                    # Try to parse as text/directory listing\n                    text_content = content.decode('utf-8')\n                    self.log_raw_content(hash_ref, text_content, \"TEXT\")\n                    content_info.update(self.parse_directory_listing(text_content))\n                except UnicodeDecodeError:\n                    # Binary content - no parsing\n                    self.log_raw_content(hash_ref, content, \"BINARY\")\n            \n            self.logger.debug(f\"  Parsed content_info: {json.dumps({k: v for k, v in content_info.items() if k != 'content'}, indent=4, default=str)}\")\n            \n            return content_info\n            \n        except Exception as e:\n            print(f\"\u274c Error fetching {hash_ref[:16]}...: {e}\")\n            self.failed_hashes.add(hash_ref)\n            self.stats['failed_downloads'] += 1\n            return None\n    \n    def parse_zip_content(self, content: bytes) -> Dict[str, Any]:\n        \"\"\"Parse ZIP archive content\"\"\"\n        import io\n        \n        result = {\n            'is_directory': False,\n            'hash_references': []\n        }\n        \n        try:\n            with zipfile.ZipFile(io.BytesIO(content), 'r') as zip_file:\n                # Check if it contains multiple files (directory-like)\n                files = zip_file.namelist()\n                if len(files) > 1:\n                    result['is_directory'] = True\n                \n                # Look for metadata files\n                for filename in files:\n                    if filename.endswith('.metadata'):\n                        try:\n                            metadata_content = zip_file.read(filename)\n                            metadata = json.loads(metadata_content.decode('utf-8'))\n                            result['metadata'] = metadata\n                        except:\n                            pass\n                            \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Error parsing ZIP: {e}\")\n        \n        return result\n    \n    def parse_directory_listing(self, text_content: str) -> Dict[str, Any]:\n        \"\"\"Parse text content as directory listing with proper hierarchy handling\"\"\"\n        result = {\n            'is_directory': False,\n            'hash_references': [],\n            'child_objects': [],  # New objects to discover recursively\n            'data_components': []  # Data components of current object\n        }\n        \n        lines = text_content.split('\\n')\n        \n        # Skip first line if it's just a number (count)\n        if lines and lines[0].strip().isdigit():\n            lines = lines[1:]\n        \n        import re\n        # Pattern: hash:flags:uuid:type:size or hash:flags:uuid.component:type:size\n        entry_pattern = r'^([a-f0-9]{64}):([0-9a-fA-F]+):([a-f0-9-]+(?:\\.[^:]+)?):(\\d+):(\\d+)$'\n        \n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n            \n            match = re.match(entry_pattern, line, re.IGNORECASE)\n            if match:\n                hash_val, flags, uuid_component, type_val, size_val = match.groups()\n                \n                entry_info = {\n                    'hash': hash_val,\n                    'flags': flags,\n                    'uuid_component': uuid_component,\n                    'type': type_val,\n                    'size': int(size_val),\n                    'line': line\n                }\n                \n                # Determine if this is a child object or data component\n                if '.' in uuid_component:\n                    # Data component (has extension like .content, .metadata, .rm, .pdf, .pagedata)\n                    component_type = uuid_component.split('.')[-1]\n                    entry_info['component_type'] = component_type\n                    result['data_components'].append(entry_info)\n                    \n                    self.logger.debug(f\"    \ud83d\udcc4 Data component: {component_type} ({size_val} bytes)\")\n                else:\n                    # Child object (pure UUID, can be discovered recursively)\n                    result['child_objects'].append(entry_info)\n                    result['hash_references'].append({\n                        'hash': hash_val,\n                        'type': 'child_object',\n                        'line': line,\n                        'uuid': uuid_component,\n                        'flags': flags,\n                        'size': int(size_val)\n                    })\n                    \n                    self.logger.debug(f\"    \ud83d\udd17 Child object: {uuid_component} ({size_val} bytes)\")\n            else:\n                self.logger.debug(f\"    \u26a0\ufe0f Unrecognized line format: {line}\")\n        \n        if result['child_objects'] or result['data_components']:\n            result['is_directory'] = True\n            \n        self.logger.debug(f\"  Parsed directory: {len(result['child_objects'])} children, {len(result['data_components'])} components\")\n        \n        return result\n        \n        return result\n    \n    def fetch_metadata_component(self, hash_ref: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch and parse a metadata component\"\"\"\n        try:\n            content_info = self.fetch_hash_content(hash_ref)\n            if not content_info:\n                return None\n            \n            content = content_info.get('content', b'')\n            if isinstance(content, bytes):\n                try:\n                    text_content = content.decode('utf-8')\n                    return json.loads(text_content)\n                except (UnicodeDecodeError, json.JSONDecodeError) as e:\n                    self.logger.debug(f\"Failed to parse metadata from {hash_ref[:16]}...: {e}\")\n                    return None\n            \n            return None\n        except Exception as e:\n            self.logger.debug(f\"Error fetching metadata component {hash_ref[:16]}...: {e}\")\n            return None\n    \n    def parse_metadata_to_node(self, hash_ref: str, content_info: Dict[str, Any]) -> Optional[RemarkableNode]:\n        \"\"\"Parse content info into a RemarkableNode with proper metadata handling\"\"\"\n        \n        # Initialize node with default values\n        node_type = NodeType.DOCUMENT\n        name = f\"document_{hash_ref[:8]}\"\n        parent_uuid = None\n        created_time = None\n        last_modified = None\n        visible_name = None\n        \n        # Check if this has data components (indicating it's an object with metadata)\n        if content_info.get('data_components'):\n            # Look for metadata component\n            metadata_component = None\n            for component in content_info.get('data_components', []):\n                if component.get('component_type') == 'metadata':\n                    self.logger.debug(f\"  \ud83d\udd0d Fetching metadata component: {component['hash'][:16]}...\")\n                    metadata_component = self.fetch_metadata_component(component['hash'])\n                    break\n            \n            if metadata_component:\n                visible_name = metadata_component.get('visibleName')\n                parent_uuid = metadata_component.get('parent', '')\n                created_time = metadata_component.get('createdTime')\n                last_modified = metadata_component.get('lastModified')\n                \n                # Determine type from metadata\n                if metadata_component.get('type') == 'CollectionType':\n                    node_type = NodeType.FOLDER\n                    name = visible_name or f\"folder_{hash_ref[:8]}\"\n                else:\n                    node_type = NodeType.DOCUMENT\n                    name = visible_name or f\"document_{hash_ref[:8]}\"\n                    \n                self.logger.debug(f\"  \ud83d\udccb Parsed metadata: name='{name}', parent='{parent_uuid}', type='{metadata_component.get('type')}'\")\n            else:\n                # No metadata found, use default naming\n                self.logger.debug(f\"  \u26a0\ufe0f No metadata found for components in {hash_ref[:16]}...\")\n                if content_info.get('child_objects'):\n                    node_type = NodeType.FOLDER\n                    name = f\"folder_{hash_ref[:8]}\"\n                \n        elif content_info.get('is_directory') and content_info.get('child_objects'):\n            # Directory with child objects but no data components\n            node_type = NodeType.FOLDER\n            name = f\"folder_{hash_ref[:8]}\"\n        \n        # Create node\n        node = RemarkableNode(\n            hash=hash_ref,\n            name=name,\n            parent=parent_uuid,  # Use UUID from metadata\n            node_type=node_type,\n            size=content_info.get('size', 0),\n            created_time=created_time,\n            last_modified=last_modified\n        )\n        \n        # Store additional component information\n        if content_info.get('data_components'):\n            for component in content_info['data_components']:\n                comp_type = component.get('component_type')\n                if comp_type == 'content':\n                    node.content_hash = component['hash']\n                elif comp_type == 'metadata':\n                    node.metadata_hash = component['hash']\n                elif comp_type == 'pagedata':\n                    node.pagedata_hash = component['hash']\n                elif comp_type == 'pdf':\n                    node.pdf_hash = component['hash']\n        \n        return node\n    \n    def discover_node_recursive(self, hash_ref: str, depth: int = 0, parent_path: str = \"\") -> Optional[RemarkableNode]:\n        \"\"\"Recursively discover a node and all its children\"\"\"\n        \n        # Check if already processed\n        if hash_ref in self.nodes:\n            return self.nodes[hash_ref]\n        \n        if hash_ref in self.failed_hashes:\n            return None\n        \n        indent = \"  \" * depth\n        print(f\"{indent}\ud83d\udd0d Discovering node: {hash_ref[:16]}... (depth {depth})\")\n        self.logger.info(f\"{indent}\ud83d\udd0d DISCOVERING NODE: {hash_ref} (depth {depth})\")\n        \n        # Fetch content\n        content_info = self.fetch_hash_content(hash_ref)\n        if not content_info:\n            self.logger.error(f\"{indent}  Failed to fetch content for {hash_ref}\")\n            return None\n        \n        # Parse into node\n        node = self.parse_metadata_to_node(hash_ref, content_info)\n        if not node:\n            print(f\"{indent}  \u26a0\ufe0f Could not parse into node\")\n            self.logger.warning(f\"{indent}  Could not parse {hash_ref} into node\")\n            return None\n        \n        # Set additional properties\n        node.depth = depth\n        node.local_path = os.path.join(parent_path, node.name) if node.name != \"<directory>\" else parent_path\n        \n        # Log comprehensive node details\n        self.log_node_details(node, content_info, depth)\n        \n        # Store node\n        self.nodes[hash_ref] = node\n        self.stats['total_nodes'] += 1\n        \n        if node.node_type == NodeType.FOLDER:\n            self.stats['folders'] += 1\n        else:\n            self.stats['documents'] += 1\n        \n        print(f\"{indent}  \u2705 {node.node_type.value}: {node.name}\")\n        self.logger.info(f\"{indent}  \u2705 {node.node_type.value}: {node.name} | Size: {node.size} bytes | Parent: {node.parent or 'ROOT'}\")\n        \n        # Process child objects only (not data components)\n        if content_info.get('is_directory') and content_info.get('child_objects'):\n            child_objects = content_info['child_objects']\n            print(f\"{indent}    \ud83d\udcc1 Directory with {len(child_objects)} child objects\")\n            self.logger.info(f\"{indent}    \ud83d\udcc1 Directory with {len(child_objects)} child objects:\")\n            \n            for i, child_info in enumerate(child_objects, 1):\n                child_hash = child_info['hash']\n                child_uuid = child_info['uuid_component']\n                self.logger.debug(f\"{indent}      Child {i}/{len(child_objects)}: {child_uuid} -> {child_hash}\")\n                try:\n                    child_node = self.discover_node_recursive(\n                        child_hash,\n                        depth + 1,\n                        node.local_path or \"\"\n                    )\n                    if child_node:\n                        node.children.append(child_node)\n                        self.logger.debug(f\"{indent}      \u2705 Child {i} processed successfully: {child_node.name}\")\n                    else:\n                        self.logger.warning(f\"{indent}      \u26a0\ufe0f Child {i} returned None\")\n                except Exception as e:\n                    error_msg = f\"{indent}    \u274c Error processing child {child_hash[:16]}...: {e}\"\n                    print(error_msg)\n                    self.logger.error(error_msg)\n                    continue\n        \n        # Log data components for reference\n        if content_info.get('data_components'):\n            data_components = content_info['data_components']\n            self.logger.info(f\"{indent}    \ud83d\udcc4 Data components: {len(data_components)}\")\n            for component in data_components:\n                comp_type = component.get('component_type', 'unknown')\n                comp_size = component.get('size', 0)\n                self.logger.debug(f\"{indent}      \ud83d\udcbe {comp_type}: {comp_size} bytes\")\n        \n        # Save node data\n        self.save_node_data(node, content_info)\n        \n        return node\n    \n    def save_node_data(self, node: RemarkableNode, content_info: Dict[str, Any]):\n        \"\"\"Save detailed node data to JSON file\"\"\"\n        try:\n            node_file = self.output_dir / f\"node_{node.hash[:16]}.json\"\n            \n            # Prepare JSON-safe data\n            safe_content_info = dict(content_info)\n            if 'content' in safe_content_info:\n                if isinstance(safe_content_info['content'], bytes):\n                    safe_content_info['content'] = safe_content_info['content'].hex()\n                    safe_content_info['content_encoding'] = 'hex'\n            \n            node_data = {\n                'hash': node.hash,\n                'name': node.name,\n                'type': node.node_type.value,\n                'parent': node.parent,\n                'depth': node.depth,\n                'local_path': node.local_path,\n                'size': node.size,\n                'created_time': node.created_time,\n                'last_modified': node.last_modified,\n                'source': node.source,\n                'children_count': len(node.children),\n                'timestamp': datetime.now().isoformat(),\n                'raw_content_info': safe_content_info\n            }\n            \n            node_file.write_text(json.dumps(node_data, indent=2))\n            \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Error saving node data for {node.hash[:16]}...: {e}\")\n    \n    def build_hierarchy_from_parents(self) -> Dict[str, List[RemarkableNode]]:\n        \"\"\"Build proper hierarchy using parent UUIDs from metadata\"\"\"\n        hierarchy = {}\n        \n        self.logger.info(\"\ud83c\udfd7\ufe0f Building hierarchy from parent UUIDs...\")\n        \n        # Group nodes by parent UUID\n        nodes_by_parent = {}\n        root_nodes = []\n        \n        for node in self.nodes.values():\n            parent_uuid = node.parent\n            if not parent_uuid or parent_uuid == \"\":\n                root_nodes.append(node)\n                self.logger.debug(f\"  \ud83d\udcc1 Root node: {node.name}\")\n            else:\n                if parent_uuid not in nodes_by_parent:\n                    nodes_by_parent[parent_uuid] = []\n                nodes_by_parent[parent_uuid].append(node)\n                self.logger.debug(f\"  \ud83d\udcc4 Child of {parent_uuid}: {node.name}\")\n        \n        # Build hierarchy\n        hierarchy['root'] = root_nodes\n        hierarchy['children'] = nodes_by_parent\n        \n        self.logger.info(f\"  \ud83d\udcca Hierarchy built: {len(root_nodes)} root nodes, {len(nodes_by_parent)} parent groups\")\n        \n        return hierarchy\n    \n    def discover_all(self) -> bool:\n        \"\"\"Complete discovery process from root\"\"\"\n        print(\"\ud83d\ude80 Starting complete reMarkable cloud discovery...\")\n        self.logger.info(\"\ud83d\ude80 STARTING COMPLETE REMARKABLE CLOUD DISCOVERY\")\n        self.logger.info(f\"\ud83d\udcc1 Output directory: {self.output_dir}\")\n        self.logger.info(f\"\ud83d\udcdd Log file: {self.log_file}\")\n        \n        # Get root hash\n        root_hash = self.get_root_hash()\n        if not root_hash:\n            print(\"\u274c Failed to get root hash\")\n            self.logger.error(\"\u274c Failed to get root hash\")\n            return False\n        \n        self.logger.info(f\"\ud83d\udd0d Starting discovery from root hash: {root_hash}\")\n        \n        # Start recursive discovery\n        try:\n            root_node = self.discover_node_recursive(root_hash, depth=0, parent_path=\"\")\n            \n            if root_node:\n                self.root_nodes.append(root_node)\n                \n                # Build proper hierarchy using parent UUIDs\n                hierarchy = self.build_hierarchy_from_parents()\n                \n                print(f\"\\n\u2705 DISCOVERY COMPLETE!\")\n                print(f\"\ud83d\udcca Statistics:\")\n                print(f\"  \u2022 Total nodes: {self.stats['total_nodes']}\")\n                print(f\"  \u2022 Folders: {self.stats['folders']}\")\n                print(f\"  \u2022 Documents: {self.stats['documents']}\")\n                print(f\"  \u2022 Successful downloads: {self.stats['successful_downloads']}\")\n                print(f\"  \u2022 Failed downloads: {self.stats['failed_downloads']}\")\n                print(f\"  \u2022 Total bytes: {self.stats['bytes_downloaded']:,}\")\n                \n                # Show proper hierarchy\n                print(f\"\\n\ud83c\udfd7\ufe0f PROPER HIERARCHY:\")\n                self.print_proper_hierarchy(hierarchy)\n                \n                # Log final statistics\n                self.logger.info(\"\ud83c\udf89 DISCOVERY COMPLETED SUCCESSFULLY!\")\n                self.logger.info(f\"\ud83d\udcca FINAL STATISTICS:\")\n                self.logger.info(f\"  \u2022 Total nodes discovered: {self.stats['total_nodes']}\")\n                self.logger.info(f\"  \u2022 Folder nodes: {self.stats['folders']}\")\n                self.logger.info(f\"  \u2022 Document nodes: {self.stats['documents']}\")\n                self.logger.info(f\"  \u2022 Successful downloads: {self.stats['successful_downloads']}\")\n                self.logger.info(f\"  \u2022 Failed downloads: {self.stats['failed_downloads']}\")\n                self.logger.info(f\"  \u2022 Total bytes downloaded: {self.stats['bytes_downloaded']:,}\")\n                \n                # Show tree structure (old flat version)\n                print(f\"\\n\ud83c\udf33 ORIGINAL DISCOVERY TREE:\")\n                self.print_tree()\n                \n                # Save discovery summary\n                self.save_discovery_summary()\n                \n                self.logger.info(f\"\ud83d\udcbe Discovery data saved to: {self.output_dir}\")\n                self.logger.info(f\"\ud83d\udcdd Detailed log saved to: {self.log_file}\")\n                \n                return True\n            else:\n                print(\"\u274c Failed to discover from root\")\n                self.logger.error(\"\u274c Failed to discover from root\")\n                return False\n                \n        except Exception as e:\n            print(f\"\u274c Discovery error: {e}\")\n            return False\n    \n    def print_proper_hierarchy(self, hierarchy: Dict[str, Any]):\n        \"\"\"Print the proper hierarchy built from parent UUIDs\"\"\"\n        \n        def print_nodes(nodes: List[RemarkableNode], prefix: str = \"\", parent_name: str = \"ROOT\"):\n            for i, node in enumerate(nodes):\n                is_last = i == len(nodes) - 1\n                type_icon = \"\ud83d\udcc1\" if node.node_type == NodeType.FOLDER else \"\ud83d\udcc4\"\n                size_info = f\" ({node.size:,} bytes)\"\n                \n                print(f\"{prefix}{'\u2514\u2500\u2500 ' if is_last else '\u251c\u2500\u2500 '}{type_icon} {node.name}{size_info}\")\n                \n                # Find children of this node using its hash as parent\n                node_children = hierarchy['children'].get(node.hash, [])\n                if node_children:\n                    child_prefix = prefix + (\"    \" if is_last else \"\u2502   \")\n                    print_nodes(node_children, child_prefix, node.name)\n        \n        # Print root nodes\n        root_nodes = hierarchy.get('root', [])\n        print(f\"\ud83d\udcc1 Root Level ({len(root_nodes)} items)\")\n        print_nodes(root_nodes)\n        \n        # Show parent groups summary\n        parent_groups = hierarchy.get('children', {})\n        if parent_groups:\n            print(f\"\\n\ud83d\udcca Parent Groups:\")\n            for parent_uuid, children in parent_groups.items():\n                print(f\"  \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 {parent_uuid}: {len(children)} children\")\n\n    def print_tree(self):\n        \"\"\"Print the discovered tree structure\"\"\"\n        print(f\"\\n\ud83c\udf33 DISCOVERED TREE STRUCTURE:\")\n        \n        def show_tree(node: RemarkableNode, prefix: str = \"\"):\n            type_icon = \"\ud83d\udcc1\" if node.node_type == NodeType.FOLDER else \"\ud83d\udcc4\"\n            size_info = f\" ({node.size:,} bytes)\" if node.size > 0 else \"\"\n            print(f\"{prefix}{type_icon} {node.name}{size_info}\")\n            \n            for i, child in enumerate(node.children):\n                is_last = i == len(node.children) - 1\n                child_prefix = prefix + (\"\u2514\u2500\u2500 \" if is_last else \"\u251c\u2500\u2500 \")\n                show_tree(child, child_prefix)\n        \n        for root in self.root_nodes:\n            show_tree(root)\n    \n    def save_discovery_summary(self):\n        \"\"\"Save complete discovery summary\"\"\"\n        summary = {\n            'timestamp': datetime.now().isoformat(),\n            'stats': self.stats,\n            'root_nodes': len(self.root_nodes),\n            'total_nodes': len(self.nodes),\n            'failed_hashes': list(self.failed_hashes),\n            'output_directory': str(self.output_dir)\n        }\n        \n        summary_file = self.output_dir / \"discovery_summary.json\"\n        summary_file.write_text(json.dumps(summary, indent=2))\n        \n        print(f\"\\n\ud83d\udcbe Discovery summary saved to: {summary_file}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/discovery.py",
      "tags": [
        "class",
        "remarkablediscovery"
      ],
      "updated_at": "2025-12-07T01:45:48.213340",
      "usage_example": "# Example usage:\n# result = RemarkableDiscovery(bases)"
    },
    {
      "best_practices": [
        "Always provide required fields (hash, name, parent, node_type) when instantiating",
        "Use None for parent when creating root-level nodes",
        "Maintain consistency between parent hash and actual parent node relationships",
        "Use the children list to build tree structures by appending child nodes",
        "Set depth appropriately when building hierarchies (root=0, increment for each level)",
        "For documents, populate component hashes (content_hash, metadata_hash, etc.) to enable content retrieval",
        "Use NodeType enum values consistently to distinguish between folders and documents",
        "Keep timestamps in ISO format for consistency",
        "The dataclass is immutable by default unless frozen=False is specified; be aware of mutability when modifying children lists",
        "When traversing hierarchies, check node_type to determine if a node can have children",
        "Use local_path to track where node data is cached locally for offline access"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for the node in the reMarkable cloud system",
            "is_class_variable": false,
            "name": "hash",
            "type": "str"
          },
          {
            "description": "Display name of the file or folder",
            "is_class_variable": false,
            "name": "name",
            "type": "str"
          },
          {
            "description": "Hash of the parent node, None for root-level nodes",
            "is_class_variable": false,
            "name": "parent",
            "type": "Optional[str]"
          },
          {
            "description": "Enum value indicating whether this is a file or folder",
            "is_class_variable": false,
            "name": "node_type",
            "type": "NodeType"
          },
          {
            "description": "Size of the node in bytes, defaults to 0",
            "is_class_variable": false,
            "name": "size",
            "type": "int"
          },
          {
            "description": "Depth level in the hierarchy tree, 0 for root level",
            "is_class_variable": false,
            "name": "depth",
            "type": "int"
          },
          {
            "description": "Local filesystem path where node data is stored or cached",
            "is_class_variable": false,
            "name": "local_path",
            "type": "str"
          },
          {
            "description": "ISO format timestamp string of when the node was created",
            "is_class_variable": false,
            "name": "created_time",
            "type": "Optional[str]"
          },
          {
            "description": "ISO format timestamp string of when the node was last modified",
            "is_class_variable": false,
            "name": "last_modified",
            "type": "Optional[str]"
          },
          {
            "description": "Origin or source information for the node",
            "is_class_variable": false,
            "name": "source",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the content component for documents",
            "is_class_variable": false,
            "name": "content_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the metadata component for documents",
            "is_class_variable": false,
            "name": "metadata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the pagedata component for documents",
            "is_class_variable": false,
            "name": "pagedata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the PDF component for documents",
            "is_class_variable": false,
            "name": "pdf_hash",
            "type": "Optional[str]"
          },
          {
            "description": "List of child RemarkableNode objects for building hierarchical tree structures",
            "is_class_variable": false,
            "name": "children",
            "type": "List[RemarkableNode]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "children": "List of child nodes (default empty list)",
              "content_hash": "Content component hash (default None)",
              "created_time": "Creation timestamp (default None)",
              "depth": "Hierarchy depth level (default 0)",
              "hash": "Unique identifier for the node",
              "last_modified": "Last modification timestamp (default None)",
              "local_path": "Local filesystem path (default empty string)",
              "metadata_hash": "Metadata component hash (default None)",
              "name": "Display name of the node",
              "node_type": "NodeType enum indicating file or folder",
              "pagedata_hash": "Pagedata component hash (default None)",
              "parent": "Hash of parent node or None for root",
              "pdf_hash": "PDF component hash (default None)",
              "size": "Size in bytes (default 0)",
              "source": "Source information (default None)"
            },
            "purpose": "Initializes a new RemarkableNode instance with the provided attributes. Automatically generated by @dataclass decorator.",
            "returns": "None - constructor initializes the instance",
            "signature": "__init__(hash: str, name: str, parent: Optional[str], node_type: NodeType, size: int = 0, depth: int = 0, local_path: str = '', created_time: Optional[str] = None, last_modified: Optional[str] = None, source: Optional[str] = None, content_hash: Optional[str] = None, metadata_hash: Optional[str] = None, pagedata_hash: Optional[str] = None, pdf_hash: Optional[str] = None, children: List['RemarkableNode'] = None) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the RemarkableNode instance showing all attributes. Automatically generated by @dataclass decorator.",
            "returns": "String representation of the object in the format 'RemarkableNode(hash=..., name=..., ...)'",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two RemarkableNode instances for equality based on all attributes. Automatically generated by @dataclass decorator.",
            "returns": "True if all attributes are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:45:48",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass representing a node (file or folder) in the reMarkable cloud storage system, containing metadata, hierarchy information, and component hashes for documents.",
      "docstring": "Represents a node (file/folder) in reMarkable cloud",
      "id": 2095,
      "imports": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging",
        "from enum import Enum",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import field",
        "from datetime import datetime",
        "from auth import get_authenticated_session",
        "import io",
        "import re"
      ],
      "imports_required": [
        "from dataclasses import dataclass, field",
        "from typing import List, Optional"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 60,
      "line_start": 38,
      "name": "RemarkableNode",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "children": "List of child RemarkableNode objects. Used to build hierarchical tree structures. Defaults to empty list.",
        "content_hash": "Hash of the content component for documents. Used to identify and retrieve document content. Optional.",
        "created_time": "ISO format timestamp string indicating when the node was created. Optional.",
        "depth": "Depth level in the hierarchy tree, with 0 being root level. Defaults to 0.",
        "hash": "Unique identifier for the node in the reMarkable cloud system. This is the primary key used to reference the node.",
        "last_modified": "ISO format timestamp string indicating when the node was last modified. Optional.",
        "local_path": "Local filesystem path where the node's data is stored or cached. Empty string by default.",
        "metadata_hash": "Hash of the metadata component for documents. Used to identify and retrieve document metadata. Optional.",
        "name": "Display name of the file or folder as shown in the reMarkable interface.",
        "node_type": "Enum value (NodeType) indicating whether this is a file or folder/collection.",
        "pagedata_hash": "Hash of the pagedata component for documents. Used to identify and retrieve page-specific data. Optional.",
        "parent": "Hash of the parent node. None if this is a root-level node. Used to establish hierarchical relationships.",
        "pdf_hash": "Hash of the PDF component for documents. Used to identify and retrieve the PDF version. Optional.",
        "size": "Size of the node in bytes. Defaults to 0. Primarily relevant for files/documents.",
        "source": "Origin or source information for the node. Optional metadata field."
      },
      "parent_class": null,
      "purpose": "This dataclass serves as a data structure to represent files and folders in the reMarkable cloud ecosystem. It stores essential metadata like name, type, parent relationships, timestamps, and document-specific component hashes (content, metadata, pagedata, PDF). It supports hierarchical organization through parent-child relationships and can represent both folders (collections) and documents. The class is designed to facilitate tree-like navigation and manipulation of reMarkable cloud content.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableNode object representing a single node in the reMarkable cloud hierarchy. As a dataclass, it automatically provides __init__, __repr__, __eq__, and other standard methods. The object can be used to navigate hierarchies through the parent and children attributes, and to access all metadata and component hashes associated with the node.",
      "settings_required": [
        "NodeType enum must be defined in the same module or imported",
        "No environment variables or external configuration required for instantiation"
      ],
      "source_code": "class RemarkableNode:\n    \"\"\"Represents a node (file/folder) in reMarkable cloud\"\"\"\n    hash: str\n    name: str\n    parent: Optional[str]\n    node_type: NodeType\n    \n    # Optional metadata\n    size: int = 0\n    depth: int = 0\n    local_path: str = \"\"\n    created_time: Optional[str] = None\n    last_modified: Optional[str] = None\n    source: Optional[str] = None\n    \n    # Component hashes for documents\n    content_hash: Optional[str] = None\n    metadata_hash: Optional[str] = None\n    pagedata_hash: Optional[str] = None\n    pdf_hash: Optional[str] = None\n    \n    # Children for hierarchy\n    children: List['RemarkableNode'] = field(default_factory=list)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/discovery.py",
      "tags": [
        "dataclass",
        "tree-structure",
        "hierarchy",
        "remarkable",
        "cloud-storage",
        "file-system",
        "metadata",
        "document-management",
        "node",
        "data-structure"
      ],
      "updated_at": "2025-12-07T01:45:48.194395",
      "usage_example": "from dataclasses import dataclass, field\nfrom typing import List, Optional\nfrom enum import Enum\n\nclass NodeType(Enum):\n    FOLDER = 'CollectionType'\n    DOCUMENT = 'DocumentType'\n\n@dataclass\nclass RemarkableNode:\n    hash: str\n    name: str\n    parent: Optional[str]\n    node_type: NodeType\n    size: int = 0\n    depth: int = 0\n    local_path: str = ''\n    created_time: Optional[str] = None\n    last_modified: Optional[str] = None\n    source: Optional[str] = None\n    content_hash: Optional[str] = None\n    metadata_hash: Optional[str] = None\n    pagedata_hash: Optional[str] = None\n    pdf_hash: Optional[str] = None\n    children: List['RemarkableNode'] = field(default_factory=list)\n\n# Create a folder node\nfolder = RemarkableNode(\n    hash='abc123',\n    name='My Notebooks',\n    parent=None,\n    node_type=NodeType.FOLDER,\n    depth=0\n)\n\n# Create a document node\ndocument = RemarkableNode(\n    hash='def456',\n    name='Meeting Notes',\n    parent='abc123',\n    node_type=NodeType.DOCUMENT,\n    size=1024,\n    depth=1,\n    content_hash='content123',\n    metadata_hash='meta456',\n    created_time='2023-01-15T10:30:00Z'\n)\n\n# Build hierarchy\nfolder.children.append(document)\n\n# Access attributes\nprint(f'Document: {document.name}')\nprint(f'Parent: {document.parent}')\nprint(f'Has content: {document.content_hash is not None}')"
    },
    {
      "best_practices": [
        "Always provide required parameters (uuid, hash, name, node_type, parent_uuid) when instantiating a RemarkableNode.",
        "Use None for parent_uuid when creating root-level nodes.",
        "Set node_type to either 'folder' or 'document' - no other values should be used.",
        "The __post_init__ method automatically initializes mutable defaults, so you can safely omit rm_hashes, metadata, and extracted_files parameters.",
        "Update local_path only after files have been extracted to the local file system (phase 2 processing).",
        "Use rm_hashes list to track individual page hashes for multi-page documents.",
        "Store parsed metadata from the reMarkable device in the metadata dictionary for easy access.",
        "Hash values (content_hash, metadata_hash, pdf_hash, pagedata_hash) should be computed using a consistent hashing algorithm (likely SHA256 based on context).",
        "This is an immutable-style dataclass - consider creating new instances rather than modifying existing ones for better tracking of changes.",
        "The uuid field serves as the primary identifier and should be unique across all nodes in the file system."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for this node in the reMarkable file system",
            "is_class_variable": false,
            "name": "uuid",
            "type": "str"
          },
          {
            "description": "Overall hash value representing the current state of this node",
            "is_class_variable": false,
            "name": "hash",
            "type": "str"
          },
          {
            "description": "Display name of the file or folder",
            "is_class_variable": false,
            "name": "name",
            "type": "str"
          },
          {
            "description": "Type of node: 'folder' or 'document'",
            "is_class_variable": false,
            "name": "node_type",
            "type": "str"
          },
          {
            "description": "UUID of the parent folder, None for root-level items",
            "is_class_variable": false,
            "name": "parent_uuid",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the .content file containing document structure",
            "is_class_variable": false,
            "name": "content_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the .metadata file containing document metadata",
            "is_class_variable": false,
            "name": "metadata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the .pdf file for PDF documents",
            "is_class_variable": false,
            "name": "pdf_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the .pagedata file containing page-specific data",
            "is_class_variable": false,
            "name": "pagedata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "List of hashes for .rm files (one per page) containing drawing/annotation data",
            "is_class_variable": false,
            "name": "rm_hashes",
            "type": "List[str]"
          },
          {
            "description": "Dictionary containing parsed metadata from the reMarkable device",
            "is_class_variable": false,
            "name": "metadata",
            "type": "Dict[str, Any]"
          },
          {
            "description": "File system path where this node's files are stored locally",
            "is_class_variable": false,
            "name": "local_path",
            "type": "str"
          },
          {
            "description": "List of file paths that have been extracted/downloaded for this node",
            "is_class_variable": false,
            "name": "extracted_files",
            "type": "List[str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__post_init__",
            "parameters": {},
            "purpose": "Initializes mutable default values after dataclass initialization to prevent shared mutable default issues",
            "returns": "None - modifies instance attributes in place",
            "signature": "__post_init__(self) -> None"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:25:27",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [],
      "description": "A dataclass representing a node (file or folder) in the reMarkable tablet's file system hierarchy, storing metadata, hashes, and local file paths.",
      "docstring": "Simple node representation matching local_replica_v2.py",
      "id": 2040,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Dict, Any, Optional, List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 57,
      "line_start": 29,
      "name": "RemarkableNode_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "content_hash": "Hash of the .content file which contains structural information about the document. Optional, only present for documents.",
        "extracted_files": "List of file paths that have been extracted/downloaded for this node. Defaults to empty list.",
        "hash": "Overall hash value representing the current state of this node. Used for change detection and synchronization.",
        "local_path": "File system path where this node's files are stored locally. Set during phase 2 of processing. Defaults to empty string.",
        "metadata": "Dictionary containing parsed metadata from the reMarkable device, such as timestamps, version info, etc. Defaults to empty dict.",
        "metadata_hash": "Hash of the .metadata file containing document metadata like creation time, modification time, etc. Optional.",
        "name": "Display name of the file or folder as shown in the reMarkable interface.",
        "node_type": "Type of node, either 'folder' for directories or 'document' for files. Determines how the node is processed.",
        "pagedata_hash": "Hash of the .pagedata file containing page-specific data. Optional.",
        "parent_uuid": "UUID of the parent folder containing this node. None for root-level items.",
        "pdf_hash": "Hash of the .pdf file if the document is a PDF. Optional, only present for PDF documents.",
        "rm_hashes": "List of hashes for .rm files (one per page) containing the actual drawing/annotation data. Defaults to empty list.",
        "uuid": "Unique identifier for this node in the reMarkable file system. This is the primary key used to identify files and folders."
      },
      "parent_class": null,
      "purpose": "RemarkableNode serves as a data structure to represent both folders and documents in the reMarkable tablet's file system. It stores unique identifiers, content hashes for various file components (.content, .metadata, .pdf, .pagedata, .rm files), parent-child relationships, and local file system paths. This class is designed to match the structure used in local_replica_v2.py and facilitates synchronization between the reMarkable device and local storage by tracking file states through hash values.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableNode object with all specified attributes initialized. The __post_init__ method ensures that mutable default values (rm_hashes, metadata, extracted_files) are properly initialized as empty collections if not provided, preventing shared mutable default issues.",
      "settings_required": [],
      "source_code": "class RemarkableNode:\n    \"\"\"Simple node representation matching local_replica_v2.py\"\"\"\n    uuid: str\n    hash: str\n    name: str\n    node_type: str  # \"folder\" or \"document\"\n    parent_uuid: Optional[str]\n    \n    # Component hashes\n    content_hash: Optional[str] = None\n    metadata_hash: Optional[str] = None\n    pdf_hash: Optional[str] = None\n    pagedata_hash: Optional[str] = None\n    rm_hashes: List[str] = None\n    \n    # Metadata from reMarkable\n    metadata: Dict[str, Any] = None\n    \n    # Local paths (set in phase 2)\n    local_path: str = \"\"\n    extracted_files: List[str] = None\n    \n    def __post_init__(self):\n        if self.rm_hashes is None:\n            self.rm_hashes = []\n        if self.metadata is None:\n            self.metadata = {}\n        if self.extracted_files is None:\n            self.extracted_files = []",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica_new.py",
      "tags": [
        "dataclass",
        "remarkable",
        "file-system",
        "node",
        "tree-structure",
        "synchronization",
        "metadata",
        "hash",
        "document-management"
      ],
      "updated_at": "2025-12-07T01:45:48.191747",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Dict, Any, Optional, List\n\n@dataclass\nclass RemarkableNode:\n    uuid: str\n    hash: str\n    name: str\n    node_type: str\n    parent_uuid: Optional[str]\n    content_hash: Optional[str] = None\n    metadata_hash: Optional[str] = None\n    pdf_hash: Optional[str] = None\n    pagedata_hash: Optional[str] = None\n    rm_hashes: List[str] = None\n    metadata: Dict[str, Any] = None\n    local_path: str = \"\"\n    extracted_files: List[str] = None\n    \n    def __post_init__(self):\n        if self.rm_hashes is None:\n            self.rm_hashes = []\n        if self.metadata is None:\n            self.metadata = {}\n        if self.extracted_files is None:\n            self.extracted_files = []\n\n# Create a folder node\nfolder = RemarkableNode(\n    uuid=\"abc123\",\n    hash=\"hash123\",\n    name=\"My Folder\",\n    node_type=\"folder\",\n    parent_uuid=None\n)\n\n# Create a document node with full metadata\ndocument = RemarkableNode(\n    uuid=\"def456\",\n    hash=\"hash456\",\n    name=\"My Document\",\n    node_type=\"document\",\n    parent_uuid=\"abc123\",\n    content_hash=\"content_hash_123\",\n    metadata_hash=\"metadata_hash_123\",\n    pdf_hash=\"pdf_hash_123\",\n    rm_hashes=[\"page1_hash\", \"page2_hash\"],\n    metadata={\"lastModified\": \"1234567890\", \"version\": 1}\n)\n\n# Access attributes\nprint(document.name)\nprint(document.node_type)\nprint(len(document.rm_hashes))\n\n# Update local path after extraction\ndocument.local_path = \"/path/to/local/storage/def456\"\ndocument.extracted_files = [\"/path/to/local/storage/def456.pdf\", \"/path/to/local/storage/def456.content\"]"
    },
    {
      "best_practices": [
        "Always instantiate the class in a try-except block to handle authentication failures gracefully",
        "The force_refresh_document method includes a 3-second sleep to allow web app synchronization - do not remove this delay",
        "Document UUIDs must be valid and exist in the reMarkable cloud before calling methods",
        "The class modifies cloud state - ensure you have backups or understand the implications of moving documents",
        "Network errors can occur at multiple stages - wrap method calls in try-except blocks for production use",
        "The update_document_parent method performs multiple API calls sequentially - it is not atomic and can leave inconsistent state if interrupted",
        "The hardcoded gpt_in folder UUID (99c6551f-2855-44cf-a4e4-c9c586558f42) in force_refresh_document should be parameterized for reusability",
        "Session object is reused across methods - the class maintains state and should not be shared across threads without synchronization",
        "All API calls use the EU endpoint (eu.tectonic.remarkable.com) - may need adjustment for other regions",
        "The class prints status messages to stdout - redirect or capture output if running in a service context"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated HTTP session object for making API requests to reMarkable Cloud, obtained from RemarkableAuth",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the DocumentRefresher with authenticated session to reMarkable Cloud API",
            "returns": "None - initializes instance with authenticated session",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_root_info",
            "parameters": {},
            "purpose": "Retrieve the current root.docSchema information including generation and hash",
            "returns": "Tuple of (root_data dict containing generation/hash, root_content string with document schema)",
            "signature": "get_current_root_info(self) -> tuple[dict, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "find_document_in_root",
            "parameters": {
              "doc_uuid": "UUID string of the document to find",
              "root_content": "String content of root.docSchema file"
            },
            "purpose": "Locate a specific document entry within the root.docSchema content",
            "returns": "Dict with keys: hash, uuid, type, size, full_line containing document information. Raises ValueError if document not found.",
            "signature": "find_document_in_root(self, doc_uuid: str, root_content: str) -> dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_metadata",
            "parameters": {
              "doc_hash": "SHA256 hash string identifying the document schema"
            },
            "purpose": "Fetch and parse document metadata from the cloud storage",
            "returns": "Tuple of (metadata dict with document properties, doc_lines list of schema lines, metadata_line string). Raises ValueError if metadata not found.",
            "signature": "get_document_metadata(self, doc_hash: str) -> tuple[dict, list, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_document_parent",
            "parameters": {
              "description": "Human-readable description of the operation for logging",
              "doc_uuid": "UUID string of the document to move",
              "new_parent": "UUID string of the new parent folder, or empty string for root"
            },
            "purpose": "Update a document's parent folder by modifying metadata and uploading changes to cloud",
            "returns": "True on successful update. Raises exceptions on API failures.",
            "signature": "update_document_parent(self, doc_uuid: str, new_parent: str, description: str) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "force_refresh_document",
            "parameters": {
              "doc_uuid": "UUID string of the document to refresh"
            },
            "purpose": "Force the web app to refresh and display a document by moving it to root and back to gpt_in folder",
            "returns": "True if refresh sequence completed successfully, False if any error occurred",
            "signature": "force_refresh_document(self, doc_uuid: str) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:43:50",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "crc32c"
      ],
      "description": "A class that forces the reMarkable web app to refresh and display documents by programmatically moving them between folders, triggering synchronization.",
      "docstring": "Force web app refresh by moving document",
      "id": 2092,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 292,
      "line_start": 49,
      "name": "DocumentRefresher",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "The constructor takes no parameters. It automatically initializes authentication with the reMarkable service using RemarkableAuth, creates an authenticated session, and validates that authentication succeeded. Raises RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "DocumentRefresher interacts with the reMarkable Cloud API to manipulate document metadata and folder structure. Its primary use case is to force the web application to recognize and display newly uploaded or modified documents by moving them to the root folder and back to their original location. This triggers the web app's synchronization mechanism, making documents immediately visible without manual refresh. The class handles authentication, retrieves document schemas, updates metadata, and manages the complex process of updating the reMarkable cloud storage hierarchy.",
      "return_annotation": null,
      "return_explained": "The class instantiation returns a DocumentRefresher object with an authenticated session. Key method returns: get_current_root_info() returns a tuple of (root_data dict, root_content string); find_document_in_root() returns a dict with document info (hash, uuid, type, size, full_line); get_document_metadata() returns a tuple of (metadata dict, doc_lines list, metadata_line string); update_document_parent() returns True on success; force_refresh_document() returns True on success, False on failure.",
      "settings_required": [
        "RemarkableAuth must be properly configured with valid reMarkable Cloud credentials",
        "Network access to eu.tectonic.remarkable.com API endpoints",
        "Valid reMarkable account with authentication tokens",
        "compute_crc32c_header function must be available in the module scope"
      ],
      "source_code": "class DocumentRefresher:\n    \"\"\"Force web app refresh by moving document\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\udd04 Document Refresher Initialized\")\n    \n    def get_current_root_info(self):\n        \"\"\"Get current root.docSchema info\"\"\"\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        return root_data, root_content\n    \n    def find_document_in_root(self, doc_uuid: str, root_content: str):\n        \"\"\"Find document entry in root.docSchema\"\"\"\n        lines = root_content.strip().split('\\n')\n        for line in lines[1:]:  # Skip version header\n            if doc_uuid in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    return {\n                        'hash': parts[0],\n                        'uuid': parts[2],\n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n        raise ValueError(f\"Document {doc_uuid} not found in root.docSchema\")\n    \n    def get_document_metadata(self, doc_hash: str):\n        \"\"\"Get document metadata\"\"\"\n        # Get document schema\n        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n        doc_response.raise_for_status()\n        doc_content = doc_response.text\n        doc_lines = doc_content.strip().split('\\n')\n        \n        # Find metadata hash\n        metadata_hash = None\n        metadata_line = None\n        for line in doc_lines[1:]:\n            if ':' in line and '.metadata' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    metadata_hash = parts[0]\n                    metadata_line = line\n                    break\n        \n        if not metadata_hash:\n            raise ValueError(\"Metadata component not found\")\n        \n        # Fetch metadata\n        metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n        metadata_response.raise_for_status()\n        current_metadata = json.loads(metadata_response.text)\n        \n        return current_metadata, doc_lines, metadata_line\n    \n    def update_document_parent(self, doc_uuid: str, new_parent: str, description: str):\n        \"\"\"Update document parent and upload changes\"\"\"\n        print(f\"\\n\ud83d\udd04 {description}\")\n        \n        # Get current state\n        root_data, root_content = self.get_current_root_info()\n        doc_info = self.find_document_in_root(doc_uuid, root_content)\n        current_metadata, doc_lines, metadata_line = self.get_document_metadata(doc_info['hash'])\n        \n        print(f\"   Current parent: {current_metadata.get('parent', '(root)')}\")\n        print(f\"   New parent: {new_parent or '(root)'}\")\n        \n        # Update metadata\n        updated_metadata = current_metadata.copy()\n        updated_metadata['parent'] = new_parent\n        updated_metadata['lastModified'] = int(time.time() * 1000)\n        updated_metadata['metadatamodified'] = True\n        updated_metadata['modified'] = True\n        \n        # Upload new metadata\n        metadata_json = json.dumps(updated_metadata, separators=(',', ':'))\n        metadata_hash = hashlib.sha256(metadata_json.encode()).hexdigest()\n        \n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.metadata',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        crc32c_header = compute_crc32c_header(metadata_json.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\",\n            data=metadata_json.encode(),\n            headers=headers \n        )\n        upload_response.raise_for_status()\n        \n        # Update document schema\n        new_doc_lines = []\n        for line in doc_lines:\n            if line == metadata_line:\n                parts = line.split(':')\n                parts[0] = metadata_hash\n                new_doc_lines.append(':'.join(parts))\n            else:\n                new_doc_lines.append(line)\n        \n        new_doc_content = '\\n'.join(new_doc_lines)\n        doc_hash = hashlib.sha256(new_doc_content.encode()).hexdigest()\n        \n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.docSchema',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        crc32c_header = compute_crc32c_header(new_doc_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\",\n            data=new_doc_content.encode(),\n            headers=headers\n        )\n        upload_response.raise_for_status()\n        \n        # Update root.docSchema\n        old_line = doc_info['full_line']\n        parts = old_line.split(':')\n        parts[0] = doc_hash\n        new_line = ':'.join(parts)\n        new_root_content = root_content.replace(old_line, new_line)\n        \n        # Upload new root\n        root_hash = hashlib.sha256(new_root_content.encode()).hexdigest()\n        \n        headers = {\n            'Content-Type': 'text/plain',\n            'rm-batch-number': '1',\n            'rm-filename': 'root.docSchema',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        crc32c_header = compute_crc32c_header(new_root_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\",\n            data=new_root_content.encode(),\n            headers=headers\n        )\n        upload_response.raise_for_status()\n        \n        # Update root hash pointer\n        root_update_data = {\n            \"broadcast\": True,\n            \"generation\": root_data['generation'],\n            \"hash\": root_hash\n        }\n        \n        root_content_body = json.dumps(root_update_data, indent=2).encode('utf-8')\n        \n        headers = {\n            'Content-Type': 'application/json',\n            'rm-batch-number': '1',\n            'rm-filename': 'roothash',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        crc32c_header = compute_crc32c_header(root_content_body)\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        root_update_response = self.session.put(\n            \"https://eu.tectonic.remarkable.com/sync/v3/root\",\n            data=root_content_body,\n            headers=headers\n        )\n        root_update_response.raise_for_status()\n        \n        print(f\"   \u2705 {description} completed successfully\")\n        return True\n    \n    def force_refresh_document(self, doc_uuid: str):\n        \"\"\"Force web app refresh by moving document around\"\"\"\n        print(f\"\ud83d\udd04 Force Refreshing Document Visibility\")\n        print(f\"Document UUID: {doc_uuid}\")\n        print(\"=\" * 60)\n        \n        try:\n            # Step 1: Move document to root folder (this should make it visible immediately)\n            self.update_document_parent(doc_uuid, \"\", \"Moving document to root folder\")\n            \n            # Step 2: Wait a moment to let web app sync\n            print(f\"\\n\u23f3 Waiting 3 seconds for web app sync...\")\n            time.sleep(3)\n            \n            # Step 3: Move document back to gpt_in folder\n            gpt_in_uuid = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"\n            self.update_document_parent(doc_uuid, gpt_in_uuid, \"Moving document back to gpt_in folder\")\n            \n            print(f\"\\n\ud83c\udf89 Document refresh sequence completed!\")\n            print(f\"\ud83d\udca1 The document should now be visible in the gpt_in folder in the web app.\")\n            print(f\"\ud83d\udca1 If still not visible, try refreshing the web browser page.\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\\n\u274c Refresh operation failed: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/force_web_app_refresh.py",
      "tags": [
        "remarkable",
        "cloud-sync",
        "document-management",
        "api-client",
        "file-synchronization",
        "metadata-manipulation",
        "web-app-refresh",
        "content-delivery"
      ],
      "updated_at": "2025-12-07T01:43:50.825017",
      "usage_example": "# Initialize the refresher\nrefresher = DocumentRefresher()\n\n# Force refresh a specific document by UUID\ndoc_uuid = 'abc123-def456-ghi789'\nsuccess = refresher.force_refresh_document(doc_uuid)\n\nif success:\n    print('Document is now visible in web app')\nelse:\n    print('Refresh failed')\n\n# Manually move a document to a different parent folder\nparent_folder_uuid = '99c6551f-2855-44cf-a4e4-c9c586558f42'\nrefresher.update_document_parent(doc_uuid, parent_folder_uuid, 'Moving to target folder')\n\n# Get current root information\nroot_data, root_content = refresher.get_current_root_info()\nprint(f'Root generation: {root_data[\"generation\"]}')\n\n# Find a document in the root schema\ndoc_info = refresher.find_document_in_root(doc_uuid, root_content)\nprint(f'Document hash: {doc_info[\"hash\"]}')"
    },
    {
      "best_practices": [
        "Always check return values for None before using tokens or sessions, as authentication can fail at multiple stages",
        "Use get_authenticated_session() for the simplest workflow - it handles the complete authentication flow",
        "The session object is reused and maintains state - create one RemarkableAuth instance per application lifecycle",
        "Device tokens are long-lived but user tokens may expire - consider re-authenticating if API calls start failing",
        "Store device tokens securely in the remarkable_device_token.txt file with appropriate file permissions",
        "The class prints detailed status messages to stdout - redirect or capture these if running in a non-interactive environment",
        "Session headers are automatically updated after successful authentication - no manual header management needed",
        "Methods can be called independently (load_device_token, get_user_token) or use the convenience methods (authenticate, get_authenticated_session)",
        "The class checks multiple file locations for the device token (current directory first, then fallback) - place token file in the primary location for best performance"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Configuration object containing file paths (DEVICE_TOKEN_FILE, FALLBACK_DEVICE_TOKEN_FILE) and API URLs (USER_TOKEN_URL) used throughout the authentication process",
            "is_class_variable": false,
            "name": "config",
            "type": "RemarkableConfig"
          },
          {
            "description": "HTTP session object used for all API requests. Initialized with User-Agent header and updated with Authorization header after successful authentication. Maintains connection pooling and cookies across requests",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "config": "Optional RemarkableConfig instance for customizing file paths and API endpoints. Defaults to new RemarkableConfig() if not provided"
            },
            "purpose": "Initializes the authentication handler with configuration and sets up an HTTP session with appropriate headers",
            "returns": "None (constructor)",
            "signature": "__init__(self, config: RemarkableConfig = None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "load_device_token",
            "parameters": {},
            "purpose": "Loads the device token from a local file, checking the primary location first, then the fallback location",
            "returns": "The device token as a string if found and successfully loaded, None if file not found or error occurs. Prints detailed status messages during execution",
            "signature": "load_device_token(self) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_user_token",
            "parameters": {
              "device_token": "Valid device token string obtained from load_device_token() or stored elsewhere"
            },
            "purpose": "Exchanges a device token for a user token by making an authenticated request to the reMarkable API",
            "returns": "User token as a string if the API request succeeds (HTTP 200), None if request fails or encounters an error. Prints detailed status and error messages",
            "signature": "get_user_token(self, device_token: str) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "authenticate",
            "parameters": {},
            "purpose": "Performs the complete authentication flow: loads device token, exchanges it for user token, and updates session headers",
            "returns": "User token as a string if authentication succeeds, None if any step fails. Also updates self.session.headers with the Authorization header for subsequent API calls",
            "signature": "authenticate(self) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_authenticated_session",
            "parameters": {},
            "purpose": "Convenience method that performs authentication and returns a ready-to-use authenticated session object",
            "returns": "Authenticated requests.Session object if authentication succeeds, None if authentication fails. The returned session has Authorization headers set and is ready for API calls",
            "signature": "get_authenticated_session(self) -> Optional[requests.Session]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:42:26",
      "decorators": [],
      "dependencies": [
        "requests",
        "pathlib"
      ],
      "description": "Handles the complete authentication flow for reMarkable cloud services, managing device tokens, user tokens, and authenticated HTTP sessions.",
      "docstring": "Handles reMarkable authentication flow",
      "id": 2088,
      "imports": [
        "import os",
        "import requests",
        "from pathlib import Path",
        "from typing import Optional",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "import requests",
        "from pathlib import Path",
        "from typing import Optional"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 112,
      "line_start": 36,
      "name": "RemarkableAuth",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "config": "Optional RemarkableConfig instance containing configuration settings like token file paths and API URLs. If not provided, a default RemarkableConfig instance is created. This allows customization of file locations and endpoints while providing sensible defaults."
      },
      "parent_class": null,
      "purpose": "This class manages the two-stage authentication process required to interact with reMarkable's cloud API. It loads a device token from local files, exchanges it for a user token via the reMarkable API, and maintains an authenticated requests.Session object for subsequent API calls. The class handles token file discovery across multiple locations, provides detailed logging of the authentication process, and manages session headers automatically.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableAuth object. Key method returns: load_device_token() returns Optional[str] (device token or None), get_user_token() returns Optional[str] (user token or None), authenticate() returns Optional[str] (user token or None), get_authenticated_session() returns Optional[requests.Session] (authenticated session or None if authentication fails).",
      "settings_required": [
        "RemarkableConfig class must be available (imported or defined)",
        "Device token file named 'remarkable_device_token.txt' must exist in either the current directory or fallback location specified in RemarkableConfig",
        "Device token must be valid and obtained from reMarkable's device registration process",
        "Network access to reMarkable's authentication API endpoints"
      ],
      "source_code": "class RemarkableAuth:\n    \"\"\"Handles reMarkable authentication flow\"\"\"\n    \n    def __init__(self, config: RemarkableConfig = None):\n        self.config = config or RemarkableConfig()\n        self.session = requests.Session()\n        self.session.headers.update({\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951'\n        })\n    \n    def load_device_token(self) -> Optional[str]:\n        \"\"\"Load device token from file - checks current directory first, then fallback location\"\"\"\n        # Try current directory first\n        if self.config.DEVICE_TOKEN_FILE.exists():\n            token_file = self.config.DEVICE_TOKEN_FILE\n        elif self.config.FALLBACK_DEVICE_TOKEN_FILE.exists():\n            token_file = self.config.FALLBACK_DEVICE_TOKEN_FILE\n        else:\n            print(f\"\u274c Device token file not found in:\")\n            print(f\"   Primary: {self.config.DEVICE_TOKEN_FILE}\")\n            print(f\"   Fallback: {self.config.FALLBACK_DEVICE_TOKEN_FILE}\")\n            print(\"\ud83d\udca1 You need to create remarkable_device_token.txt with your device token\")\n            return None\n        \n        try:\n            with open(token_file, 'r') as f:\n                token = f.read().strip()\n            print(f\"\u2705 Loaded device token from {token_file} ({len(token)} chars)\")\n            return token\n        except Exception as e:\n            print(f\"\u274c Error loading device token: {e}\")\n            return None\n    \n    def get_user_token(self, device_token: str) -> Optional[str]:\n        \"\"\"Get user token using device token\"\"\"\n        headers = {\"Authorization\": f\"Bearer {device_token}\"}\n        \n        try:\n            print(f\"\ud83d\udd11 Requesting user token from: {self.config.USER_TOKEN_URL}\")\n            response = self.session.post(self.config.USER_TOKEN_URL, headers=headers, timeout=30)\n            \n            if response.status_code == 200:\n                user_token = response.text.strip()\n                print(f\"\u2705 User token obtained ({len(user_token)} chars)\")\n                return user_token\n            else:\n                print(f\"\u274c User token request failed: {response.status_code}\")\n                print(f\"   Response: {response.text}\")\n                return None\n                \n        except Exception as e:\n            print(f\"\u274c User token error: {e}\")\n            return None\n    \n    def authenticate(self) -> Optional[str]:\n        \"\"\"Complete authentication flow\"\"\"\n        print(\"\ud83d\udd11 Starting reMarkable authentication...\")\n        \n        device_token = self.load_device_token()\n        if not device_token:\n            return None\n        \n        user_token = self.get_user_token(device_token)\n        if user_token:\n            # Update session headers for future API calls\n            self.session.headers.update({\"Authorization\": f\"Bearer {user_token}\"})\n            print(\"\u2705 Authentication complete\")\n            return user_token\n        \n        return None\n    \n    def get_authenticated_session(self) -> Optional[requests.Session]:\n        \"\"\"Get an authenticated session ready for API calls\"\"\"\n        user_token = self.authenticate()\n        if user_token:\n            return self.session\n        return None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/auth.py",
      "tags": [
        "authentication",
        "remarkable",
        "cloud-api",
        "token-management",
        "session-management",
        "http-client",
        "oauth",
        "device-token",
        "user-token"
      ],
      "updated_at": "2025-12-07T01:42:26.971838",
      "usage_example": "# Basic usage\nfrom remarkable_auth import RemarkableAuth, RemarkableConfig\n\n# Create auth instance with default config\nauth = RemarkableAuth()\n\n# Option 1: Get authenticated session directly\nsession = auth.get_authenticated_session()\nif session:\n    # Use session for API calls\n    response = session.get('https://api.remarkable.com/some-endpoint')\n    print(response.json())\n\n# Option 2: Step-by-step authentication\ndevice_token = auth.load_device_token()\nif device_token:\n    user_token = auth.get_user_token(device_token)\n    if user_token:\n        # Session is now authenticated\n        response = auth.session.get('https://api.remarkable.com/some-endpoint')\n\n# Option 3: Just get the user token\nuser_token = auth.authenticate()\nif user_token:\n    print(f'Authenticated with token: {user_token[:10]}...')\n    # auth.session is now ready for API calls\n\n# With custom config\ncustom_config = RemarkableConfig()\nauth = RemarkableAuth(config=custom_config)\nsession = auth.get_authenticated_session()"
    },
    {
      "best_practices": [
        "This is a dataclass with only class-level attributes, so all instances share the same default values",
        "The class is designed to be instantiated once and used as a configuration container throughout an application",
        "DEVICE_TOKEN_FILE uses Path(__file__).parent which resolves relative to the module's location, not the current working directory",
        "The fallback pattern (DEVICE_TOKEN_FILE -> FALLBACK_DEVICE_TOKEN_FILE) should be implemented in code that uses this config",
        "Consider making this a singleton or using class methods directly if you don't need instance-specific configuration",
        "All attributes are mutable by default in dataclasses; consider using frozen=True in the decorator if immutability is desired",
        "The EU Tectonic endpoints are hardcoded; if supporting multiple regions, consider parameterizing the region selection"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The base URL for the reMarkable EU Tectonic API endpoint used for primary API operations",
            "is_class_variable": true,
            "name": "BASE_URL",
            "type": "str"
          },
          {
            "description": "The URL endpoint for generating new user authentication tokens in the reMarkable cloud service",
            "is_class_variable": true,
            "name": "USER_TOKEN_URL",
            "type": "str"
          },
          {
            "description": "Primary location for storing the device authentication token, relative to the module's directory",
            "is_class_variable": true,
            "name": "DEVICE_TOKEN_FILE",
            "type": "Path"
          },
          {
            "description": "Fallback location for the device token in the user's home directory at ~/.remarkable/device_token.txt",
            "is_class_variable": true,
            "name": "FALLBACK_DEVICE_TOKEN_FILE",
            "type": "Path"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Automatically generated constructor that initializes all attributes with their default values",
            "returns": "None - initializes a new RemarkableConfig instance",
            "signature": "__init__() -> None"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:41:55",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "pathlib",
        "dataclasses"
      ],
      "description": "A dataclass that stores configuration constants for interacting with the reMarkable cloud API, including API endpoints and local file paths for device tokens.",
      "docstring": "Configuration for reMarkable cloud API",
      "id": 2087,
      "imports": [
        "import os",
        "import requests",
        "from pathlib import Path",
        "from typing import Optional",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from dataclasses import dataclass"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 33,
      "line_start": 24,
      "name": "RemarkableConfig",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "This is a dataclass decorator parameter, not a constructor parameter. The dataclass decorator automatically generates __init__, __repr__, and other methods based on class attributes. No explicit parameters are needed for instantiation as all attributes have default values."
      },
      "parent_class": null,
      "purpose": "This configuration class centralizes all necessary URLs and file paths required to authenticate and communicate with the reMarkable cloud service. It provides EU Tectonic endpoints for API access and defines both primary and fallback locations for storing device authentication tokens. The class serves as a single source of truth for reMarkable API configuration, making it easy to modify endpoints or paths without changing code throughout an application.",
      "return_annotation": null,
      "return_explained": "Instantiating this class returns a RemarkableConfig object with all configuration attributes set to their default values. The class itself doesn't have methods that return values, but provides access to configuration constants through its attributes.",
      "settings_required": [
        "No environment variables or external configuration required - all settings have default values",
        "Optional: Device token file at './remarkable_device_token.txt' or '~/.remarkable/device_token.txt' for authentication"
      ],
      "source_code": "class RemarkableConfig:\n    \"\"\"Configuration for reMarkable cloud API\"\"\"\n    \n    # EU Tectonic endpoints (working endpoints from network capture)\n    BASE_URL: str = \"https://eu.tectonic.remarkable.com\"\n    USER_TOKEN_URL: str = \"https://webapp-prod.cloud.remarkable.engineering/token/json/2/user/new\"\n    \n    # Local paths - look in current directory first, then fallback to home\n    DEVICE_TOKEN_FILE: Path = Path(__file__).parent / \"remarkable_device_token.txt\"\n    FALLBACK_DEVICE_TOKEN_FILE: Path = Path.home() / \".remarkable\" / \"device_token.txt\"",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/auth.py",
      "tags": [
        "configuration",
        "dataclass",
        "remarkable",
        "cloud-api",
        "authentication",
        "constants",
        "endpoints",
        "file-paths",
        "settings"
      ],
      "updated_at": "2025-12-07T01:41:55.205056",
      "usage_example": "from pathlib import Path\nfrom dataclasses import dataclass\n\n@dataclass\nclass RemarkableConfig:\n    BASE_URL: str = \"https://eu.tectonic.remarkable.com\"\n    USER_TOKEN_URL: str = \"https://webapp-prod.cloud.remarkable.engineering/token/json/2/user/new\"\n    DEVICE_TOKEN_FILE: Path = Path(__file__).parent / \"remarkable_device_token.txt\"\n    FALLBACK_DEVICE_TOKEN_FILE: Path = Path.home() / \".remarkable\" / \"device_token.txt\"\n\n# Instantiate the config\nconfig = RemarkableConfig()\n\n# Access configuration values\napi_url = config.BASE_URL\ntoken_url = config.USER_TOKEN_URL\ntoken_file = config.DEVICE_TOKEN_FILE\n\n# Check if device token file exists\nif config.DEVICE_TOKEN_FILE.exists():\n    with open(config.DEVICE_TOKEN_FILE, 'r') as f:\n        device_token = f.read().strip()\nelif config.FALLBACK_DEVICE_TOKEN_FILE.exists():\n    with open(config.FALLBACK_DEVICE_TOKEN_FILE, 'r') as f:\n        device_token = f.read().strip()\n\n# Use in API requests\nimport requests\nheaders = {'Authorization': f'Bearer {device_token}'}\nresponse = requests.get(config.BASE_URL, headers=headers)"
    },
    {
      "best_practices": [
        "Always call cleanup() method when done to remove temporary directories, preferably in a finally block or context manager",
        "Check return value of upload_pdf() to verify successful upload before proceeding",
        "Validate PDF file exists and has correct extension before calling upload_pdf()",
        "Use list_folders() to get valid parent_uuid values before uploading to specific folders",
        "The class creates a new temporary database for each instance, so create one instance per upload session",
        "Do not reuse the instance after calling cleanup() as temporary resources will be deleted",
        "Handle exceptions appropriately as network operations may fail",
        "The temp_dir attribute contains the path to temporary resources if manual cleanup is needed"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Path to the temporary directory created for this upload session, contains the temporary database file",
            "is_class_variable": false,
            "name": "temp_dir",
            "type": "str"
          },
          {
            "description": "Instance of RemarkableUploadManager that handles the actual upload operations and cloud synchronization",
            "is_class_variable": false,
            "name": "upload_manager",
            "type": "RemarkableUploadManager"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the uploader by creating a temporary directory and database, then instantiating the underlying RemarkableUploadManager",
            "returns": "None (constructor)",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "list_folders",
            "parameters": {},
            "purpose": "Retrieve all available folders from the reMarkable device for use as upload targets",
            "returns": "Dictionary mapping folder UUIDs (str) to folder names (str). Returns empty dict on error.",
            "signature": "list_folders(self) -> dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_pdf",
            "parameters": {
              "document_name": "Display name for the document on the reMarkable device",
              "parent_uuid": "UUID of the parent folder (None for root folder, must be valid folder UUID from list_folders())",
              "pdf_path": "Path to the PDF file to upload (string or Path-like object)"
            },
            "purpose": "Upload a PDF file to the reMarkable device with specified name and optional parent folder",
            "returns": "Boolean: True if upload succeeded, False if upload failed or file validation failed",
            "signature": "upload_pdf(self, pdf_path: str, document_name: str, parent_uuid: str = None) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "cleanup",
            "parameters": {},
            "purpose": "Remove temporary directory and all associated resources created during initialization",
            "returns": "None. Prints status messages but does not raise exceptions on cleanup failure.",
            "signature": "cleanup(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only needed when cleanup() method is called",
          "import": "import shutil",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:41:00",
      "decorators": [],
      "dependencies": [
        "tempfile",
        "os",
        "pathlib",
        "shutil",
        "cloudtest.upload_manager"
      ],
      "description": "A wrapper class that provides a simplified interface for uploading PDF documents to a reMarkable tablet using a temporary database session.",
      "docstring": "Standalone PDF uploader using proven upload_manager logic",
      "id": 2084,
      "imports": [
        "import sys",
        "import os",
        "import tempfile",
        "from pathlib import Path",
        "from cloudtest.upload_manager import RemarkableUploadManager",
        "import shutil"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "import tempfile",
        "from pathlib import Path",
        "from cloudtest.upload_manager import RemarkableUploadManager",
        "import shutil"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 107,
      "line_start": 19,
      "name": "RemarkablePDFUploader",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically creates a temporary directory and database for the upload session, then initializes an internal RemarkableUploadManager instance."
      },
      "parent_class": null,
      "purpose": "RemarkablePDFUploader serves as a standalone utility for uploading PDF files to reMarkable tablets. It abstracts the complexity of the underlying RemarkableUploadManager by creating temporary database sessions and providing simple methods for listing folders and uploading PDFs. The class handles resource management including temporary directory creation and cleanup, making it suitable for one-off upload operations or scripts that need to upload PDFs without maintaining persistent state.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkablePDFUploader object. The upload_pdf method returns a boolean (True for successful upload, False for failure). The list_folders method returns a dictionary mapping folder UUIDs (strings) to folder names (strings). The cleanup method has no return value.",
      "settings_required": [
        "RemarkableUploadManager must be properly configured with reMarkable cloud credentials",
        "Network access to reMarkable cloud services",
        "Write permissions for temporary directory creation (typically in system temp directory)"
      ],
      "source_code": "class RemarkablePDFUploader:\n    \"\"\"Standalone PDF uploader using proven upload_manager logic\"\"\"\n    \n    def __init__(self):\n        # Create a temporary database for this upload session\n        self.temp_dir = tempfile.mkdtemp(prefix=\"remarkable_upload_\")\n        temp_db = os.path.join(self.temp_dir, \"temp_upload.db\")\n        \n        print(f\"\ud83d\udd27 Initializing upload manager with temp DB: {temp_db}\")\n        \n        # Initialize the proven upload manager\n        self.upload_manager = RemarkableUploadManager(temp_db)\n        \n    def list_folders(self):\n        \"\"\"List available folders for upload target selection\"\"\"\n        try:\n            # Get the folder structure from the upload manager\n            folders = {}\n            replica = self.upload_manager.get_replica()\n            \n            for uuid, doc in replica.items():\n                if doc.get('Type') == 'CollectionType':\n                    name = doc.get('VissibleName', 'Unnamed Folder')\n                    folders[uuid] = name\n                    \n            return folders\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Error listing folders: {e}\")\n            return {}\n    \n    def upload_pdf(self, pdf_path, document_name, parent_uuid=None):\n        \"\"\"\n        Upload a PDF using the proven upload_manager logic\n        \n        Args:\n            pdf_path: Path to the PDF file\n            document_name: Name for the document on reMarkable\n            parent_uuid: UUID of parent folder (None for root)\n            \n        Returns:\n            bool: True if upload successful, False otherwise\n        \"\"\"\n        try:\n            pdf_file = Path(pdf_path)\n            if not pdf_file.exists():\n                print(f\"\u274c PDF file not found: {pdf_path}\")\n                return False\n                \n            if not pdf_file.suffix.lower() == '.pdf':\n                print(f\"\u274c File is not a PDF: {pdf_path}\")\n                return False\n                \n            print(f\"\ud83d\udcc4 Uploading: {pdf_file.name}\")\n            print(f\"\ud83d\udcdd Document name: {document_name}\")\n            if parent_uuid:\n                print(f\"\ud83d\udcc1 Target folder: {parent_uuid}\")\n            else:\n                print(\"\ud83d\udcc1 Target: Root folder\")\n                \n            # Use the proven upload manager to handle the upload\n            print(f\"\\n\ud83d\udd27 Using proven upload_manager.py for upload...\")\n            \n            # Call the working upload method\n            success = self.upload_manager.upload_pdf_document(\n                pdf_path=str(pdf_file),\n                name=document_name,\n                parent_uuid=parent_uuid\n            )\n            \n            if success:\n                print(\"\u2705 Upload successful!\")\n                return True\n            else:\n                print(\"\u274c Upload failed!\")\n                return False\n                \n        except Exception as e:\n            print(f\"\u274c Upload error: {e}\")\n            return False\n    \n    def cleanup(self):\n        \"\"\"Clean up temporary resources\"\"\"\n        try:\n            import shutil\n            if hasattr(self, 'temp_dir') and os.path.exists(self.temp_dir):\n                shutil.rmtree(self.temp_dir)\n                print(f\"\ud83e\uddf9 Cleaned up temp directory: {self.temp_dir}\")\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Cleanup warning: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_pdf_new.py",
      "tags": [
        "remarkable",
        "pdf",
        "upload",
        "file-management",
        "cloud-sync",
        "document-management",
        "temporary-storage",
        "wrapper-class"
      ],
      "updated_at": "2025-12-07T01:41:00.380895",
      "usage_example": "# Create uploader instance\nuploader = RemarkablePDFUploader()\n\ntry:\n    # List available folders\n    folders = uploader.list_folders()\n    print(f\"Available folders: {folders}\")\n    \n    # Upload PDF to root folder\n    success = uploader.upload_pdf(\n        pdf_path='/path/to/document.pdf',\n        document_name='My Document',\n        parent_uuid=None\n    )\n    \n    # Or upload to specific folder\n    if folders:\n        folder_uuid = list(folders.keys())[0]\n        success = uploader.upload_pdf(\n            pdf_path='/path/to/another.pdf',\n            document_name='Another Document',\n            parent_uuid=folder_uuid\n        )\n    \n    if success:\n        print('Upload completed successfully')\nfinally:\n    # Always cleanup temporary resources\n    uploader.cleanup()"
    },
    {
      "best_practices": [
        "Instantiate once per logging session to avoid repeatedly clearing the log file",
        "Be cautious when logging sensitive data (tokens, passwords) in headers or body - consider sanitizing before logging",
        "Monitor log file size when logging large request bodies, as files can grow quickly",
        "The log file is cleared on instantiation, so create a new instance with a different filename if you need to preserve previous logs",
        "For production use, consider implementing log rotation or size limits",
        "The class is not thread-safe; use separate instances or add locking if logging from multiple threads",
        "Binary data larger than 1000 bytes is automatically truncated with SHA256 hash for verification"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Path object representing the log file location where all HTTP requests are written",
            "is_class_variable": false,
            "name": "log_file",
            "type": "pathlib.Path"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "log_file": "Path to the log file (string). Defaults to 'raw_requests.log'. Parent directories are created automatically."
            },
            "purpose": "Initializes the logger, creates the log file directory if needed, and clears/initializes the log file with a timestamp header",
            "returns": "None (constructor)",
            "signature": "__init__(self, log_file: str = 'raw_requests.log')"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "log_request",
            "parameters": {
              "body": "Optional request body as bytes. If larger than 1000 bytes, only first/last 500 bytes are logged with SHA256 hash. If smaller, full content is logged (decoded as UTF-8 if possible, otherwise as hex)",
              "headers": "Dictionary of HTTP headers with string keys and values",
              "method": "HTTP method as a string (e.g., 'GET', 'POST', 'PUT', 'DELETE')",
              "response_status": "Optional HTTP response status code as an integer (e.g., 200, 404, 500)",
              "response_text": "Optional response body as a string",
              "url": "Full URL of the request as a string"
            },
            "purpose": "Logs an HTTP request and optionally its response to the log file in a formatted, human-readable structure",
            "returns": "None (performs file I/O side effect)",
            "signature": "log_request(self, method: str, url: str, headers: dict, body: bytes = None, response_status: int = None, response_text: str = None)"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:39:33",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "datetime",
        "hashlib"
      ],
      "description": "A logging utility class that captures and formats HTTP requests and responses in a human-readable format similar to reMarkable app logs, writing them to a file.",
      "docstring": "Logs HTTP requests in the same format as reMarkable app logs",
      "id": 2081,
      "imports": [
        "import requests",
        "import json",
        "import hashlib",
        "from datetime import datetime",
        "from pathlib import Path"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from datetime import datetime",
        "import hashlib"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 63,
      "line_start": 13,
      "name": "RawRequestLogger",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "log_file": "Path to the log file where HTTP requests will be written. Defaults to 'raw_requests.log' in the current directory. The parent directory will be created if it doesn't exist. The file is cleared on instantiation with a timestamp header."
      },
      "parent_class": null,
      "purpose": "This class provides structured logging for HTTP request/response cycles, particularly useful for debugging API interactions. It formats requests with method, URL, headers, and body content, handling both text and binary data intelligently. Large bodies are truncated with SHA256 hashes for verification. The logger creates a timestamped log file and appends each request with clear separators for easy reading.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RawRequestLogger object that maintains a file handle reference and provides the log_request method. The log_request method returns None as it performs side effects (file writing) only.",
      "settings_required": [
        "Write permissions in the directory where the log file will be created",
        "Sufficient disk space for log file storage (logs can grow large with request bodies)"
      ],
      "source_code": "class RawRequestLogger:\n    \"\"\"Logs HTTP requests in the same format as reMarkable app logs\"\"\"\n    \n    def __init__(self, log_file: str = \"raw_requests.log\"):\n        self.log_file = Path(log_file)\n        self.log_file.parent.mkdir(exist_ok=True)\n        \n        # Clear log file\n        with open(self.log_file, 'w') as f:\n            f.write(f\"=== RAW REQUEST LOG - {datetime.now().isoformat()} ===\\n\\n\")\n    \n    def log_request(self, method: str, url: str, headers: dict, body: bytes = None, response_status: int = None, response_text: str = None):\n        \"\"\"Log a request in raw format\"\"\"\n        with open(self.log_file, 'a') as f:\n            # Log request line\n            f.write(f\"{method} {url}\\n\")\n            \n            # Log headers\n            for key, value in headers.items():\n                f.write(f\"{key}: {value}\\n\")\n            \n            f.write(\"\\n\")  # Empty line before body\n            \n            # Log body if present\n            if body:\n                if len(body) > 1000:\n                    # For large bodies, show first 500 and last 500 bytes\n                    f.write(f\"[BODY: {len(body)} bytes]\\n\")\n                    f.write(f\"First 500 bytes: {body[:500]}\\n\")\n                    f.write(f\"Last 500 bytes: {body[-500:]}\\n\")\n                    f.write(f\"SHA256: {hashlib.sha256(body).hexdigest()}\\n\")\n                else:\n                    # For small bodies, show full content\n                    try:\n                        decoded = body.decode('utf-8')\n                        f.write(f\"[BODY: {len(body)} bytes - UTF-8]\\n\")\n                        f.write(decoded)\n                        f.write(\"\\n\")\n                    except:\n                        f.write(f\"[BODY: {len(body)} bytes - BINARY]\\n\")\n                        f.write(f\"Hex: {body.hex()}\\n\")\n            else:\n                f.write(\"[NO BODY]\\n\")\n            \n            # Log response if available\n            if response_status is not None:\n                f.write(f\"\\nRESPONSE: {response_status}\\n\")\n                if response_text:\n                    f.write(f\"Response body: {response_text}\\n\")\n            \n            f.write(\"\\n\" + \"=\"*80 + \"\\n\\n\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/raw_request_logger.py",
      "tags": [
        "logging",
        "http",
        "debugging",
        "request-logging",
        "file-io",
        "api-debugging",
        "http-client",
        "request-response",
        "remarkable"
      ],
      "updated_at": "2025-12-07T01:39:33.056394",
      "usage_example": "# Basic usage\nlogger = RawRequestLogger('api_requests.log')\n\n# Log a simple GET request\nlogger.log_request(\n    method='GET',\n    url='https://api.example.com/users',\n    headers={'Authorization': 'Bearer token123', 'Content-Type': 'application/json'}\n)\n\n# Log a POST request with body and response\nrequest_body = json.dumps({'name': 'John', 'email': 'john@example.com'}).encode('utf-8')\nlogger.log_request(\n    method='POST',\n    url='https://api.example.com/users',\n    headers={'Content-Type': 'application/json'},\n    body=request_body,\n    response_status=201,\n    response_text='{\"id\": 123, \"name\": \"John\"}'\n)\n\n# Log a request with binary data\nbinary_data = b'\\x89PNG\\r\\n\\x1a\\n' + b'\\x00' * 2000\nlogger.log_request(\n    method='PUT',\n    url='https://api.example.com/upload',\n    headers={'Content-Type': 'image/png'},\n    body=binary_data,\n    response_status=200\n)"
    },
    {
      "best_practices": [
        "Always instantiate within a try-except block to handle authentication failures gracefully",
        "Run test_raw_content_upload() before test_document_creation() as the latter depends on the former working",
        "Use run_test() for comprehensive testing as it handles proper test sequencing and provides detailed output",
        "Ensure the remarkable_replica_v2/replica_database.json file exists and is accessible before instantiation",
        "The class creates test files in 'test_uploads' directory - ensure cleanup after testing if needed",
        "Monitor console output as the class provides extensive logging with emoji indicators for test progress",
        "The class modifies uploader._current_document_uuid during raw upload tests - this is intentional for proper header generation",
        "Test PDFs are timestamped and include UUIDs to ensure uniqueness across test runs"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Base directory path where the test script is located, used for resolving relative paths",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Authenticated session object obtained from RemarkableAuth for making API requests",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session or similar"
          },
          {
            "description": "Upload manager instance configured with the authenticated session and database path for handling PDF uploads",
            "is_class_variable": false,
            "name": "uploader",
            "type": "RemarkableUploadManager"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the test class with authentication and upload manager setup",
            "returns": "None. Raises RuntimeError if authentication fails.",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_test_pdf",
            "parameters": {},
            "purpose": "Create a minimal test PDF file using reportlab or manual PDF generation as fallback",
            "returns": "Path object pointing to the created test PDF file in the test_uploads directory",
            "signature": "create_test_pdf(self) -> Path"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_raw_content_upload",
            "parameters": {},
            "purpose": "Test uploading raw PDF content directly to reMarkable cloud storage without metadata",
            "returns": "Boolean indicating success (True) or failure (False) of the raw upload operation",
            "signature": "test_raw_content_upload(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_document_creation",
            "parameters": {},
            "purpose": "Test creating a complete document with metadata and verifying it appears in the database",
            "returns": "Boolean indicating success (True) or failure (False) of document creation and database integration",
            "signature": "test_document_creation(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "run_test",
            "parameters": {},
            "purpose": "Execute the complete test suite including raw upload and document creation tests with summary reporting",
            "returns": "Boolean indicating overall test success (True if both raw upload and document creation succeed)",
            "signature": "run_test(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only if reportlab is available for PDF generation; falls back to manual PDF creation if not installed",
          "import": "from reportlab.pdfgen import canvas",
          "optional": true
        },
        {
          "condition": "only if reportlab is available for PDF generation",
          "import": "from reportlab.lib.pagesizes import letter",
          "optional": true
        },
        {
          "condition": "used for exception handling and debugging in test methods",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:37:57",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "time",
        "uuid",
        "hashlib",
        "json",
        "os",
        "typing",
        "reportlab",
        "traceback"
      ],
      "description": "A test class for validating PDF upload functionality to reMarkable cloud, including raw content upload and complete document creation with metadata.",
      "docstring": "Focused test for basic PDF upload functionality",
      "id": 2077,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "import traceback",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "from pathlib import Path",
        "import time",
        "import uuid",
        "import hashlib",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 249,
      "line_start": 18,
      "name": "SimplePDFUploadTest",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes authentication, loads the upload manager, and sets up the base directory structure. It will raise RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "This class provides a focused testing framework for reMarkable PDF upload operations. It handles authentication, creates test PDFs (using reportlab or manual PDF generation as fallback), tests raw content upload to cloud storage, and validates complete document creation with metadata. The class is designed to diagnose upload issues by testing incrementally from basic raw uploads to full document creation with database integration.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a SimplePDFUploadTest object. The run_test() method returns a boolean indicating overall test success (True if both raw upload and document creation succeed). Individual test methods (test_raw_content_upload, test_document_creation) return boolean success indicators.",
      "settings_required": [
        "RemarkableAuth module must be available and configured for authentication",
        "RemarkableUploadManager module must be available",
        "Database file at 'remarkable_replica_v2/replica_database.json' relative to the script location",
        "Write permissions for 'test_uploads' directory to create test PDFs",
        "Valid reMarkable cloud authentication credentials"
      ],
      "source_code": "class SimplePDFUploadTest:\n    \"\"\"Focused test for basic PDF upload functionality\"\"\"\n    \n    def __init__(self):\n        self.base_dir = Path(__file__).parent\n        \n        # Load auth session\n        from auth import RemarkableAuth\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        # Load upload manager\n        from upload_manager import RemarkableUploadManager\n        database_path = self.base_dir / \"remarkable_replica_v2\" / \"replica_database.json\"\n        self.uploader = RemarkableUploadManager(self.session, database_path)\n        \n        print(\"\ud83e\uddea Simple PDF Upload Test Initialized\")\n    \n    def create_test_pdf(self) -> Path:\n        \"\"\"Create a minimal test PDF\"\"\"\n        test_pdf_path = self.base_dir / \"test_uploads\" / \"simple_test.pdf\"\n        test_pdf_path.parent.mkdir(exist_ok=True)\n        \n        try:\n            from reportlab.pdfgen import canvas\n            from reportlab.lib.pagesizes import letter\n            \n            # Create simple PDF with reportlab\n            c = canvas.Canvas(str(test_pdf_path), pagesize=letter)\n            c.drawString(100, 750, f\"Simple PDF Upload Test\")\n            c.drawString(100, 720, f\"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}\")\n            c.drawString(100, 690, f\"UUID: {uuid.uuid4()}\")\n            c.showPage()\n            c.save()\n            \n            print(f\"\ud83d\udcc4 Created PDF with reportlab: {test_pdf_path}\")\n            \n        except ImportError:\n            # Fallback: create a minimal PDF manually\n            pdf_content = \"\"\"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n/Contents 4 0 R\n>>\nendobj\n\n4 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 12 Tf\n100 700 Td\n(Simple PDF Test) Tj\nET\nendstream\nendobj\n\nxref\n0 5\n0000000000 65535 f \n0000000009 00000 n \n0000000058 00000 n \n0000000115 00000 n \n0000000206 00000 n \ntrailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n300\n%%EOF\"\"\"\n            \n            with open(test_pdf_path, 'w') as f:\n                f.write(pdf_content)\n            \n            print(f\"\ud83d\udcc4 Created minimal PDF manually: {test_pdf_path}\")\n        \n        return test_pdf_path\n    \n    def test_raw_content_upload(self) -> bool:\n        \"\"\"Test uploading raw PDF content directly\"\"\"\n        try:\n            print(\"\\n\ud83d\udd27 Testing Raw PDF Content Upload\")\n            \n            # Create test PDF\n            test_pdf_path = self.create_test_pdf()\n            \n            # Read PDF content\n            with open(test_pdf_path, 'rb') as f:\n                pdf_content = f.read()\n            \n            print(f\"\ud83d\udcca PDF size: {len(pdf_content)} bytes\")\n            print(f\"\ud83d\udcca PDF SHA256: {hashlib.sha256(pdf_content).hexdigest()[:16]}...\")\n            \n            # Generate document UUID (like the logs show)\n            doc_uuid = str(uuid.uuid4())\n            print(f\"\ud83d\udcca Document UUID: {doc_uuid}\")\n            \n            # Set the document UUID for proper rm-filename header\n            self.uploader._current_document_uuid = doc_uuid\n            \n            # Upload PDF content\n            filename = f\"{doc_uuid}.pdf\"\n            print(f\"\ud83d\ude80 Uploading as: {filename}\")\n            \n            result_hash = self.uploader.upload_raw_content(\n                content=pdf_content,\n                content_type='application/pdf',\n                filename=filename\n            )\n            \n            if result_hash:\n                print(f\"\u2705 Upload successful! Hash: {result_hash[:16]}...\")\n                return True\n            else:\n                print(\"\u274c Upload failed - no hash returned\")\n                return False\n                \n        except Exception as e:\n            print(f\"\u274c Upload failed with exception: {e}\")\n            import traceback\n            traceback.print_exc()\n            return False\n    \n    def test_document_creation(self) -> bool:\n        \"\"\"Test creating a complete document with metadata\"\"\"\n        try:\n            print(\"\\n\ud83d\udcc4 Testing Complete Document Creation\")\n            \n            # Create test PDF\n            test_pdf_path = self.create_test_pdf()\n            document_name = f\"SimpleTest_{int(time.time())}\"\n            \n            print(f\"\ud83d\udcca Creating document: {document_name}\")\n            print(f\"\ud83d\udcca PDF file: {test_pdf_path}\")\n            \n            # Get initial document count\n            initial_count = len([n for n in self.uploader.database['nodes'].values() \n                               if n['node_type'] == 'document'])\n            print(f\"\ud83d\udcca Initial document count: {initial_count}\")\n            \n            # Upload document\n            success = self.uploader.upload_pdf_document(str(test_pdf_path), document_name)\n            \n            if success:\n                # Check if document was added\n                new_count = len([n for n in self.uploader.database['nodes'].values() \n                               if n['node_type'] == 'document'])\n                print(f\"\ud83d\udcca New document count: {new_count}\")\n                \n                if new_count > initial_count:\n                    print(f\"\u2705 Document created successfully!\")\n                    \n                    # Find the new document\n                    for node in self.uploader.database['nodes'].values():\n                        if node.get('name') == document_name:\n                            print(f\"\ud83d\udcc4 Found document: {node['uuid'][:8]}... '{node['name']}'\")\n                            return True\n                    \n                    print(\"\u26a0\ufe0f Document created but not found in database\")\n                    return True  # Still count as success if upload worked\n                else:\n                    print(\"\u274c Document count unchanged\")\n                    return False\n            else:\n                print(\"\u274c Document creation failed\")\n                return False\n                \n        except Exception as e:\n            print(f\"\u274c Document creation failed with exception: {e}\")\n            import traceback\n            traceback.print_exc()\n            return False\n    \n    def run_test(self):\n        \"\"\"Run the focused upload tests\"\"\"\n        print(\"\ud83d\ude80 Starting Simple PDF Upload Test\")\n        print(\"=\" * 50)\n        \n        # Test 1: Raw content upload\n        print(\"\\n1\ufe0f\u20e3 Raw Content Upload Test\")\n        raw_success = self.test_raw_content_upload()\n        \n        # Test 2: Complete document creation (only if raw upload works)\n        print(\"\\n2\ufe0f\u20e3 Complete Document Creation Test\")\n        if raw_success:\n            doc_success = self.test_document_creation()\n        else:\n            print(\"\u23ed\ufe0f Skipping document creation test (raw upload failed)\")\n            doc_success = False\n        \n        # Summary\n        print(\"\\n\" + \"=\" * 50)\n        print(\"\ud83d\udcca TEST SUMMARY\")\n        print(f\"Raw Upload: {'\u2705 PASS' if raw_success else '\u274c FAIL'}\")\n        print(f\"Document Creation: {'\u2705 PASS' if doc_success else '\u274c FAIL'}\")\n        \n        if raw_success:\n            print(\"\\n\ud83c\udf89 Basic upload functionality is working!\")\n            if doc_success:\n                print(\"\ud83c\udf89 Complete document creation is working!\")\n            else:\n                print(\"\u26a0\ufe0f Document creation needs investigation\")\n        else:\n            print(\"\\n\u274c Basic upload functionality needs to be fixed first\")\n            print(\"\ud83d\udca1 Check headers, authentication, and checksum calculation\")\n        \n        return raw_success and doc_success",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_simple_pdf_upload.py",
      "tags": [
        "testing",
        "pdf",
        "upload",
        "remarkable",
        "cloud-storage",
        "integration-test",
        "document-management",
        "authentication",
        "file-upload"
      ],
      "updated_at": "2025-12-07T01:37:57.914844",
      "usage_example": "# Basic usage\ntry:\n    test = SimplePDFUploadTest()\n    success = test.run_test()\n    if success:\n        print('All tests passed')\n    else:\n        print('Some tests failed')\nexcept RuntimeError as e:\n    print(f'Authentication failed: {e}')\n\n# Run individual tests\ntest = SimplePDFUploadTest()\nraw_upload_ok = test.test_raw_content_upload()\nif raw_upload_ok:\n    doc_creation_ok = test.test_document_creation()\n\n# Create test PDF only\ntest = SimplePDFUploadTest()\npdf_path = test.create_test_pdf()\nprint(f'Test PDF created at: {pdf_path}')"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "replica_dir": "Type: str",
              "session": "Type: requests.Session"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, session, replica_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_logging",
            "parameters": {},
            "purpose": "Setup comprehensive logging",
            "returns": "None",
            "signature": "setup_logging(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fetch_hash_content",
            "parameters": {
              "hash_ref": "Type: str"
            },
            "purpose": "Fetch content from reMarkable cloud by hash",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "fetch_hash_content(self, hash_ref) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_root_hash",
            "parameters": {},
            "purpose": "Get the root hash from reMarkable cloud",
            "returns": "Returns Optional[str]",
            "signature": "get_root_hash(self) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "parse_directory_listing",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Parse directory listing content",
            "returns": "Returns Dict[str, Any]",
            "signature": "parse_directory_listing(self, content) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_metadata",
            "parameters": {
              "hash_ref": "Type: str"
            },
            "purpose": "Extract metadata from a metadata component",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "extract_metadata(self, hash_ref) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_raw_component",
            "parameters": {
              "component_type": "Type: str",
              "content": "Type: bytes",
              "hash_ref": "Type: str"
            },
            "purpose": "Save raw component to disk and return the file path",
            "returns": "Returns str",
            "signature": "save_raw_component(self, hash_ref, content, component_type) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_pdf_from_component",
            "parameters": {
              "pdf_hash": "Type: str",
              "target_path": "Type: Path"
            },
            "purpose": "Extract PDF from a PDF component",
            "returns": "Returns bool",
            "signature": "extract_pdf_from_component(self, pdf_hash, target_path) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_notebook_components",
            "parameters": {
              "components": "Type: Dict[str, str]",
              "node": "Type: ReplicaNode"
            },
            "purpose": "Extract reMarkable notebook components",
            "returns": "Returns List[str]",
            "signature": "extract_notebook_components(self, node, components) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "build_node_path",
            "parameters": {
              "name": "Type: str",
              "node_uuid": "Type: str",
              "parent_path": "Type: str"
            },
            "purpose": "Build the local file system path for a node",
            "returns": "Returns str",
            "signature": "build_node_path(self, node_uuid, name, parent_path) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "discover_node",
            "parameters": {
              "depth": "Type: int",
              "hash_ref": "Type: str",
              "parent_path": "Type: str",
              "parent_uuid": "Type: str"
            },
            "purpose": "Discover and process a single node",
            "returns": "Returns Optional[ReplicaNode]",
            "signature": "discover_node(self, hash_ref, parent_uuid, parent_path, depth) -> Optional[ReplicaNode]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_rm_file",
            "parameters": {
              "rm_hash": "Type: str",
              "target_path": "Type: Path"
            },
            "purpose": "Extract .rm file from reMarkable cloud",
            "returns": "Returns bool",
            "signature": "extract_rm_file(self, rm_hash, target_path) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_database",
            "parameters": {},
            "purpose": "Save the complete metadata database",
            "returns": "None",
            "signature": "save_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_sync_log",
            "parameters": {},
            "purpose": "Save sync log for future incremental updates",
            "returns": "None",
            "signature": "save_sync_log(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "build_complete_replica",
            "parameters": {},
            "purpose": "Build the complete local replica",
            "returns": "Returns bool",
            "signature": "build_complete_replica(self) -> bool"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:36:03",
      "decorators": [],
      "dependencies": [],
      "description": "Builds and maintains a complete local replica of reMarkable cloud",
      "docstring": "Builds and maintains a complete local replica of reMarkable cloud",
      "id": 2071,
      "imports": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging",
        "import shutil",
        "from enum import Enum",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 618,
      "line_start": 93,
      "name": "RemarkableLocalReplica",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Builds and maintains a complete local replica of reMarkable cloud",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableLocalReplica:\n    \"\"\"Builds and maintains a complete local replica of reMarkable cloud\"\"\"\n    \n    def __init__(self, session: requests.Session, replica_dir: str = \"remarkable_replica\"):\n        self.session = session\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        \n        # Setup replica directory structure\n        self.replica_dir = Path(replica_dir).resolve()\n        self.content_dir = self.replica_dir / \"content\"\n        self.metadata_dir = self.replica_dir / \"metadata\"\n        self.raw_dir = self.replica_dir / \"raw_components\"\n        \n        # Create directory structure\n        for directory in [self.replica_dir, self.content_dir, self.metadata_dir, self.raw_dir]:\n            directory.mkdir(parents=True, exist_ok=True)\n        \n        # Metadata database\n        self.database_file = self.replica_dir / \"replica_database.json\"\n        self.sync_log_file = self.replica_dir / \"sync_log.json\"\n        \n        # Setup logging\n        self.log_file = self.replica_dir / \"replica_build.log\"\n        self.setup_logging()\n        \n        # State tracking\n        self.nodes: Dict[str, ReplicaNode] = {}\n        self.hierarchy: Dict[str, List[str]] = {}  # parent_uuid -> [child_uuids]\n        self.failed_downloads: Set[str] = set()\n        \n        # Statistics\n        self.stats = {\n            'total_nodes': 0,\n            'folders': 0,\n            'documents': 0,\n            'notebooks': 0,\n            'pdfs_extracted': 0,\n            'notebooks_extracted': 0,\n            'total_files': 0,\n            'total_size': 0,\n            'failed_extractions': 0\n        }\n    \n    def setup_logging(self):\n        \"\"\"Setup comprehensive logging\"\"\"\n        self.logger = logging.getLogger('RemarkableReplica')\n        self.logger.setLevel(logging.DEBUG)\n        self.logger.handlers.clear()\n        \n        # File handler\n        file_handler = logging.FileHandler(self.log_file, mode='w', encoding='utf-8')\n        file_handler.setLevel(logging.DEBUG)\n        file_formatter = logging.Formatter(\n            '%(asctime)s | %(levelname)-8s | %(message)s',\n            datefmt='%Y-%m-%d %H:%M:%S'\n        )\n        file_handler.setFormatter(file_formatter)\n        \n        # Console handler\n        console_handler = logging.StreamHandler()\n        console_handler.setLevel(logging.INFO)\n        console_formatter = logging.Formatter('%(message)s')\n        console_handler.setFormatter(console_formatter)\n        \n        self.logger.addHandler(file_handler)\n        self.logger.addHandler(console_handler)\n        \n        self.logger.info(f\"\ud83c\udfd7\ufe0f REMARKABLE LOCAL REPLICA BUILDER STARTED\")\n        self.logger.info(f\"\ud83d\udcc1 Replica directory: {self.replica_dir}\")\n        self.logger.info(f\"\ud83d\udcdd Build log: {self.log_file}\")\n    \n    def fetch_hash_content(self, hash_ref: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch content from reMarkable cloud by hash\"\"\"\n        try:\n            url = f\"{self.base_url}/sync/v3/files/{hash_ref}\"\n            self.logger.debug(f\"FETCHING: {url}\")\n            \n            response = self.session.get(url)\n            response.raise_for_status()\n            \n            content_type = response.headers.get('content-type', '')\n            content = response.content\n            \n            self.logger.debug(f\"  Response: {len(content)} bytes, {content_type}\")\n            \n            return {\n                'hash': hash_ref,\n                'size': len(content),\n                'content': content,\n                'content_type': content_type\n            }\n            \n        except Exception as e:\n            self.logger.error(f\"Failed to fetch {hash_ref[:16]}...: {e}\")\n            self.failed_downloads.add(hash_ref)\n            return None\n    \n    def get_root_hash(self) -> Optional[str]:\n        \"\"\"Get the root hash from reMarkable cloud\"\"\"\n        try:\n            url = f\"{self.base_url}/sync/v4/root\"\n            response = self.session.get(url)\n            response.raise_for_status()\n            \n            data = response.json()\n            root_hash = data.get('hash')\n            \n            self.logger.info(f\"\ud83c\udf31 Root hash: {root_hash}\")\n            return root_hash\n            \n        except Exception as e:\n            self.logger.error(f\"Failed to get root hash: {e}\")\n            return None\n    \n    def parse_directory_listing(self, content: bytes) -> Dict[str, Any]:\n        \"\"\"Parse directory listing content\"\"\"\n        try:\n            text_content = content.decode('utf-8')\n        except UnicodeDecodeError:\n            self.logger.debug(\"Failed to decode as text, treating as binary\")\n            return {'is_directory': False, 'child_objects': [], 'data_components': []}\n        \n        result = {\n            'is_directory': False,\n            'child_objects': [],\n            'data_components': []\n        }\n        \n        lines = text_content.split('\\n')\n        if lines and lines[0].strip().isdigit():\n            lines = lines[1:]  # Skip count line\n        \n        import re\n        entry_pattern = r'^([a-f0-9]{64}):([0-9a-fA-F]+):([a-f0-9-]+(?:\\.[^:]+)?):(\\d+):(\\d+)$'\n        \n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n                \n            match = re.match(entry_pattern, line, re.IGNORECASE)\n            if match:\n                hash_val, flags, uuid_component, type_val, size_val = match.groups()\n                \n                entry_info = {\n                    'hash': hash_val,\n                    'flags': flags,\n                    'uuid_component': uuid_component,\n                    'type': type_val,\n                    'size': int(size_val)\n                }\n                \n                if '.' in uuid_component:\n                    # Data component\n                    component_type = uuid_component.split('.')[-1]\n                    entry_info['component_type'] = component_type\n                    result['data_components'].append(entry_info)\n                else:\n                    # Child object\n                    result['child_objects'].append(entry_info)\n                \n                result['is_directory'] = True\n        \n        return result\n    \n    def extract_metadata(self, hash_ref: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Extract metadata from a metadata component\"\"\"\n        content_info = self.fetch_hash_content(hash_ref)\n        if not content_info:\n            return None\n        \n        try:\n            text_content = content_info['content'].decode('utf-8')\n            return json.loads(text_content)\n        except (UnicodeDecodeError, json.JSONDecodeError) as e:\n            self.logger.debug(f\"Failed to parse metadata: {e}\")\n            return None\n    \n    def save_raw_component(self, hash_ref: str, content: bytes, component_type: str) -> str:\n        \"\"\"Save raw component to disk and return the file path\"\"\"\n        filename = f\"{hash_ref}_{component_type}\"\n        filepath = self.raw_dir / filename\n        \n        with open(filepath, 'wb') as f:\n            f.write(content)\n        \n        return str(filepath)\n    \n    def extract_pdf_from_component(self, pdf_hash: str, target_path: Path) -> bool:\n        \"\"\"Extract PDF from a PDF component\"\"\"\n        try:\n            content_info = self.fetch_hash_content(pdf_hash)\n            if not content_info:\n                return False\n            \n            with open(target_path, 'wb') as f:\n                f.write(content_info['content'])\n            \n            self.logger.debug(f\"  \ud83d\udcc4 Extracted PDF: {target_path}\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"Failed to extract PDF {pdf_hash[:16]}...: {e}\")\n            return False\n    \n    def extract_notebook_components(self, node: ReplicaNode, components: Dict[str, str]) -> List[str]:\n        \"\"\"Extract reMarkable notebook components\"\"\"\n        extracted_files = []\n        \n        # Create notebook directory\n        notebook_dir = Path(node.local_path).parent / f\"{Path(node.local_path).stem}_notebook\"\n        notebook_dir.mkdir(exist_ok=True)\n        \n        for component_type, hash_ref in components.items():\n            if not hash_ref:\n                continue\n                \n            try:\n                content_info = self.fetch_hash_content(hash_ref)\n                if not content_info:\n                    continue\n                \n                if component_type == 'content':\n                    # Save content file\n                    content_file = notebook_dir / \"content\"\n                    with open(content_file, 'wb') as f:\n                        f.write(content_info['content'])\n                    extracted_files.append(str(content_file))\n                \n                elif component_type == 'metadata':\n                    # Already processed for main metadata\n                    pass\n                \n                elif component_type == 'pagedata':\n                    # Save pagedata\n                    pagedata_file = notebook_dir / \"pagedata\"\n                    with open(pagedata_file, 'wb') as f:\n                        f.write(content_info['content'])\n                    extracted_files.append(str(pagedata_file))\n                \n                elif component_type.endswith('.rm'):\n                    # Save .rm files (stroke data)\n                    rm_file = notebook_dir / component_type.split('/')[-1]\n                    with open(rm_file, 'wb') as f:\n                        f.write(content_info['content'])\n                    extracted_files.append(str(rm_file))\n                    node.rm_files.append(str(rm_file))\n                \n            except Exception as e:\n                self.logger.error(f\"Failed to extract {component_type}: {e}\")\n        \n        return extracted_files\n    \n    def build_node_path(self, node_uuid: str, name: str, parent_path: str = \"\") -> str:\n        \"\"\"Build the local file system path for a node\"\"\"\n        if parent_path:\n            base_path = Path(parent_path)\n        else:\n            base_path = self.content_dir\n        \n        # Sanitize filename\n        safe_name = \"\".join(c for c in name if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()\n        if not safe_name:\n            safe_name = f\"unnamed_{node_uuid[:8]}\"\n        \n        return str(base_path / safe_name)\n    \n    def discover_node(self, hash_ref: str, parent_uuid: str = None, parent_path: str = \"\", depth: int = 0) -> Optional[ReplicaNode]:\n        \"\"\"Discover and process a single node\"\"\"\n        if hash_ref in self.failed_downloads:\n            return None\n\n        self.logger.debug(f\"{'  ' * depth}\ud83d\udd0d DISCOVERING: {hash_ref[:16]}... (depth {depth})\")\n\n        # Fetch content\n        content_info = self.fetch_hash_content(hash_ref)\n        if not content_info:\n            return None\n\n        # Parse content\n        parsed = self.parse_directory_listing(content_info['content'])\n\n        # Find metadata component and extract metadata\n        metadata = {}\n        metadata_hash = None\n        node_name = f\"unknown_{hash_ref[:8]}\"\n        node_type = \"folder\"\n        metadata_parent_uuid = None\n\n        for component in parsed['data_components']:\n            if component['component_type'] == 'metadata':\n                metadata_hash = component['hash']\n                extracted_metadata = self.extract_metadata(metadata_hash)\n                if extracted_metadata:\n                    metadata = extracted_metadata\n                    node_name = metadata.get('visibleName', node_name)\n                    if metadata.get('type') == 'DocumentType':\n                        node_type = \"document\" \n                    elif metadata.get('type') == 'CollectionType':\n                        node_type = \"folder\"\n                    # Use the parent from metadata (this is the TRUE parent)\n                    metadata_parent_uuid = metadata.get('parent', '') or None\n                break\n\n        # Determine UUID from metadata or fallback\n        node_uuid = None\n        for component in parsed['child_objects']:\n            node_uuid = component['uuid_component']\n            break\n        if not node_uuid and parsed['data_components']:\n            # Extract UUID from component name\n            component_name = parsed['data_components'][0]['uuid_component']\n            if '.' in component_name:\n                node_uuid = component_name.split('.')[0]\n        if not node_uuid:\n            node_uuid = hash_ref[:32]  # Fallback\n\n        # Build proper local path using metadata parent UUID\n        actual_parent_uuid = metadata_parent_uuid or parent_uuid\n        \n        # Find the actual parent's local path\n        if actual_parent_uuid and actual_parent_uuid in self.nodes:\n            parent_node = self.nodes[actual_parent_uuid]\n            actual_parent_path = parent_node.local_path\n        else:\n            actual_parent_path = str(self.content_dir)\n\n        local_path = self.build_node_path(node_uuid, node_name, actual_parent_path)\n\n        # Create replica node\n        node = ReplicaNode(\n            uuid=node_uuid,\n            hash=hash_ref,\n            name=node_name,\n            node_type=node_type,\n            parent_uuid=actual_parent_uuid,\n            parent_path=actual_parent_path,\n            local_path=local_path,\n            depth=depth,\n            sync_timestamp=datetime.now().isoformat(),\n            sync_hash=hash_ref\n        )\n\n        # Apply metadata fields safely\n        for key, value in metadata.items():\n            if hasattr(node, key):\n                setattr(node, key, value)\n\n        # Extract component hashes and collect .rm files\n        rm_file_info = []  # Store info about .rm files\n        \n        for component in parsed['data_components']:\n            comp_type = component['component_type']\n            comp_hash = component['hash']\n\n            if comp_type == 'content':\n                node.content_hash = comp_hash\n            elif comp_type == 'metadata':\n                node.metadata_hash = comp_hash\n            elif comp_type == 'pdf':\n                node.pdf_hash = comp_hash\n            elif comp_type == 'pagedata':\n                node.pagedata_hash = comp_hash\n            elif comp_type.endswith('.rm'):\n                # Store .rm file info for extraction\n                rm_file_info.append({\n                    'hash': comp_hash,\n                    'filename': comp_type.split('/')[-1]  # Extract just the filename\n                })\n\n        # Create local directory/file structure\n        if node_type == \"folder\":\n            Path(local_path).mkdir(parents=True, exist_ok=True)\n            self.stats['folders'] += 1\n        else:\n            Path(local_path).parent.mkdir(parents=True, exist_ok=True)\n\n            # Extract PDF content if available\n            if node.pdf_hash:\n                pdf_path = Path(local_path).with_suffix('.pdf')\n                if self.extract_pdf_from_component(node.pdf_hash, pdf_path):\n                    node.extracted_files.append(str(pdf_path))\n                    self.stats['pdfs_extracted'] += 1\n\n            # Extract .rm files if available\n            if rm_file_info:\n                notebook_dir = Path(local_path).parent / f\"{Path(local_path).stem}_notebook\"\n                notebook_dir.mkdir(exist_ok=True)\n                \n                for rm_info in rm_file_info:\n                    rm_path = notebook_dir / rm_info['filename']\n                    if self.extract_rm_file(rm_info['hash'], rm_path):\n                        node.rm_files.append(str(rm_path))\n                        node.extracted_files.append(str(rm_path))\n\n            # Extract other notebook components\n            notebook_components = {}\n            for component in parsed['data_components']:\n                comp_type = component['component_type']\n                if comp_type in ['content', 'pagedata']:\n                    notebook_components[comp_type] = component['hash']\n\n            if notebook_components:\n                extracted = self.extract_notebook_components(node, notebook_components)\n                node.extracted_files.extend(extracted)\n                if extracted:\n                    self.stats['notebooks_extracted'] += 1\n\n            if node_type == \"document\":\n                self.stats['documents'] += 1\n            else:\n                self.stats['notebooks'] += 1\n\n        # Store node BEFORE processing children (so children can find their parents)\n        self.nodes[node_uuid] = node\n        self.stats['total_nodes'] += 1\n\n        # Track hierarchy\n        if actual_parent_uuid:\n            if actual_parent_uuid not in self.hierarchy:\n                self.hierarchy[actual_parent_uuid] = []\n            self.hierarchy[actual_parent_uuid].append(node_uuid)\n\n        self.logger.info(f\"{'  ' * depth}\u2705 {node_type}: {node_name} | {len(parsed['child_objects'])} children\")\n\n        # Recursively discover children\n        for child_obj in parsed['child_objects']:\n            child_hash = child_obj['hash']\n            self.discover_node(child_hash, node_uuid, local_path, depth + 1)\n\n        return node\n    \n    def extract_rm_file(self, rm_hash: str, target_path: Path) -> bool:\n        \"\"\"Extract .rm file from reMarkable cloud\"\"\"\n        try:\n            content_info = self.fetch_hash_content(rm_hash)\n            if not content_info:\n                return False\n            \n            with open(target_path, 'wb') as f:\n                f.write(content_info['content'])\n            \n            self.logger.debug(f\"  \ud83d\udd8a\ufe0f Extracted .rm file: {target_path}\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"Failed to extract .rm file {rm_hash[:16]}...: {e}\")\n            return False\n\n    def save_database(self):\n        \"\"\"Save the complete metadata database\"\"\"\n        database = {\n            'replica_info': {\n                'created': datetime.now().isoformat(),\n                'replica_dir': str(self.replica_dir),\n                'total_nodes': len(self.nodes),\n                'statistics': self.stats\n            },\n            'nodes': {uuid: asdict(node) for uuid, node in self.nodes.items()},\n            'hierarchy': self.hierarchy,\n            'failed_downloads': list(self.failed_downloads)\n        }\n        \n        with open(self.database_file, 'w', encoding='utf-8') as f:\n            json.dump(database, f, indent=2, ensure_ascii=False)\n        \n        self.logger.info(f\"\ud83d\udcbe Database saved: {self.database_file}\")\n    \n    def save_sync_log(self):\n        \"\"\"Save sync log for future incremental updates\"\"\"\n        sync_log = {\n            'last_sync': datetime.now().isoformat(),\n            'root_hash': getattr(self, 'root_hash', ''),\n            'nodes_synced': len(self.nodes),\n            'sync_hashes': {uuid: node.sync_hash for uuid, node in self.nodes.items()},\n            'statistics': self.stats\n        }\n        \n        with open(self.sync_log_file, 'w', encoding='utf-8') as f:\n            json.dump(sync_log, f, indent=2)\n        \n        self.logger.info(f\"\ud83d\udccb Sync log saved: {self.sync_log_file}\")\n    \n    def build_complete_replica(self) -> bool:\n        \"\"\"Build the complete local replica\"\"\"\n        self.logger.info(f\"\ud83d\ude80 STARTING COMPLETE REPLICA BUILD\")\n        \n        # Get root hash\n        root_hash = self.get_root_hash()\n        if not root_hash:\n            self.logger.error(\"\u274c Failed to get root hash\")\n            return False\n        \n        self.root_hash = root_hash\n        \n        # Discover from root\n        self.logger.info(f\"\ud83d\udd0d Starting discovery from root: {root_hash}\")\n        root_node = self.discover_node(root_hash)\n        \n        if not root_node:\n            self.logger.error(\"\u274c Failed to discover root node\")\n            return False\n        \n        # Calculate final statistics\n        self.stats['total_files'] = sum(len(node.extracted_files) for node in self.nodes.values())\n        \n        # Save database and sync log\n        self.save_database()\n        self.save_sync_log()\n        \n        # Final report\n        self.logger.info(f\"\\n\ud83c\udf89 REPLICA BUILD COMPLETED!\")\n        self.logger.info(f\"\ud83d\udcca FINAL STATISTICS:\")\n        self.logger.info(f\"  \ud83d\udcc1 Total nodes: {self.stats['total_nodes']}\")\n        self.logger.info(f\"  \ud83d\udcc2 Folders: {self.stats['folders']}\")\n        self.logger.info(f\"  \ud83d\udcc4 Documents: {self.stats['documents']}\")\n        self.logger.info(f\"  \ud83d\udcd4 Notebooks: {self.stats['notebooks']}\")\n        self.logger.info(f\"  \ud83d\udcc4 PDFs extracted: {self.stats['pdfs_extracted']}\")\n        self.logger.info(f\"  \ud83d\udcdd Notebooks extracted: {self.stats['notebooks_extracted']}\")\n        self.logger.info(f\"  \ud83d\udcce Total files: {self.stats['total_files']}\")\n        self.logger.info(f\"  \u274c Failed downloads: {len(self.failed_downloads)}\")\n        self.logger.info(f\"\\n\ud83d\udcc1 Replica location: {self.replica_dir}\")\n        self.logger.info(f\"\ud83d\udcbe Database: {self.database_file}\")\n        self.logger.info(f\"\ud83d\udccb Sync log: {self.sync_log_file}\")\n        \n        return True",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica.py",
      "tags": [
        "class",
        "remarkablelocalreplica"
      ],
      "updated_at": "2025-12-07T01:36:03.453077",
      "usage_example": "# Example usage:\n# result = RemarkableLocalReplica(bases)"
    },
    {
      "best_practices": [
        "Always instantiate with all required parameters (uuid, hash, name, node_type, parent_uuid, parent_path, local_path, depth)",
        "The __post_init__ method automatically initializes rm_files and extracted_files to empty lists if None is passed",
        "Use dataclasses.asdict() to convert the node to a dictionary for JSON serialization",
        "The class maintains duplicate fields (parent/parent_uuid, type/node_type, visibleName/name) to match reMarkable's metadata format exactly",
        "Hash fields (hash, content_hash, metadata_hash, pdf_hash, pagedata_hash, sync_hash) should be computed using consistent hashing algorithms for change detection",
        "Timestamps (createdTime, lastModified, lastOpened, sync_timestamp) should use ISO 8601 format for consistency",
        "The depth field should be calculated based on the node's position in the hierarchy tree",
        "rm_files list should contain file extensions present for this node (e.g., ['.content', '.metadata', '.pdf'])",
        "This is an immutable-style dataclass; create new instances rather than modifying existing ones for state changes",
        "The class is designed for use in sync operations, so sync_timestamp and sync_hash should be updated after successful synchronization"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for the node matching reMarkable cloud UUID",
            "is_class_variable": false,
            "name": "uuid",
            "type": "str"
          },
          {
            "description": "Hash value representing current state for change detection",
            "is_class_variable": false,
            "name": "hash",
            "type": "str"
          },
          {
            "description": "Display name of the node",
            "is_class_variable": false,
            "name": "name",
            "type": "str"
          },
          {
            "description": "Type of node (DocumentType, CollectionType, etc.)",
            "is_class_variable": false,
            "name": "node_type",
            "type": "str"
          },
          {
            "description": "UUID of parent node, None for root-level items",
            "is_class_variable": false,
            "name": "parent_uuid",
            "type": "Optional[str]"
          },
          {
            "description": "Full path to parent node in local replica",
            "is_class_variable": false,
            "name": "parent_path",
            "type": "str"
          },
          {
            "description": "Full local filesystem path for this node's data",
            "is_class_variable": false,
            "name": "local_path",
            "type": "str"
          },
          {
            "description": "Depth level in hierarchy tree (0 for root)",
            "is_class_variable": false,
            "name": "depth",
            "type": "int"
          },
          {
            "description": "ISO timestamp of creation on reMarkable",
            "is_class_variable": false,
            "name": "createdTime",
            "type": "Optional[str]"
          },
          {
            "description": "ISO timestamp of last modification on reMarkable",
            "is_class_variable": false,
            "name": "lastModified",
            "type": "Optional[str]"
          },
          {
            "description": "ISO timestamp of last opening on reMarkable",
            "is_class_variable": false,
            "name": "lastOpened",
            "type": "Optional[str]"
          },
          {
            "description": "Page number last opened for documents",
            "is_class_variable": false,
            "name": "lastOpenedPage",
            "type": "Optional[int]"
          },
          {
            "description": "Flag indicating if item is marked as deleted",
            "is_class_variable": false,
            "name": "deleted",
            "type": "bool"
          },
          {
            "description": "Flag indicating if item is pinned in reMarkable UI",
            "is_class_variable": false,
            "name": "pinned",
            "type": "bool"
          },
          {
            "description": "Flag indicating if item has been synced",
            "is_class_variable": false,
            "name": "synced",
            "type": "bool"
          },
          {
            "description": "Version number of item metadata",
            "is_class_variable": false,
            "name": "version",
            "type": "int"
          },
          {
            "description": "Source of document (email, upload method, etc.)",
            "is_class_variable": false,
            "name": "source",
            "type": "Optional[str]"
          },
          {
            "description": "Flag indicating if metadata has been modified",
            "is_class_variable": false,
            "name": "metadatamodified",
            "type": "bool"
          },
          {
            "description": "Flag indicating if content has been modified",
            "is_class_variable": false,
            "name": "modified",
            "type": "bool"
          },
          {
            "description": "Parent UUID as stored in reMarkable metadata (duplicate field)",
            "is_class_variable": false,
            "name": "parent",
            "type": "str"
          },
          {
            "description": "Type as stored in reMarkable metadata (duplicate field)",
            "is_class_variable": false,
            "name": "type",
            "type": "str"
          },
          {
            "description": "Visible name as stored in reMarkable metadata (duplicate field)",
            "is_class_variable": false,
            "name": "visibleName",
            "type": "str"
          },
          {
            "description": "Hash of content file for change detection",
            "is_class_variable": false,
            "name": "content_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of metadata file for change detection",
            "is_class_variable": false,
            "name": "metadata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of PDF file if present",
            "is_class_variable": false,
            "name": "pdf_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of pagedata file for notebooks",
            "is_class_variable": false,
            "name": "pagedata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "List of reMarkable file extensions present (.content, .metadata, etc.)",
            "is_class_variable": false,
            "name": "rm_files",
            "type": "List[str]"
          },
          {
            "description": "Total size in bytes of all associated files",
            "is_class_variable": false,
            "name": "file_size",
            "type": "int"
          },
          {
            "description": "List of files extracted from node's data",
            "is_class_variable": false,
            "name": "extracted_files",
            "type": "List[str]"
          },
          {
            "description": "ISO timestamp of last synchronization",
            "is_class_variable": false,
            "name": "sync_timestamp",
            "type": "str"
          },
          {
            "description": "Hash value at time of last synchronization",
            "is_class_variable": false,
            "name": "sync_hash",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__post_init__",
            "parameters": {},
            "purpose": "Initializes mutable default values for rm_files and extracted_files lists after dataclass initialization",
            "returns": "None - modifies instance attributes in place",
            "signature": "__post_init__(self) -> None"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:36:03",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses"
      ],
      "description": "A dataclass representing a node in a local replica of reMarkable cloud storage, containing comprehensive metadata about files, folders, and their synchronization state.",
      "docstring": "Enhanced node for local replica with all metadata",
      "id": 2070,
      "imports": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging",
        "import shutil",
        "from enum import Enum",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Optional, List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 90,
      "line_start": 41,
      "name": "ReplicaNode",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "content_hash": "Hash of the content file for change detection",
        "createdTime": "ISO timestamp when the item was created on reMarkable",
        "deleted": "Boolean flag indicating if the item is marked as deleted",
        "depth": "Depth level in the hierarchy tree (0 for root level)",
        "extracted_files": "List of files extracted from the node's data",
        "file_size": "Total size in bytes of all files associated with this node",
        "hash": "Hash value representing the current state of the node for change detection",
        "lastModified": "ISO timestamp when the item was last modified on reMarkable",
        "lastOpened": "ISO timestamp when the item was last opened on reMarkable",
        "lastOpenedPage": "Page number that was last opened for documents",
        "local_path": "Full local filesystem path where this node's data is stored",
        "metadata_hash": "Hash of the metadata file for change detection",
        "metadatamodified": "Boolean flag indicating if metadata has been modified",
        "modified": "Boolean flag indicating if content has been modified",
        "name": "Display name of the node (file or folder name)",
        "node_type": "Type of node (e.g., 'DocumentType', 'CollectionType')",
        "pagedata_hash": "Hash of the pagedata file for notebooks",
        "parent": "Parent UUID as stored in reMarkable metadata (duplicate of parent_uuid)",
        "parent_path": "Full path to the parent node in the local replica structure",
        "parent_uuid": "UUID of the parent node in the hierarchy, None for root-level items",
        "pdf_hash": "Hash of the PDF file if present",
        "pinned": "Boolean flag indicating if the item is pinned in reMarkable UI",
        "rm_files": "List of reMarkable file extensions present (.content, .metadata, .pdf, etc.)",
        "source": "Source of the document (e.g., email, upload method)",
        "sync_hash": "Hash value at the time of last synchronization",
        "sync_timestamp": "ISO timestamp of the last synchronization",
        "synced": "Boolean flag indicating if the item has been synced",
        "type": "Type as stored in reMarkable metadata (duplicate of node_type)",
        "uuid": "Unique identifier for the node, typically matching the reMarkable cloud UUID",
        "version": "Version number of the item metadata",
        "visibleName": "Visible name as stored in reMarkable metadata (duplicate of name)"
      },
      "parent_class": null,
      "purpose": "ReplicaNode serves as a data structure to track and manage individual items (documents, folders, notebooks) in a local replica of reMarkable cloud storage. It stores hierarchical information, file metadata, synchronization state, and component hashes for change detection. This class is used to maintain a complete representation of the reMarkable file system locally, enabling efficient sync operations and metadata tracking.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a ReplicaNode object containing all metadata for a single item in the reMarkable replica. The object is immutable after creation (dataclass with frozen=False by default) and can be serialized to dict using dataclasses.asdict().",
      "settings_required": [],
      "source_code": "class ReplicaNode:\n    \"\"\"Enhanced node for local replica with all metadata\"\"\"\n    # Core identification\n    uuid: str\n    hash: str\n    name: str\n    node_type: str\n    \n    # Hierarchy\n    parent_uuid: Optional[str]\n    parent_path: str\n    local_path: str\n    depth: int\n    \n    # Metadata from reMarkable (using exact field names)\n    createdTime: Optional[str] = None\n    lastModified: Optional[str] = None\n    lastOpened: Optional[str] = None\n    lastOpenedPage: Optional[int] = None\n    deleted: bool = False\n    pinned: bool = False\n    synced: bool = False\n    version: int = 0\n    source: Optional[str] = None\n    metadatamodified: bool = False\n    modified: bool = False\n    parent: str = \"\"\n    type: str = \"\"\n    visibleName: str = \"\"\n    \n    # Component information\n    content_hash: Optional[str] = None\n    metadata_hash: Optional[str] = None\n    pdf_hash: Optional[str] = None\n    pagedata_hash: Optional[str] = None\n    rm_files: List[str] = None\n    \n    # File information\n    file_size: int = 0\n    extracted_files: List[str] = None\n    \n    # Sync tracking\n    sync_timestamp: str = \"\"\n    sync_hash: str = \"\"\n    \n    def __post_init__(self):\n        if self.rm_files is None:\n            self.rm_files = []\n        if self.extracted_files is None:\n            self.extracted_files = []",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica.py",
      "tags": [
        "dataclass",
        "remarkable",
        "sync",
        "metadata",
        "file-system",
        "replica",
        "cloud-storage",
        "hierarchy",
        "node",
        "data-structure"
      ],
      "updated_at": "2025-12-07T01:36:03.439240",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Optional, List\n\n@dataclass\nclass ReplicaNode:\n    uuid: str\n    hash: str\n    name: str\n    node_type: str\n    parent_uuid: Optional[str]\n    parent_path: str\n    local_path: str\n    depth: int\n    createdTime: Optional[str] = None\n    lastModified: Optional[str] = None\n    deleted: bool = False\n    rm_files: List[str] = None\n    extracted_files: List[str] = None\n    \n    def __post_init__(self):\n        if self.rm_files is None:\n            self.rm_files = []\n        if self.extracted_files is None:\n            self.extracted_files = []\n\n# Create a new replica node for a document\nnode = ReplicaNode(\n    uuid='abc123-def456',\n    hash='hash_value_123',\n    name='My Document',\n    node_type='DocumentType',\n    parent_uuid='parent_uuid_789',\n    parent_path='/root/folder',\n    local_path='/local/replica/abc123-def456',\n    depth=2,\n    createdTime='2024-01-15T10:30:00Z',\n    lastModified='2024-01-16T14:20:00Z',\n    deleted=False,\n    rm_files=['.content', '.metadata', '.pdf']\n)\n\n# Access node properties\nprint(f\"Node: {node.name} at {node.local_path}\")\nprint(f\"Files: {node.rm_files}\")\nprint(f\"Modified: {node.lastModified}\")\n\n# Convert to dictionary for serialization\nfrom dataclasses import asdict\nnode_dict = asdict(node)"
    },
    {
      "best_practices": [
        "Use NodeType members directly (e.g., NodeType.FOLDER) rather than string literals to ensure type safety",
        "When comparing node types, use identity comparison (==) with enum members rather than comparing string values",
        "Use NodeType in type hints to make function signatures more explicit and enable better IDE support",
        "To get the string value for API calls or serialization, use the .value attribute",
        "To create a NodeType from a string value (e.g., from API response), use NodeType('folder') which will raise ValueError if the string is invalid",
        "Enum members are singletons and immutable, so they can be safely used as dictionary keys or in sets",
        "Do not attempt to modify enum values or add new members at runtime"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Represents a folder/directory node in reMarkable cloud that can contain other nodes",
            "is_class_variable": true,
            "name": "FOLDER",
            "type": "NodeType"
          },
          {
            "description": "Represents a document node (imported PDF or EPUB file) in reMarkable cloud",
            "is_class_variable": true,
            "name": "DOCUMENT",
            "type": "NodeType"
          },
          {
            "description": "Represents a notebook node (native reMarkable notebook with handwritten content) in reMarkable cloud",
            "is_class_variable": true,
            "name": "NOTEBOOK",
            "type": "NodeType"
          }
        ],
        "methods": []
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:35:21",
      "decorators": [],
      "dependencies": [
        "enum"
      ],
      "description": "An enumeration class that defines the three types of nodes that can exist in the reMarkable cloud storage system.",
      "docstring": "Types of nodes in reMarkable cloud",
      "id": 2069,
      "imports": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging",
        "import shutil",
        "from enum import Enum",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "from enum import Enum"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 37,
      "line_start": 33,
      "name": "NodeType",
      "parameters": [
        {
          "annotation": "Enum",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Inherits from Enum base class, which provides enumeration functionality. No constructor parameters are needed as this is an Enum with predefined values."
      },
      "parent_class": null,
      "purpose": "NodeType is an Enum class that provides type-safe constants for representing different node types in the reMarkable cloud ecosystem. It distinguishes between folders (containers), documents (imported files), and notebooks (native reMarkable files). This enum is used throughout the codebase to categorize and handle different types of content stored in reMarkable cloud, enabling type checking and preventing invalid node type values.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a NodeType enum member. Each member has a name (FOLDER, DOCUMENT, NOTEBOOK) and a corresponding string value ('folder', 'document', 'notebook'). Accessing members returns the enum instance, and accessing .value returns the string representation.",
      "settings_required": [],
      "source_code": "class NodeType(Enum):\n    \"\"\"Types of nodes in reMarkable cloud\"\"\"\n    FOLDER = \"folder\"\n    DOCUMENT = \"document\"\n    NOTEBOOK = \"notebook\"",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica.py",
      "tags": [
        "enum",
        "enumeration",
        "constants",
        "remarkable",
        "cloud-storage",
        "node-types",
        "type-safety",
        "classification"
      ],
      "updated_at": "2025-12-07T01:35:21.820746",
      "usage_example": "from enum import Enum\n\nclass NodeType(Enum):\n    FOLDER = \"folder\"\n    DOCUMENT = \"document\"\n    NOTEBOOK = \"notebook\"\n\n# Access enum members\nnode_type = NodeType.FOLDER\nprint(node_type)  # NodeType.FOLDER\nprint(node_type.value)  # 'folder'\nprint(node_type.name)  # 'FOLDER'\n\n# Compare enum members\nif node_type == NodeType.FOLDER:\n    print(\"This is a folder\")\n\n# Create from string value\nnode_from_string = NodeType('document')\nprint(node_from_string)  # NodeType.DOCUMENT\n\n# Iterate over all types\nfor node_type in NodeType:\n    print(f\"{node_type.name}: {node_type.value}\")\n\n# Use in type hints\ndef process_node(node_type: NodeType) -> str:\n    return f\"Processing {node_type.value}\""
    },
    {
      "best_practices": [
        "Always instantiate the class first and check that authentication succeeds before calling other methods",
        "Use simulate_fixed_upload() for complete end-to-end testing rather than calling individual component creation methods",
        "Call verify_fixes_applied() after simulate_fixed_upload() to ensure all fixes are correctly implemented",
        "The class only simulates uploads and does not make actual API calls - it generates all necessary data structures and headers for inspection",
        "Install crc32c library for accurate hash calculation matching reMarkable's expectations",
        "The class maintains state through self.session and self.base_dir - do not modify these directly",
        "Document UUIDs are generated fresh for each simulation - use the returned results to track them",
        "All component creation methods return bytes - decode appropriately if you need to inspect JSON content",
        "The fixes applied are specific to matching reMarkable macOS desktop app v3.20.0.922 behavior",
        "Method call order for full simulation: __init__() -> simulate_fixed_upload() -> verify_fixes_applied()"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Path object pointing to the directory containing this test file",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Authenticated session object from RemarkableAuth containing authorization headers and connection details",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session or similar"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the test class with authentication and base directory setup",
            "returns": "None. Raises RuntimeError if authentication fails.",
            "signature": "__init__(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_fixed_document_metadata",
            "parameters": {
              "doc_name": "Display name for the document",
              "doc_uuid": "UUID string for the document"
            },
            "purpose": "Create document metadata JSON with all real app fixes applied (source field, lastOpened field)",
            "returns": "UTF-8 encoded bytes of JSON metadata with compact separators",
            "signature": "create_fixed_document_metadata(self, doc_uuid: str, doc_name: str) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_fixed_document_content",
            "parameters": {
              "doc_uuid": "UUID string for the document",
              "pdf_size": "Size of the PDF file in bytes"
            },
            "purpose": "Create document content structure JSON with page information and PDF metadata",
            "returns": "UTF-8 encoded bytes of JSON content structure",
            "signature": "create_fixed_document_content(self, doc_uuid: str, pdf_size: int) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_fixed_pagedata",
            "parameters": {},
            "purpose": "Create pagedata with real app fix applied (newline character instead of empty string)",
            "returns": "UTF-8 encoded bytes containing a single newline character",
            "signature": "create_fixed_pagedata(self) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_test_pdf_content",
            "parameters": {},
            "purpose": "Create minimal valid PDF content for testing purposes",
            "returns": "Bytes containing a minimal but valid PDF file structure",
            "signature": "create_test_pdf_content(self) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_fixed_headers",
            "parameters": {
              "content_size": "Size of the content being uploaded in bytes",
              "doc_uuid": "UUID string for the document",
              "file_type": "Type of file being uploaded: 'metadata', 'content', 'pdf', 'pagedata', or 'docschema'"
            },
            "purpose": "Generate HTTP headers with all real app fixes applied, including correct User-Agent and rm-filename",
            "returns": "Dictionary of HTTP header names to values with proper authentication and content headers",
            "signature": "get_fixed_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict[str, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "calculate_crc32c_hash",
            "parameters": {
              "content": "Bytes to calculate hash for"
            },
            "purpose": "Calculate CRC32C hash and return base64 encoded string for x-goog-hash header",
            "returns": "Base64-encoded ASCII string of CRC32C hash (falls back to regular CRC32 if crc32c not available)",
            "signature": "calculate_crc32c_hash(self, content: bytes) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "calculate_sha256_hash",
            "parameters": {
              "content": "Bytes to calculate hash for"
            },
            "purpose": "Calculate SHA256 hash for content identification and docSchema generation",
            "returns": "Hexadecimal string representation of SHA256 hash",
            "signature": "calculate_sha256_hash(self, content: bytes) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_fixed_docschema",
            "parameters": {
              "components": "Dictionary mapping component names ('metadata', 'content', 'pdf', 'pagedata') to their byte content",
              "doc_uuid": "UUID string for the document"
            },
            "purpose": "Create docSchema file with SHA256 hashes and metadata for all document components",
            "returns": "UTF-8 encoded bytes of docSchema in reMarkable format with version and component hashes",
            "signature": "create_fixed_docschema(self, components: Dict[str, bytes], doc_uuid: str) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "simulate_fixed_upload",
            "parameters": {
              "doc_name": "Display name for the test document (default: 'Fixed_Test_Document')"
            },
            "purpose": "Simulate the complete upload process with all fixes applied, generating all components and upload requests",
            "returns": "Dictionary containing timestamp, document_uuid, document_name, fixes_applied details, upload_requests list, and components with sizes and hashes",
            "signature": "simulate_fixed_upload(self, doc_name: str = 'Fixed_Test_Document') -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "verify_fixes_applied",
            "parameters": {
              "results": "Dictionary returned from simulate_fixed_upload() containing upload simulation data"
            },
            "purpose": "Verify that all critical fixes have been correctly applied in the simulation results",
            "returns": "Boolean indicating whether all fixes (User-Agent, metadata source, pagedata content, lastOpened) are correct",
            "signature": "verify_fixes_applied(self, results: Dict[str, Any]) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required for authentication with reMarkable cloud service. Must be available in the same directory or Python path.",
          "import": "from auth import RemarkableAuth",
          "optional": false
        },
        {
          "condition": "Used for calculating CRC32C hashes. Falls back to standard binascii.crc32 if not available, but crc32c is preferred for accuracy.",
          "import": "import crc32c",
          "optional": true
        }
      ],
      "created_at": "2025-12-07 00:34:30",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "json",
        "time",
        "uuid",
        "hashlib",
        "base64",
        "binascii",
        "typing",
        "crc32c"
      ],
      "description": "A test class that simulates document upload to reMarkable cloud with specific fixes applied to match the real reMarkable desktop app behavior.",
      "docstring": "Test upload behavior with real app fixes applied",
      "id": 2066,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from pathlib import Path",
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 346,
      "line_start": 24,
      "name": "FixedUploadTest",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes authentication with reMarkable cloud, loads the authenticated session, and sets up the base directory path. Raises RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "This class is designed to test and verify document upload behavior by creating properly formatted document components (metadata, content, PDF, pagedata, docschema) with critical fixes that match the real reMarkable macOS desktop app. It simulates the complete upload process including generating correct headers, calculating hashes, and creating all necessary document files. The class applies four specific fixes: (1) User-Agent matching macOS app, (2) metadata source field set to 'com.remarkable.macos', (3) pagedata content using newline character instead of empty string, and (4) lastOpened field consistently set to '0'. It's primarily used for testing and debugging upload workflows without making actual API calls.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a FixedUploadTest object with authenticated session and base directory configured. Key method returns: create_fixed_document_metadata() returns bytes of JSON metadata, create_fixed_document_content() returns bytes of JSON content structure, create_fixed_pagedata() returns bytes containing newline character, create_test_pdf_content() returns bytes of minimal PDF, get_fixed_headers() returns Dict[str, str] of HTTP headers, calculate_crc32c_hash() returns base64-encoded string, calculate_sha256_hash() returns hex string, create_fixed_docschema() returns bytes of docSchema format, simulate_fixed_upload() returns Dict[str, Any] with complete simulation results including upload requests and component details, verify_fixes_applied() returns bool indicating if all fixes are correctly applied.",
      "settings_required": [
        "RemarkableAuth module must be available and properly configured with authentication credentials",
        "Valid reMarkable cloud account credentials for authentication",
        "Network access to eu.tectonic.remarkable.com for actual uploads (though this class only simulates)",
        "crc32c library recommended for accurate hash calculation (pip install crc32c)"
      ],
      "source_code": "class FixedUploadTest:\n    \"\"\"Test upload behavior with real app fixes applied\"\"\"\n    \n    def __init__(self):\n        self.base_dir = Path(__file__).parent\n        \n        # Load auth session  \n        from auth import RemarkableAuth\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83e\uddea Fixed Upload Test Initialized\")\n        print(\"\u2705 All critical fixes from dry run analysis applied\")\n\n    def create_fixed_document_metadata(self, doc_uuid: str, doc_name: str) -> bytes:\n        \"\"\"Create document metadata with all real app fixes applied\"\"\"\n        \n        # \u2705 FIX 1: Source field changed to match real app\n        # \u2705 FIX 4: LastOpened consistently set to \"0\"\n        metadata = {\n            \"createdTime\": str(int(time.time() * 1000)),\n            \"lastModified\": str(int(time.time() * 1000)),\n            \"lastOpened\": \"0\",  # \u2705 FIXED: Always \"0\" like real app\n            \"lastOpenedPage\": 0,\n            \"metadatamodified\": False,\n            \"modified\": False,\n            \"parent\": \"\",\n            \"pinned\": False,\n            \"source\": \"com.remarkable.macos\",  # \u2705 FIXED: Changed from windows to macos\n            \"type\": \"DocumentType\",\n            \"visibleName\": doc_name,\n            \"version\": 1\n        }\n        \n        return json.dumps(metadata, separators=(',', ':')).encode('utf-8')\n\n    def create_fixed_document_content(self, doc_uuid: str, pdf_size: int) -> bytes:\n        \"\"\"Create document content structure\"\"\"\n        \n        content = {\n            \"coverPageNumber\": 0,\n            \"customZoomCenterX\": 0,\n            \"customZoomCenterY\": 936,\n            \"customZoomOrientation\": \"portrait\",\n            \"customZoomPageHeight\": 1872,\n            \"customZoomPageWidth\": 1404,\n            \"customZoomScale\": 1,\n            \"documentMetadata\": {},\n            \"extraMetadata\": {},\n            \"fileType\": \"pdf\",\n            \"fontName\": \"\",\n            \"formatVersion\": 1,\n            \"lineHeight\": -1,\n            \"orientation\": \"portrait\",\n            \"originalPageCount\": 1,\n            \"pageCount\": 1,\n            \"pageTags\": [],\n            \"pages\": [str(uuid.uuid4())],\n            \"redirectionPageMap\": [0],\n            \"sizeInBytes\": str(pdf_size),\n            \"tags\": [],\n            \"textAlignment\": \"justify\",\n            \"textScale\": 1,\n            \"zoomMode\": \"bestFit\"\n        }\n        \n        return json.dumps(content, separators=(',', ':')).encode('utf-8')\n\n    def create_fixed_pagedata(self) -> bytes:\n        \"\"\"Create pagedata with real app fix applied\"\"\"\n        \n        # \u2705 FIX 3: Pagedata changed from empty string to newline character\n        return \"\\n\".encode('utf-8')  # \u2705 FIXED: Real app uses newline, not empty string\n\n    def create_test_pdf_content(self) -> bytes:\n        \"\"\"Create minimal test PDF content\"\"\"\n        \n        pdf_content = b'''\\\n%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n/Contents 4 0 R\n>>\nendobj\n4 0 obj\n<<\n/Filter /FlateDecode\n/Length 44\n>>\nstream\nx\\x9c+\\x14\\x06\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\nendstream\nendobj\nxref\n0 5\n0000000000 65535 f \n0000000010 00000 n \n0000000079 00000 n \n0000000173 00000 n \n0000000301 00000 n \ntrailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n398\n%%EOF'''\n        \n        return pdf_content\n\n    def get_fixed_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict[str, str]:\n        \"\"\"Generate headers with all real app fixes applied\"\"\"\n        \n        # Get CRC32C hash for content\n        crc32c_hash = self.calculate_crc32c_hash(b\"dummy_content\")\n        \n        # \u2705 FIX 2: User-Agent changed to match real app exactly\n        headers = {\n            'host': 'eu.tectonic.remarkable.com',\n            'authorization': self.session.headers.get('Authorization', ''),\n            'content-type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-sync-id': str(uuid.uuid4()),\n            'user-agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 FIXED: Matches real app exactly\n            'x-goog-hash': f'crc32c={crc32c_hash}',\n            'content-length': str(content_size),\n            'connection': 'Keep-Alive',\n            'accept-encoding': 'gzip, deflate',\n            'accept-language': 'en-BE,*'  # Real app uses Belgian locale\n        }\n        \n        # Set rm-filename based on file type\n        if file_type == 'metadata':\n            headers['rm-filename'] = f'{doc_uuid}.metadata'\n        elif file_type == 'content':\n            headers['rm-filename'] = f'{doc_uuid}.content'\n        elif file_type == 'pdf':\n            headers['rm-filename'] = f'{doc_uuid}.pdf'\n        elif file_type == 'pagedata':\n            headers['rm-filename'] = f'{doc_uuid}.pagedata'\n        elif file_type == 'docschema':\n            headers['rm-filename'] = doc_uuid\n        \n        return headers\n\n    def calculate_crc32c_hash(self, content: bytes) -> str:\n        \"\"\"Calculate CRC32C hash and return base64 encoded\"\"\"\n        try:\n            import crc32c\n            crc_value = crc32c.crc32c(content)\n            crc_bytes = crc_value.to_bytes(4, byteorder='big')\n            return base64.b64encode(crc_bytes).decode('ascii')\n        except ImportError:\n            # Fallback to regular CRC32 for testing\n            crc_value = binascii.crc32(content)\n            crc_bytes = crc_value.to_bytes(4, byteorder='big', signed=True)\n            return base64.b64encode(crc_bytes).decode('ascii')\n\n    def calculate_sha256_hash(self, content: bytes) -> str:\n        \"\"\"Calculate SHA256 hash\"\"\"\n        return hashlib.sha256(content).hexdigest()\n\n    def create_fixed_docschema(self, components: Dict[str, bytes], doc_uuid: str) -> bytes:\n        \"\"\"Create docSchema with all component hashes\"\"\"\n        \n        # Calculate hashes for all components\n        metadata_hash = self.calculate_sha256_hash(components['metadata'])\n        content_hash = self.calculate_sha256_hash(components['content'])\n        pdf_hash = self.calculate_sha256_hash(components['pdf'])\n        pagedata_hash = self.calculate_sha256_hash(components['pagedata'])\n        \n        # Build docSchema in reMarkable format\n        lines = [\n            \"3\",  # Version\n            f\"{metadata_hash}:80000000:{doc_uuid}.metadata:0:{len(components['metadata'])}\",\n            f\"{content_hash}:80000000:{doc_uuid}.content:0:{len(components['content'])}\",\n            f\"{pdf_hash}:80000000:{doc_uuid}.pdf:0:{len(components['pdf'])}\",\n            f\"{pagedata_hash}:80000000:{doc_uuid}.pagedata:0:{len(components['pagedata'])}\"\n        ]\n        \n        return '\\n'.join(lines).encode('utf-8')\n\n    def simulate_fixed_upload(self, doc_name: str = \"Fixed_Test_Document\") -> Dict[str, Any]:\n        \"\"\"Simulate the complete upload process with all fixes applied\"\"\"\n        \n        print(f\"\\n\ud83e\uddea Simulating Fixed Upload: '{doc_name}'\")\n        print(\"=\" * 60)\n        \n        # Generate document UUID\n        doc_uuid = str(uuid.uuid4())\n        print(f\"\ud83d\udcdd Document UUID: {doc_uuid}\")\n        \n        # Create all document components with fixes\n        print(\"\\n\ud83d\udd27 Creating document components with real app fixes...\")\n        \n        pdf_content = self.create_test_pdf_content()\n        components = {\n            'metadata': self.create_fixed_document_metadata(doc_uuid, doc_name),\n            'content': self.create_fixed_document_content(doc_uuid, len(pdf_content)),\n            'pdf': pdf_content,\n            'pagedata': self.create_fixed_pagedata()  # \u2705 FIXED: Now uses '\\n'\n        }\n        \n        # Create docSchema\n        docschema_content = self.create_fixed_docschema(components, doc_uuid)\n        components['docschema'] = docschema_content\n        \n        # Generate all upload requests (simulation only - no actual API calls)\n        upload_requests = []\n        \n        for component_type, content in components.items():\n            if component_type == 'docschema':\n                content_hash = self.calculate_sha256_hash(content)\n                url = f\"https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}\"\n            else:\n                content_hash = self.calculate_sha256_hash(content)\n                url = f\"https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}\"\n            \n            headers = self.get_fixed_headers(component_type, doc_uuid, len(content))\n            \n            upload_requests.append({\n                'component': component_type,\n                'method': 'PUT',\n                'url': url,\n                'headers': headers,\n                'content_size': len(content),\n                'content_hash': content_hash,\n                'content_preview': content[:100] if len(content) > 100 else content\n            })\n        \n        # Display the fixes applied\n        print(\"\\n\u2705 FIXES APPLIED:\")\n        print(f\"   1. User-Agent: {upload_requests[0]['headers']['user-agent']}\")\n        print(f\"   2. Metadata Source: com.remarkable.macos (in metadata JSON)\")\n        print(f\"   3. Pagedata Content: {repr(components['pagedata'].decode('utf-8'))}\")\n        print(f\"   4. LastOpened Field: '0' (in metadata JSON)\")\n        \n        # Show component details\n        print(f\"\\n\ud83d\udcca COMPONENT ANALYSIS:\")\n        for req in upload_requests:\n            print(f\"   {req['component'].upper()}:\")\n            print(f\"      Hash: {req['content_hash'][:16]}...\")\n            print(f\"      Size: {req['content_size']} bytes\")\n            print(f\"      Headers: user-agent, rm-filename, x-goog-hash\")\n            \n            # Show critical content for verification\n            if req['component'] == 'metadata':\n                metadata_json = json.loads(components['metadata'])\n                print(f\"      Source: {metadata_json['source']}\")\n                print(f\"      LastOpened: {metadata_json['lastOpened']}\")\n            elif req['component'] == 'pagedata':\n                print(f\"      Content: {repr(components['pagedata'].decode('utf-8'))}\")\n        \n        # Save simulation results\n        results = {\n            'timestamp': time.time(),\n            'document_uuid': doc_uuid,\n            'document_name': doc_name,\n            'fixes_applied': {\n                'user_agent': 'desktop/3.20.0.922 (macos 15.4)',\n                'metadata_source': 'com.remarkable.macos',\n                'pagedata_content': '\\\\n',\n                'last_opened': '0'\n            },\n            'upload_requests': upload_requests,\n            'components': {k: {'size': len(v), 'hash': self.calculate_sha256_hash(v)} \n                          for k, v in components.items()}\n        }\n        \n        return results\n\n    def verify_fixes_applied(self, results: Dict[str, Any]) -> bool:\n        \"\"\"Verify that all critical fixes have been applied correctly\"\"\"\n        \n        print(f\"\\n\ud83d\udd0d VERIFYING FIXES...\")\n        \n        fixes_correct = True\n        \n        # Check User-Agent\n        first_request = results['upload_requests'][0]\n        user_agent = first_request['headers']['user-agent']\n        if user_agent == 'desktop/3.20.0.922 (macos 15.4)':\n            print(\"   \u2705 User-Agent: CORRECT\")\n        else:\n            print(f\"   \u274c User-Agent: WRONG - {user_agent}\")\n            fixes_correct = False\n        \n        # Check metadata source and lastOpened\n        metadata_req = next((r for r in results['upload_requests'] if r['component'] == 'metadata'), None)\n        if metadata_req:\n            # We'd need to parse the content to verify, but we know it's correct from creation\n            print(\"   \u2705 Metadata Source: CORRECT (com.remarkable.macos)\")\n            print(\"   \u2705 LastOpened Field: CORRECT ('0')\")\n        \n        # Check pagedata content\n        pagedata_req = next((r for r in results['upload_requests'] if r['component'] == 'pagedata'), None)\n        if pagedata_req and pagedata_req['content_size'] == 1:\n            print(\"   \u2705 Pagedata Content: CORRECT ('\\\\n')\")\n        else:\n            print(f\"   \u274c Pagedata Content: WRONG - size {pagedata_req['content_size'] if pagedata_req else 'N/A'}\")\n            fixes_correct = False\n        \n        return fixes_correct",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/fixed_upload_test.py",
      "tags": [
        "testing",
        "remarkable",
        "cloud-sync",
        "document-upload",
        "pdf",
        "metadata",
        "hash-calculation",
        "api-simulation",
        "file-format",
        "debugging"
      ],
      "updated_at": "2025-12-07T01:34:30.563371",
      "usage_example": "# Initialize the test class\ntest = FixedUploadTest()\n\n# Simulate a complete upload with all fixes applied\nresults = test.simulate_fixed_upload(doc_name=\"My Test Document\")\n\n# Verify that all fixes were correctly applied\nfixes_valid = test.verify_fixes_applied(results)\n\nif fixes_valid:\n    print(\"All fixes verified successfully!\")\n    print(f\"Document UUID: {results['document_uuid']}\")\n    print(f\"Components created: {list(results['components'].keys())}\")\nelse:\n    print(\"Some fixes were not applied correctly\")\n\n# Access individual components if needed\ndoc_uuid = str(uuid.uuid4())\nmetadata_bytes = test.create_fixed_document_metadata(doc_uuid, \"Test Doc\")\npdf_bytes = test.create_test_pdf_content()\npagedata_bytes = test.create_fixed_pagedata()\n\n# Calculate hashes for verification\nsha256_hash = test.calculate_sha256_hash(pdf_bytes)\ncrc32c_hash = test.calculate_crc32c_hash(pdf_bytes)"
    },
    {
      "best_practices": [
        "Always instantiate within a try-except block to handle authentication failures gracefully",
        "The analyzer makes multiple HTTP requests to the reMarkable API - be mindful of rate limiting",
        "Ensure the RemarkableAuth module is properly configured before instantiation",
        "The analyze_real_app_document method performs extensive network operations and may take time for large documents",
        "Results should be saved using save_analysis_results() for persistence and later review",
        "The target_name parameter in analyze_real_app_document uses case-insensitive substring matching",
        "Component analysis includes validation of PDF headers, JSON metadata parsing, and size verification",
        "The class maintains a session object throughout its lifetime - reuse the same instance for multiple analyses",
        "Raw HTTP logs can be saved using save_raw_logs() if the global raw_logs variable is populated",
        "The analyzer expects documents to follow the reMarkable cloud storage structure with .metadata, .content, .pdf, and .pagedata components"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Base directory path where the class file is located, used for resolving relative paths for saving results",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Authenticated HTTP session object for making requests to the reMarkable cloud API, obtained from RemarkableAuth",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initializes the analyzer, sets up base directory, authenticates with reMarkable service, and creates an authenticated session",
            "returns": "None - raises RuntimeError if authentication fails",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_real_app_document",
            "parameters": {
              "target_name": "Name or partial name of the document to analyze (case-insensitive substring match). Defaults to 'Pylontech force H3 datasheet'"
            },
            "purpose": "Performs comprehensive analysis of a document uploaded by the reMarkable app, including fetching root schema, document schema, all components, and validating size consistency",
            "returns": "Dictionary containing analysis results with keys: success (bool), document_name, document_uuid, document_hash, root_size_claim, actual_docschema_size, total_component_size, size comparison booleans, components (dict), and metadata. Returns {'success': False, 'error': str} on failure",
            "signature": "analyze_real_app_document(self, target_name: str = 'Pylontech force H3 datasheet') -> dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_analysis_results",
            "parameters": {
              "results": "Dictionary containing analysis results from analyze_real_app_document method"
            },
            "purpose": "Saves analysis results to a JSON file in the test_results/real_app_analysis directory with timestamp",
            "returns": "Path object pointing to the saved JSON file",
            "signature": "save_analysis_results(self, results: dict) -> Path"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_raw_logs",
            "parameters": {},
            "purpose": "Saves raw HTTP request/response logs to a JSON file if the global raw_logs variable is populated",
            "returns": "Path object pointing to the saved log file, or None if no logs are available",
            "signature": "save_raw_logs(self) -> Path"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required for authentication with reMarkable service - must be available in the same directory or Python path",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:33:28",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "json",
        "time",
        "requests",
        "re"
      ],
      "description": "Analyzes documents uploaded by the real reMarkable app by fetching and examining their structure, metadata, and components from the reMarkable cloud sync service.",
      "docstring": "Analyzes documents uploaded by the real reMarkable app",
      "id": 2064,
      "imports": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "import requests",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "import requests",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 381,
      "line_start": 71,
      "name": "RealAppUploadAnalyzer",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes the base directory path and authenticates with the reMarkable service using the RemarkableAuth class. Raises RuntimeError if authentication fails."
      },
      "parent_class": null,
      "purpose": "This class provides comprehensive analysis of documents stored in the reMarkable cloud infrastructure. It authenticates with the reMarkable sync service, retrieves document schemas, fetches individual components (PDF files, metadata, content, pagedata), validates size consistency, and provides detailed reporting on document structure. The analyzer is designed to understand how the reMarkable app structures and stores documents in the cloud, including the relationship between root.docSchema entries, document schemas, and their constituent components.",
      "return_annotation": null,
      "return_explained": "The class instantiation returns a RealAppUploadAnalyzer object. The main method analyze_real_app_document() returns a dictionary with keys: 'success' (bool), 'document_name', 'document_uuid', 'document_hash', 'root_size_claim', 'actual_docschema_size', 'total_component_size', size comparison booleans, 'components' (dict of component details), and 'metadata'. On failure, returns {'success': False, 'error': error_message}. The save methods return Path objects pointing to saved files.",
      "settings_required": [
        "RemarkableAuth module must be available and properly configured",
        "Valid reMarkable account credentials (handled by RemarkableAuth)",
        "Network access to eu.tectonic.remarkable.com",
        "Authenticated session with reMarkable cloud service"
      ],
      "source_code": "class RealAppUploadAnalyzer:\n    \"\"\"Analyzes documents uploaded by the real reMarkable app\"\"\"\n    \n    def __init__(self):\n        self.base_dir = Path(__file__).parent\n        \n        # Load auth session\n        from auth import RemarkableAuth\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\udd0d Real App Upload Analyzer Initialized\")\n    \n    def analyze_real_app_document(self, target_name: str = \"Pylontech force H3 datasheet\") -> dict:\n        \"\"\"Analyze a document uploaded by the real reMarkable app\"\"\"\n        print(f\"\ud83c\udfaf Analyzing Real App Document: '{target_name}'\")\n        print(\"=\" * 60)\n        \n        try:\n            # Step 1: Get current root.docSchema from server\n            print(f\"\\n\ud83d\udccb Step 1: Fetching current root.docSchema...\")\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            current_root_hash = root_data['hash']\n            \n            print(f\"\u2705 Root hash: {current_root_hash}\")\n            print(f\"\u2705 Generation: {root_data.get('generation')}\")\n            \n            # Step 2: Fetch root.docSchema content\n            print(f\"\\n\ud83d\udccb Step 2: Fetching root.docSchema content...\")\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{current_root_hash}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            print(f\"\u2705 Root.docSchema size: {len(root_content)} bytes\")\n            lines = root_content.strip().split('\\n')\n            print(f\"\ud83d\udcc4 Root.docSchema entries ({len(lines) - 1} total):\")\n            \n            for i, line in enumerate(lines):\n                if i == 0:\n                    print(f\"   Version: {line}\")\n                else:\n                    print(f\"   Entry {i}: {line}\")\n            \n            # Step 3: Look for our target document\n            print(f\"\\n\ud83d\udccb Step 3: Looking for document containing '{target_name}'...\")\n            target_documents = []\n            \n            lines = root_content.strip().split('\\n')\n            for line in lines[1:]:  # Skip version header\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        doc_hash = parts[0]\n                        doc_uuid = parts[2] \n                        node_type = parts[3]\n                        size = parts[4]\n                        \n                        # Fetch the document's docSchema to check metadata\n                        try:\n                            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n                            doc_response.raise_for_status()\n                            doc_schema = doc_response.text\n                            \n                            # Look for metadata component\n                            doc_lines = doc_schema.strip().split('\\n')\n                            for doc_line in doc_lines[1:]:\n                                if '.metadata' in doc_line and ':' in doc_line:\n                                    metadata_parts = doc_line.split(':')\n                                    if len(metadata_parts) >= 3:\n                                        metadata_hash = metadata_parts[0]\n                                        \n                                        # Fetch metadata content\n                                        try:\n                                            metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n                                            metadata_response.raise_for_status()\n                                            metadata = json.loads(metadata_response.text)\n                                            \n                                            doc_name = metadata.get('visibleName', '')\n                                            if target_name.lower() in doc_name.lower():\n                                                target_documents.append({\n                                                    'hash': doc_hash,\n                                                    'uuid': doc_uuid,\n                                                    'size': size,\n                                                    'node_type': node_type,\n                                                    'name': doc_name,\n                                                    'line': line,\n                                                    'metadata': metadata,\n                                                    'docschema_size': len(doc_schema)\n                                                })\n                                                print(f\"\ud83c\udfaf FOUND: '{doc_name}' (UUID: {doc_uuid[:8]}...)\")\n                                                print(f\"   Root entry: {line}\")\n                                                print(f\"   DocSchema size: {len(doc_schema)} bytes\")\n                                                print(f\"   Root claimed size: {size} bytes\")\n                                                print(f\"   Size match: {'\u2705 YES' if str(len(doc_schema)) == size else '\u274c NO'}\")\n                                                break\n                                        except:\n                                            continue\n                                    break\n                        except:\n                            continue\n            \n            if not target_documents:\n                print(f\"\u274c Document containing '{target_name}' not found\")\n                return {'success': False, 'error': 'Document not found'}\n            \n            # Step 4: Analyze the first matching document in detail\n            target_doc = target_documents[0]\n            print(f\"\\n\ud83d\udccb Step 4: Deep Analysis of '{target_doc['name']}'\")\n            print(f\"   Document UUID: {target_doc['uuid']}\")\n            print(f\"   Document hash: {target_doc['hash']}\")\n            print(f\"   Node type: {target_doc['node_type']}\")\n            print(f\"   Root.docSchema size claim: {target_doc['size']} bytes\")\n            print(f\"   Actual docSchema size: {target_doc['docschema_size']} bytes\")\n            print(f\"   Size consistency: {'\u2705 CORRECT' if str(target_doc['docschema_size']) == target_doc['size'] else '\u274c MISMATCH'}\")\n            \n            # Step 5: Fetch and analyze the document's docSchema\n            print(f\"\\n\ud83d\udccb Step 5: Analyzing document's docSchema structure...\")\n            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{target_doc['hash']}\")\n            doc_response.raise_for_status()\n            doc_schema_content = doc_response.text\n            \n            lines = doc_schema_content.strip().split('\\n')\n            print(f\"\ud83d\udcc4 Full docSchema content ({len(doc_schema_content)} bytes):\")\n            for i, line in enumerate(lines):\n                print(f\"   Line {i}: {line}\")\n            \n            # Step 6: Analyze each component\n            print(f\"\\n\ud83d\udccb Step 6: Analyzing each document component...\")\n            lines = doc_schema_content.strip().split('\\n')\n            version = lines[0]\n            print(f\"\ud83d\udcca DocSchema version: {version}\")\n            \n            components = {}\n            component_sizes = []\n            \n            for i, line in enumerate(lines[1:], 1):\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        comp_hash = parts[0]\n                        comp_name = parts[2]\n                        comp_size = int(parts[4])\n                        component_sizes.append(comp_size)\n                        \n                        print(f\"\\n   \ud83d\udd0d Component {i}: {comp_name}\")\n                        print(f\"       Hash: {comp_hash}\")\n                        print(f\"       Expected size: {comp_size}\")\n                        \n                        try:\n                            comp_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{comp_hash}\")\n                            comp_response.raise_for_status()\n                            actual_size = len(comp_response.content)\n                            \n                            print(f\"       \u2705 Actual size: {actual_size} bytes\")\n                            print(f\"       \ud83d\udcca Size match: {'\u2705 YES' if actual_size == comp_size else '\u274c NO'}\")\n                            \n                            components[comp_name] = {\n                                'hash': comp_hash,\n                                'expected_size': comp_size,\n                                'actual_size': actual_size,\n                                'content': comp_response.content\n                            }\n                            \n                            # Component-specific analysis\n                            if comp_name.endswith('.pdf'):\n                                print(f\"       \ud83d\udcc4 PDF content preview: {comp_response.content[:50]}\")\n                                if comp_response.content.startswith(b'%PDF'):\n                                    print(f\"       \u2705 Valid PDF header\")\n                                    # Try to determine PDF size/pages\n                                    pdf_size_mb = len(comp_response.content) / (1024 * 1024)\n                                    print(f\"       \ud83d\udccf PDF file size: {pdf_size_mb:.2f} MB\")\n                                else:\n                                    print(f\"       \u274c Invalid PDF header\")\n                            \n                            elif comp_name.endswith('.metadata'):\n                                try:\n                                    metadata_json = json.loads(comp_response.text)\n                                    print(f\"       \u2705 Valid JSON metadata\")\n                                    print(f\"       \ud83d\udcdd Name: {metadata_json.get('visibleName', 'N/A')}\")\n                                    print(f\"       \ud83d\udcc1 Parent: {metadata_json.get('parent', 'root')}\")\n                                    print(f\"       \ud83d\uddc2\ufe0f Type: {metadata_json.get('type', 'N/A')}\")\n                                    print(f\"       \u23f0 Created: {metadata_json.get('createdTime', 'N/A')}\")\n                                    print(f\"       \u23f0 Modified: {metadata_json.get('lastModified', 'N/A')}\")\n                                    \n                                    # Check for content_data \n                                    if 'content_data' in comp_response.text:\n                                        print(f\"       \ud83d\udccb Has content_data field\")\n                                        try:\n                                            # Try to extract content_data\n                                            import re\n                                            content_data_match = re.search(r'\"content_data\":\\\\s*\"([^\"]*)\"', comp_response.text)\n                                            if content_data_match:\n                                                content_data_str = content_data_match.group(1)\n                                                print(f\"       \ud83d\udccb Content data: {content_data_str[:100]}...\")\n                                        except:\n                                            pass\n                                    \n                                    print(f\"       \ud83d\udcc4 Full metadata JSON:\")\n                                    for key, value in metadata_json.items():\n                                        print(f\"           {key}: {value}\")\n                                        \n                                except Exception as json_e:\n                                    print(f\"       \u274c Invalid JSON: {json_e}\")\n                            \n                            elif comp_name.endswith('.content'):\n                                print(f\"       \ud83d\udcc4 Content preview: {comp_response.text[:100]}...\")\n                            \n                            elif comp_name.endswith('.pagedata'):\n                                if comp_size == 0:\n                                    print(f\"       \ud83d\udcc4 Empty pagedata (as expected for PDFs)\")\n                                else:\n                                    print(f\"       \ud83d\udcc4 Pagedata preview: {comp_response.text[:100]}...\")\n                            \n                        except Exception as e:\n                            print(f\"       \u274c Component error: {e}\")\n                            components[comp_name] = {\n                                'hash': comp_hash,\n                                'expected_size': comp_size,\n                                'error': str(e)\n                            }\n            \n            # Step 7: Final analysis and comparison\n            print(f\"\\n\ud83d\udccb Step 7: Final Analysis\")\n            print(\"=\" * 50)\n            \n            total_component_size = sum(component_sizes)\n            actual_docschema_size = len(doc_schema_content)\n            claimed_size = int(target_doc['size'])\n            \n            print(f\"\ud83d\udcca Size Analysis Results:\")\n            print(f\"   Document name: {target_doc['name']}\")\n            print(f\"   Root.docSchema claimed size: {claimed_size} bytes\")\n            print(f\"   Actual docSchema size: {actual_docschema_size} bytes\")\n            print(f\"   Sum of component sizes: {total_component_size} bytes\")\n            print(f\"   \")\n            print(f\"   \u2705 Key Findings:\")\n            print(f\"   \u2022 Root size claim matches actual docSchema: {'\u2705 YES' if claimed_size == actual_docschema_size else '\u274c NO'}\")\n            print(f\"   \u2022 Root size claim matches component sum: {'\u2705 YES' if claimed_size == total_component_size else '\u274c NO'}\")\n            print(f\"   \u2022 DocSchema size matches component sum: {'\u2705 YES' if actual_docschema_size == total_component_size else '\u274c NO'}\")\n            \n            # Determine the correct pattern\n            if claimed_size == actual_docschema_size:\n                print(f\"   \ud83c\udfaf CONCLUSION: Root.docSchema stores the actual docSchema file size\")\n            elif claimed_size == total_component_size:\n                print(f\"   \ud83c\udfaf CONCLUSION: Root.docSchema stores the sum of component sizes\")\n            else:\n                print(f\"   \ud83e\udd14 CONCLUSION: Unclear pattern - sizes don't match expected relationships\")\n            \n            print(f\"\\n\ud83d\udccb Component Breakdown:\")\n            for name, details in components.items():\n                if 'error' not in details:\n                    print(f\"   \u2705 {name}: {details['actual_size']} bytes\")\n                else:\n                    print(f\"   \u274c {name}: {details['error']}\")\n            \n            return {\n                'success': True,\n                'document_name': target_doc['name'],\n                'document_uuid': target_doc['uuid'],\n                'document_hash': target_doc['hash'],\n                'root_size_claim': claimed_size,\n                'actual_docschema_size': actual_docschema_size,\n                'total_component_size': total_component_size,\n                'size_claim_matches_docschema': claimed_size == actual_docschema_size,\n                'size_claim_matches_components': claimed_size == total_component_size,\n                'docschema_matches_components': actual_docschema_size == total_component_size,\n                'components': components,\n                'metadata': target_doc['metadata']\n            }\n            \n        except Exception as e:\n            print(f\"\u274c Analysis failed: {e}\")\n            return {'success': False, 'error': str(e)}\n    \n    def save_analysis_results(self, results: dict) -> Path:\n        \"\"\"Save analysis results to file\"\"\"\n        results_dir = self.base_dir / \"test_results\" / \"real_app_analysis\"\n        results_dir.mkdir(parents=True, exist_ok=True)\n        \n        timestamp = int(time.time())\n        results_file = results_dir / f\"real_app_analysis_{timestamp}.json\"\n        \n        with open(results_file, 'w') as f:\n            json.dump(results, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Analysis results saved to: {results_file}\")\n        return results_file\n    \n    def save_raw_logs(self) -> Path:\n        \"\"\"Save raw HTTP logs\"\"\"\n        global raw_logs\n        \n        if not raw_logs:\n            return None\n        \n        logs_dir = self.base_dir / \"test_results\" / \"real_app_analysis\"\n        logs_dir.mkdir(parents=True, exist_ok=True)\n        \n        timestamp = int(time.time())\n        log_file = logs_dir / f\"real_app_requests_{timestamp}.json\"\n        \n        with open(log_file, 'w') as f:\n            json.dump(raw_logs, f, indent=2, default=str)\n        \n        print(f\"\ud83d\udd0d Raw HTTP logs saved to: {log_file}\")\n        return log_file",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_real_app_upload.py",
      "tags": [
        "remarkable",
        "document-analysis",
        "cloud-sync",
        "pdf-analysis",
        "metadata-extraction",
        "file-structure",
        "api-client",
        "document-validation",
        "size-verification",
        "schema-analysis"
      ],
      "updated_at": "2025-12-07T01:33:28.784006",
      "usage_example": "# Instantiate the analyzer (requires authentication)\nanalyzer = RealAppUploadAnalyzer()\n\n# Analyze a specific document by name\nresults = analyzer.analyze_real_app_document(target_name=\"Pylontech force H3 datasheet\")\n\nif results['success']:\n    print(f\"Document: {results['document_name']}\")\n    print(f\"UUID: {results['document_uuid']}\")\n    print(f\"Size matches: {results['size_claim_matches_docschema']}\")\n    \n    # Save results to file\n    results_file = analyzer.save_analysis_results(results)\n    print(f\"Results saved to: {results_file}\")\n    \n    # Optionally save raw HTTP logs\n    log_file = analyzer.save_raw_logs()\nelse:\n    print(f\"Analysis failed: {results['error']}\")"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "enable_raw_logging": "Type: bool"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, enable_raw_logging)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_raw_logs",
            "parameters": {},
            "purpose": "Save captured raw HTTP logs to file for comparison with real app logs",
            "returns": "Returns Path",
            "signature": "save_raw_logs(self) -> Path"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "log_test",
            "parameters": {
              "details": "Type: str",
              "success": "Type: bool",
              "test_name": "Type: str"
            },
            "purpose": "Log test result",
            "returns": "None",
            "signature": "log_test(self, test_name, success, details)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_edit_document_name",
            "parameters": {},
            "purpose": "Test 1: Edit existing document name",
            "returns": "Returns bool",
            "signature": "test_edit_document_name(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_create_test_pdf",
            "parameters": {},
            "purpose": "Create a test PDF for upload testing",
            "returns": "Returns Path",
            "signature": "test_create_test_pdf(self) -> Path"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_upload_new_pdf",
            "parameters": {
              "parent_uuid": "Type: str"
            },
            "purpose": "Test: Upload new PDF document to specified folder (or root if no parent_uuid)",
            "returns": "Returns bool",
            "signature": "test_upload_new_pdf(self, parent_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_create_new_notebook",
            "parameters": {},
            "purpose": "Test 3: Create new notebook",
            "returns": "Returns bool",
            "signature": "test_create_new_notebook(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_move_document",
            "parameters": {},
            "purpose": "Test 4: Move document to different folder",
            "returns": "Returns bool",
            "signature": "test_move_document(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_hash_consistency",
            "parameters": {},
            "purpose": "Test 5: Verify hash consistency after uploads",
            "returns": "Returns bool",
            "signature": "test_hash_consistency(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "validate_uploaded_document",
            "parameters": {
              "document_uuid": "Type: str"
            },
            "purpose": "Validate that an uploaded document has all required components accessible",
            "returns": "Returns bool",
            "signature": "validate_uploaded_document(self, document_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_root_docschema_sizes",
            "parameters": {},
            "purpose": "Analyze the sizes in root.docSchema to identify patterns and issues",
            "returns": "Returns bool",
            "signature": "analyze_root_docschema_sizes(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_document_download_chain",
            "parameters": {
              "document_uuid": "Type: str"
            },
            "purpose": "Follow the complete download chain for a document to identify where it fails",
            "returns": "Returns bool",
            "signature": "test_document_download_chain(self, document_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "test_existing_document_chain",
            "parameters": {},
            "purpose": "Test the complete download chain for an existing uploaded document - NO NEW UPLOADS",
            "returns": "Returns Dict[str, Any]",
            "signature": "test_existing_document_chain(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "comprehensive_cloud_analysis",
            "parameters": {},
            "purpose": "Run comprehensive analysis of cloud state and replica sync process",
            "returns": "Returns Dict[str, Any]",
            "signature": "comprehensive_cloud_analysis(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_demo_upload_scenario",
            "parameters": {},
            "purpose": "Create a comprehensive demo scenario",
            "returns": "None",
            "signature": "create_demo_upload_scenario(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:29:41",
      "decorators": [],
      "dependencies": [],
      "description": "Test suite for reMarkable upload functionality",
      "docstring": "Test suite for reMarkable upload functionality",
      "id": 2053,
      "imports": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "import uuid",
        "import requests",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1272,
      "line_start": 107,
      "name": "RemarkableUploadTests",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Test suite for reMarkable upload functionality",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableUploadTests:\n    \"\"\"Test suite for reMarkable upload functionality\"\"\"\n    \n    def __init__(self, enable_raw_logging: bool = True):\n        self.base_dir = Path(__file__).parent\n        self.test_results = []\n        self.raw_logging = enable_raw_logging\n        \n        if self.raw_logging:\n            print(\"\ud83d\udd0d Raw HTTP request logging ENABLED\")\n        else:\n            print(\"\ud83d\udd0d Raw HTTP request logging DISABLED\")\n        \n        # Load auth session\n        from auth import RemarkableAuth\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        # Load upload manager\n        from upload_manager import RemarkableUploadManager\n        database_path = self.base_dir / \"remarkable_replica_v2\" / \"replica_database.json\"\n        self.uploader = RemarkableUploadManager(self.session, database_path)\n        \n        print(\"\ud83e\uddea reMarkable Upload Test Suite Initialized\")\n    \n    def save_raw_logs(self) -> Path:\n        \"\"\"Save captured raw HTTP logs to file for comparison with real app logs\"\"\"\n        global raw_logs\n        \n        if not raw_logs:\n            print(\"\ud83d\udcdd No raw logs to save\")\n            return None\n        \n        logs_dir = self.base_dir / \"test_results\" / \"raw_logs\"\n        logs_dir.mkdir(parents=True, exist_ok=True)\n        \n        timestamp = int(time.time())\n        log_file = logs_dir / f\"raw_requests_{timestamp}.json\"\n        \n        # Save detailed logs\n        with open(log_file, 'w') as f:\n            json.dump(raw_logs, f, indent=2, default=str)\n        \n        # Also create a simplified log similar to real app logs\n        simple_log_file = logs_dir / f\"simple_requests_{timestamp}.txt\"\n        with open(simple_log_file, 'w') as f:\n            f.write(\"=== reMarkable Upload Test - Raw Request Log ===\\n\\n\")\n            for i, log in enumerate(raw_logs, 1):\n                f.write(f\"Request #{i}: {log['method']} {log['url']}\\n\")\n                f.write(f\"Status: {log['response_status']}\\n\")\n                \n                # Key headers\n                if 'rm-filename' in log['headers']:\n                    f.write(f\"rm-filename: {log['headers']['rm-filename']}\\n\")\n                if 'x-goog-hash' in log['headers']:\n                    f.write(f\"x-goog-hash: {log['headers']['x-goog-hash']}\\n\")\n                if 'Content-Type' in log['headers']:\n                    f.write(f\"Content-Type: {log['headers']['Content-Type']}\\n\")\n                \n                # Body information\n                f.write(f\"Body size: {log['body_size']} bytes\\n\")\n                \n                # Include full text body if available\n                if log.get('body_type') == 'text' and log.get('body_text'):\n                    f.write(f\"Body content:\\n{log['body_text']}\\n\")\n                elif log.get('body_type') == 'binary':\n                    f.write(f\"Body: {log['body_text']}\\n\")\n                elif log.get('json_body'):\n                    f.write(f\"JSON Body: {json.dumps(log['json_body'], indent=2)}\\n\")\n                \n                # Response information\n                if log.get('response_text'):\n                    f.write(f\"Response: {log['response_text']}\\n\")\n                \n                f.write(\"-\" * 50 + \"\\n\\n\")\n        \n        print(f\"\ud83d\udcdd Raw logs saved:\")\n        print(f\"   Detailed: {log_file}\")\n        print(f\"   Simple: {simple_log_file}\")\n        print(f\"   Total requests: {len(raw_logs)}\")\n        \n        return log_file\n\n    def log_test(self, test_name: str, success: bool, details: str = \"\"):\n        \"\"\"Log test result\"\"\"\n        status = \"\u2705 PASS\" if success else \"\u274c FAIL\"\n        print(f\"{status} {test_name}\")\n        if details:\n            print(f\"   {details}\")\n        \n        self.test_results.append({\n            'test': test_name,\n            'success': success,\n            'details': details,\n            'timestamp': time.time()\n        })\n    \n    def test_edit_document_name(self) -> bool:\n        \"\"\"Test 1: Edit existing document name\"\"\"\n        try:\n            print(\"\\n\ud83d\udd27 Test 1: Edit Document Name\")\n            \n            # Find a document to edit\n            database = self.uploader.database\n            document_uuid = None\n            original_name = None\n            \n            for uuid, node in database['nodes'].items():\n                if node['node_type'] == 'document' and 'pdf' not in node['name'].lower():\n                    document_uuid = uuid\n                    original_name = node['name']\n                    break\n            \n            if not document_uuid:\n                self.log_test(\"Edit Document Name\", False, \"No suitable document found\")\n                return False\n            \n            # Generate new name\n            new_name = f\"TEST_RENAMED_{int(time.time())}\"\n            print(f\"Renaming '{original_name}' to '{new_name}'\")\n            \n            # Perform edit\n            success = self.uploader.edit_document_metadata(document_uuid, new_name=new_name)\n            \n            if success:\n                # Verify in database\n                updated_node = self.uploader.database['nodes'][document_uuid]\n                if updated_node['metadata']['visibleName'] == new_name:\n                    self.log_test(\"Edit Document Name\", True, f\"Renamed to: {new_name}\")\n                    \n                    # Rename back to original\n                    self.uploader.edit_document_metadata(document_uuid, new_name=original_name)\n                    return True\n                else:\n                    self.log_test(\"Edit Document Name\", False, \"Database not updated\")\n                    return False\n            else:\n                self.log_test(\"Edit Document Name\", False, \"Upload failed\")\n                return False\n                \n        except Exception as e:\n            self.log_test(\"Edit Document Name\", False, f\"Exception: {e}\")\n            return False\n    \n    def test_create_test_pdf(self) -> Path:\n        \"\"\"Create a test PDF for upload testing\"\"\"\n        try:\n            from reportlab.pdfgen import canvas\n            from reportlab.lib.pagesizes import letter\n            \n            test_pdf_path = self.base_dir / \"test_uploads\" / \"test_document.pdf\"\n            test_pdf_path.parent.mkdir(exist_ok=True)\n            \n            # Create simple PDF\n            c = canvas.Canvas(str(test_pdf_path), pagesize=letter)\n            c.drawString(100, 750, f\"reMarkable Upload Test Document\")\n            c.drawString(100, 720, f\"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}\")\n            c.drawString(100, 690, f\"Test UUID: {uuid.uuid4()}\")\n            c.drawString(100, 660, \"This is a test document for upload functionality.\")\n            c.showPage()\n            c.save()\n            \n            return test_pdf_path\n            \n        except ImportError:\n            # Create simple text-based PDF content\n            test_pdf_path = self.base_dir / \"test_uploads\" / \"test_document.txt\"\n            test_pdf_path.parent.mkdir(exist_ok=True)\n            \n            with open(test_pdf_path, 'w') as f:\n                f.write(f\"reMarkable Upload Test Document\\n\")\n                f.write(f\"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}\\n\")\n                f.write(f\"Test UUID: {uuid.uuid4()}\\n\")\n                f.write(\"This is a test document for upload functionality.\\n\")\n            \n            print(\"\u26a0\ufe0f  Using text file instead of PDF (reportlab not available)\")\n            return test_pdf_path\n    \n    def test_upload_new_pdf(self, parent_uuid: str = \"\") -> bool:\n        \"\"\"Test: Upload new PDF document to specified folder (or root if no parent_uuid)\"\"\"\n        try:\n            target_location = \"specified folder\" if parent_uuid else \"root folder\"\n            folder_info = f\" (UUID: {parent_uuid})\" if parent_uuid else \"\"\n            print(f\"\\n\ud83d\udcc4 Test: Upload New PDF Document to {target_location}{folder_info}\")\n            \n            # Create test PDF\n            test_pdf = self.test_create_test_pdf()\n            test_name = f\"UploadTest_{int(time.time())}\"\n            \n            print(f\"Uploading: {test_pdf} as '{test_name}' to {target_location}\")\n            \n            # Get initial document count from current database (no pre-sync needed)\n            initial_count = len([n for n in self.uploader.database['nodes'].values() \n                               if n['node_type'] == 'document'])\n            print(f\"\ud83d\udcca Initial document count: {initial_count}\")\n            \n            # Upload document with parent parameter - this should follow the complete sequence:\n            # 1-5: Document components + docSchema\n            # 6: root.docSchema update  \n            # 7: roothash update\n            # 8: Replica sync to verify\n            success = self.uploader.upload_pdf_document(str(test_pdf), test_name, parent_uuid)\n            \n            if success:\n                print(f\"\u2705 Upload completed - document should be visible in {target_location}\")\n                \n                # The upload_manager already runs replica sync, so no need to duplicate it\n                # Just verify the result\n                new_count = len([n for n in self.uploader.database['nodes'].values() \n                               if n['node_type'] == 'document'])\n                print(f\"\ud83d\udcca New document count: {new_count}\")\n                \n                # Find the new document\n                new_doc = None\n                for node in self.uploader.database['nodes'].values():\n                    if node.get('name') == test_name:\n                        new_doc = node\n                        break\n                \n                if new_doc:\n                    # Verify parent location if specified\n                    actual_parent = new_doc.get('metadata', {}).get('parent', '')\n                    expected_parent = parent_uuid if parent_uuid else ''\n                    \n                    if actual_parent == expected_parent:\n                        location_status = f\"\u2705 in correct location ({target_location})\"\n                    else:\n                        location_status = f\"\u274c wrong location (expected: {expected_parent}, actual: {actual_parent})\"\n                    \n                    self.log_test(\"Upload New PDF\", True, \n                                f\"Document created: {test_name} (UUID: {new_doc['uuid'][:8]}...) {location_status}\")\n                    return True\n                elif new_count > initial_count:\n                    self.log_test(\"Upload New PDF\", True, \n                                f\"Document uploaded successfully (count increased by {new_count - initial_count})\")\n                    return True\n                else:\n                    self.log_test(\"Upload New PDF\", False, \"Document not found and count unchanged\")\n                    return False\n            else:\n                self.log_test(\"Upload New PDF\", False, \"Upload failed\")\n                return False\n                \n        except Exception as e:\n            self.log_test(\"Upload New PDF\", False, f\"Exception: {e}\")\n            return False\n    \n    def test_create_new_notebook(self) -> bool:\n        \"\"\"Test 3: Create new notebook\"\"\"\n        try:\n            print(\"\\n\ud83d\udcd3 Test 3: Create New Notebook\")\n            \n            notebook_name = f\"TestNotebook_{int(time.time())}\"\n            print(f\"Creating notebook: '{notebook_name}'\")\n            \n            # Get initial document count\n            initial_count = len([n for n in self.uploader.database['nodes'].values() \n                               if n['node_type'] == 'document'])\n            \n            # Create notebook\n            success = self.uploader.create_notebook(notebook_name)\n            \n            if success:\n                # Verify in database\n                new_count = len([n for n in self.uploader.database['nodes'].values() \n                               if n['node_type'] == 'document'])\n                \n                if new_count > initial_count:\n                    # Find the new notebook\n                    new_notebook = None\n                    for node in self.uploader.database['nodes'].values():\n                        if node['name'] == notebook_name:\n                            new_notebook = node\n                            break\n                    \n                    if new_notebook:\n                        # Verify it has content structure\n                        has_content = new_notebook['component_hashes'].get('content') is not None\n                        has_rm_files = len(new_notebook['component_hashes'].get('rm_files', [])) > 0\n                        \n                        if has_content and has_rm_files:\n                            self.log_test(\"Create New Notebook\", True, \n                                        f\"Notebook created: {notebook_name} with content + RM files\")\n                            return True\n                        else:\n                            self.log_test(\"Create New Notebook\", False, \n                                        \"Notebook missing content structure\")\n                            return False\n                    else:\n                        self.log_test(\"Create New Notebook\", False, \"Notebook not found in database\")\n                        return False\n                else:\n                    self.log_test(\"Create New Notebook\", False, \"Document count unchanged\")\n                    return False\n            else:\n                self.log_test(\"Create New Notebook\", False, \"Creation failed\")\n                return False\n                \n        except Exception as e:\n            self.log_test(\"Create New Notebook\", False, f\"Exception: {e}\")\n            return False\n    \n    def test_move_document(self) -> bool:\n        \"\"\"Test 4: Move document to different folder\"\"\"\n        try:\n            print(\"\\n\ud83d\udcc1 Test 4: Move Document\")\n            \n            database = self.uploader.database\n            \n            # Find a document and a folder\n            document_uuid = None\n            folder_uuid = None\n            original_parent = None\n            \n            for uuid, node in database['nodes'].items():\n                if node['node_type'] == 'document' and document_uuid is None:\n                    document_uuid = uuid\n                    original_parent = node.get('parent_uuid', '')\n                elif node['node_type'] == 'folder' and folder_uuid is None:\n                    folder_uuid = uuid\n                \n                if document_uuid and folder_uuid:\n                    break\n            \n            if not document_uuid:\n                self.log_test(\"Move Document\", False, \"No document found\")\n                return False\n            \n            if not folder_uuid:\n                self.log_test(\"Move Document\", False, \"No folder found\")\n                return False\n            \n            document_name = database['nodes'][document_uuid]['name']\n            folder_name = database['nodes'][folder_uuid]['name']\n            \n            print(f\"Moving '{document_name}' to folder '{folder_name}'\")\n            \n            # Move document\n            success = self.uploader.edit_document_metadata(document_uuid, new_parent=folder_uuid)\n            \n            if success:\n                # Verify move\n                updated_node = self.uploader.database['nodes'][document_uuid]\n                if updated_node['metadata']['parent'] == folder_uuid:\n                    self.log_test(\"Move Document\", True, \n                                f\"Moved '{document_name}' to '{folder_name}'\")\n                    \n                    # Move back to original location\n                    self.uploader.edit_document_metadata(document_uuid, new_parent=original_parent)\n                    return True\n                else:\n                    self.log_test(\"Move Document\", False, \"Parent not updated in database\")\n                    return False\n            else:\n                self.log_test(\"Move Document\", False, \"Move operation failed\")\n                return False\n                \n        except Exception as e:\n            self.log_test(\"Move Document\", False, f\"Exception: {e}\")\n            return False\n    \n    def test_hash_consistency(self) -> bool:\n        \"\"\"Test 5: Verify hash consistency after uploads\"\"\"\n        try:\n            print(\"\\n\ud83d\udd10 Test 5: Hash Consistency Check\")\n            \n            database = self.uploader.database\n            hash_registry = database.get('hash_registry', {})\n            \n            # Check for hash collisions\n            hash_types = {}\n            for hash_val, info in hash_registry.items():\n                hash_type = info.get('type', 'unknown')\n                if hash_type not in hash_types:\n                    hash_types[hash_type] = []\n                hash_types[hash_type].append(hash_val)\n            \n            # Verify recent uploads have proper hash entries\n            recent_uploads = [result for result in self.test_results if result['success']]\n            \n            consistency_issues = 0\n            \n            # Check node consistency\n            for uuid, node in database['nodes'].items():\n                node_hash = node.get('hash')\n                if node_hash and node_hash not in hash_registry:\n                    consistency_issues += 1\n                    print(f\"\u26a0\ufe0f  Node {uuid[:8]}... has unregistered hash {node_hash[:16]}...\")\n                \n                # Check component hashes\n                comp_hashes = node.get('component_hashes', {})\n                for comp_type, comp_hash in comp_hashes.items():\n                    if comp_hash and isinstance(comp_hash, str) and comp_hash not in hash_registry:\n                        consistency_issues += 1\n                        print(f\"\u26a0\ufe0f  Component {comp_type} has unregistered hash {comp_hash[:16]}...\")\n            \n            if consistency_issues == 0:\n                self.log_test(\"Hash Consistency\", True, \n                            f\"All hashes properly registered ({len(hash_registry)} total)\")\n                return True\n            else:\n                self.log_test(\"Hash Consistency\", False, \n                            f\"{consistency_issues} hash consistency issues found\")\n                return False\n                \n        except Exception as e:\n            self.log_test(\"Hash Consistency\", False, f\"Exception: {e}\")\n            return False\n    \n    def validate_uploaded_document(self, document_uuid: str) -> bool:\n        \"\"\"Validate that an uploaded document has all required components accessible\"\"\"\n        try:\n            print(f\"\\n\ud83d\udd0d Validating uploaded document: {document_uuid[:8]}...\")\n            \n            # Find the document in our database\n            doc_node = None\n            for uuid, node in self.uploader.database['nodes'].items():\n                if uuid == document_uuid:\n                    doc_node = node\n                    break\n            \n            if not doc_node:\n                print(f\"\u274c Document {document_uuid[:8]}... not found in local database\")\n                return False\n            \n            print(f\"\ud83d\udcc4 Document found: {doc_node['name']}\")\n            print(f\"\ud83d\udd17 Document hash: {doc_node['hash']}\")\n            \n            # Try to fetch the document's docSchema from server\n            doc_hash = doc_node['hash']\n            try:\n                doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n                doc_response.raise_for_status()\n                print(f\"\u2705 Document docSchema accessible from server ({len(doc_response.text)} bytes)\")\n                print(f\"\ud83d\udccb DocSchema content preview: {doc_response.text[:200]}...\")\n                \n                # Parse the docSchema and validate each component\n                lines = doc_response.text.strip().split('\\n')\n                if len(lines) < 2:\n                    print(f\"\u274c Invalid docSchema format: too few lines ({len(lines)})\")\n                    return False\n                \n                version = lines[0]\n                print(f\"\ud83d\udcca DocSchema version: {version}\")\n                \n                component_count = 0\n                missing_components = []\n                \n                for line in lines[1:]:\n                    if ':' in line:\n                        parts = line.split(':')\n                        if len(parts) >= 3:\n                            comp_hash = parts[0]\n                            comp_name = parts[2]\n                            component_count += 1\n                            \n                            # Try to fetch this component\n                            try:\n                                comp_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{comp_hash}\")\n                                comp_response.raise_for_status()\n                                print(f\"\u2705 Component {comp_name}: accessible ({len(comp_response.content)} bytes)\")\n                            except Exception as e:\n                                print(f\"\u274c Component {comp_name}: NOT accessible - {e}\")\n                                missing_components.append(comp_name)\n                \n                print(f\"\ud83d\udcca Total components: {component_count}\")\n                print(f\"\u274c Missing components: {len(missing_components)}\")\n                \n                if missing_components:\n                    print(f\"\u26a0\ufe0f Missing: {missing_components}\")\n                    return False\n                else:\n                    print(f\"\u2705 All document components are accessible\")\n                    return True\n                \n            except Exception as e:\n                print(f\"\u274c Cannot fetch document docSchema: {e}\")\n                return False\n                \n        except Exception as e:\n            print(f\"\u274c Document validation failed: {e}\")\n            return False\n\n    def analyze_root_docschema_sizes(self) -> bool:\n        \"\"\"Analyze the sizes in root.docSchema to identify patterns and issues\"\"\"\n        try:\n            print(f\"\\n\ud83d\udcca Analyzing root.docSchema sizes...\")\n            \n            # Get current root.docSchema\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            current_root_hash = root_data['hash']\n            \n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{current_root_hash}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            # Parse entries and analyze sizes\n            lines = root_content.strip().split('\\n')[1:]  # Skip version header\n            size_analysis = {}\n            entries_by_size = {}\n            \n            print(f\"\ud83d\udccb Analyzing {len(lines)} entries...\")\n            \n            for line in lines:\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        doc_hash = parts[0]\n                        doc_uuid = parts[2]\n                        node_type = parts[3]\n                        size = parts[4]\n                        \n                        # Group by size\n                        if size not in entries_by_size:\n                            entries_by_size[size] = []\n                        entries_by_size[size].append({\n                            'uuid': doc_uuid,\n                            'hash': doc_hash,\n                            'type': node_type,\n                            'line': line\n                        })\n            \n            # Report size distribution\n            print(f\"\\n\ud83d\udcca Size Distribution Analysis:\")\n            for size, entries in sorted(entries_by_size.items(), key=lambda x: len(x[1]), reverse=True):\n                count = len(entries)\n                print(f\"   Size {size}: {count} documents\")\n                \n                if count > 1:\n                    print(f\"      \u26a0\ufe0f Multiple documents with identical size:\")\n                    for entry in entries[:5]:  # Show first 5\n                        print(f\"         - {entry['uuid'][:8]}... (type {entry['type']})\")\n                    if len(entries) > 5:\n                        print(f\"         ... and {len(entries) - 5} more\")\n            \n            # Check if our uploaded documents have suspicious sizes\n            suspicious_sizes = ['2247', '2246']  # The sizes we noticed\n            print(f\"\\n\ud83d\udd0d Checking for suspicious identical sizes...\")\n            \n            for size in suspicious_sizes:\n                if size in entries_by_size and len(entries_by_size[size]) > 1:\n                    print(f\"\u26a0\ufe0f Found {len(entries_by_size[size])} documents with size {size}:\")\n                    \n                    # Test each document with this size\n                    for entry in entries_by_size[size]:\n                        print(f\"\\n   Testing document {entry['uuid'][:8]}...\")\n                        try:\n                            # Fetch the document's docSchema\n                            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{entry['hash']}\")\n                            doc_response.raise_for_status()\n                            actual_docschema_size = len(doc_response.text)\n                            \n                            print(f\"      DocSchema actual size: {actual_docschema_size} bytes\")\n                            print(f\"      Root.docSchema claims: {size} bytes\")\n                            print(f\"      Match: {'\u2705' if str(actual_docschema_size) == size else '\u274c'}\")\n                            \n                            if str(actual_docschema_size) != size:\n                                print(f\"      \u26a0\ufe0f SIZE MISMATCH DETECTED!\")\n                                print(f\"      \ud83d\udcc4 DocSchema content preview: {doc_response.text[:100]}...\")\n                        \n                        except Exception as e:\n                            print(f\"      \u274c Cannot fetch docSchema: {e}\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Root.docSchema size analysis failed: {e}\")\n            return False\n\n    def test_document_download_chain(self, document_uuid: str) -> bool:\n        \"\"\"Follow the complete download chain for a document to identify where it fails\"\"\"\n        try:\n            print(f\"\\n\ud83d\udd17 Testing Document Download Chain: {document_uuid[:8]}...\")\n            \n            # Step 1: Get current root.docSchema and find our document\n            print(\"\ud83d\udccb Step 1: Getting root.docSchema from server...\")\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            current_root_hash = root_data['hash']\n            \n            print(f\"\u2705 Root hash: {current_root_hash}\")\n            print(f\"\u2705 Generation: {root_data.get('generation')}\")\n            \n            # Step 2: Fetch root.docSchema content\n            print(\"\ud83d\udccb Step 2: Fetching root.docSchema content...\")\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{current_root_hash}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            print(f\"\u2705 Root.docSchema size: {len(root_content)} bytes\")\n            print(f\"\ud83d\udcc4 Root.docSchema content preview:\")\n            for i, line in enumerate(root_content.strip().split('\\n')[:10]):\n                print(f\"   {i}: {line}\")\n            \n            # Step 3: Find our document in root.docSchema\n            print(f\"\ud83d\udccb Step 3: Looking for document {document_uuid} in root.docSchema...\")\n            doc_entry = None\n            doc_hash = None\n            doc_size = None\n            \n            for line in root_content.strip().split('\\n')[1:]:  # Skip version header\n                if document_uuid in line:\n                    doc_entry = line\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        doc_hash = parts[0]\n                        doc_size = parts[4]\n                    break\n            \n            if not doc_entry:\n                print(f\"\u274c Document {document_uuid} NOT found in root.docSchema\")\n                return False\n            \n            print(f\"\u2705 Document found in root.docSchema:\")\n            print(f\"   Entry: {doc_entry}\")\n            print(f\"   Hash: {doc_hash}\")\n            print(f\"   Size: {doc_size}\")\n            \n            # Step 4: Fetch document's docSchema\n            print(f\"\ud83d\udccb Step 4: Fetching document's docSchema...\")\n            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n            doc_response.raise_for_status()\n            doc_schema_content = doc_response.text\n            \n            print(f\"\u2705 Document docSchema size: {len(doc_schema_content)} bytes\")\n            print(f\"\ud83d\udcc4 Document docSchema content:\")\n            for i, line in enumerate(doc_schema_content.strip().split('\\n')):\n                print(f\"   {i}: {line}\")\n            \n            # Step 5: Parse docSchema and test each component\n            print(f\"\ud83d\udccb Step 5: Testing each document component...\")\n            lines = doc_schema_content.strip().split('\\n')\n            if len(lines) < 2:\n                print(f\"\u274c Invalid docSchema format: only {len(lines)} lines\")\n                return False\n            \n            version = lines[0]\n            print(f\"\ud83d\udcca DocSchema version: {version}\")\n            \n            all_components_valid = True\n            component_details = {}\n            \n            for i, line in enumerate(lines[1:], 1):\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        comp_hash = parts[0]\n                        comp_name = parts[2]\n                        comp_size = parts[4]\n                        \n                        print(f\"\\n   \ud83d\udd0d Testing component {i}: {comp_name}\")\n                        print(f\"       Hash: {comp_hash}\")\n                        print(f\"       Expected size: {comp_size}\")\n                        \n                        try:\n                            comp_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{comp_hash}\")\n                            comp_response.raise_for_status()\n                            actual_size = len(comp_response.content)\n                            \n                            print(f\"       \u2705 Component accessible\")\n                            print(f\"       \ud83d\udccf Actual size: {actual_size} bytes\")\n                            print(f\"       \ud83d\udcca Size match: {'\u2705' if str(actual_size) == comp_size else '\u274c'}\")\n                            \n                            # Store component details\n                            component_details[comp_name] = {\n                                'hash': comp_hash,\n                                'expected_size': comp_size,\n                                'actual_size': actual_size,\n                                'accessible': True,\n                                'content_preview': comp_response.content[:100] if comp_response.content else b''\n                            }\n                            \n                            # Special handling for PDF content\n                            if comp_name.endswith('.pdf'):\n                                print(f\"       \ud83d\udcc4 PDF content preview: {comp_response.content[:50]}...\")\n                                if comp_response.content.startswith(b'%PDF'):\n                                    print(f\"       \u2705 Valid PDF header detected\")\n                                else:\n                                    print(f\"       \u274c Invalid PDF header - content: {comp_response.content[:20]}\")\n                                    all_components_valid = False\n                            \n                            # Special handling for metadata\n                            elif comp_name.endswith('.metadata'):\n                                try:\n                                    metadata_json = json.loads(comp_response.text)\n                                    print(f\"       \u2705 Valid JSON metadata\")\n                                    print(f\"       \ud83d\udcdd Name: {metadata_json.get('visibleName', 'N/A')}\")\n                                    print(f\"       \ud83d\udcc1 Parent: {metadata_json.get('parent', 'root')}\")\n                                except:\n                                    print(f\"       \u274c Invalid JSON metadata\")\n                                    all_components_valid = False\n                            \n                        except Exception as e:\n                            print(f\"       \u274c Component NOT accessible: {e}\")\n                            component_details[comp_name] = {\n                                'hash': comp_hash,\n                                'expected_size': comp_size,\n                                'accessible': False,\n                                'error': str(e)\n                            }\n                            all_components_valid = False\n            \n            # Step 6: Summary\n            print(f\"\\n\ud83d\udccb Step 6: Download Chain Summary\")\n            print(f\"   Root.docSchema: \u2705 Accessible\")\n            print(f\"   Document entry: \u2705 Found\")\n            print(f\"   Document docSchema: \u2705 Accessible\")\n            print(f\"   All components: {'\u2705 Valid' if all_components_valid else '\u274c Issues found'}\")\n            \n            if not all_components_valid:\n                print(f\"\\n\u26a0\ufe0f Component Issues Detected:\")\n                for name, details in component_details.items():\n                    if not details.get('accessible', False):\n                        print(f\"   \u274c {name}: {details.get('error', 'Not accessible')}\")\n                    elif details.get('expected_size') != str(details.get('actual_size', 0)):\n                        print(f\"   \u26a0\ufe0f {name}: Size mismatch ({details['expected_size']} vs {details['actual_size']})\")\n            \n            return all_components_valid\n            \n        except Exception as e:\n            print(f\"\u274c Document download chain test failed: {e}\")\n            return False\n\n    def test_existing_document_chain(self) -> Dict[str, Any]:\n        \"\"\"Test the complete download chain for an existing uploaded document - NO NEW UPLOADS\"\"\"\n        print(\"\ufffd Starting Document Chain Analysis Test\")\n        print(\"=\" * 50)\n        print(\"\ud83d\udeab NO NEW UPLOADS - Testing existing documents only\")\n        \n        try:\n            # Step 1: Get current root.docSchema from server\n            print(\"\\n\ufffd Step 1: Fetching current root.docSchema from server...\")\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            current_root_hash = root_data['hash']\n            \n            print(f\"\u2705 Root response: {root_response.status_code}\")\n            print(f\"\u2705 Root hash: {current_root_hash}\")\n            print(f\"\u2705 Generation: {root_data.get('generation')}\")\n            print(f\"\ud83d\udcc4 Full root response: {json.dumps(root_data, indent=2)}\")\n            \n            # Step 2: Fetch root.docSchema content\n            print(f\"\\n\ud83d\udccb Step 2: Fetching root.docSchema content using hash {current_root_hash}...\")\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{current_root_hash}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            print(f\"\u2705 Root content response: {root_content_response.status_code}\")\n            print(f\"\u2705 Root.docSchema size: {len(root_content)} bytes\")\n            print(f\"\ud83d\udcc4 Full root.docSchema content:\")\n            for i, line in enumerate(root_content.strip().split('\\n')):\n                print(f\"   Line {i}: {line}\")\n            \n            # Step 3: Find a PDF document (type 4) in root.docSchema\n            print(f\"\\n\ud83d\udccb Step 3: Looking for PDF documents (type 4) in root.docSchema...\")\n            pdf_documents = []\n            \n            for line in root_content.strip().split('\\n')[1:]:  # Skip version header\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        doc_hash = parts[0]\n                        doc_uuid = parts[2]\n                        node_type = parts[3]\n                        size = parts[4]\n                        \n                        if node_type == '4':  # PDF document\n                            pdf_documents.append({\n                                'hash': doc_hash,\n                                'uuid': doc_uuid,\n                                'size': size,\n                                'line': line\n                            })\n            \n            print(f\"\u2705 Found {len(pdf_documents)} PDF documents\")\n            for i, doc in enumerate(pdf_documents):\n                print(f\"   PDF {i+1}: UUID {doc['uuid'][:8]}... Hash {doc['hash'][:16]}... Size {doc['size']}\")\n            \n            if not pdf_documents:\n                print(\"\u274c No PDF documents found in root.docSchema\")\n                return {'success': False, 'error': 'No PDF documents found'}\n            \n            # Step 4: Pick the first PDF and fetch its docSchema\n            target_doc = pdf_documents[0]\n            print(f\"\\n\ud83d\udccb Step 4: Analyzing PDF document {target_doc['uuid'][:8]}...\")\n            print(f\"   Target hash: {target_doc['hash']}\")\n            print(f\"   Target size: {target_doc['size']}\")\n            print(f\"   Full entry: {target_doc['line']}\")\n            \n            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{target_doc['hash']}\")\n            doc_response.raise_for_status()\n            doc_schema_content = doc_response.text\n            \n            print(f\"\u2705 Document docSchema response: {doc_response.status_code}\")\n            print(f\"\u2705 Document docSchema size: {len(doc_schema_content)} bytes\")\n            print(f\"\u2705 Root.docSchema claimed size: {target_doc['size']} bytes\")\n            print(f\"\ud83d\udcca Size match: {'\u2705 YES' if str(len(doc_schema_content)) == target_doc['size'] else '\u274c NO - MISMATCH!'}\")\n            \n            print(f\"\ud83d\udcc4 Full document docSchema content:\")\n            for i, line in enumerate(doc_schema_content.strip().split('\\n')):\n                print(f\"   Line {i}: {line}\")\n            \n            # Step 5: Parse document docSchema and fetch each component\n            print(f\"\\n\ud83d\udccb Step 5: Fetching each document component...\")\n            lines = doc_schema_content.strip().split('\\n')\n            if len(lines) < 2:\n                print(f\"\u274c Invalid docSchema format: only {len(lines)} lines\")\n                return {'success': False, 'error': 'Invalid docSchema format'}\n            \n            version = lines[0]\n            print(f\"\ud83d\udcca DocSchema version: {version}\")\n            \n            components = {}\n            \n            for i, line in enumerate(lines[1:], 1):\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        comp_hash = parts[0]\n                        comp_name = parts[2]\n                        comp_size = parts[4]\n                        \n                        print(f\"\\n   \ud83d\udd0d Component {i}: {comp_name}\")\n                        print(f\"       Hash: {comp_hash}\")\n                        print(f\"       Expected size: {comp_size}\")\n                        \n                        try:\n                            comp_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{comp_hash}\")\n                            comp_response.raise_for_status()\n                            actual_size = len(comp_response.content)\n                            \n                            print(f\"       \u2705 Component response: {comp_response.status_code}\")\n                            print(f\"       \u2705 Actual size: {actual_size} bytes\")\n                            print(f\"       \ud83d\udcca Size match: {'\u2705 YES' if str(actual_size) == comp_size else '\u274c NO - MISMATCH!'}\")\n                            \n                            # Store component details\n                            components[comp_name] = {\n                                'hash': comp_hash,\n                                'expected_size': comp_size,\n                                'actual_size': actual_size,\n                                'response_status': comp_response.status_code,\n                                'accessible': True,\n                                'content': comp_response.content\n                            }\n                            \n                            # Show content preview based on type\n                            if comp_name.endswith('.pdf'):\n                                print(f\"       \ud83d\udcc4 PDF content preview (first 50 bytes): {comp_response.content[:50]}\")\n                                if comp_response.content.startswith(b'%PDF'):\n                                    print(f\"       \u2705 Valid PDF header detected\")\n                                else:\n                                    print(f\"       \u274c Invalid PDF header!\")\n                            \n                            elif comp_name.endswith('.metadata'):\n                                try:\n                                    metadata_json = json.loads(comp_response.text)\n                                    print(f\"       \u2705 Valid JSON metadata\")\n                                    print(f\"       \ud83d\udcdd Document name: {metadata_json.get('visibleName', 'N/A')}\")\n                                    print(f\"       \ud83d\udcc1 Parent UUID: {metadata_json.get('parent', 'root')}\")\n                                    print(f\"       \ud83d\uddc2\ufe0f Document type: {metadata_json.get('type', 'N/A')}\")\n                                    print(f\"       \ud83d\udcc4 Full metadata: {json.dumps(metadata_json, indent=8)}\")\n                                except Exception as json_e:\n                                    print(f\"       \u274c Invalid JSON metadata: {json_e}\")\n                                    print(f\"       \ud83d\udcc4 Raw content: {comp_response.text[:200]}...\")\n                            \n                            elif comp_name.endswith('.content'):\n                                print(f\"       \ud83d\udcc4 Content preview: {comp_response.text[:100]}...\")\n                            \n                            elif comp_name.endswith('.pagedata'):\n                                print(f\"       \ud83d\udcc4 Pagedata preview: {comp_response.text[:100]}...\")\n                            \n                            else:\n                                print(f\"       \ud83d\udcc4 Unknown component type, raw preview: {comp_response.content[:50]}\")\n                        \n                        except Exception as e:\n                            print(f\"       \u274c Component NOT accessible: {e}\")\n                            components[comp_name] = {\n                                'hash': comp_hash,\n                                'expected_size': comp_size,\n                                'accessible': False,\n                                'error': str(e)\n                            }\n            \n            # Step 6: Summary and analysis\n            print(f\"\\n\ud83d\udccb Step 6: Complete Analysis Summary\")\n            print(f\"=\" * 50)\n            accessible_count = sum(1 for c in components.values() if c.get('accessible', False))\n            total_count = len(components)\n            \n            print(f\"\ud83d\udcca Document Analysis Results:\")\n            print(f\"   Target document: {target_doc['uuid']}\")\n            print(f\"   Document hash: {target_doc['hash']}\")\n            print(f\"   Root.docSchema size claim: {target_doc['size']} bytes\")\n            print(f\"   Actual docSchema size: {len(doc_schema_content)} bytes\")\n            print(f\"   Size consistency: {'\u2705 GOOD' if str(len(doc_schema_content)) == target_doc['size'] else '\u274c CORRUPTED'}\")\n            print(f\"   Total components: {total_count}\")\n            print(f\"   Accessible components: {accessible_count}\")\n            print(f\"   Component accessibility: {'\u2705 ALL GOOD' if accessible_count == total_count else '\u274c ISSUES FOUND'}\")\n            \n            print(f\"\\n\ud83d\udccb Component Details:\")\n            for name, details in components.items():\n                status = \"\u2705 OK\" if details.get('accessible') else \"\u274c FAIL\"\n                size_status = \"\"\n                if details.get('accessible'):\n                    expected = details.get('expected_size')\n                    actual = details.get('actual_size')\n                    size_status = f\" (size: {'\u2705' if str(actual) == expected else '\u274c'})\"\n                print(f\"   {status} {name}{size_status}\")\n                if not details.get('accessible'):\n                    print(f\"      Error: {details.get('error')}\")\n            \n            success = accessible_count == total_count and str(len(doc_schema_content)) == target_doc['size']\n            \n            return {\n                'success': success,\n                'document_uuid': target_doc['uuid'],\n                'document_hash': target_doc['hash'],\n                'root_size_claim': target_doc['size'],\n                'actual_docschema_size': len(doc_schema_content),\n                'size_consistent': str(len(doc_schema_content)) == target_doc['size'],\n                'total_components': total_count,\n                'accessible_components': accessible_count,\n                'components': components\n            }\n            \n        except Exception as e:\n            print(f\"\u274c Document chain analysis failed: {e}\")\n            return {'success': False, 'error': str(e)}\n    \n    def comprehensive_cloud_analysis(self) -> Dict[str, Any]:\n        \"\"\"Run comprehensive analysis of cloud state and replica sync process\"\"\"\n        print(\"\\n\ud83d\udd0d COMPREHENSIVE CLOUD & REPLICA ANALYSIS\")\n        print(\"=\" * 60)\n        \n        analysis_results = {\n            'cloud_state': {},\n            'replica_state': {},\n            'sync_issues': [],\n            'missing_documents': []\n        }\n        \n        try:\n            # STEP 1: Direct Cloud State Analysis\n            print(\"\\n\ud83d\udce1 STEP 1: DIRECT CLOUD STATE ANALYSIS\")\n            print(\"-\" * 40)\n            \n            # Get current root state from server\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            \n            print(f\"\u2705 Root hash: {root_data['hash']}\")\n            print(f\"\u2705 Generation: {root_data['generation']}\")\n            \n            # Get root.docSchema content from server\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            print(f\"\\n\ud83d\udcc4 CLOUD ROOT.DOCSCHEMA ({len(root_content)} bytes):\")\n            cloud_lines = root_content.strip().split('\\n')\n            for i, line in enumerate(cloud_lines):\n                print(f\"   Line {i}: {line}\")\n            \n            # Parse cloud entries\n            cloud_documents = []\n            cloud_folders = []\n            \n            for line_num, line in enumerate(cloud_lines[1:], 1):  # Skip version header\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        doc_hash = parts[0]\n                        doc_uuid = parts[2]\n                        node_type = parts[3]\n                        size = parts[4]\n                        \n                        entry_info = {\n                            'uuid': doc_uuid,\n                            'hash': doc_hash,  \n                            'type': node_type,\n                            'size': size,\n                            'line': line,\n                            'line_number': line_num\n                        }\n                        \n                        if node_type in ['1', '2']:  # Folders\n                            cloud_folders.append(entry_info)\n                        elif node_type in ['3', '4']:  # Documents\n                            cloud_documents.append(entry_info)\n            \n            analysis_results['cloud_state'] = {\n                'root_hash': root_data['hash'],\n                'generation': root_data['generation'],\n                'total_entries': len(cloud_lines) - 1,\n                'folders': cloud_folders,\n                'documents': cloud_documents\n            }\n            \n            print(f\"\\n\ud83d\udcca CLOUD INVENTORY:\")\n            print(f\"   \ud83d\udcc2 Folders: {len(cloud_folders)}\")\n            print(f\"   \ud83d\udcc4 Documents: {len(cloud_documents)}\")\n            \n            # STEP 2: Fresh Replica Build with Detailed Logging\n            print(f\"\\n\ud83c\udfd7\ufe0f STEP 2: FRESH REPLICA BUILD WITH DETAILED LOGGING\")\n            print(\"-\" * 40)\n            \n            from local_replica_v2 import RemarkableReplicaBuilder\n            replica_builder = RemarkableReplicaBuilder(self.session)\n            \n            # Force a complete rebuild\n            print(\"\ud83d\udd04 Forcing complete replica rebuild...\")\n            replica_builder.build_complete_replica()\n            \n            # STEP 3: Compare Replica Database vs Cloud State\n            print(f\"\\n\ud83d\udcca STEP 3: REPLICA DATABASE ANALYSIS\")\n            print(\"-\" * 40)\n            \n            # Load the replica database\n            database_path = self.base_dir / \"remarkable_replica_v2\" / \"replica_database.json\"\n            if database_path.exists():\n                with open(database_path, 'r') as f:\n                    replica_db = json.load(f)\n                \n                replica_nodes = replica_db.get('nodes', {})\n                replica_documents = []\n                replica_folders = []\n                \n                for uuid, node in replica_nodes.items():\n                    if node.get('node_type') == 'document':\n                        replica_documents.append({\n                            'uuid': uuid,\n                            'name': node.get('name', 'Unknown'),\n                            'hash': node.get('hash', 'Unknown'),\n                            'parent': node.get('metadata', {}).get('parent', '')\n                        })\n                    elif node.get('node_type') == 'folder':\n                        replica_folders.append({\n                            'uuid': uuid,\n                            'name': node.get('name', 'Unknown'),\n                            'hash': node.get('hash', 'Unknown')\n                        })\n                \n                analysis_results['replica_state'] = {\n                    'total_nodes': len(replica_nodes),\n                    'folders': replica_folders,\n                    'documents': replica_documents\n                }\n                \n                print(f\"\ud83d\udcc1 REPLICA DATABASE CONTENT:\")\n                print(f\"   \ud83d\udcc2 Folders: {len(replica_folders)}\")\n                print(f\"   \ud83d\udcc4 Documents: {len(replica_documents)}\")\n                \n                print(f\"\\n\ud83d\udcc2 REPLICA FOLDERS:\")\n                for i, folder in enumerate(replica_folders, 1):\n                    print(f\"   {i}. {folder['name']} (UUID: {folder['uuid'][:8]}...)\")\n                \n                print(f\"\\n\ud83d\udcc4 REPLICA DOCUMENTS:\")\n                for i, doc in enumerate(replica_documents, 1):\n                    parent_info = f\" [in folder]\" if doc['parent'] else \" [root]\"\n                    print(f\"   {i}. {doc['name']} (UUID: {doc['uuid'][:8]}...){parent_info}\")\n                \n            else:\n                print(\"\u274c Replica database not found!\")\n                analysis_results['sync_issues'].append(\"Replica database file missing\")\n            \n            # STEP 4: Cross-Reference Analysis\n            print(f\"\\n\ud83d\udd0d STEP 4: CROSS-REFERENCE ANALYSIS\")\n            print(\"-\" * 40)\n            \n            # Check if cloud documents are in replica\n            missing_from_replica = []\n            for cloud_doc in cloud_documents:\n                found_in_replica = any(r['uuid'] == cloud_doc['uuid'] for r in replica_documents)\n                if not found_in_replica:\n                    missing_from_replica.append(cloud_doc)\n                    print(f\"\u274c MISSING FROM REPLICA: {cloud_doc['uuid'][:8]}... (type {cloud_doc['type']}, size {cloud_doc['size']})\")\n            \n            # Check if replica documents are in cloud\n            missing_from_cloud = []\n            for replica_doc in replica_documents:\n                found_in_cloud = any(c['uuid'] == replica_doc['uuid'] for c in cloud_documents)\n                if not found_in_cloud:\n                    missing_from_cloud.append(replica_doc)\n                    print(f\"\u274c MISSING FROM CLOUD: {replica_doc['name']} (UUID: {replica_doc['uuid'][:8]}...)\")\n            \n            analysis_results['missing_documents'] = {\n                'missing_from_replica': missing_from_replica,\n                'missing_from_cloud': missing_from_cloud\n            }\n            \n            # STEP 5: Test Document Analysis\n            print(f\"\\n\ud83c\udfaf STEP 5: TEST DOCUMENT ANALYSIS\")\n            print(\"-\" * 40)\n            \n            test_uuids = [\n                '824225cd-3f6f-4d00-bbbb-54d53ab94cc5',  # Our recent upload\n                '2342f4af-3034-45d2-be90-e17ecc9e04d5',  # Invoice PDF  \n                '7b3f2f2b-6757-4673-9aa4-636a895415f5'   # Pylontech PDF (should be missing)\n            ]\n            \n            for test_uuid in test_uuids:\n                print(f\"\\n\ud83d\udd0d Testing document {test_uuid[:8]}...\")\n                \n                # Check in cloud\n                in_cloud = any(test_uuid in line for line in cloud_lines)\n                print(f\"   Cloud: {'\u2705 FOUND' if in_cloud else '\u274c NOT FOUND'}\")\n                \n                # Check in replica\n                in_replica = test_uuid in replica_nodes if 'replica_nodes' in locals() else False\n                print(f\"   Replica: {'\u2705 FOUND' if in_replica else '\u274c NOT FOUND'}\")\n                \n                # If in cloud, fetch and validate components\n                if in_cloud:\n                    cloud_line = next(line for line in cloud_lines if test_uuid in line)\n                    parts = cloud_line.split(':')\n                    doc_hash = parts[0]\n                    \n                    try:\n                        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n                        doc_response.raise_for_status()\n                        print(f\"   DocSchema: \u2705 ACCESSIBLE ({len(doc_response.text)} bytes)\")\n                    except Exception as e:\n                        print(f\"   DocSchema: \u274c NOT ACCESSIBLE - {e}\")\n            \n            print(f\"\\n\ud83d\udcca ANALYSIS SUMMARY:\")\n            print(f\"   Cloud entries: {len(cloud_documents)} docs, {len(cloud_folders)} folders\")\n            print(f\"   Replica entries: {len(replica_documents)} docs, {len(replica_folders)} folders\")\n            print(f\"   Missing from replica: {len(missing_from_replica)}\")\n            print(f\"   Missing from cloud: {len(missing_from_cloud)}\")\n            \n            return analysis_results\n            \n        except Exception as e:\n            print(f\"\u274c Analysis failed: {e}\")\n            analysis_results['error'] = str(e)\n            return analysis_results\n\n    def create_demo_upload_scenario(self):\n        \"\"\"Create a comprehensive demo scenario\"\"\"\n        print(\"\\n\ud83c\udfac Creating Demo Upload Scenario\")\n        print(\"This will demonstrate a complete workflow:\")\n        print(\"1. Create a new notebook\")\n        print(\"2. Upload a PDF document\") \n        print(\"3. Organize them in folders\")\n        print(\"4. Edit metadata\")\n        print(\"5. Verify everything synced correctly\")\n        \n        # Create demo notebook\n        demo_notebook = f\"Demo_Notebook_{int(time.time())}\"\n        self.uploader.create_notebook(demo_notebook, template=\"Lines Medium\")\n        \n        # Create and upload demo PDF\n        demo_pdf = self.test_create_test_pdf()\n        demo_pdf_name = f\"Demo_PDF_{int(time.time())}\"\n        self.uploader.upload_pdf_document(str(demo_pdf), demo_pdf_name)\n        \n        print(\"\u2705 Demo scenario created successfully!\")\n        print(\"Check your reMarkable device to see the new files.\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_uploads.py",
      "tags": [
        "class",
        "remarkableuploadtests"
      ],
      "updated_at": "2025-12-07T01:29:41.723351",
      "usage_example": "# Example usage:\n# result = RemarkableUploadTests(bases)"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "replica_dir": "Type: str",
              "session": "Type: requests.Session"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, session, replica_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_logging",
            "parameters": {},
            "purpose": "Setup logging",
            "returns": "None",
            "signature": "setup_logging(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_existing_database",
            "parameters": {},
            "purpose": "Load existing database if it exists",
            "returns": "None",
            "signature": "_load_existing_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_should_update_node",
            "parameters": {
              "node_hash": "Type: str",
              "node_uuid": "Type: str"
            },
            "purpose": "Check if a node needs to be updated based on existing database",
            "returns": "Returns bool",
            "signature": "_should_update_node(self, node_hash, node_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fetch_hash_content",
            "parameters": {
              "hash_ref": "Type: str"
            },
            "purpose": "Fetch content from reMarkable cloud by hash",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "fetch_hash_content(self, hash_ref) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_root_hash",
            "parameters": {},
            "purpose": "Get the root hash",
            "returns": "Returns Optional[str]",
            "signature": "get_root_hash(self) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "parse_directory_listing",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Parse directory listing",
            "returns": "Returns Dict[str, Any]",
            "signature": "parse_directory_listing(self, content) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_metadata",
            "parameters": {
              "metadata_hash": "Type: str"
            },
            "purpose": "Extract metadata from hash",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "extract_metadata(self, metadata_hash) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "discover_all_nodes",
            "parameters": {
              "root_hash": "Type: str"
            },
            "purpose": "Step 1: Discover all nodes and collect metadata",
            "returns": "Returns bool",
            "signature": "discover_all_nodes(self, root_hash) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "build_folder_structure",
            "parameters": {},
            "purpose": "Step 2: Build correct folder structure based on parent UUIDs",
            "returns": "Returns bool",
            "signature": "build_folder_structure(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_node_paths",
            "parameters": {
              "node": "Type: RemarkableNode",
              "parent_path": "Type: str"
            },
            "purpose": "Recursively build paths for node and its children",
            "returns": "None",
            "signature": "_build_node_paths(self, node, parent_path)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_all_files",
            "parameters": {},
            "purpose": "Step 3: Extract PDFs and .rm files to correct locations",
            "returns": "Returns bool",
            "signature": "extract_all_files(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_node_files",
            "parameters": {
              "node": "Type: RemarkableNode"
            },
            "purpose": "Extract files for a document node",
            "returns": "None",
            "signature": "_extract_node_files(self, node)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_pdf",
            "parameters": {
              "pdf_hash": "Type: str",
              "target_path": "Type: Path"
            },
            "purpose": "Extract PDF file",
            "returns": "Returns bool",
            "signature": "_extract_pdf(self, pdf_hash, target_path) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_rm_file",
            "parameters": {
              "rm_hash": "Type: str",
              "target_path": "Type: Path"
            },
            "purpose": "Extract .rm file",
            "returns": "Returns bool",
            "signature": "_extract_rm_file(self, rm_hash, target_path) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_component",
            "parameters": {
              "comp_hash": "Type: str",
              "target_path": "Type: Path"
            },
            "purpose": "Extract other component",
            "returns": "Returns bool",
            "signature": "_extract_component(self, comp_hash, target_path) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_convert_notebook_to_pdf",
            "parameters": {
              "node": "Type: RemarkableNode",
              "notebook_dir": "Type: Path"
            },
            "purpose": "Convert reMarkable notebook files to PDF using rmc and concatenate pages",
            "returns": "None",
            "signature": "_convert_notebook_to_pdf(self, node, notebook_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "build_complete_replica",
            "parameters": {},
            "purpose": "Build complete replica using 3-step process",
            "returns": "Returns bool",
            "signature": "build_complete_replica(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_database",
            "parameters": {},
            "purpose": "Save the comprehensive replica database",
            "returns": "None",
            "signature": "_save_database(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:27:51",
      "decorators": [],
      "dependencies": [],
      "description": "Step-by-step replica builder",
      "docstring": "Step-by-step replica builder",
      "id": 2046,
      "imports": [
        "import os",
        "import json",
        "import requests",
        "import logging",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "import shutil",
        "import subprocess",
        "import PyPDF2",
        "import shutil"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import requests",
        "import logging",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 884,
      "line_start": 52,
      "name": "RemarkableReplicaBuilder",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Step-by-step replica builder",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableReplicaBuilder:\n    \"\"\"Step-by-step replica builder\"\"\"\n    \n    def __init__(self, session: requests.Session, replica_dir: str = \"remarkable_replica_v2\"):\n        self.session = session\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        \n        # Setup directories\n        self.replica_dir = Path(replica_dir).resolve()\n        self.content_dir = self.replica_dir / \"content\"\n        self.raw_dir = self.replica_dir / \"raw_components\"\n        \n        for directory in [self.replica_dir, self.content_dir, self.raw_dir]:\n            directory.mkdir(parents=True, exist_ok=True)\n        \n        # Setup logging\n        self.log_file = self.replica_dir / \"build.log\"\n        self.setup_logging()\n        \n        # State\n        self.nodes: Dict[str, RemarkableNode] = {}\n        self.all_hashes: Set[str] = set()\n        self.failed_downloads: Set[str] = set()\n        self.existing_database: Optional[Dict[str, Any]] = None\n        \n        # Load existing database if it exists\n        self._load_existing_database()\n        \n        # Statistics\n        self.stats = {\n            'total_nodes': 0,\n            'folders': 0,\n            'documents': 0,\n            'trash_items': 0,\n            'pdfs_extracted': 0,\n            'rm_files_extracted': 0,\n            'rm_pdfs_converted': 0,\n            'total_files': 0,\n            'nodes_updated': 0,\n            'nodes_added': 0,\n            'nodes_unchanged': 0\n        }\n    \n    def setup_logging(self):\n        \"\"\"Setup logging\"\"\"\n        self.logger = logging.getLogger('ReplicaBuilder')\n        self.logger.setLevel(logging.DEBUG)\n        self.logger.handlers.clear()\n        \n        # File handler\n        file_handler = logging.FileHandler(self.log_file, mode='w', encoding='utf-8')\n        file_handler.setLevel(logging.DEBUG)\n        file_formatter = logging.Formatter(\n            '%(asctime)s | %(levelname)-8s | %(message)s',\n            datefmt='%Y-%m-%d %H:%M:%S'\n        )\n        file_handler.setFormatter(file_formatter)\n        \n        # Console handler\n        console_handler = logging.StreamHandler()\n        console_handler.setLevel(logging.INFO)\n        console_formatter = logging.Formatter('%(message)s')\n        console_handler.setFormatter(console_formatter)\n        \n        self.logger.addHandler(file_handler)\n        self.logger.addHandler(console_handler)\n        \n        self.logger.info(f\"\ud83c\udfd7\ufe0f REMARKABLE REPLICA BUILDER (STEP-BY-STEP)\")\n        self.logger.info(f\"\ud83d\udcc1 Replica directory: {self.replica_dir}\")\n    \n    def _load_existing_database(self):\n        \"\"\"Load existing database if it exists\"\"\"\n        database_file = self.replica_dir / \"replica_database.json\"\n        \n        if database_file.exists():\n            try:\n                with open(database_file, 'r', encoding='utf-8') as f:\n                    self.existing_database = json.load(f)\n                    \n                existing_count = len(self.existing_database.get('nodes', {}))\n                last_sync = self.existing_database.get('replica_info', {}).get('last_sync', 'unknown')\n                \n                self.logger.info(f\"\ud83d\udcc2 Found existing database with {existing_count} nodes\")\n                self.logger.info(f\"\ud83d\udcc5 Last sync: {last_sync}\")\n                \n            except Exception as e:\n                self.logger.warning(f\"\u26a0\ufe0f Failed to load existing database: {e}\")\n                self.existing_database = None\n        else:\n            self.logger.info(f\"\ud83d\udcc2 No existing database found - full sync will be performed\")\n    \n    def _should_update_node(self, node_hash: str, node_uuid: str) -> bool:\n        \"\"\"Check if a node needs to be updated based on existing database\"\"\"\n        if not self.existing_database:\n            return True\n            \n        existing_nodes = self.existing_database.get('nodes', {})\n        hash_registry = self.existing_database.get('hash_registry', {})\n        \n        # Check if this hash is already known\n        if node_hash in hash_registry:\n            existing_uuid = hash_registry[node_hash].get('uuid')\n            if existing_uuid == node_uuid:\n                # Same node, same hash - no update needed\n                return False\n        \n        # Check if node exists but with different hash (updated)\n        if node_uuid in existing_nodes:\n            existing_hash = existing_nodes[node_uuid].get('hash')\n            if existing_hash != node_hash:\n                # Node exists but hash changed - update needed\n                return True\n            else:\n                # Same hash - no update needed\n                return False\n        \n        # New node - update needed\n        return True\n    \n    def fetch_hash_content(self, hash_ref: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch content from reMarkable cloud by hash\"\"\"\n        if hash_ref in self.failed_downloads:\n            return None\n            \n        try:\n            url = f\"{self.base_url}/sync/v3/files/{hash_ref}\"\n            self.logger.debug(f\"FETCHING: {hash_ref[:16]}...\")\n            \n            response = self.session.get(url)\n            response.raise_for_status()\n            \n            content = response.content\n            self.logger.debug(f\"  \u2192 {len(content)} bytes\")\n            \n            return {\n                'hash': hash_ref,\n                'content': content,\n                'size': len(content)\n            }\n            \n        except Exception as e:\n            self.logger.error(f\"Failed to fetch {hash_ref[:16]}...: {e}\")\n            self.failed_downloads.add(hash_ref)\n            return None\n    \n    def get_root_hash(self) -> Optional[str]:\n        \"\"\"Get the root hash\"\"\"\n        try:\n            url = f\"{self.base_url}/sync/v4/root\"\n            self.logger.debug(f\"Getting root hash from: {url}\")\n            response = self.session.get(url, timeout=30)\n            \n            if response.status_code == 200:\n                data = response.json()\n                root_hash = data.get('hash')\n                \n                self.logger.info(f\"\ud83c\udf31 Root hash: {root_hash}\")\n                return root_hash\n            else:\n                self.logger.error(f\"Root request failed with status {response.status_code}\")\n                return None\n            \n        except Exception as e:\n            self.logger.error(f\"Failed to get root hash: {e}\")\n            self.logger.debug(f\"Response text: {getattr(response, 'text', 'No response')}\")\n            return None\n    \n    def parse_directory_listing(self, content: bytes) -> Dict[str, Any]:\n        \"\"\"Parse directory listing\"\"\"\n        try:\n            text_content = content.decode('utf-8')\n        except UnicodeDecodeError:\n            return {'child_objects': [], 'data_components': []}\n        \n        result = {\n            'child_objects': [],\n            'data_components': []\n        }\n        \n        lines = text_content.split('\\n')\n        if lines and lines[0].strip().isdigit():\n            lines = lines[1:]  # Skip count line\n        \n        import re\n        entry_pattern = r'^([a-f0-9]{64}):([0-9a-fA-F]+):([a-f0-9-/]+(?:\\.[^:]+)?):(\\d+):(\\d+)$'\n        \n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n                \n            match = re.match(entry_pattern, line, re.IGNORECASE)\n            if match:\n                hash_val, flags, uuid_component, type_val, size_val = match.groups()\n                \n                entry_info = {\n                    'hash': hash_val,\n                    'flags': flags,\n                    'uuid_component': uuid_component,\n                    'type': type_val,\n                    'size': int(size_val)\n                }\n                \n                if '.' in uuid_component:\n                    # Data component (.content, .metadata, .pdf, .rm, etc.)\n                    component_type = uuid_component.split('.')[-1]\n                    if '/' in component_type:  # Handle .rm files like \"uuid/filename.rm\"\n                        component_type = component_type.split('/')[-1]\n                    entry_info['component_type'] = component_type\n                    result['data_components'].append(entry_info)\n                else:\n                    # Child object (pure UUID)\n                    result['child_objects'].append(entry_info)\n        \n        return result\n    \n    def extract_metadata(self, metadata_hash: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Extract metadata from hash\"\"\"\n        content_info = self.fetch_hash_content(metadata_hash)\n        if not content_info:\n            return None\n        \n        try:\n            text_content = content_info['content'].decode('utf-8')\n            return json.loads(text_content)\n        except (UnicodeDecodeError, json.JSONDecodeError) as e:\n            self.logger.debug(f\"Failed to parse metadata {metadata_hash[:16]}...: {e}\")\n            return None\n    \n    # ================================================================\n    # STEP 1: DISCOVERY PHASE\n    # ================================================================\n    \n    def discover_all_nodes(self, root_hash: str) -> bool:\n        \"\"\"Step 1: Discover all nodes and collect metadata\"\"\"\n        self.logger.info(f\"\ud83d\udccb STEP 1: DISCOVERY PHASE\")\n        self.logger.info(f\"\ud83d\udd0d Discovering all nodes from root...\")\n        \n        discovered_hashes = set()\n        hashes_to_process = [root_hash]\n        \n        while hashes_to_process:\n            current_hash = hashes_to_process.pop(0)\n            \n            if current_hash in discovered_hashes:\n                continue\n                \n            discovered_hashes.add(current_hash)\n            self.logger.debug(f\"  Processing: {current_hash[:16]}...\")\n            \n            # Fetch and parse content\n            content_info = self.fetch_hash_content(current_hash)\n            if not content_info:\n                continue\n            \n            parsed = self.parse_directory_listing(content_info['content'])\n            \n            # Extract metadata if available\n            metadata = {}\n            metadata_hash = None\n            node_name = f\"unknown_{current_hash[:8]}\"\n            node_type = \"folder\"\n            parent_uuid = None\n            \n            for component in parsed['data_components']:\n                if component['component_type'] == 'metadata':\n                    metadata_hash = component['hash']\n                    extracted_metadata = self.extract_metadata(metadata_hash)\n                    if extracted_metadata:\n                        metadata = extracted_metadata\n                        node_name = metadata.get('visibleName', node_name)\n                        if metadata.get('type') == 'DocumentType':\n                            node_type = \"document\"\n                        elif metadata.get('type') == 'CollectionType':\n                            node_type = \"folder\"\n                        parent_uuid = metadata.get('parent', '') or None\n                    break\n            \n            # Determine node UUID\n            node_uuid = None\n            for component in parsed['child_objects']:\n                node_uuid = component['uuid_component']\n                break\n            if not node_uuid and parsed['data_components']:\n                component_name = parsed['data_components'][0]['uuid_component']\n                if '.' in component_name:\n                    node_uuid = component_name.split('.')[0]\n            if not node_uuid:\n                node_uuid = current_hash[:32]  # Fallback\n            \n            # Check if node needs updating (incremental sync)\n            if self._should_update_node(current_hash, node_uuid):\n                # Create node\n                node = RemarkableNode(\n                    uuid=node_uuid,\n                    hash=current_hash,\n                    name=node_name,\n                    node_type=node_type,\n                    parent_uuid=parent_uuid,\n                    metadata=metadata\n                )\n                \n                # Extract component hashes\n                for component in parsed['data_components']:\n                    comp_type = component['component_type']\n                    comp_hash = component['hash']\n                    \n                    if comp_type == 'content':\n                        node.content_hash = comp_hash\n                    elif comp_type == 'metadata':\n                        node.metadata_hash = comp_hash\n                    elif comp_type == 'pdf':\n                        node.pdf_hash = comp_hash\n                    elif comp_type == 'pagedata':\n                        node.pagedata_hash = comp_hash\n                    elif comp_type == 'rm' or comp_type.endswith('.rm'):\n                        node.rm_hashes.append(comp_hash)\n                \n                # Store node\n                self.nodes[node_uuid] = node\n                self.stats['nodes_added'] += 1\n                \n                self.logger.debug(f\"    \u2192 NEW/UPDATED {node_type}: {node_name} (parent: {parent_uuid or 'ROOT'})\")\n            else:\n                # Node unchanged - load from existing database\n                if self.existing_database and node_uuid in self.existing_database.get('nodes', {}):\n                    existing_node_data = self.existing_database['nodes'][node_uuid]\n                    \n                    node = RemarkableNode(\n                        uuid=existing_node_data['uuid'],\n                        hash=existing_node_data['hash'],\n                        name=existing_node_data['name'],\n                        node_type=existing_node_data['node_type'],\n                        parent_uuid=existing_node_data['parent_uuid'],\n                        metadata=existing_node_data['metadata']\n                    )\n                    \n                    # Restore component hashes\n                    comp_hashes = existing_node_data.get('component_hashes', {})\n                    node.content_hash = comp_hashes.get('content')\n                    node.metadata_hash = comp_hashes.get('metadata')\n                    node.pdf_hash = comp_hashes.get('pdf')\n                    node.pagedata_hash = comp_hashes.get('pagedata')\n                    node.rm_hashes = comp_hashes.get('rm_files', [])\n                    \n                    # Restore paths and files\n                    node.local_path = existing_node_data.get('local_path', '')\n                    node.extracted_files = existing_node_data.get('extracted_files', [])\n                    \n                    self.nodes[node_uuid] = node\n                    self.stats['nodes_unchanged'] += 1\n                \n                self.logger.debug(f\"    \u2192 UNCHANGED {node_type}: {node_name}\")\n            \n            self.stats['total_nodes'] += 1\n            \n            if node_type == \"folder\":\n                self.stats['folders'] += 1\n            else:\n                self.stats['documents'] += 1\n            \n            # Track trash items\n            if parent_uuid == 'trash':\n                self.stats['trash_items'] += 1\n            \n            # Add child hashes to process\n            for child_obj in parsed['child_objects']:\n                if child_obj['hash'] not in discovered_hashes:\n                    hashes_to_process.append(child_obj['hash'])\n        \n        self.logger.info(f\"\u2705 Discovery complete: {len(self.nodes)} nodes found\")\n        self.logger.info(f\"   \ud83d\udcc2 Folders: {self.stats['folders']}\")\n        self.logger.info(f\"   \ud83d\udcc4 Documents: {self.stats['documents']}\")\n        self.logger.info(f\"   \ud83d\uddd1\ufe0f Trash items: {self.stats['trash_items']}\")\n        self.logger.info(f\"   \ud83c\udd95 New/Updated: {self.stats['nodes_added']}\")\n        self.logger.info(f\"   \u2705 Unchanged: {self.stats['nodes_unchanged']}\")\n        \n        return True\n    \n    # ================================================================\n    # STEP 2: HIERARCHY PHASE\n    # ================================================================\n    \n    def build_folder_structure(self) -> bool:\n        \"\"\"Step 2: Build correct folder structure based on parent UUIDs\"\"\"\n        self.logger.info(f\"\\n\ud83d\udcc1 STEP 2: HIERARCHY PHASE\")\n        self.logger.info(f\"\ud83c\udfd7\ufe0f Building folder structure...\")\n        \n        # Create special trash folder\n        trash_folder = self.content_dir / \"trash\"\n        trash_folder.mkdir(parents=True, exist_ok=True)\n        self.logger.info(f\"\ud83d\uddd1\ufe0f Created trash folder: {trash_folder}\")\n        \n        # Find root nodes (nodes with no parent or empty parent)\n        root_nodes = []\n        trash_nodes = []\n        for uuid, node in self.nodes.items():\n            if node.parent_uuid == 'trash':\n                trash_nodes.append(node)\n            elif not node.parent_uuid:\n                root_nodes.append(node)\n        \n        self.logger.info(f\"\ud83d\udccd Found {len(root_nodes)} root nodes\")\n        self.logger.info(f\"\ud83d\uddd1\ufe0f Found {len(trash_nodes)} trash nodes\")\n        \n        # Build paths recursively from root\n        for root_node in root_nodes:\n            self._build_node_paths(root_node, str(self.content_dir))\n        \n        # Build paths for trash nodes\n        for trash_node in trash_nodes:\n            self._build_node_paths(trash_node, str(trash_folder))\n        \n        # Create all folder directories\n        for uuid, node in self.nodes.items():\n            if node.node_type == \"folder\" and node.local_path:\n                Path(node.local_path).mkdir(parents=True, exist_ok=True)\n                self.logger.debug(f\"\ud83d\udcc1 Created: {node.local_path}\")\n        \n        self.logger.info(f\"\u2705 Folder structure built\")\n        return True\n    \n    def _build_node_paths(self, node: RemarkableNode, parent_path: str):\n        \"\"\"Recursively build paths for node and its children\"\"\"\n        # Sanitize name for filesystem\n        safe_name = \"\".join(c for c in node.name if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()\n        if not safe_name:\n            safe_name = f\"unnamed_{node.uuid[:8]}\"\n        \n        # Set local path\n        node.local_path = str(Path(parent_path) / safe_name)\n        \n        # Log with special indication for trash items\n        if node.parent_uuid == 'trash':\n            self.logger.debug(f\"  \ud83d\uddd1\ufe0f Trash Path: {node.name} \u2192 {node.local_path}\")\n        else:\n            self.logger.debug(f\"  Path: {node.name} \u2192 {node.local_path}\")\n        \n        # Process children - both normal UUID children and trash children\n        for child_uuid, child_node in self.nodes.items():\n            if child_node.parent_uuid == node.uuid:\n                self._build_node_paths(child_node, node.local_path)\n    \n    # ================================================================\n    # STEP 3: EXTRACTION PHASE  \n    # ================================================================\n    \n    def extract_all_files(self) -> bool:\n        \"\"\"Step 3: Extract PDFs and .rm files to correct locations\"\"\"\n        self.logger.info(f\"\\n\ud83d\udcce STEP 3: EXTRACTION PHASE\")\n        self.logger.info(f\"\u2b07\ufe0f Extracting files to correct locations...\")\n        \n        nodes_to_process = []\n        for uuid, node in self.nodes.items():\n            if node.node_type == \"document\":\n                # Only process if node is new/updated (has no extracted files from database)\n                if not node.extracted_files or len(node.extracted_files) == 0:\n                    nodes_to_process.append(node)\n        \n        if nodes_to_process:\n            self.logger.info(f\"\ud83d\udd04 Processing {len(nodes_to_process)} new/updated documents...\")\n            \n            for node in nodes_to_process:\n                self._extract_node_files(node)\n        else:\n            self.logger.info(f\"\u2705 No new documents to process - all files up to date\")\n        \n        self.logger.info(f\"\u2705 File extraction complete\")\n        self.logger.info(f\"   \ud83d\udcc4 PDFs extracted: {self.stats['pdfs_extracted']}\")\n        self.logger.info(f\"   \ud83d\udd8a\ufe0f RM files extracted: {self.stats['rm_files_extracted']}\")\n        self.logger.info(f\"   \ud83d\udcc4 RM\u2192PDF conversions: {self.stats['rm_pdfs_converted']}\")\n        \n        return True\n    \n    def _extract_node_files(self, node: RemarkableNode):\n        \"\"\"Extract files for a document node\"\"\"\n        if not node.local_path:\n            self.logger.warning(f\"No local path for {node.name}\")\n            return\n        \n        # Ensure parent directory exists\n        parent_dir = Path(node.local_path).parent\n        parent_dir.mkdir(parents=True, exist_ok=True)\n        \n        # Extract PDF if available - this goes directly to the folder structure\n        if node.pdf_hash:\n            pdf_path = Path(node.local_path).with_suffix('.pdf')\n            if self._extract_pdf(node.pdf_hash, pdf_path):\n                node.extracted_files.append(str(pdf_path))\n                self.stats['pdfs_extracted'] += 1\n                self.logger.debug(f\"  \ud83d\udcc4 PDF: {pdf_path}\")\n        \n        # Extract .rm files if available - these get converted to PDF\n        if node.rm_hashes:\n            # Create temporary notebook subdirectory for processing\n            notebook_dir = parent_dir / f\"{Path(node.local_path).stem}_temp_notebook\"\n            notebook_dir.mkdir(exist_ok=True)\n            \n            # Extract .rm files to temporary directory\n            for i, rm_hash in enumerate(node.rm_hashes):\n                rm_path = notebook_dir / f\"page_{i+1}.rm\"\n                if self._extract_rm_file(rm_hash, rm_path):\n                    self.stats['rm_files_extracted'] += 1\n                    self.logger.debug(f\"  \ud83d\udd8a\ufe0f RM: {rm_path}\")\n            \n            # Convert .rm files to PDF (this places the PDF in the correct location)\n            self._convert_notebook_to_pdf(node, notebook_dir)\n            \n            # Clean up temporary notebook directory after conversion\n            import shutil\n            shutil.rmtree(notebook_dir, ignore_errors=True)\n        \n        # Store metadata components in node for database (don't extract to filesystem)\n        if node.content_hash:\n            content_info = self.fetch_hash_content(node.content_hash)\n            if content_info:\n                try:\n                    node.metadata['content_data'] = content_info['content'].decode('utf-8')\n                except UnicodeDecodeError:\n                    node.metadata['content_data'] = f\"<binary data: {len(content_info['content'])} bytes>\"\n        \n        if node.pagedata_hash:\n            pagedata_info = self.fetch_hash_content(node.pagedata_hash)  \n            if pagedata_info:\n                try:\n                    node.metadata['pagedata_data'] = pagedata_info['content'].decode('utf-8')\n                except UnicodeDecodeError:\n                    node.metadata['pagedata_data'] = f\"<binary data: {len(pagedata_info['content'])} bytes>\"\n    \n    def _extract_pdf(self, pdf_hash: str, target_path: Path) -> bool:\n        \"\"\"Extract PDF file\"\"\"\n        content_info = self.fetch_hash_content(pdf_hash)\n        if not content_info:\n            return False\n        \n        try:\n            with open(target_path, 'wb') as f:\n                f.write(content_info['content'])\n            return True\n        except Exception as e:\n            self.logger.error(f\"Failed to write PDF {target_path}: {e}\")\n            return False\n    \n    def _extract_rm_file(self, rm_hash: str, target_path: Path) -> bool:\n        \"\"\"Extract .rm file\"\"\"\n        content_info = self.fetch_hash_content(rm_hash)\n        if not content_info:\n            return False\n        \n        try:\n            with open(target_path, 'wb') as f:\n                f.write(content_info['content'])\n            return True\n        except Exception as e:\n            self.logger.error(f\"Failed to write RM file {target_path}: {e}\")\n            return False\n    \n    def _extract_component(self, comp_hash: str, target_path: Path) -> bool:\n        \"\"\"Extract other component\"\"\"\n        content_info = self.fetch_hash_content(comp_hash)\n        if not content_info:\n            return False\n        \n        try:\n            with open(target_path, 'wb') as f:\n                f.write(content_info['content'])\n            return True\n        except Exception as e:\n            self.logger.error(f\"Failed to write component {target_path}: {e}\")\n            return False\n    \n    def _convert_notebook_to_pdf(self, node: RemarkableNode, notebook_dir: Path):\n        \"\"\"Convert reMarkable notebook files to PDF using rmc and concatenate pages\"\"\"\n        try:\n            import subprocess\n            \n            # Find all .rm files in the notebook directory\n            rm_files = sorted(notebook_dir.glob(\"page_*.rm\"), key=lambda x: int(x.stem.split('_')[1]))\n            if not rm_files:\n                self.logger.debug(f\"  \u26a0\ufe0f No .rm files found for {node.name}\")\n                return\n            \n            # Final PDF should be placed at the same level as notebook folder, named after the node\n            parent_dir = notebook_dir.parent\n            final_pdf_path = parent_dir / f\"{node.name}.pdf\"\n            \n            if len(rm_files) == 1:\n                # Single page - convert directly\n                try:\n                    result = subprocess.run([\n                        \"rmc\", str(rm_files[0]), \"-o\", str(final_pdf_path)\n                    ], capture_output=True, text=True, timeout=60)\n                    \n                    if result.returncode == 0 and final_pdf_path.exists() and final_pdf_path.stat().st_size > 0:\n                        node.extracted_files.append(str(final_pdf_path))\n                        self.logger.debug(f\"  \ud83d\udcc4 Converted single page to PDF: {final_pdf_path}\")\n                        self.stats['rm_pdfs_converted'] += 1\n                    else:\n                        self.logger.debug(f\"  \u26a0\ufe0f rmc conversion failed: {result.stderr}\")\n                \n                except (subprocess.TimeoutExpired, Exception) as e:\n                    self.logger.debug(f\"  \u26a0\ufe0f rmc conversion error: {e}\")\n            \n            else:\n                # Multiple pages - convert each to temporary PDF and concatenate\n                temp_pdfs = []\n                conversion_success = True\n                \n                for i, rm_file in enumerate(rm_files):\n                    temp_pdf = notebook_dir / f\"temp_page_{i+1}.pdf\"\n                    \n                    try:\n                        result = subprocess.run([\n                            \"rmc\", str(rm_file), \"-o\", str(temp_pdf)\n                        ], capture_output=True, text=True, timeout=60)\n                        \n                        if result.returncode == 0 and temp_pdf.exists() and temp_pdf.stat().st_size > 0:\n                            temp_pdfs.append(temp_pdf)\n                            self.logger.debug(f\"  \ud83d\udcc4 Converted page {i+1}\")\n                        else:\n                            self.logger.debug(f\"  \u26a0\ufe0f rmc conversion failed for page {i+1}: {result.stderr}\")\n                            conversion_success = False\n                            break\n                    \n                    except (subprocess.TimeoutExpired, Exception) as e:\n                        self.logger.debug(f\"  \u26a0\ufe0f rmc conversion error for page {i+1}: {e}\")\n                        conversion_success = False\n                        break\n                \n                # Concatenate PDFs if all conversions succeeded\n                if conversion_success and temp_pdfs:\n                    try:\n                        # Use PyPDF2 to concatenate PDFs\n                        import PyPDF2\n                        \n                        pdf_writer = PyPDF2.PdfWriter()\n                        \n                        for temp_pdf in temp_pdfs:\n                            with open(temp_pdf, 'rb') as pdf_file:\n                                pdf_reader = PyPDF2.PdfReader(pdf_file)\n                                for page in pdf_reader.pages:\n                                    pdf_writer.add_page(page)\n                        \n                        # Write the concatenated PDF\n                        with open(final_pdf_path, 'wb') as output_file:\n                            pdf_writer.write(output_file)\n                        \n                        if final_pdf_path.exists() and final_pdf_path.stat().st_size > 0:\n                            node.extracted_files.append(str(final_pdf_path))\n                            self.logger.debug(f\"  \ud83d\udcc4 Concatenated {len(temp_pdfs)} pages to PDF: {final_pdf_path}\")\n                            self.stats['rm_pdfs_converted'] += 1\n                        \n                    except ImportError:\n                        # Fallback: use system tools to concatenate if PyPDF2 not available\n                        self.logger.debug(f\"  \u26a0\ufe0f PyPDF2 not available, using first page only\")\n                        if temp_pdfs:\n                            import shutil\n                            shutil.copy2(temp_pdfs[0], final_pdf_path)\n                            node.extracted_files.append(str(final_pdf_path))\n                            self.stats['rm_pdfs_converted'] += 1\n                    \n                    except Exception as e:\n                        self.logger.debug(f\"  \u26a0\ufe0f PDF concatenation failed: {e}\")\n                    \n                    finally:\n                        # Clean up temporary files\n                        for temp_pdf in temp_pdfs:\n                            temp_pdf.unlink(missing_ok=True)\n        \n        except Exception as e:\n            self.logger.debug(f\"  \u26a0\ufe0f PDF conversion error for {node.name}: {e}\")\n    \n    # ================================================================\n    # MAIN BUILD PROCESS\n    # ================================================================\n    \n    def build_complete_replica(self) -> bool:\n        \"\"\"Build complete replica using 3-step process\"\"\"\n        self.logger.info(f\"\ud83d\ude80 STARTING 3-STEP REPLICA BUILD\")\n        \n        # Get root hash\n        root_hash = self.get_root_hash()\n        if not root_hash:\n            self.logger.error(\"\u274c Failed to get root hash\")\n            return False\n        \n        # Step 1: Discovery\n        if not self.discover_all_nodes(root_hash):\n            self.logger.error(\"\u274c Discovery phase failed\")\n            return False\n        \n        # Step 2: Hierarchy\n        if not self.build_folder_structure():\n            self.logger.error(\"\u274c Hierarchy phase failed\") \n            return False\n        \n        # Step 3: Extraction\n        if not self.extract_all_files():\n            self.logger.error(\"\u274c Extraction phase failed\")\n            return False\n        \n        # Save database\n        self._save_database()\n        \n        # Final report\n        self.logger.info(f\"\\n\ud83c\udf89 REPLICA BUILD COMPLETED!\")\n        self.logger.info(f\"\ud83d\udcca FINAL STATISTICS:\")\n        self.logger.info(f\"  \ud83d\udcc1 Total nodes: {self.stats['total_nodes']}\")\n        self.logger.info(f\"  \ud83d\udcc2 Folders: {self.stats['folders']}\")\n        self.logger.info(f\"  \ud83d\udcc4 Documents: {self.stats['documents']}\")\n        self.logger.info(f\"  \ufffd\ufe0f Trash items: {self.stats['trash_items']}\")\n        self.logger.info(f\"  \ufffd\ud83d\udcc4 PDFs extracted: {self.stats['pdfs_extracted']}\")\n        self.logger.info(f\"  \ud83d\udd8a\ufe0f RM files extracted: {self.stats['rm_files_extracted']}\")\n        self.logger.info(f\"  \ud83d\udcc4 RM\u2192PDF conversions: {self.stats['rm_pdfs_converted']}\")\n        self.logger.info(f\"  \u274c Failed downloads: {len(self.failed_downloads)}\")\n        self.logger.info(f\"\\n\ud83d\udcc1 Replica location: {self.replica_dir}\")\n        self.logger.info(f\"\ud83d\uddd1\ufe0f Trash location: {self.replica_dir}/content/trash\")\n        \n        return True\n    \n    def _save_database(self):\n        \"\"\"Save the comprehensive replica database\"\"\"\n        database = {\n            'replica_info': {\n                'created': datetime.now().isoformat(),\n                'last_sync': datetime.now().isoformat(),\n                'replica_dir': str(self.replica_dir),\n                'total_nodes': len(self.nodes),\n                'statistics': self.stats,\n                'version': \"2.0\"\n            },\n            'nodes': {},\n            'hash_registry': {},  # For tracking file changes\n            'failed_downloads': list(self.failed_downloads)\n        }\n        \n        # Create detailed node entries\n        for uuid, node in self.nodes.items():\n            node_data = {\n                'uuid': node.uuid,\n                'hash': node.hash,\n                'name': node.name,\n                'node_type': node.node_type,\n                'parent_uuid': node.parent_uuid,\n                'local_path': node.local_path,\n                'extracted_files': node.extracted_files,\n                \n                # Component hashes for sync tracking\n                'component_hashes': {\n                    'content': node.content_hash,\n                    'metadata': node.metadata_hash,\n                    'pdf': node.pdf_hash,\n                    'pagedata': node.pagedata_hash,\n                    'rm_files': node.rm_hashes\n                },\n                \n                # Full metadata including content and pagedata\n                'metadata': node.metadata,\n                \n                # Timestamps\n                'last_modified': node.metadata.get('lastModified', ''),\n                'version': node.metadata.get('version', 0),\n                \n                # Sync status\n                'sync_status': 'current',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n            database['nodes'][uuid] = node_data\n            \n            # Add to hash registry for quick lookup\n            database['hash_registry'][node.hash] = {\n                'uuid': uuid,\n                'type': 'node',\n                'last_seen': datetime.now().isoformat()\n            }\n            \n            # Add component hashes to registry\n            for comp_type, comp_hash in node_data['component_hashes'].items():\n                if comp_hash:\n                    if isinstance(comp_hash, list):\n                        for i, h in enumerate(comp_hash):\n                            database['hash_registry'][h] = {\n                                'uuid': uuid,\n                                'type': f'{comp_type}_{i}',\n                                'last_seen': datetime.now().isoformat()\n                            }\n                    else:\n                        database['hash_registry'][comp_hash] = {\n                            'uuid': uuid,\n                            'type': comp_type,\n                            'last_seen': datetime.now().isoformat()\n                        }\n        \n        database_file = self.replica_dir / \"replica_database.json\"\n        with open(database_file, 'w', encoding='utf-8') as f:\n            json.dump(database, f, indent=2, ensure_ascii=False)\n        \n        self.logger.info(f\"\ud83d\udcbe Database saved: {database_file}\")\n        \n        # Also create a human-readable summary\n        summary_file = self.replica_dir / \"replica_summary.txt\"\n        with open(summary_file, 'w', encoding='utf-8') as f:\n            f.write(f\"reMarkable Replica Summary\\n\")\n            f.write(f\"=\" * 50 + \"\\n\")\n            f.write(f\"Created: {database['replica_info']['created']}\\n\")\n            f.write(f\"Location: {database['replica_info']['replica_dir']}\\n\")\n            f.write(f\"Total Nodes: {database['replica_info']['total_nodes']}\\n\")\n            f.write(f\"Statistics: {database['replica_info']['statistics']}\\n\\n\")\n            \n            f.write(f\"Folder Structure:\\n\")\n            f.write(f\"-\" * 20 + \"\\n\")\n            \n            # Write folder structure\n            def write_node_tree(uuid, indent=0):\n                if uuid not in self.nodes:\n                    return\n                node = self.nodes[uuid]\n                prefix = \"  \" * indent\n                icon = \"\ud83d\udcc1\" if node.node_type == \"folder\" else \"\ud83d\udcc4\"\n                f.write(f\"{prefix}{icon} {node.name}\\n\")\n                \n                # Find children\n                for child_uuid, child_node in self.nodes.items():\n                    if child_node.parent_uuid == uuid:\n                        write_node_tree(child_uuid, indent + 1)\n            \n            # Write root nodes\n            for uuid, node in self.nodes.items():\n                if not node.parent_uuid:\n                    write_node_tree(uuid)\n        \n        self.logger.info(f\"\ud83d\udcc4 Summary saved: {summary_file}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica_v2.py",
      "tags": [
        "class",
        "remarkablereplicabuilder"
      ],
      "updated_at": "2025-12-07T01:27:51.991707",
      "usage_example": "# Example usage:\n# result = RemarkableReplicaBuilder(bases)"
    },
    {
      "best_practices": [
        "Always specify uuid, hash, name, node_type, and parent_uuid when creating instances as these are required fields.",
        "Use node_type='folder' for directories and node_type='document' for files to ensure proper processing.",
        "The __post_init__ method automatically initializes mutable defaults (rm_hashes, metadata, extracted_files) to prevent shared state between instances.",
        "Set local_path and extracted_files during the extraction/download phase (phase 2) after the node has been created.",
        "Use the hash fields (content_hash, metadata_hash, pdf_hash, pagedata_hash) to detect changes and avoid unnecessary re-downloads.",
        "The metadata dictionary should contain raw metadata from the reMarkable device for reference and debugging.",
        "Parent-child relationships are maintained through parent_uuid, allowing tree structure reconstruction.",
        "This is a pure data class with no methods beyond __post_init__, so it's safe to serialize/deserialize using dataclasses.asdict()."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for the node in the reMarkable cloud system",
            "is_class_variable": false,
            "name": "uuid",
            "type": "str"
          },
          {
            "description": "Overall hash value representing the current state of the node",
            "is_class_variable": false,
            "name": "hash",
            "type": "str"
          },
          {
            "description": "Human-readable name of the document or folder",
            "is_class_variable": false,
            "name": "name",
            "type": "str"
          },
          {
            "description": "Type of the node: 'folder' or 'document'",
            "is_class_variable": false,
            "name": "node_type",
            "type": "str"
          },
          {
            "description": "UUID of the parent folder, None for root-level items",
            "is_class_variable": false,
            "name": "parent_uuid",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the content file for change detection",
            "is_class_variable": false,
            "name": "content_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the metadata file for change detection",
            "is_class_variable": false,
            "name": "metadata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the PDF file if present",
            "is_class_variable": false,
            "name": "pdf_hash",
            "type": "Optional[str]"
          },
          {
            "description": "Hash of the pagedata file containing page-specific information",
            "is_class_variable": false,
            "name": "pagedata_hash",
            "type": "Optional[str]"
          },
          {
            "description": "List of hashes for .rm files (reMarkable drawing format)",
            "is_class_variable": false,
            "name": "rm_hashes",
            "type": "List[str]"
          },
          {
            "description": "Dictionary containing metadata from the reMarkable device",
            "is_class_variable": false,
            "name": "metadata",
            "type": "Dict[str, Any]"
          },
          {
            "description": "Local file system path where this node's files are stored",
            "is_class_variable": false,
            "name": "local_path",
            "type": "str"
          },
          {
            "description": "List of file paths that have been extracted locally for this node",
            "is_class_variable": false,
            "name": "extracted_files",
            "type": "List[str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__post_init__",
            "parameters": {},
            "purpose": "Initializes mutable default values after dataclass initialization to prevent shared state between instances",
            "returns": "None. Modifies instance attributes in-place.",
            "signature": "__post_init__(self) -> None"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:27:51",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [],
      "description": "A dataclass representing a node (folder or document) in the reMarkable cloud storage system, storing metadata, hashes, and local file paths.",
      "docstring": "Simple node representation",
      "id": 2045,
      "imports": [
        "import os",
        "import json",
        "import requests",
        "import logging",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "import shutil",
        "import subprocess",
        "import PyPDF2",
        "import shutil"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Dict, List, Optional, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 49,
      "line_start": 21,
      "name": "RemarkableNode_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "content_hash": "Hash of the content file (.content) for this node. Optional, used for detecting content changes.",
        "extracted_files": "List of file paths that have been extracted locally for this node. Defaults to empty list if not provided.",
        "hash": "Overall hash value representing the current state of the node, used for change detection during synchronization.",
        "local_path": "Local file system path where this node's files are stored. Set during phase 2 of synchronization. Defaults to empty string.",
        "metadata": "Dictionary containing metadata from the reMarkable device (e.g., timestamps, version info). Defaults to empty dict if not provided.",
        "metadata_hash": "Hash of the metadata file (.metadata) for this node. Optional, used for detecting metadata changes.",
        "name": "Human-readable name of the document or folder as displayed on the reMarkable device.",
        "node_type": "Type of the node, must be either 'folder' or 'document'. Determines how the node should be processed.",
        "pagedata_hash": "Hash of the pagedata file containing page-specific information. Optional.",
        "parent_uuid": "UUID of the parent folder containing this node. None for root-level items.",
        "pdf_hash": "Hash of the PDF file if the document is or contains a PDF. Optional.",
        "rm_hashes": "List of hashes for .rm files (reMarkable's proprietary drawing format). Defaults to empty list if not provided.",
        "uuid": "Unique identifier for the node in the reMarkable cloud system. This is the primary key used to identify documents and folders."
      },
      "parent_class": null,
      "purpose": "RemarkableNode serves as a data structure to represent files and folders from a reMarkable tablet's cloud storage. It tracks unique identifiers (UUID), content hashes for change detection, metadata from the device, and local file system paths after synchronization. This class is typically used in sync operations to maintain a mapping between cloud resources and local files, enabling efficient incremental updates by comparing hashes.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableNode object with all specified attributes initialized. The __post_init__ method ensures that mutable default values (rm_hashes, metadata, extracted_files) are properly initialized as empty collections if not provided, preventing shared mutable default issues.",
      "settings_required": [],
      "source_code": "class RemarkableNode:\n    \"\"\"Simple node representation\"\"\"\n    uuid: str\n    hash: str\n    name: str\n    node_type: str  # \"folder\" or \"document\"\n    parent_uuid: Optional[str]\n    \n    # Component hashes\n    content_hash: Optional[str] = None\n    metadata_hash: Optional[str] = None\n    pdf_hash: Optional[str] = None\n    pagedata_hash: Optional[str] = None\n    rm_hashes: List[str] = None\n    \n    # Metadata from reMarkable\n    metadata: Dict[str, Any] = None\n    \n    # Local paths (set in phase 2)\n    local_path: str = \"\"\n    extracted_files: List[str] = None\n    \n    def __post_init__(self):\n        if self.rm_hashes is None:\n            self.rm_hashes = []\n        if self.metadata is None:\n            self.metadata = {}\n        if self.extracted_files is None:\n            self.extracted_files = []",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica_v2.py",
      "tags": [
        "dataclass",
        "reMarkable",
        "sync",
        "node",
        "file-system",
        "metadata",
        "hash",
        "cloud-storage",
        "document-management"
      ],
      "updated_at": "2025-12-07T01:27:51.977103",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Dict, List, Optional, Any\n\n@dataclass\nclass RemarkableNode:\n    uuid: str\n    hash: str\n    name: str\n    node_type: str\n    parent_uuid: Optional[str]\n    content_hash: Optional[str] = None\n    metadata_hash: Optional[str] = None\n    pdf_hash: Optional[str] = None\n    pagedata_hash: Optional[str] = None\n    rm_hashes: List[str] = None\n    metadata: Dict[str, Any] = None\n    local_path: str = \"\"\n    extracted_files: List[str] = None\n    \n    def __post_init__(self):\n        if self.rm_hashes is None:\n            self.rm_hashes = []\n        if self.metadata is None:\n            self.metadata = {}\n        if self.extracted_files is None:\n            self.extracted_files = []\n\n# Create a folder node\nfolder = RemarkableNode(\n    uuid=\"abc-123\",\n    hash=\"hash123\",\n    name=\"My Folder\",\n    node_type=\"folder\",\n    parent_uuid=None\n)\n\n# Create a document node with metadata\ndocument = RemarkableNode(\n    uuid=\"def-456\",\n    hash=\"hash456\",\n    name=\"My Document\",\n    node_type=\"document\",\n    parent_uuid=\"abc-123\",\n    content_hash=\"content_hash_123\",\n    metadata_hash=\"meta_hash_123\",\n    pdf_hash=\"pdf_hash_123\",\n    metadata={\"lastModified\": \"1234567890\", \"version\": 1}\n)\n\n# Access attributes\nprint(document.name)  # \"My Document\"\nprint(document.rm_hashes)  # []\nprint(document.metadata)  # {\"lastModified\": \"1234567890\", \"version\": 1}\n\n# Update local path after extraction\ndocument.local_path = \"/path/to/local/file\"\ndocument.extracted_files = [\"/path/to/local/file.pdf\", \"/path/to/local/file.rm\"]"
    },
    {
      "best_practices": [
        "Always run preview_changes(dry_run=True) before applying actual changes to inspect what will be modified",
        "The tool automatically creates backups in the 'docschema_repair' directory - review these before and after repairs",
        "Ensure stable network connection as the tool makes multiple API calls to reMarkable cloud services",
        "The class preserves the original version number from root.docSchema - do not manually modify this",
        "Only documents with suspicious sizes (2247, 2246, 2715) are modified; folders and other entries remain unchanged",
        "If authentication fails during __init__, a RuntimeError is raised - handle this appropriately",
        "The repair process fetches individual document schemas to recalculate sizes - this may take time for many corrupted entries",
        "Review console output carefully as it provides detailed logging of each operation and categorization",
        "The tool uses SHA256 hashing to generate new content hashes - ensure hashlib is available",
        "Backup files are timestamped and stored locally for recovery if needed"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated HTTP session for making API requests to reMarkable cloud services",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          },
          {
            "description": "Path object pointing to the parent directory of the script file",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Path object pointing to 'docschema_repair' subdirectory where backup and preview files are stored",
            "is_class_variable": false,
            "name": "backup_dir",
            "type": "Path"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the repair tool with authenticated session and setup backup directory",
            "returns": "None - initializes instance with session, base_dir, and backup_dir attributes",
            "signature": "__init__(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_root_content",
            "parameters": {},
            "purpose": "Fetch the current root.docSchema content and metadata from reMarkable cloud",
            "returns": "Tuple containing (root_content_string, hash_string, root_metadata_dict) where content is the full docSchema text, hash is the current content hash, and metadata contains version and other root information",
            "signature": "get_current_root_content(self) -> Tuple[str, str, Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "identify_corrupted_entries",
            "parameters": {
              "root_content": "The full text content of root.docSchema file with newline-separated entries"
            },
            "purpose": "Parse root.docSchema content and categorize entries into good, corrupted, folders, and special types based on size and type fields",
            "returns": "Dictionary with keys 'original_version', 'good_entries', 'corrupted_entries', 'folders', 'special' containing categorized entry dictionaries with fields: line_number, hash, uuid, type, size, full_line",
            "signature": "identify_corrupted_entries(self, root_content: str) -> Dict[str, List]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_corrupted_document",
            "parameters": {
              "entry": "Dictionary containing document entry fields including 'hash', 'uuid', 'size', and 'full_line'"
            },
            "purpose": "Recalculate correct size for a corrupted document by fetching its docSchema and summing component sizes",
            "returns": "Corrected docSchema line string with recalculated size, or original line if repair fails",
            "signature": "fix_corrupted_document(self, entry: Dict) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "rebuild_corrected_root",
            "parameters": {
              "categorized": "Dictionary from identify_corrupted_entries containing categorized entries and original version"
            },
            "purpose": "Reconstruct complete root.docSchema content with fixed sizes for corrupted entries while preserving all other entries",
            "returns": "Complete corrected root.docSchema content as newline-separated string with version header and all entries",
            "signature": "rebuild_corrected_root(self, categorized: Dict) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "preview_changes",
            "parameters": {
              "dry_run": "If True, only preview changes without uploading; if False, apply changes to server"
            },
            "purpose": "Preview or execute the repair process, showing what changes will be made and optionally applying them",
            "returns": "Boolean indicating success (True) or failure (False) of the preview/repair operation",
            "signature": "preview_changes(self, dry_run: bool = True) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_corrected_root",
            "parameters": {
              "corrected_content": "The complete corrected root.docSchema content string to upload"
            },
            "purpose": "Upload the corrected root.docSchema content to reMarkable cloud and update the root pointer",
            "returns": "Boolean indicating success (True) or failure (False) of the upload and root update operations",
            "signature": "upload_corrected_root(self, corrected_content: str) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required during __init__ to authenticate with reMarkable cloud services",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:27:06",
      "decorators": [],
      "dependencies": [
        "requests",
        "pathlib",
        "typing",
        "json",
        "time",
        "hashlib"
      ],
      "description": "A repair tool that fixes corrupted size entries in reMarkable cloud's root.docSchema file by recalculating correct document sizes from their component schemas.",
      "docstring": "Corrected version of the repair tool",
      "id": 2043,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Any",
        "import requests",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, List, Tuple, Any",
        "import requests"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 274,
      "line_start": 19,
      "name": "CorrectedRootDocSchemaRepair",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically authenticates with reMarkable cloud services, creates a backup directory for repair operations, and initializes the session for API communication."
      },
      "parent_class": null,
      "purpose": "This class provides functionality to diagnose and repair corrupted entries in the reMarkable cloud storage's root.docSchema file. It identifies documents with suspicious size values (2247, 2246, 2715 bytes), recalculates their correct sizes by summing component sizes from individual document schemas, and uploads the corrected root.docSchema back to the reMarkable cloud. The tool preserves the original version number and maintains all folder and special entries unchanged while fixing only corrupted document entries.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a CorrectedRootDocSchemaRepair object with an authenticated session ready to perform repair operations. Key method returns: get_current_root_content() returns tuple of (content_string, hash_string, metadata_dict); identify_corrupted_entries() returns dict with categorized entries; fix_corrupted_document() returns corrected line string; rebuild_corrected_root() returns complete corrected content string; preview_changes() and upload_corrected_root() return boolean success status.",
      "settings_required": [
        "RemarkableAuth module must be available and properly configured for authentication",
        "Valid reMarkable cloud account credentials configured in the auth module",
        "Network access to eu.tectonic.remarkable.com API endpoints",
        "Write permissions to create backup directory in the script's parent directory"
      ],
      "source_code": "class CorrectedRootDocSchemaRepair:\n    \"\"\"Corrected version of the repair tool\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        from auth import RemarkableAuth\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        self.base_dir = Path(__file__).parent\n        self.backup_dir = self.base_dir / \"docschema_repair\"\n        self.backup_dir.mkdir(exist_ok=True)\n        \n        print(\"\ud83d\udd27 CORRECTED reMarkable Root DocSchema Repair Tool\")\n    \n    def get_current_root_content(self) -> Tuple[str, str, Dict]:\n        \"\"\"Get current root.docSchema content and metadata\"\"\"\n        # Get root info\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        return root_content, root_data['hash'], root_data\n    \n    def identify_corrupted_entries(self, root_content: str) -> Dict[str, List]:\n        \"\"\"Identify which entries need size correction\"\"\"\n        lines = root_content.strip().split('\\n')\n        original_version = lines[0]  # KEEP ORIGINAL VERSION!\n        entries = lines[1:]\n        \n        print(f\"\ud83d\udcca Original version: {original_version} (will preserve this)\")\n        print(f\"\ud83d\udcca Total entries: {len(entries)}\")\n        \n        # Suspicious sizes that indicate corruption\n        suspicious_sizes = {'2247', '2246', '2715'}\n        \n        categorized = {\n            'original_version': original_version,\n            'good_entries': [],      # Entries with reasonable sizes\n            'corrupted_entries': [], # Entries with suspicious small sizes\n            'folders': [],           # All folder types (type 1 and 2)\n            'special': []            # Other special entries\n        }\n        \n        for i, line in enumerate(entries):\n            if ':' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    entry = {\n                        'line_number': i + 1,\n                        'hash': parts[0],\n                        'uuid': parts[2], \n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n                    \n                    # Categorize by type and size\n                    if entry['type'] in ['1', '2']:  # Folders (both types)\n                        categorized['folders'].append(entry)\n                        print(f\"\ud83d\udcc1 Folder (type {entry['type']}): {entry['uuid'][:8]}... size {entry['size']}\")\n                    \n                    elif entry['type'] in ['4', '5']:  # Documents (PDF/Notebook)\n                        if entry['size'] in suspicious_sizes:\n                            categorized['corrupted_entries'].append(entry)\n                            print(f\"\ud83d\udd27 CORRUPTED document: {entry['uuid'][:8]}... size {entry['size']} (needs fixing)\")\n                        else:\n                            categorized['good_entries'].append(entry)\n                            print(f\"\u2705 Good document: {entry['uuid'][:8]}... size {entry['size']}\")\n                    \n                    else:\n                        categorized['special'].append(entry)\n                        print(f\"\u2753 Special entry type {entry['type']}: {entry['uuid'][:8]}... size {entry['size']}\")\n        \n        print(f\"\\n\ud83d\udcca Classification Summary:\")\n        print(f\"   \ud83d\udcc1 Folders: {len(categorized['folders'])}\")\n        print(f\"   \u2705 Good documents: {len(categorized['good_entries'])}\")\n        print(f\"   \ud83d\udd27 Corrupted documents: {len(categorized['corrupted_entries'])}\")\n        print(f\"   \u2753 Special entries: {len(categorized['special'])}\")\n        \n        return categorized\n    \n    def fix_corrupted_document(self, entry: Dict) -> str:\n        \"\"\"Fix a single corrupted document by recalculating its correct size\"\"\"\n        doc_hash = entry['hash']\n        doc_uuid = entry['uuid']\n        \n        print(f\"\\n\ud83d\udd27 Fixing document {doc_uuid[:8]}...\")\n        print(f\"   Current size: {entry['size']}\")\n        \n        try:\n            # Fetch document's docSchema\n            doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n            doc_response.raise_for_status()\n            doc_content = doc_response.text\n            \n            # Parse components and sum their sizes\n            lines = doc_content.strip().split('\\n')\n            if len(lines) < 2:\n                print(f\"   \u274c Invalid docSchema - keeping original\")\n                return entry['full_line']\n            \n            total_component_size = 0\n            component_count = 0\n            \n            for line in lines[1:]:  # Skip version header\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        comp_size = int(parts[4])\n                        total_component_size += comp_size\n                        component_count += 1\n            \n            # The correct size is the sum of all component sizes\n            correct_size = total_component_size\n            \n            print(f\"   \ud83d\udce6 Components: {component_count}\")\n            print(f\"   \ud83d\udcca Correct size: {correct_size}\")\n            \n            # Rebuild the line with correct size\n            parts = entry['full_line'].split(':')\n            parts[4] = str(correct_size)  # Replace size field\n            fixed_line = ':'.join(parts)\n            \n            print(f\"   \u2705 Fixed: {entry['size']} \u2192 {correct_size}\")\n            return fixed_line\n            \n        except Exception as e:\n            print(f\"   \u274c Error fixing document: {e}\")\n            print(f\"   \u26a0\ufe0f  Keeping original line\")\n            return entry['full_line']\n    \n    def rebuild_corrected_root(self, categorized: Dict) -> str:\n        \"\"\"Rebuild root.docSchema with all entries (fixed sizes where needed)\"\"\"\n        print(f\"\\n\ud83c\udfd7\ufe0f  Rebuilding root.docSchema...\")\n        \n        # Start with ORIGINAL version (not \"43\"!)\n        lines = [categorized['original_version']]\n        \n        # Add folders (unchanged)\n        for folder in categorized['folders']:\n            lines.append(folder['full_line'])\n        \n        # Add good documents (unchanged)\n        for doc in categorized['good_entries']:\n            lines.append(doc['full_line'])\n        \n        # Add fixed corrupted documents\n        for corrupted_doc in categorized['corrupted_entries']:\n            fixed_line = self.fix_corrupted_document(corrupted_doc)\n            lines.append(fixed_line)\n        \n        # Add special entries (unchanged)\n        for special in categorized['special']:\n            lines.append(special['full_line'])\n        \n        new_content = '\\n'.join(lines)\n        \n        print(f\"\\n\u2705 Rebuilt root.docSchema:\")\n        print(f\"   \ud83d\udcca Version: {categorized['original_version']} (preserved)\")\n        print(f\"   \ud83d\udcca Total entries: {len(lines) - 1}\")\n        print(f\"   \ud83d\udcc1 Folders: {len(categorized['folders'])}\")\n        print(f\"   \u2705 Good documents: {len(categorized['good_entries'])}\")\n        print(f\"   \ud83d\udd27 Fixed documents: {len(categorized['corrupted_entries'])}\")\n        print(f\"   \u2753 Special entries: {len(categorized['special'])}\")\n        print(f\"   \ud83d\udccf Content size: {len(new_content)} bytes\")\n        \n        return new_content\n    \n    def preview_changes(self, dry_run: bool = True) -> bool:\n        \"\"\"Preview what changes will be made\"\"\"\n        print(f\"\\n\ud83d\udd0d {'DRY RUN' if dry_run else 'LIVE RUN'} - Root DocSchema Repair\")\n        print(\"=\" * 60)\n        \n        try:\n            # Get current state\n            root_content, current_hash, root_data = self.get_current_root_content()\n            \n            print(f\"\ud83d\udccb Current root.docSchema:\")\n            print(f\"   Hash: {current_hash}\")\n            print(f\"   Size: {len(root_content)} bytes\")\n            \n            # Identify corrupted entries\n            categorized = self.identify_corrupted_entries(root_content)\n            \n            if len(categorized['corrupted_entries']) == 0:\n                print(f\"\\n\u2705 No corrupted entries found - no changes needed!\")\n                return True\n            \n            # Build corrected version\n            corrected_content = self.rebuild_corrected_root(categorized)\n            \n            # Save preview\n            preview_file = self.backup_dir / f\"corrected_preview_{int(time.time())}.txt\"\n            with open(preview_file, 'w') as f:\n                f.write(corrected_content)\n            print(f\"\ud83d\udcc4 Preview saved to: {preview_file}\")\n            \n            if dry_run:\n                print(f\"\\n\ud83d\udd0d DRY RUN COMPLETE - No changes made to server\")\n                return True\n            \n            # Upload corrected version\n            return self.upload_corrected_root(corrected_content)\n            \n        except Exception as e:\n            print(f\"\u274c Repair failed: {e}\")\n            return False\n    \n    def upload_corrected_root(self, corrected_content: str) -> bool:\n        \"\"\"Upload the corrected root.docSchema\"\"\"\n        print(f\"\\n\u2b06\ufe0f  Uploading corrected root.docSchema...\")\n        \n        try:\n            # Calculate new hash\n            new_hash = hashlib.sha256(corrected_content.encode()).hexdigest()\n            print(f\"   \ud83d\udcca New hash: {new_hash}\")\n            \n            # Upload content\n            upload_response = self.session.put(\n                f\"https://eu.tectonic.remarkable.com/sync/v3/files/{new_hash}\",\n                data=corrected_content.encode(),\n                headers={'Content-Type': 'text/plain'}\n            )\n            \n            if upload_response.status_code in [200, 202]:\n                print(f\"   \u2705 Content uploaded ({upload_response.status_code})\")\n                \n                # Update root pointer\n                root_update_response = self.session.put(\n                    \"https://eu.tectonic.remarkable.com/sync/v4/root\",\n                    json={'hash': new_hash}\n                )\n                \n                if root_update_response.status_code in [200, 202]:\n                    print(f\"   \u2705 Root updated ({root_update_response.status_code})\")\n                    print(f\"   \ud83c\udf89 REPAIR COMPLETED SUCCESSFULLY!\")\n                    return True\n                else:\n                    print(f\"   \u274c Root update failed: {root_update_response.status_code}\")\n                    return False\n            else:\n                print(f\"   \u274c Content upload failed: {upload_response.status_code}\")\n                return False\n                \n        except Exception as e:\n            print(f\"   \u274c Upload failed: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/corrected_repair.py",
      "tags": [
        "remarkable",
        "cloud-storage",
        "repair-tool",
        "data-integrity",
        "schema-validation",
        "file-corruption",
        "api-client",
        "backup",
        "document-management"
      ],
      "updated_at": "2025-12-07T01:27:06.773018",
      "usage_example": "# Basic usage with dry run\nrepair_tool = CorrectedRootDocSchemaRepair()\n\n# Preview changes without modifying server\nsuccess = repair_tool.preview_changes(dry_run=True)\n\n# If preview looks good, apply changes\nif success:\n    success = repair_tool.preview_changes(dry_run=False)\n    if success:\n        print('Repair completed successfully')\n\n# Advanced usage: manual inspection\ncontent, hash_val, metadata = repair_tool.get_current_root_content()\ncategorized = repair_tool.identify_corrupted_entries(content)\nprint(f'Found {len(categorized[\"corrupted_entries\"])} corrupted entries')\n\n# Build corrected version without uploading\ncorrected = repair_tool.rebuild_corrected_root(categorized)"
    },
    {
      "best_practices": [
        "Always ensure remarkable_device_token.txt exists in workspace_dir before instantiation, as authentication happens in __init__",
        "The class raises RuntimeError if authentication fails during initialization, so wrap instantiation in try-except",
        "Call sync_replica() as the main entry point - it orchestrates all phases in the correct order",
        "Do not call internal methods (_discover_all_nodes, _build_folder_hierarchy, _extract_content) directly unless you understand the dependencies",
        "Check the return value of sync_replica() to determine if synchronization succeeded",
        "Monitor the log file (replica_dir/build.log) for detailed progress and error information",
        "The class maintains state in self.nodes, self.all_hashes, and self.failed_downloads - these are populated during sync_replica()",
        "Statistics are accumulated in self.stats dictionary throughout the sync process",
        "The replica is created in workspace_dir/remarkable_replica_v2/documents with full folder hierarchy",
        "Existing documents directory is cleaned on each sync to ensure fresh state",
        "The class uses a requests.Session with authentication headers - this session is reused for all API calls",
        "Notebook conversion creates placeholder PDFs - implement proper rm2pdf conversion for production use",
        "Failed downloads are tracked in self.failed_downloads set but not automatically retried"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Root workspace directory containing device token and where replica will be created",
            "is_class_variable": false,
            "name": "workspace_dir",
            "type": "Path"
          },
          {
            "description": "Directory where replica is stored (workspace_dir/remarkable_replica_v2)",
            "is_class_variable": false,
            "name": "replica_dir",
            "type": "Path"
          },
          {
            "description": "Directory for content cache (replica_dir/content)",
            "is_class_variable": false,
            "name": "content_dir",
            "type": "Path"
          },
          {
            "description": "Path to build.log file for detailed logging",
            "is_class_variable": false,
            "name": "log_file",
            "type": "Path"
          },
          {
            "description": "Logger instance for recording sync operations",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "Authenticated HTTP session for reMarkable API calls",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          },
          {
            "description": "Dictionary mapping UUID to RemarkableNode objects for all discovered documents and folders",
            "is_class_variable": false,
            "name": "nodes",
            "type": "Dict[str, RemarkableNode]"
          },
          {
            "description": "Set of all content hashes encountered during discovery",
            "is_class_variable": false,
            "name": "all_hashes",
            "type": "Set[str]"
          },
          {
            "description": "Set of UUIDs for documents that failed to download",
            "is_class_variable": false,
            "name": "failed_downloads",
            "type": "Set[str]"
          },
          {
            "description": "Dictionary tracking synchronization statistics including total_nodes, folders, documents, pdfs_extracted, rm_files_extracted, rm_pdfs_converted, and nodes_added",
            "is_class_variable": false,
            "name": "stats",
            "type": "Dict[str, int]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "workspace_dir": "Optional path to workspace directory; defaults to script's parent directory"
            },
            "purpose": "Initialize the sync instance, set up directories, configure logging, and authenticate with reMarkable cloud",
            "returns": "None (raises RuntimeError if authentication fails)",
            "signature": "__init__(self, workspace_dir: str = None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_logging",
            "parameters": {},
            "purpose": "Configure logging to write to both file and console with INFO level",
            "returns": "None",
            "signature": "setup_logging(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_authenticate",
            "parameters": {},
            "purpose": "Authenticate with reMarkable cloud using device token and obtain user token",
            "returns": "Authenticated requests.Session object or None if authentication fails",
            "signature": "_authenticate(self) -> Optional[requests.Session]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "sync_replica",
            "parameters": {},
            "purpose": "Main entry point that performs complete 3-phase synchronization: discovery, hierarchy building, and content extraction",
            "returns": "True if sync completed successfully, False otherwise",
            "signature": "sync_replica(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_discover_all_nodes",
            "parameters": {},
            "purpose": "Phase 1: Fetch all documents and folders from reMarkable cloud and populate self.nodes",
            "returns": "True if discovery succeeded, False otherwise",
            "signature": "_discover_all_nodes(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_process_document_item",
            "parameters": {
              "item": "Dictionary containing document metadata from reMarkable API"
            },
            "purpose": "Process a single document/folder item from cloud API response into a RemarkableNode object",
            "returns": "RemarkableNode object or None if processing fails",
            "signature": "_process_document_item(self, item: Dict) -> Optional[RemarkableNode]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_folder_hierarchy",
            "parameters": {},
            "purpose": "Phase 2: Create local folder structure matching cloud hierarchy and set local_path for all nodes",
            "returns": "True if hierarchy building succeeded, False otherwise",
            "signature": "_build_folder_hierarchy(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_node_path",
            "parameters": {
              "node": "RemarkableNode object to calculate path for"
            },
            "purpose": "Calculate the full local filesystem path for a node by walking up parent chain",
            "returns": "String containing full local path including replica_dir/documents prefix",
            "signature": "_get_node_path(self, node: RemarkableNode) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_content",
            "parameters": {},
            "purpose": "Phase 3: Download and extract content for all document nodes to their local paths",
            "returns": "True if content extraction succeeded, False otherwise",
            "signature": "_extract_content(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_document_content",
            "parameters": {
              "node": "RemarkableNode representing the document to extract"
            },
            "purpose": "Download and save content for a single document node (PDF or notebook)",
            "returns": "True if extraction succeeded, False otherwise",
            "signature": "_extract_document_content(self, node: RemarkableNode) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_convert_notebook_to_pdf",
            "parameters": {
              "node": "RemarkableNode representing the notebook",
              "target_path": "Path object where PDF should be saved"
            },
            "purpose": "Convert a reMarkable notebook to PDF format (currently creates placeholder PDF)",
            "returns": "True if conversion succeeded, False otherwise",
            "signature": "_convert_notebook_to_pdf(self, node: RemarkableNode, target_path: Path) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_summary",
            "parameters": {},
            "purpose": "Generate and save a text summary of the sync operation with statistics",
            "returns": "None",
            "signature": "_generate_summary(self)"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:26:13",
      "decorators": [],
      "dependencies": [
        "requests",
        "pathlib",
        "logging",
        "json",
        "shutil",
        "datetime",
        "typing"
      ],
      "description": "A class that synchronizes reMarkable cloud documents to a local replica directory, downloading and organizing folders and documents in a hierarchical structure.",
      "docstring": "Standalone replica synchronization using proven local_replica_v2 approach",
      "id": 2041,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "import requests",
        "import logging",
        "import json",
        "import shutil",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, Any, Optional, List, Set"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 445,
      "line_start": 59,
      "name": "RemarkableReplicaSync",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "workspace_dir": "Optional path to the workspace directory where the replica will be created. If None, defaults to the directory containing the script. This directory should contain 'remarkable_device_token.txt' for authentication. The replica will be created in a 'remarkable_replica_v2' subdirectory within this workspace."
      },
      "parent_class": null,
      "purpose": "RemarkableReplicaSync provides a complete solution for creating and maintaining a local replica of reMarkable cloud storage. It authenticates with the reMarkable cloud service, discovers all documents and folders, builds a proper folder hierarchy, and extracts content (PDFs and notebooks) to local storage. The class follows a proven 3-phase approach: Discovery (fetch all nodes), Hierarchy (build folder structure), and Extraction (download content). It maintains state about nodes, tracks statistics, and handles both PDF documents and notebook conversions.",
      "return_annotation": null,
      "return_explained": "The constructor returns an initialized RemarkableReplicaSync instance. The main sync_replica() method returns a boolean indicating success (True) or failure (False) of the synchronization process. Individual helper methods return booleans for success/failure or Optional types for data retrieval.",
      "settings_required": [
        "remarkable_device_token.txt file must exist in workspace_dir containing a valid reMarkable device token",
        "Internet connection to access reMarkable cloud services at document-storage-production-dot-remarkable-production.appspot.com and webapp-prod.cloud.remarkable.engineering",
        "Write permissions in the workspace directory to create replica_dir and content_dir",
        "RemarkableNode class must be defined (referenced but not included in source)"
      ],
      "source_code": "class RemarkableReplicaSync:\n    \"\"\"Standalone replica synchronization using proven local_replica_v2 approach\"\"\"\n    \n    def __init__(self, workspace_dir: str = None):\n        self.workspace_dir = Path(workspace_dir) if workspace_dir else Path(__file__).parent\n        self.replica_dir = self.workspace_dir / \"remarkable_replica_v2\"\n        self.content_dir = self.replica_dir / \"content\"\n        \n        # Create directories\n        for directory in [self.replica_dir, self.content_dir]:\n            directory.mkdir(parents=True, exist_ok=True)\n        \n        # Setup logging\n        self.log_file = self.replica_dir / \"build.log\"\n        self.setup_logging()\n        \n        # Initialize authentication\n        self.session = self._authenticate()\n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        # State matching local_replica_v2.py\n        self.nodes: Dict[str, RemarkableNode] = {}\n        self.all_hashes: Set[str] = set()\n        self.failed_downloads: Set[str] = set()\n        \n        # Statistics\n        self.stats = {\n            'total_nodes': 0,\n            'folders': 0,\n            'documents': 0,\n            'pdfs_extracted': 0,\n            'rm_files_extracted': 0,\n            'rm_pdfs_converted': 0,\n            'nodes_added': 0\n        }\n        \n        self.logger.info(\"\ud83d\udd04 reMarkable Replica Sync Initialized\")\n\n    def setup_logging(self):\n        \"\"\"Setup logging to file\"\"\"\n        logging.basicConfig(\n            level=logging.INFO,\n            format='%(asctime)s - %(levelname)s - %(message)s',\n            handlers=[\n                logging.FileHandler(self.log_file, mode='w'),\n                logging.StreamHandler()\n            ]\n        )\n        self.logger = logging.getLogger(__name__)\n    \n    def _authenticate(self) -> Optional[requests.Session]:\n        \"\"\"Authenticate with the reMarkable cloud service using device token approach\"\"\"\n        try:\n            print(\"\ud83d\udd11 Starting reMarkable authentication...\")\n            \n            # Load device token\n            device_token_path = self.workspace_dir / \"remarkable_device_token.txt\"\n            if not device_token_path.exists():\n                raise FileNotFoundError(\"Device token not found. Please run initial setup.\")\n            \n            with open(device_token_path, 'r') as f:\n                device_token = f.read().strip()\n            \n            print(f\"\u2705 Loaded device token ({len(device_token)} chars)\")\n            \n            # Get user token\n            session = requests.Session()\n            user_token_response = session.post(\n                \"https://webapp-prod.cloud.remarkable.engineering/token/json/2/user/new\",\n                headers={\"Authorization\": f\"Bearer {device_token}\"}\n            )\n            user_token_response.raise_for_status()\n            user_token = user_token_response.text.strip()\n            \n            print(f\"\u2705 User token obtained ({len(user_token)} chars)\")\n            \n            # Set up authenticated session\n            session.headers.update({\n                \"Authorization\": f\"Bearer {user_token}\",\n                \"User-Agent\": \"remarkable-replica-sync/1.0\"\n            })\n            \n            print(\"\u2705 Authentication complete\")\n            return session\n            \n        except Exception as e:\n            print(f\"\u274c Authentication failed: {e}\")\n            return None\n    \n    def sync_replica(self) -> bool:\n        \"\"\"\n        Perform replica synchronization using the proven 3-step process:\n        1. Discovery - Get all nodes from cloud\n        2. Hierarchy - Build proper folder structure\n        3. Extraction - Download content to correct locations\n        \"\"\"\n        try:\n            self.logger.info(\"\ud83d\ude80 Starting reMarkable replica sync\")\n            \n            # Phase 1: Discovery\n            if not self._discover_all_nodes():\n                self.logger.error(\"\u274c Discovery phase failed\")\n                return False\n            \n            # Phase 2: Build hierarchy \n            if not self._build_folder_hierarchy():\n                self.logger.error(\"\u274c Hierarchy phase failed\")\n                return False\n            \n            # Phase 3: Extract content\n            if not self._extract_content():\n                self.logger.error(\"\u274c Content extraction phase failed\")\n                return False\n            \n            # Generate summary\n            self._generate_summary()\n            \n            self.logger.info(\"\u2705 Replica sync completed successfully\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Sync failed: {e}\")\n            return False\n    \n    def _discover_all_nodes(self) -> bool:\n        \"\"\"Phase 1: Discover all nodes from reMarkable cloud\"\"\"\n        try:\n            self.logger.info(\"\ud83d\udce1 Phase 1: Discovering all nodes...\")\n            \n            # Get root document schema using working approach\n            docs_url = \"https://document-storage-production-dot-remarkable-production.appspot.com/document-storage/json/2/docs\"\n            response = self.session.get(docs_url)\n            response.raise_for_status()\n            \n            root_data = response.json()\n            self.logger.info(f\"\ud83d\udccb Retrieved root schema with {len(root_data)} items\")\n            \n            # Process each document/folder\n            for item in root_data:\n                node = self._process_document_item(item)\n                if node:\n                    self.nodes[node.uuid] = node\n                    self.all_hashes.add(node.hash)\n                    \n                    # Add component hashes\n                    for comp_hash in [node.content_hash, node.metadata_hash, node.pdf_hash, node.pagedata_hash]:\n                        if comp_hash:\n                            self.all_hashes.add(comp_hash)\n                    \n                    for rm_hash in node.rm_hashes:\n                        self.all_hashes.add(rm_hash)\n            \n            # Update statistics\n            self.stats['total_nodes'] = len(self.nodes)\n            self.stats['folders'] = sum(1 for node in self.nodes.values() if node.node_type == 'folder')\n            self.stats['documents'] = sum(1 for node in self.nodes.values() if node.node_type == 'document')\n            \n            self.logger.info(f\"\u2705 Discovery complete: {self.stats['total_nodes']} nodes ({self.stats['folders']} folders, {self.stats['documents']} documents)\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Discovery failed: {e}\")\n            return False\n    \n    def _process_document_item(self, item: Dict) -> Optional[RemarkableNode]:\n        \"\"\"Process a single document/folder item\"\"\"\n        try:\n            uuid = item.get('ID', '')\n            hash_val = item.get('Hash', '')\n            name = item.get('VissibleName', item.get('VisibleName', 'Unnamed'))\n            node_type = item.get('Type', 'unknown')\n            parent_uuid = item.get('Parent', '')\n            \n            if not uuid or not hash_val:\n                return None\n            \n            # Create node\n            node = RemarkableNode(\n                uuid=uuid,\n                hash=hash_val,\n                name=name,\n                node_type='folder' if node_type == 'CollectionType' else 'document',\n                parent_uuid=parent_uuid if parent_uuid else None,\n                metadata=item\n            )\n            \n            # For documents, extract component hashes\n            if node.node_type == 'document':\n                # Get document hashes from metadata\n                if 'fileType' in item:\n                    # Determine available components based on file type\n                    if item['fileType'] == 'pdf':\n                        node.pdf_hash = hash_val\n                    elif item['fileType'] == 'notebook':\n                        node.content_hash = hash_val\n                        node.metadata_hash = hash_val\n                        # rm files use same hash pattern\n                        node.rm_hashes = [hash_val]\n            \n            return node\n            \n        except Exception as e:\n            self.logger.warning(f\"\u26a0\ufe0f Failed to process item {item.get('ID', 'unknown')}: {e}\")\n            return None\n    \n    def _build_folder_hierarchy(self) -> bool:\n        \"\"\"Phase 2: Build proper folder hierarchy\"\"\"\n        try:\n            self.logger.info(\"\ud83d\udcc1 Phase 2: Building folder hierarchy...\")\n            \n            # Clean existing documents directory\n            documents_dir = self.replica_dir / \"documents\"\n            if documents_dir.exists():\n                shutil.rmtree(documents_dir)\n            documents_dir.mkdir(parents=True, exist_ok=True)\n            \n            # Build paths for all nodes\n            for node in self.nodes.values():\n                node.local_path = self._get_node_path(node)\n            \n            # Create all folder paths\n            folders_created = 0\n            for node in self.nodes.values():\n                if node.node_type == 'folder':\n                    folder_path = Path(node.local_path)\n                    folder_path.mkdir(parents=True, exist_ok=True)\n                    folders_created += 1\n                elif node.node_type == 'document':\n                    # Ensure parent directory exists for documents\n                    doc_path = Path(node.local_path)\n                    doc_path.parent.mkdir(parents=True, exist_ok=True)\n            \n            self.logger.info(f\"\u2705 Hierarchy built: {folders_created} folders created\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Hierarchy building failed: {e}\")\n            return False\n    \n    def _get_node_path(self, node: RemarkableNode) -> str:\n        \"\"\"Get the full local path for a node\"\"\"\n        path_parts = []\n        current_node = node\n        \n        # Build path by walking up the parent chain\n        while current_node:\n            if current_node.node_type == 'folder':\n                path_parts.append(current_node.name)\n            elif current_node.node_type == 'document':\n                # For documents, add the name with extension\n                if current_node.metadata.get('fileType') == 'pdf':\n                    path_parts.append(f\"{current_node.name}.pdf\")\n                else:\n                    path_parts.append(f\"{current_node.name}.pdf\")  # Convert all to PDF\n            \n            # Move to parent\n            if current_node.parent_uuid and current_node.parent_uuid in self.nodes:\n                current_node = self.nodes[current_node.parent_uuid]\n            else:\n                break\n        \n        # Reverse to get correct order (root to leaf)\n        path_parts.reverse()\n        \n        # Build full path\n        full_path = self.replica_dir / \"documents\"\n        for part in path_parts:\n            full_path = full_path / part\n        \n        return str(full_path)\n    \n    def _extract_content(self) -> bool:\n        \"\"\"Phase 3: Extract content to proper locations\"\"\"\n        try:\n            self.logger.info(\"\ud83d\udce5 Phase 3: Extracting content...\")\n            \n            documents_processed = 0\n            \n            for node in self.nodes.values():\n                if node.node_type == 'document':\n                    if self._extract_document_content(node):\n                        documents_processed += 1\n            \n            self.logger.info(f\"\u2705 Content extraction complete: {documents_processed} documents processed\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Content extraction failed: {e}\")\n            return False\n    \n    def _extract_document_content(self, node: RemarkableNode) -> bool:\n        \"\"\"Extract content for a single document\"\"\"\n        try:\n            target_path = Path(node.local_path)\n            \n            # Skip if already exists\n            if target_path.exists():\n                return True\n            \n            # Try to download PDF first (preferred)\n            if node.pdf_hash:\n                base_url = \"https://document-storage-production-dot-remarkable-production.appspot.com/document-storage/json/2\"\n                pdf_url = f'{base_url}/upload/request'\n                pdf_data = {'http_method': 'GET', 'relative_path': node.pdf_hash}\n                \n                response = self.session.put(pdf_url, json=pdf_data)\n                if response.status_code == 200:\n                    download_url = response.text.strip('\"')\n                    pdf_response = self.session.get(download_url)\n                    \n                    if pdf_response.status_code == 200:\n                        with open(target_path, 'wb') as f:\n                            f.write(pdf_response.content)\n                        self.stats['pdfs_extracted'] += 1\n                        self.logger.info(f\"   \ud83d\udcc4 Extracted PDF: {node.name}\")\n                        return True\n            \n            # For notebook files, try to convert to PDF\n            if node.content_hash and node.metadata.get('fileType') == 'notebook':\n                if self._convert_notebook_to_pdf(node, target_path):\n                    self.stats['rm_pdfs_converted'] += 1\n                    return True\n            \n            return False\n            \n        except Exception as e:\n            self.logger.warning(f\"\u26a0\ufe0f Failed to extract {node.name}: {e}\")\n            return False\n    \n    def _convert_notebook_to_pdf(self, node: RemarkableNode, target_path: Path) -> bool:\n        \"\"\"Convert a reMarkable notebook to PDF\"\"\"\n        try:\n            # This is a simplified conversion - creates a placeholder PDF\n            # In practice you'd need proper rm2pdf conversion\n            \n            base_url = \"https://document-storage-production-dot-remarkable-production.appspot.com/document-storage/json/2\"\n            content_url = f'{base_url}/upload/request'\n            content_data = {'http_method': 'GET', 'relative_path': node.content_hash}\n            \n            response = self.session.put(content_url, json=content_data)\n            if response.status_code == 200:\n                download_url = response.text.strip('\"')\n                content_response = self.session.get(download_url)\n                \n                if content_response.status_code == 200:\n                    # Save as placeholder PDF (would need proper conversion in real implementation)\n                    with open(target_path, 'wb') as f:\n                        f.write(b\"%PDF-1.4\\n1 0 obj\\n<</Type/Catalog/Pages 2 0 R>>\\nendobj\\n2 0 obj\\n<</Type/Pages/Kids[3 0 R]/Count 1>>\\nendobj\\n3 0 obj\\n<</Type/Page/Parent 2 0 R/MediaBox[0 0 612 792]>>\\nendobj\\nxref\\n0 4\\n0000000000 65535 f \\n0000000009 00000 n \\n0000000058 00000 n \\n0000000115 00000 n \\ntrailer\\n<</Size 4/Root 1 0 R>>\\nstartxref\\n174\\n%%EOF\")\n                    \n                    self.logger.info(f\"   \ud83d\udcdd Converted notebook: {node.name}\")\n                    return True\n            \n            return False\n            \n        except Exception as e:\n            self.logger.warning(f\"\u26a0\ufe0f Notebook conversion failed for {node.name}: {e}\")\n            return False\n    \n    def _generate_summary(self):\n        \"\"\"Generate sync summary\"\"\"\n        try:\n            summary_lines = [\n                \"reMarkable Replica Sync Summary\",\n                \"=\" * 40,\n                f\"Sync completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\",\n                \"\",\n                \"Statistics:\",\n                f\"  Total nodes: {self.stats['total_nodes']}\",\n                f\"  Folders: {self.stats['folders']}\",\n                f\"  Documents: {self.stats['documents']}\",\n                f\"  PDFs extracted: {self.stats['pdfs_extracted']}\",\n                f\"  Notebooks converted: {self.stats['rm_pdfs_converted']}\",\n                \"\",\n                f\"Local replica location: {self.replica_dir / 'documents'}\",\n                f\"Content cache: {self.content_dir}\",\n                \"\"\n            ]\n            \n            with open(self.replica_dir / \"sync_summary.txt\", 'w') as f:\n                f.write('\\n'.join(summary_lines))\n            \n            # Print summary to console\n            print(\"\\n\".join(summary_lines))\n            \n        except Exception as e:\n            self.logger.warning(f\"\u26a0\ufe0f Summary generation failed: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica_new.py",
      "tags": [
        "remarkable",
        "cloud-sync",
        "document-management",
        "file-synchronization",
        "pdf-extraction",
        "authentication",
        "api-client",
        "replica",
        "backup",
        "notebook-conversion"
      ],
      "updated_at": "2025-12-07T01:26:13.079355",
      "usage_example": "# Basic usage\nfrom remarkable_replica_sync import RemarkableReplicaSync\n\n# Initialize with default workspace (current directory)\nsync = RemarkableReplicaSync()\n\n# Or specify a custom workspace\nsync = RemarkableReplicaSync(workspace_dir='/path/to/workspace')\n\n# Perform full synchronization\nsuccess = sync.sync_replica()\n\nif success:\n    print(f\"Synced {sync.stats['total_nodes']} nodes\")\n    print(f\"Documents location: {sync.replica_dir / 'documents'}\")\nelse:\n    print(\"Sync failed, check logs\")\n\n# Access statistics\nprint(f\"Folders: {sync.stats['folders']}\")\nprint(f\"Documents: {sync.stats['documents']}\")\nprint(f\"PDFs extracted: {sync.stats['pdfs_extracted']}\")\n\n# Check log file for details\nwith open(sync.log_file) as f:\n    print(f.read())"
    },
    {
      "best_practices": [
        "Instantiate the class only after ensuring real app logs are available in the expected directory structure",
        "Call simulate_pdf_upload() before compare_with_real_app() to generate the request sequence to compare",
        "Always review critical_issues in the differences dictionary as these indicate authentication or compatibility problems",
        "Use generate_fix_recommendations() to get actionable fixes rather than manually parsing differences",
        "The class performs file I/O operations during initialization, so handle potential IOError exceptions",
        "Real app logs must contain request content for meaningful header comparisons",
        "The class maintains state through instance attributes (real_app_logs, auth_session), so create new instances for independent test runs",
        "Hash calculations use SHA256 for file content identification matching reMarkable's sync protocol",
        "User-agent and authorization header mismatches are flagged as critical and should be fixed first",
        "The class does not make actual API calls, making it safe for testing without affecting cloud state"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Path object pointing to the directory containing the script, used for locating log files",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Authenticated session object from RemarkableAuth containing authorization headers and tokens",
            "is_class_variable": false,
            "name": "auth_session",
            "type": "requests.Session or similar"
          },
          {
            "description": "List of parsed log entries from real app network captures, each containing request details like url, method, headers, and content",
            "is_class_variable": false,
            "name": "real_app_logs",
            "type": "List[Dict]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the comparison tool by loading authentication, real app logs, and setting up the base directory",
            "returns": "None. Sets up instance attributes: base_dir, auth_session, real_app_logs",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "load_real_app_logs",
            "parameters": {},
            "purpose": "Load and parse real app network logs from CSV and raw request files",
            "returns": "List of dictionaries containing log entries with id, url, method, status_code, request_body_size, response_body_size, and optionally request_content",
            "signature": "load_real_app_logs(self) -> List[Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "simulate_pdf_upload",
            "parameters": {
              "pdf_name": "Name of the test document to simulate uploading (default: 'TestDocument')"
            },
            "purpose": "Simulate the complete PDF upload process and generate all HTTP requests that would be made",
            "returns": "List of dictionaries representing HTTP requests with step, method, url, headers, body, and body_size for metadata, content, PDF, pagedata, docschema, and root update",
            "signature": "simulate_pdf_upload(self, pdf_name: str = 'TestDocument') -> List[Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_document_components",
            "parameters": {
              "doc_uuid": "UUID string for the document",
              "pdf_content": "Raw PDF file content as bytes",
              "pdf_name": "Visible name for the document"
            },
            "purpose": "Create all document components (metadata, content, pagedata) as the implementation would generate them",
            "returns": "Dictionary with keys 'metadata', 'content', 'pagedata' containing JSON-encoded bytes for each component",
            "signature": "create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_docschema",
            "parameters": {
              "components": "Dictionary containing metadata, content, and pagedata components",
              "doc_uuid": "UUID string for the document",
              "pdf_hash": "SHA256 hash of the PDF content"
            },
            "purpose": "Create the docSchema file that references all document components with their hashes and sizes",
            "returns": "Encoded bytes of the docSchema file in reMarkable's format with version and component references",
            "signature": "create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_our_headers",
            "parameters": {
              "content_size": "Size of the content in bytes for Content-Length header",
              "doc_uuid": "UUID string for the document",
              "file_type": "Type of file being uploaded: 'metadata', 'content', 'pdf', 'pagedata', 'docschema', or 'root'"
            },
            "purpose": "Generate HTTP headers for file uploads matching the implementation's format",
            "returns": "Dictionary of HTTP headers including authorization, content-type, rm-filename, user-agent, and x-goog-hash",
            "signature": "get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "calculate_hash",
            "parameters": {
              "content": "Bytes content to hash"
            },
            "purpose": "Calculate SHA256 hash of content for file identification in sync protocol",
            "returns": "Hexadecimal string representation of SHA256 hash",
            "signature": "calculate_hash(self, content: bytes) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "compare_with_real_app",
            "parameters": {
              "our_requests": "List of request dictionaries from simulate_pdf_upload()"
            },
            "purpose": "Compare proposed requests against real app logs and identify differences",
            "returns": "Dictionary with keys: header_differences, sequence_differences, content_differences, critical_issues containing detailed comparison results",
            "signature": "compare_with_real_app(self, our_requests: List[Dict]) -> Dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "compare_headers",
            "parameters": {
              "differences": "Dictionary to append differences to (modified in place)",
              "our_req": "Dictionary representing our implementation's request",
              "real_req": "Dictionary representing real app's request from logs"
            },
            "purpose": "Compare HTTP headers between implementation and real app for a specific request",
            "returns": "None. Modifies the differences dictionary in place by appending to header_differences list",
            "signature": "compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_critical_differences",
            "parameters": {
              "differences": "Dictionary containing all differences from comparison"
            },
            "purpose": "Analyze differences and highlight critical issues that could cause failures",
            "returns": "None. Prints analysis to console and updates differences['critical_issues'] list",
            "signature": "analyze_critical_differences(self, differences: Dict)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_fix_recommendations",
            "parameters": {
              "differences": "Dictionary containing comparison results from compare_with_real_app()"
            },
            "purpose": "Generate actionable recommendations to fix identified differences",
            "returns": "List of string recommendations describing specific fixes needed (e.g., 'UPDATE: Change user-agent to: ...')",
            "signature": "generate_fix_recommendations(self, differences: Dict) -> List[str]"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required for authentication token generation and session management. Must be available in the same directory or Python path.",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:23:13",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "pathlib",
        "typing",
        "uuid",
        "hashlib",
        "base64",
        "binascii"
      ],
      "description": "A diagnostic class that compares a custom PDF upload implementation against real reMarkable app behavior by analyzing captured network logs without making actual API calls.",
      "docstring": "Compare upload implementation against real app without making API calls",
      "id": 2033,
      "imports": [
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict, Any, List",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 426,
      "line_start": 18,
      "name": "DryRunUploadComparison",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes the comparison environment by loading authentication credentials, parsing real app logs from the file system, and setting up the base directory for file operations."
      },
      "parent_class": null,
      "purpose": "This class performs dry-run testing and validation of PDF upload implementations for the reMarkable tablet ecosystem. It loads real app network logs from CSV files, simulates the custom upload process, and performs detailed comparisons of HTTP requests, headers, and content to identify discrepancies. The primary use case is debugging and validating custom upload implementations before deploying them, ensuring they match the official app's behavior to avoid authentication or compatibility issues.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a DryRunUploadComparison object with loaded real app logs and authenticated session. Key methods return: simulate_pdf_upload() returns List[Dict] of proposed HTTP requests; compare_with_real_app() returns Dict containing header_differences, sequence_differences, content_differences, and critical_issues; generate_fix_recommendations() returns List[str] of actionable fix suggestions.",
      "settings_required": [
        "Real app logs must be present at '../app_out_bis/newstart.csv' and '../app_out_bis/Raw_newstart.folder/' relative to the script location",
        "RemarkableAuth module must be configured with valid credentials",
        "File system read permissions for log directories",
        "Network logs should contain PUT requests to 'sync/v3/files' endpoints with request content"
      ],
      "source_code": "class DryRunUploadComparison:\n    \"\"\"Compare upload implementation against real app without making API calls\"\"\"\n    \n    def __init__(self):\n        self.base_dir = Path(__file__).parent\n        \n        # Load auth for token format analysis\n        from auth import RemarkableAuth\n        auth = RemarkableAuth()\n        self.auth_session = auth.get_authenticated_session()\n        \n        # Load real app logs for comparison\n        self.real_app_logs = self.load_real_app_logs()\n        \n        print(\"\ud83d\udd0d Dry Run Upload Comparison Initialized\")\n        print(f\"\ud83d\udccb Real app logs loaded: {len(self.real_app_logs)} entries\")\n    \n    def load_real_app_logs(self) -> List[Dict]:\n        \"\"\"Load the real app logs from CSV and request files\"\"\"\n        try:\n            # Load the CSV log\n            csv_file = self.base_dir.parent / \"app_out_bis\" / \"newstart.csv\"\n            raw_folder = self.base_dir.parent / \"app_out_bis\" / \"Raw_newstart.folder\"\n            \n            if not csv_file.exists() or not raw_folder.exists():\n                print(\"\u274c Real app logs not found\")\n                return []\n            \n            # Parse CSV to get request sequence\n            logs = []\n            with open(csv_file, 'r') as f:\n                lines = f.readlines()[1:]  # Skip header\n                for line in lines:\n                    parts = line.strip().split(',')\n                    if len(parts) >= 8:\n                        log_entry = {\n                            'id': parts[0],\n                            'url': parts[1],\n                            'method': parts[6],\n                            'status_code': parts[5],\n                            'request_body_size': parts[19],\n                            'response_body_size': parts[20]\n                        }\n                        logs.append(log_entry)\n            \n            # Load actual request content for key requests\n            for log in logs:\n                if log['method'] == 'PUT' and 'sync/v3/files' in log['url']:\n                    # Try to find corresponding request file\n                    request_file = raw_folder / f\"[{log['id']}] Request - {log['url'].replace('https://', '').replace('/', '_')}.txt\"\n                    if request_file.exists():\n                        with open(request_file, 'r', encoding='utf-8', errors='ignore') as f:\n                            log['request_content'] = f.read()\n            \n            print(f\"\u2705 Loaded {len(logs)} real app log entries\")\n            return logs\n            \n        except Exception as e:\n            print(f\"\u274c Failed to load real app logs: {e}\")\n            return []\n    \n    def simulate_pdf_upload(self, pdf_name: str = \"TestDocument\") -> List[Dict]:\n        \"\"\"Simulate our PDF upload implementation and return proposed requests\"\"\"\n        print(f\"\\n\ud83e\uddea Simulating PDF upload: '{pdf_name}'\")\n        \n        # Simulate document creation process\n        doc_uuid = str(uuid.uuid4())\n        \n        # Create test PDF content\n        test_pdf_content = b'%PDF-1.4\\n1 0 obj\\n<<\\n/Type /Catalog\\n/Pages 2 0 R\\n>>\\nendobj\\n2 0 obj\\n<<\\n/Type /Pages\\n/Kids [3 0 R]\\n/Count 1\\n>>\\nendobj\\n3 0 obj\\n<<\\n/Type /Page\\n/Parent 2 0 R\\n/MediaBox [0 0 612 792]\\n>>\\nendobj\\nxref\\n0 4\\n0000000000 65535 f \\n0000000010 00000 n \\n0000000079 00000 n \\n0000000173 00000 n \\ntrailer\\n<<\\n/Size 4\\n/Root 1 0 R\\n>>\\nstartxref\\n301\\n%%EOF'\n        \n        # Simulate component creation\n        components = self.create_document_components(doc_uuid, pdf_name, test_pdf_content)\n        \n        # Generate proposed requests\n        proposed_requests = []\n        \n        # 1. Metadata upload\n        metadata_hash = self.calculate_hash(components['metadata'])\n        proposed_requests.append({\n            'step': 'metadata_upload',\n            'method': 'PUT',\n            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}',\n            'headers': self.get_our_headers('metadata', doc_uuid, len(components['metadata'])),\n            'body': components['metadata'],\n            'body_size': len(components['metadata'])\n        })\n        \n        # 2. Content upload\n        content_hash = self.calculate_hash(components['content'])\n        proposed_requests.append({\n            'step': 'content_upload',\n            'method': 'PUT',\n            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{content_hash}',\n            'headers': self.get_our_headers('content', doc_uuid, len(components['content'])),\n            'body': components['content'],\n            'body_size': len(components['content'])\n        })\n        \n        # 3. PDF upload\n        pdf_hash = self.calculate_hash(test_pdf_content)\n        proposed_requests.append({\n            'step': 'pdf_upload',\n            'method': 'PUT',\n            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pdf_hash}',\n            'headers': self.get_our_headers('pdf', doc_uuid, len(test_pdf_content)),\n            'body': test_pdf_content,\n            'body_size': len(test_pdf_content)\n        })\n        \n        # 4. Pagedata upload\n        pagedata_hash = self.calculate_hash(components['pagedata'])\n        proposed_requests.append({\n            'step': 'pagedata_upload',\n            'method': 'PUT',\n            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}',\n            'headers': self.get_our_headers('pagedata', doc_uuid, len(components['pagedata'])),\n            'body': components['pagedata'],\n            'body_size': len(components['pagedata'])\n        })\n        \n        # 5. DocSchema upload\n        docschema_content = self.create_docschema(components, pdf_hash, doc_uuid)\n        docschema_hash = self.calculate_hash(docschema_content)\n        proposed_requests.append({\n            'step': 'docschema_upload',\n            'method': 'PUT',\n            'url': f'https://eu.tectonic.remarkable.com/sync/v3/files/{docschema_hash}',\n            'headers': self.get_our_headers('docschema', doc_uuid, len(docschema_content)),\n            'body': docschema_content,\n            'body_size': len(docschema_content)\n        })\n        \n        # 6. Root update (simulated)\n        proposed_requests.append({\n            'step': 'root_update',\n            'method': 'PUT',\n            'url': 'https://eu.tectonic.remarkable.com/sync/v3/root',\n            'headers': self.get_our_headers('root', doc_uuid, 200),  # Estimated size\n            'body': 'ROOT_UPDATE_CONTENT',\n            'body_size': 200\n        })\n        \n        return proposed_requests\n    \n    def create_document_components(self, doc_uuid: str, pdf_name: str, pdf_content: bytes) -> Dict:\n        \"\"\"Create all document components as our implementation would\"\"\"\n        \n        # Metadata\n        metadata = {\n            \"createdTime\": str(int(time.time() * 1000)),\n            \"lastModified\": str(int(time.time() * 1000)),\n            \"lastOpened\": \"0\",  # \u2705 FIXED: Always \"0\" like real app\n            \"lastOpenedPage\": 0,\n            \"metadatamodified\": False,\n            \"modified\": False,\n            \"parent\": \"\",\n            \"pinned\": False,\n            \"source\": \"com.remarkable.macos\",  # \u2705 FIXED: Changed from windows to macos\n            \"type\": \"DocumentType\",\n            \"visibleName\": pdf_name,\n            \"version\": 1\n        }\n        \n        # Content\n        content = {\n            \"coverPageNumber\": 0,\n            \"customZoomCenterX\": 0,\n            \"customZoomCenterY\": 936,\n            \"customZoomOrientation\": \"portrait\",\n            \"customZoomPageHeight\": 1872,\n            \"customZoomPageWidth\": 1404,\n            \"customZoomScale\": 1,\n            \"documentMetadata\": {},\n            \"extraMetadata\": {},\n            \"fileType\": \"pdf\",\n            \"fontName\": \"\",\n            \"formatVersion\": 1,\n            \"lineHeight\": -1,\n            \"orientation\": \"portrait\",\n            \"originalPageCount\": 1,\n            \"pageCount\": 1,\n            \"pageTags\": [],\n            \"pages\": [str(uuid.uuid4())],\n            \"redirectionPageMap\": [0],\n            \"sizeInBytes\": str(len(pdf_content)),\n            \"tags\": [],\n            \"textAlignment\": \"justify\",\n            \"textScale\": 1,\n            \"zoomMode\": \"bestFit\"\n        }\n        \n        # Pagedata (our current implementation)\n        pagedata = \"\\n\"  # \u2705 FIXED: Changed from empty string to newline like real app\n        \n        return {\n            'metadata': json.dumps(metadata).encode('utf-8'),\n            'content': json.dumps(content).encode('utf-8'),\n            'pagedata': pagedata.encode('utf-8')\n        }\n    \n    def create_docschema(self, components: Dict, pdf_hash: str, doc_uuid: str) -> bytes:\n        \"\"\"Create docSchema as our implementation would\"\"\"\n        metadata_hash = self.calculate_hash(components['metadata'])\n        content_hash = self.calculate_hash(components['content'])\n        pagedata_hash = self.calculate_hash(components['pagedata'])\n        \n        lines = [\n            \"3\",  # Version\n            f\"{metadata_hash}:80000000:{doc_uuid}.metadata:0:{len(components['metadata'])}\",\n            f\"{content_hash}:80000000:{doc_uuid}.content:0:{len(components['content'])}\",\n            f\"{pdf_hash}:80000000:{doc_uuid}.pdf:0:{len(b'PDF_CONTENT')}\",  # Placeholder\n            f\"{pagedata_hash}:80000000:{doc_uuid}.pagedata:0:{len(components['pagedata'])}\"\n        ]\n        \n        return '\\n'.join(lines).encode('utf-8')\n    \n    def get_our_headers(self, file_type: str, doc_uuid: str, content_size: int) -> Dict:\n        \"\"\"Generate headers as our implementation would\"\"\"\n        \n        # Get authorization token from our session\n        auth_header = \"\"\n        if self.auth_session and hasattr(self.auth_session, 'headers'):\n            auth_header = self.auth_session.headers.get('Authorization', '')\n        \n        # Base headers\n        headers = {\n            'host': 'eu.tectonic.remarkable.com',\n            'authorization': auth_header,\n            'content-type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-sync-id': str(uuid.uuid4()),\n            'user-agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 FIXED: Matches real app exactly\n            'content-length': str(content_size),\n            'connection': 'Keep-Alive',\n            'accept-encoding': 'gzip, deflate',\n            'accept-language': 'en-US,*'\n        }\n        \n        # File-specific headers\n        if file_type == 'metadata':\n            headers['rm-filename'] = f'{doc_uuid}.metadata'\n        elif file_type == 'content':\n            headers['rm-filename'] = f'{doc_uuid}.content'\n        elif file_type == 'pdf':\n            headers['rm-filename'] = f'{doc_uuid}.pdf'\n        elif file_type == 'pagedata':\n            headers['rm-filename'] = f'{doc_uuid}.pagedata'\n        elif file_type == 'docschema':\n            headers['rm-filename'] = f'{doc_uuid}'\n        \n        # Calculate CRC32C hash (simplified for dry run)\n        headers['x-goog-hash'] = f'crc32c={base64.b64encode(b\"dummy_hash\").decode()}'\n        \n        return headers\n    \n    def calculate_hash(self, content: bytes) -> str:\n        \"\"\"Calculate SHA256 hash\"\"\"\n        return hashlib.sha256(content).hexdigest()\n    \n    def compare_with_real_app(self, our_requests: List[Dict]) -> Dict:\n        \"\"\"Compare our proposed requests with real app logs\"\"\"\n        print(f\"\\n\ud83d\udd0d Comparing Implementation vs Real App\")\n        print(\"=\" * 60)\n        \n        # Filter real app logs for file uploads\n        real_uploads = [log for log in self.real_app_logs \n                       if log['method'] == 'PUT' and 'sync/v3/files' in log.get('url', '')]\n        \n        print(f\"\ud83d\udcca Our implementation: {len(our_requests)} requests\")\n        print(f\"\ud83d\udcca Real app: {len(real_uploads)} uploads\")\n        \n        differences = {\n            'header_differences': [],\n            'sequence_differences': [],\n            'content_differences': [],\n            'critical_issues': []\n        }\n        \n        # Compare headers for each type\n        for our_req in our_requests:\n            print(f\"\\n\ud83d\udd0d Analyzing {our_req['step']}:\")\n            \n            # Find corresponding real app request\n            real_req = None\n            for real in real_uploads:\n                if 'request_content' in real and our_req['step'] in ['metadata_upload', 'pdf_upload']:\n                    real_req = real\n                    break\n            \n            if real_req and 'request_content' in real_req:\n                self.compare_headers(our_req, real_req, differences)\n            \n            print(f\"   \ud83d\udccf Our body size: {our_req['body_size']} bytes\")\n            print(f\"   \ud83d\udd17 Our URL: {our_req['url']}\")\n            print(f\"   \ud83d\udccb Our headers preview:\")\n            for key, value in our_req['headers'].items():\n                if key in ['user-agent', 'authorization', 'rm-filename', 'x-goog-hash']:\n                    print(f\"      {key}: {value[:50]}{'...' if len(str(value)) > 50 else ''}\")\n        \n        # Analyze critical differences\n        self.analyze_critical_differences(differences)\n        \n        return differences\n    \n    def compare_headers(self, our_req: Dict, real_req: Dict, differences: Dict):\n        \"\"\"Compare headers between our implementation and real app\"\"\"\n        try:\n            # Parse real app request headers\n            real_content = real_req['request_content']\n            real_headers = {}\n            \n            lines = real_content.split('\\n')\n            for line in lines[1:]:  # Skip first line (PUT ...)\n                if ':' in line and not line.startswith('PUT') and not line.startswith('<Data'):\n                    key, value = line.split(':', 1)\n                    real_headers[key.strip().lower()] = value.strip()\n                elif line.startswith('<Data'):\n                    break\n            \n            # Compare key headers\n            our_headers = {k.lower(): v for k, v in our_req['headers'].items()}\n            \n            critical_headers = ['user-agent', 'authorization', 'content-type', 'rm-filename', 'x-goog-hash']\n            \n            for header in critical_headers:\n                our_value = our_headers.get(header, 'MISSING')\n                real_value = real_headers.get(header, 'MISSING')\n                \n                if our_value != real_value:\n                    diff = {\n                        'step': our_req['step'],\n                        'header': header,\n                        'our_value': our_value,\n                        'real_value': real_value,\n                        'critical': header in ['user-agent', 'authorization']\n                    }\n                    differences['header_differences'].append(diff)\n                    \n                    print(f\"   \u26a0\ufe0f Header difference - {header}:\")\n                    print(f\"      Our: {our_value[:50]}{'...' if len(str(our_value)) > 50 else ''}\")\n                    print(f\"      Real: {real_value[:50]}{'...' if len(str(real_value)) > 50 else ''}\")\n        \n        except Exception as e:\n            print(f\"   \u274c Header comparison failed: {e}\")\n    \n    def analyze_critical_differences(self, differences: Dict):\n        \"\"\"Analyze and highlight critical differences\"\"\"\n        print(f\"\\n\ud83d\udea8 CRITICAL DIFFERENCES ANALYSIS\")\n        print(\"=\" * 60)\n        \n        # Group differences by criticality\n        critical_issues = []\n        \n        for diff in differences['header_differences']:\n            if diff['critical'] or diff['header'] in ['user-agent', 'authorization', 'source']:\n                critical_issues.append(diff)\n        \n        if critical_issues:\n            print(f\"\u274c Found {len(critical_issues)} critical issues:\")\n            for issue in critical_issues:\n                print(f\"   \ud83d\udd34 {issue['step']} - {issue['header']}\")\n                print(f\"      Problem: {issue['our_value'][:30]} vs {issue['real_value'][:30]}\")\n        else:\n            print(\"\u2705 No critical header differences found\")\n        \n        # Check for user-agent mismatch\n        ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']\n        if ua_issues:\n            print(f\"\\n\ud83d\udd34 USER-AGENT MISMATCH (CRITICAL):\")\n            for ua in ua_issues:\n                print(f\"   Our: {ua['our_value']}\")\n                print(f\"   Real: {ua['real_value']}\")\n                print(f\"   Impact: Version/platform detection issues\")\n        \n        # Check for authorization differences\n        auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']\n        if auth_issues:\n            print(f\"\\n\ud83d\udd34 AUTHORIZATION DIFFERENCES (CRITICAL):\")\n            print(f\"   This could cause authentication/device recognition issues\")\n        \n        differences['critical_issues'] = critical_issues\n    \n    def generate_fix_recommendations(self, differences: Dict) -> List[str]:\n        \"\"\"Generate specific recommendations to fix differences\"\"\"\n        recommendations = []\n        \n        # User-Agent fixes\n        ua_issues = [d for d in differences['header_differences'] if d['header'] == 'user-agent']\n        if ua_issues:\n            real_ua = ua_issues[0]['real_value']\n            recommendations.append(f\"UPDATE: Change user-agent to: {real_ua}\")\n        \n        # Authorization fixes\n        auth_issues = [d for d in differences['header_differences'] if d['header'] == 'authorization']\n        if auth_issues:\n            recommendations.append(\"UPDATE: Fix JWT token to match macOS device description\")\n        \n        # Content-Type fixes\n        ct_issues = [d for d in differences['header_differences'] if d['header'] == 'content-type']\n        if ct_issues:\n            recommendations.append(\"UPDATE: Standardize content-type to application/octet-stream\")\n        \n        # Source field fix (from metadata analysis)\n        recommendations.append(\"UPDATE: Change metadata source from 'com.remarkable.windows' to 'com.remarkable.macos'\")\n        recommendations.append(\"UPDATE: Change pagedata from empty string to '\\\\n' character\")\n        recommendations.append(\"UPDATE: Set lastOpened to '0' consistently\")\n        \n        return recommendations",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/dry_run_comparison.py",
      "tags": [
        "testing",
        "validation",
        "remarkable-tablet",
        "pdf-upload",
        "api-comparison",
        "dry-run",
        "network-analysis",
        "debugging",
        "http-requests",
        "authentication",
        "file-upload",
        "log-analysis"
      ],
      "updated_at": "2025-12-07T01:23:13.041741",
      "usage_example": "# Initialize the comparison tool\ncomparison = DryRunUploadComparison()\n\n# Simulate a PDF upload with custom document name\nproposed_requests = comparison.simulate_pdf_upload(pdf_name=\"MyTestDocument\")\n\n# Compare against real app behavior\ndifferences = comparison.compare_with_real_app(proposed_requests)\n\n# Generate fix recommendations\nrecommendations = comparison.generate_fix_recommendations(differences)\n\n# Review recommendations\nfor rec in recommendations:\n    print(f\"Fix needed: {rec}\")\n\n# Access specific difference categories\nif differences['critical_issues']:\n    print(f\"Found {len(differences['critical_issues'])} critical issues\")\n    for issue in differences['critical_issues']:\n        print(f\"Issue in {issue['step']}: {issue['header']}\")"
    },
    {
      "best_practices": [
        "Always call initialize() or start_watching() (which calls initialize internally) before attempting to process files",
        "The class maintains state through processed_files set to avoid reprocessing the same files",
        "Use start_watching() for continuous monitoring; it handles initialization automatically",
        "Ensure OneDriveClient is properly configured with valid authentication credentials before instantiation",
        "The class creates temporary files in 'temp_onedrive' directory which are cleaned up after processing",
        "Handle KeyboardInterrupt gracefully when using start_watching() for clean shutdown",
        "The poll_interval should be set appropriately to balance responsiveness and API rate limits",
        "Supported file extensions are hardcoded but can be modified by accessing the supported_extensions attribute",
        "Set delete_after_processing to True in config only if you want original files removed after successful processing"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "OneDrive client instance for API interactions",
            "is_class_variable": false,
            "name": "client",
            "type": "OneDriveClient"
          },
          {
            "description": "OpenAI API key for LLM processing",
            "is_class_variable": false,
            "name": "api_key",
            "type": "str"
          },
          {
            "description": "Full OneDrive configuration dictionary",
            "is_class_variable": false,
            "name": "config",
            "type": "Dict[str, Any]"
          },
          {
            "description": "OneDrive folder path to monitor for new files (default: '/E-Ink LLM Input')",
            "is_class_variable": false,
            "name": "watch_folder",
            "type": "str"
          },
          {
            "description": "OneDrive folder path where processed files are uploaded (default: '/E-Ink LLM Output')",
            "is_class_variable": false,
            "name": "output_folder",
            "type": "str"
          },
          {
            "description": "Seconds between checks for new files (default: 60)",
            "is_class_variable": false,
            "name": "poll_interval",
            "type": "int"
          },
          {
            "description": "Set of file IDs that have already been processed to avoid reprocessing",
            "is_class_variable": false,
            "name": "processed_files",
            "type": "set"
          },
          {
            "description": "List of file extensions that can be processed: ['.pdf', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']",
            "is_class_variable": false,
            "name": "supported_extensions",
            "type": "List[str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "OpenAI API key for LLM processing",
              "onedrive_config": "Dictionary containing OneDrive settings and authentication credentials"
            },
            "purpose": "Initialize the OneDrive processor with configuration and API credentials",
            "returns": "None (constructor)",
            "signature": "__init__(self, onedrive_config: Dict[str, Any], api_key: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "initialize",
            "parameters": {},
            "purpose": "Authenticate with OneDrive and ensure required folders exist",
            "returns": "Boolean indicating whether initialization was successful",
            "signature": "async initialize(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "start_watching",
            "parameters": {},
            "purpose": "Start continuous monitoring of OneDrive folder for new files to process",
            "returns": "None (runs indefinitely until KeyboardInterrupt)",
            "signature": "async start_watching(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_check_for_new_files",
            "parameters": {},
            "purpose": "Check OneDrive watch folder for new files and process any found",
            "returns": "None (performs side effects: processes files and updates processed_files set)",
            "signature": "async _check_for_new_files(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_process_file",
            "parameters": {
              "file_info": "Dictionary containing file metadata including 'id', 'name', and download information"
            },
            "purpose": "Download a file from OneDrive, process it through LLM, and upload result",
            "returns": "None (performs side effects: downloads, processes, uploads files)",
            "signature": "async _process_file(self, file_info: Dict[str, Any]) -> None"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required when _process_file method is called to process downloaded files through the E-Ink LLM Assistant",
          "import": "from processor import process_single_file",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:21:14",
      "decorators": [],
      "dependencies": [
        "msal",
        "requests",
        "asyncio",
        "pathlib"
      ],
      "description": "OneDriveProcessor is a class that monitors a OneDrive folder for new files, processes them using an E-Ink LLM Assistant, and uploads the results back to OneDrive.",
      "docstring": "OneDrive file processor for E-Ink LLM Assistant",
      "id": 2029,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import asyncio",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "import hashlib",
        "import msal",
        "import requests",
        "from datetime import datetime",
        "from datetime import timedelta",
        "from processor import process_single_file"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "import asyncio",
        "from pathlib import Path",
        "from typing import Dict, List, Optional, Any",
        "import hashlib",
        "import msal",
        "import requests",
        "from datetime import datetime, timedelta"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 625,
      "line_start": 510,
      "name": "OneDriveProcessor",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "OpenAI API key string used for processing files through the LLM. Required for the process_single_file function to work.",
        "onedrive_config": "Dictionary containing OneDrive configuration settings. Expected keys include: 'watch_folder_path' (default: '/E-Ink LLM Input'), 'output_folder_path' (default: '/E-Ink LLM Output'), 'poll_interval' (default: 60 seconds), 'delete_after_processing' (boolean, optional). Also contains authentication credentials passed to OneDriveClient."
      },
      "parent_class": null,
      "purpose": "This class provides automated file processing integration with OneDrive for the E-Ink LLM Assistant. It continuously watches a specified OneDrive folder for new files (PDFs and images), downloads them, processes them through the LLM assistant, and uploads the processed results to an output folder. It supports configurable polling intervals, automatic folder creation, and optional deletion of processed files.",
      "return_annotation": null,
      "return_explained": "The constructor returns an instance of OneDriveProcessor. The initialize() method returns a boolean indicating success/failure of OneDrive connection. The start_watching() method returns None and runs indefinitely until interrupted. Internal methods _check_for_new_files() and _process_file() return None and perform side effects (file processing and uploads).",
      "settings_required": [
        "OneDrive configuration dictionary with authentication credentials (client_id, client_secret, tenant_id, or refresh_token)",
        "OpenAI API key for LLM processing",
        "OneDriveClient class must be available and properly configured",
        "processor module with process_single_file function must be available",
        "Optional: watch_folder_path, output_folder_path, poll_interval, delete_after_processing in config"
      ],
      "source_code": "class OneDriveProcessor:\n    \"\"\"OneDrive file processor for E-Ink LLM Assistant\"\"\"\n    \n    def __init__(self, onedrive_config: Dict[str, Any], api_key: str):\n        \"\"\"\n        Initialize OneDrive processor\n        \n        Args:\n            onedrive_config: OneDrive configuration dictionary\n            api_key: OpenAI API key\n        \"\"\"\n        self.client = OneDriveClient(onedrive_config)\n        self.api_key = api_key\n        self.config = onedrive_config\n        \n        # Configuration\n        self.watch_folder = onedrive_config.get('watch_folder_path', '/E-Ink LLM Input')\n        self.output_folder = onedrive_config.get('output_folder_path', '/E-Ink LLM Output')\n        self.poll_interval = onedrive_config.get('poll_interval', 60)\n        self.processed_files = set()\n        \n        # Supported file types\n        self.supported_extensions = ['.pdf', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']\n        \n        print(f\"\ud83d\udcc1 OneDrive watch folder: {self.watch_folder}\")\n        print(f\"\ud83d\udcc1 OneDrive output folder: {self.output_folder}\")\n    \n    async def initialize(self) -> bool:\n        \"\"\"Initialize OneDrive connection\"\"\"\n        success = await self.client.authenticate()\n        if success:\n            # Ensure folders exist\n            await self.client.create_folder(self.watch_folder)\n            await self.client.create_folder(self.output_folder)\n        return success\n    \n    async def start_watching(self) -> None:\n        \"\"\"Start watching OneDrive folder for new files\"\"\"\n        if not await self.initialize():\n            print(\"\u274c Failed to initialize OneDrive connection\")\n            return\n        \n        print(f\"\ud83d\udc40 Watching OneDrive folder: {self.watch_folder}\")\n        print(f\"\u23f1\ufe0f Poll interval: {self.poll_interval} seconds\")\n        print(\"\ud83d\uded1 Press Ctrl+C to stop\")\n        \n        try:\n            while True:\n                await self._check_for_new_files()\n                await asyncio.sleep(self.poll_interval)\n                \n        except KeyboardInterrupt:\n            print(\"\\n\ud83d\uded1 OneDrive watching stopped\")\n    \n    async def _check_for_new_files(self) -> None:\n        \"\"\"Check for new files in OneDrive watch folder\"\"\"\n        try:\n            files = await self.client.list_files_in_folder(\n                self.watch_folder, \n                self.supported_extensions\n            )\n            \n            new_files = [f for f in files if f['id'] not in self.processed_files]\n            \n            if new_files:\n                print(f\"\ud83d\udd0d Found {len(new_files)} new files in OneDrive\")\n                \n                for file_info in new_files:\n                    await self._process_file(file_info)\n                    self.processed_files.add(file_info['id'])\n            \n        except Exception as e:\n            print(f\"\u274c Error checking for new files: {e}\")\n    \n    async def _process_file(self, file_info: Dict[str, Any]) -> None:\n        \"\"\"Process a single file from OneDrive\"\"\"\n        print(f\"\ud83d\udcc4 Processing OneDrive file: {file_info['name']}\")\n        \n        try:\n            # Create temporary directory for processing\n            temp_dir = Path(\"temp_onedrive\")\n            temp_dir.mkdir(exist_ok=True)\n            \n            # Download file\n            local_input_path = temp_dir / file_info['name']\n            if not await self.client.download_file(file_info, str(local_input_path)):\n                return\n            \n            # Process with E-Ink LLM\n            from processor import process_single_file\n            result_path = await process_single_file(str(local_input_path), self.api_key)\n            \n            if result_path:\n                # Upload result to OneDrive\n                result_file = Path(result_path)\n                upload_success = await self.client.upload_file(\n                    str(result_file),\n                    self.output_folder,\n                    result_file.name\n                )\n                \n                if upload_success:\n                    print(f\"\u2705 Processed and uploaded: {file_info['name']} -> {result_file.name}\")\n                    \n                    # Optional: delete original file from input folder\n                    if self.config.get('delete_after_processing', False):\n                        await self.client.delete_file(file_info)\n                \n                # Clean up local files\n                local_input_path.unlink(missing_ok=True)\n                result_file.unlink(missing_ok=True)\n            else:\n                print(f\"\u274c Failed to process: {file_info['name']}\")\n                \n        except Exception as e:\n            print(f\"\u274c Error processing {file_info['name']}: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/onedrive_client.py",
      "tags": [
        "onedrive",
        "file-processing",
        "cloud-storage",
        "async",
        "file-watcher",
        "llm",
        "e-ink",
        "automation",
        "document-processing",
        "image-processing"
      ],
      "updated_at": "2025-12-07T01:21:14.300557",
      "usage_example": "import asyncio\nfrom onedrive_processor import OneDriveProcessor\n\n# Configuration\nonedrive_config = {\n    'client_id': 'your-client-id',\n    'client_secret': 'your-client-secret',\n    'tenant_id': 'your-tenant-id',\n    'watch_folder_path': '/E-Ink LLM Input',\n    'output_folder_path': '/E-Ink LLM Output',\n    'poll_interval': 60,\n    'delete_after_processing': False\n}\napi_key = 'your-openai-api-key'\n\n# Create processor instance\nprocessor = OneDriveProcessor(onedrive_config, api_key)\n\n# Start watching (runs indefinitely)\nasync def main():\n    await processor.start_watching()\n\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "Always call authenticate() before performing any file operations",
        "The client automatically handles token refresh through _ensure_authenticated(), but initial authentication is required",
        "Token cache is automatically saved to disk for reuse across sessions",
        "For service applications, use client_secret and user_principal_name in config",
        "For interactive applications, omit client_secret to use device code flow",
        "Large files (>4MB) are automatically uploaded using chunked upload sessions",
        "File operations are async and should be awaited",
        "The client maintains state (access_token, token_expiry) that persists across method calls",
        "Folder paths should use forward slashes (e.g., '/MyFolder/SubFolder')",
        "The client automatically creates folders if they don't exist when uploading or getting folder IDs",
        "Error handling prints messages but returns None/False/empty list on failure - check return values",
        "For client credentials flow, scopes are automatically converted to /.default format",
        "The drive_endpoint is automatically set based on authentication method (me/drive vs users/{upn}/drive)"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Complete configuration dictionary passed to constructor",
            "is_class_variable": false,
            "name": "config",
            "type": "Dict[str, Any]"
          },
          {
            "description": "Azure App Registration client ID",
            "is_class_variable": false,
            "name": "client_id",
            "type": "str"
          },
          {
            "description": "Azure App Registration client secret (for confidential clients)",
            "is_class_variable": false,
            "name": "client_secret",
            "type": "Optional[str]"
          },
          {
            "description": "Azure tenant ID (defaults to 'common')",
            "is_class_variable": false,
            "name": "tenant_id",
            "type": "str"
          },
          {
            "description": "OAuth redirect URI (defaults to 'http://localhost:8080')",
            "is_class_variable": false,
            "name": "redirect_uri",
            "type": "str"
          },
          {
            "description": "List of Microsoft Graph API scopes required",
            "is_class_variable": false,
            "name": "scopes",
            "type": "List[str]"
          },
          {
            "description": "User email for client credentials flow",
            "is_class_variable": false,
            "name": "user_principal_name",
            "type": "Optional[str]"
          },
          {
            "description": "Path to token cache file for persistent authentication",
            "is_class_variable": false,
            "name": "token_cache_file",
            "type": "Path"
          },
          {
            "description": "MSAL application instance for authentication",
            "is_class_variable": false,
            "name": "app",
            "type": "Union[msal.ConfidentialClientApplication, msal.PublicClientApplication]"
          },
          {
            "description": "Current OAuth access token for Graph API requests",
            "is_class_variable": false,
            "name": "access_token",
            "type": "Optional[str]"
          },
          {
            "description": "Expiration time of current access token",
            "is_class_variable": false,
            "name": "token_expiry",
            "type": "Optional[datetime]"
          },
          {
            "description": "Microsoft Graph API base URL (https://graph.microsoft.com/v1.0)",
            "is_class_variable": false,
            "name": "graph_url",
            "type": "str"
          },
          {
            "description": "Graph API endpoint for drive access (me/drive or users/{upn}/drive)",
            "is_class_variable": false,
            "name": "drive_endpoint",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "config": "Dictionary with client_id (required), client_secret, tenant_id, redirect_uri, scopes, user_principal_name, token_cache_file"
            },
            "purpose": "Initialize the OneDrive client with configuration and set up MSAL authentication",
            "returns": "None - initializes instance",
            "signature": "__init__(self, config: Dict[str, Any])"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_msal_app",
            "parameters": {},
            "purpose": "Create and configure MSAL application instance (ConfidentialClientApplication or PublicClientApplication)",
            "returns": "MSAL application instance with token cache loaded",
            "signature": "_create_msal_app(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_token_cache",
            "parameters": {},
            "purpose": "Save the current token cache to disk for reuse across sessions",
            "returns": "None - saves cache to file",
            "signature": "_save_token_cache(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "authenticate",
            "parameters": {},
            "purpose": "Authenticate with Microsoft Graph API using cached token, client credentials, or device code flow",
            "returns": "True if authentication successful, False otherwise",
            "signature": "async authenticate(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_headers",
            "parameters": {},
            "purpose": "Generate HTTP headers with authorization token for Graph API requests",
            "returns": "Dictionary with Authorization and Content-Type headers",
            "signature": "_get_headers(self) -> Dict[str, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_ensure_authenticated",
            "parameters": {},
            "purpose": "Check if access token is valid and refresh if expired",
            "returns": "None - ensures valid token or re-authenticates",
            "signature": "async _ensure_authenticated(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_drive_info",
            "parameters": {},
            "purpose": "Retrieve information about the user's OneDrive (owner, quota, drive type)",
            "returns": "Dictionary with drive information or None on failure",
            "signature": "async get_drive_info(self) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_folder",
            "parameters": {
              "folder_path": "Path like '/E-Ink LLM Input' or '/My Folder/Subfolder'"
            },
            "purpose": "Create a folder hierarchy in OneDrive, creating parent folders as needed",
            "returns": "Folder ID of the created/existing folder, or None on failure",
            "signature": "async create_folder(self, folder_path: str) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_find_item_in_folder",
            "parameters": {
              "folder_id": "Parent folder ID to search in",
              "item_name": "Name of the item to find",
              "item_type": "Optional filter: 'folder' or 'file'"
            },
            "purpose": "Search for a specific file or folder by name within a parent folder",
            "returns": "Item information dictionary or None if not found",
            "signature": "async _find_item_in_folder(self, folder_id: str, item_name: str, item_type: str = None) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_folder_id",
            "parameters": {
              "folder_path": "Path like '/MyFolder/SubFolder'"
            },
            "purpose": "Get folder ID by path, creating the folder hierarchy if it doesn't exist",
            "returns": "Folder ID or None on failure",
            "signature": "async get_folder_id(self, folder_path: str) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "list_files_in_folder",
            "parameters": {
              "file_extensions": "Optional list of extensions to filter (e.g., ['.pdf', '.jpg'])",
              "folder_path": "Path like '/E-Ink LLM Input'"
            },
            "purpose": "List all files in a OneDrive folder, optionally filtered by file extension",
            "returns": "List of file information dictionaries with id, name, size, modified, download_url, path",
            "signature": "async list_files_in_folder(self, folder_path: str, file_extensions: List[str] = None) -> List[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "download_file",
            "parameters": {
              "file_info": "File information dictionary from list_files_in_folder",
              "local_path": "Local path where file should be saved"
            },
            "purpose": "Download a file from OneDrive to local storage",
            "returns": "True if download successful, False otherwise",
            "signature": "async download_file(self, file_info: Dict[str, Any], local_path: str) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_file",
            "parameters": {
              "filename": "Optional filename override (defaults to local filename)",
              "local_path": "Path to local file to upload",
              "onedrive_folder_path": "OneDrive folder path like '/E-Ink LLM Output'"
            },
            "purpose": "Upload a file to OneDrive, automatically handling large files with chunked upload",
            "returns": "True if upload successful, False otherwise",
            "signature": "async upload_file(self, local_path: str, onedrive_folder_path: str, filename: str = None) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_upload_large_file",
            "parameters": {
              "filename": "Name for uploaded file",
              "folder_id": "OneDrive folder ID",
              "local_file": "Path object to local file"
            },
            "purpose": "Upload large files (>4MB) using chunked upload session with progress tracking",
            "returns": "True if upload successful, False otherwise",
            "signature": "async _upload_large_file(self, local_file: Path, folder_id: str, filename: str) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "delete_file",
            "parameters": {
              "file_info": "File information dictionary with 'id' key"
            },
            "purpose": "Delete a file from OneDrive",
            "returns": "True if deletion successful, False otherwise",
            "signature": "async delete_file(self, file_info: Dict[str, Any]) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:20:43",
      "decorators": [],
      "dependencies": [
        "msal",
        "requests",
        "pathlib",
        "typing",
        "datetime",
        "json",
        "os"
      ],
      "description": "A comprehensive Microsoft OneDrive client that uses the Microsoft Graph API to authenticate and perform file operations (upload, download, list, delete) on OneDrive storage.",
      "docstring": "Microsoft OneDrive client using Graph API",
      "id": 2028,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import asyncio",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "import hashlib",
        "import msal",
        "import requests",
        "from datetime import datetime",
        "from datetime import timedelta",
        "from processor import process_single_file"
      ],
      "imports_required": [
        "import msal",
        "import requests",
        "from pathlib import Path",
        "from typing import Dict, List, Optional, Any",
        "from datetime import datetime, timedelta"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 507,
      "line_start": 20,
      "name": "OneDriveClient",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "config": "Dictionary containing OneDrive/Azure configuration. Required keys: 'client_id' (Azure App Registration client ID). Optional keys: 'client_secret' (for confidential client apps), 'tenant_id' (defaults to 'common'), 'redirect_uri' (defaults to 'http://localhost:8080'), 'scopes' (list of Graph API scopes, defaults to ['https://graph.microsoft.com/Files.ReadWrite.All']), 'user_principal_name' (user email for client credentials flow), 'token_cache_file' (path to token cache file, defaults to 'onedrive_token_cache.json')"
      },
      "parent_class": null,
      "purpose": "This class provides a complete interface for interacting with Microsoft OneDrive through the Graph API. It handles authentication using MSAL (Microsoft Authentication Library) with support for both device code flow (interactive) and client credentials flow (service applications). The client manages token caching, automatic token refresh, folder creation, file uploads (including large file chunked uploads), downloads, listing, and deletion. It's designed for applications that need to integrate OneDrive storage capabilities, such as automated file synchronization, backup systems, or document processing pipelines.",
      "return_annotation": null,
      "return_explained": "The constructor returns an initialized OneDriveClient instance. Key methods return: authenticate() returns bool (success/failure), get_drive_info() returns Optional[Dict] with drive metadata, create_folder() returns Optional[str] folder ID, list_files_in_folder() returns List[Dict] of file information, download_file() returns bool, upload_file() returns bool, delete_file() returns bool. File information dictionaries contain keys: 'id', 'name', 'size', 'modified', 'download_url', 'path'.",
      "settings_required": [
        "Azure App Registration with client_id (required)",
        "Azure App Registration client_secret (optional, for service applications)",
        "Azure tenant_id (optional, defaults to 'common' for multi-tenant)",
        "Microsoft Graph API permissions configured in Azure portal (Files.ReadWrite.All or similar)",
        "Token cache file location (optional, defaults to 'onedrive_token_cache.json' in current directory)",
        "For client credentials flow: user_principal_name (user email) must be provided"
      ],
      "source_code": "class OneDriveClient:\n    \"\"\"Microsoft OneDrive client using Graph API\"\"\"\n    \n    def __init__(self, config: Dict[str, Any]):\n        \"\"\"\n        Initialize OneDrive client\n        \n        Args:\n            config: Dictionary containing:\n                - client_id: Azure App Registration client ID\n                - client_secret: Azure App Registration client secret (optional for public clients)\n                - tenant_id: Azure tenant ID (optional, defaults to 'common')\n                - redirect_uri: Redirect URI for authentication (optional)\n                - scopes: List of required scopes\n        \"\"\"\n        self.config = config\n        self.client_id = config['client_id']\n        self.client_secret = config.get('client_secret')\n        self.tenant_id = config.get('tenant_id', 'common')\n        self.redirect_uri = config.get('redirect_uri', 'http://localhost:8080')\n        self.scopes = config.get('scopes', ['https://graph.microsoft.com/Files.ReadWrite.All'])\n        self.user_principal_name = config.get('user_principal_name')  # For client credentials flow\n        \n        # Token cache file\n        self.token_cache_file = Path(config.get('token_cache_file', 'onedrive_token_cache.json'))\n        \n        # MSAL client app\n        self.app = self._create_msal_app()\n        \n        # Current access token\n        self.access_token = None\n        self.token_expiry = None\n        \n        # Graph API base URL\n        self.graph_url = \"https://graph.microsoft.com/v1.0\"\n        \n        # Determine drive endpoint based on authentication method\n        if self.client_secret and self.user_principal_name:\n            self.drive_endpoint = f\"users/{self.user_principal_name}/drive\"\n        else:\n            self.drive_endpoint = \"me/drive\"\n        \n        print(\"\ud83d\udd17 OneDrive client initialized\")\n    \n    def _create_msal_app(self):\n        \"\"\"Create MSAL application instance\"\"\"\n        cache = msal.SerializableTokenCache()\n        \n        # Load existing token cache if available\n        if self.token_cache_file.exists():\n            try:\n                with open(self.token_cache_file, 'r') as f:\n                    cache.deserialize(f.read())\n            except Exception as e:\n                print(f\"\u26a0\ufe0f Could not load token cache: {e}\")\n        \n        if self.client_secret:\n            # Confidential client app (with client secret)\n            app = msal.ConfidentialClientApplication(\n                client_id=self.client_id,\n                client_credential=self.client_secret,\n                authority=f\"https://login.microsoftonline.com/{self.tenant_id}\",\n                token_cache=cache\n            )\n        else:\n            # Public client app (device code flow)\n            app = msal.PublicClientApplication(\n                client_id=self.client_id,\n                authority=f\"https://login.microsoftonline.com/{self.tenant_id}\",\n                token_cache=cache\n            )\n        \n        return app\n    \n    def _save_token_cache(self):\n        \"\"\"Save token cache to file\"\"\"\n        try:\n            if self.app.token_cache.has_state_changed:\n                with open(self.token_cache_file, 'w') as f:\n                    f.write(self.app.token_cache.serialize())\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Could not save token cache: {e}\")\n    \n    async def authenticate(self) -> bool:\n        \"\"\"Authenticate with Microsoft Graph API\"\"\"\n        print(\"\ud83d\udd10 Authenticating with Microsoft Graph API...\")\n        \n        # Try to get token silently first\n        accounts = self.app.get_accounts()\n        if accounts:\n            try:\n                result = self.app.acquire_token_silent(self.scopes, account=accounts[0])\n                if result and \"access_token\" in result:\n                    self.access_token = result[\"access_token\"]\n                    self.token_expiry = datetime.now() + timedelta(seconds=result.get(\"expires_in\", 3600))\n                    self._save_token_cache()\n                    print(\"\u2705 Authentication successful (cached token)\")\n                    return True\n            except Exception as e:\n                print(f\"\u26a0\ufe0f Silent authentication failed: {e}\")\n        \n        # Interactive authentication required\n        if self.client_secret:\n            # Client credentials flow (for service applications)\n            try:\n                # For client credentials flow, scopes must end with /.default\n                client_cred_scopes = [\"https://graph.microsoft.com/.default\"]\n                result = self.app.acquire_token_for_client(scopes=client_cred_scopes)\n                if result and \"access_token\" in result:\n                    self.access_token = result[\"access_token\"]\n                    self.token_expiry = datetime.now() + timedelta(seconds=result.get(\"expires_in\", 3600))\n                    self._save_token_cache()\n                    print(\"\u2705 Authentication successful (client credentials)\")\n                    return True\n                else:\n                    print(f\"\u274c Authentication failed: {result.get('error_description', 'Unknown error')}\")\n                    return False\n            except Exception as e:\n                print(f\"\u274c Authentication error: {e}\")\n                return False\n        \n        else:\n            # Device code flow (for interactive applications)\n            try:\n                flow = self.app.initiate_device_flow(scopes=self.scopes)\n                if \"user_code\" not in flow:\n                    print(\"\u274c Failed to create device flow\")\n                    return False\n                \n                print(f\"\ud83d\udd17 Please visit: {flow['verification_uri']}\")\n                print(f\"\ud83d\udcf1 Enter code: {flow['user_code']}\")\n                print(\"\u23f3 Waiting for authentication...\")\n                \n                result = self.app.acquire_token_by_device_flow(flow)\n                if result and \"access_token\" in result:\n                    self.access_token = result[\"access_token\"]\n                    self.token_expiry = datetime.now() + timedelta(seconds=result.get(\"expires_in\", 3600))\n                    self._save_token_cache()\n                    print(\"\u2705 Authentication successful (device flow)\")\n                    return True\n                else:\n                    print(f\"\u274c Authentication failed: {result.get('error_description', 'Unknown error')}\")\n                    return False\n                    \n            except Exception as e:\n                print(f\"\u274c Authentication error: {e}\")\n                return False\n    \n    def _get_headers(self) -> Dict[str, str]:\n        \"\"\"Get headers for Graph API requests\"\"\"\n        if not self.access_token:\n            raise ValueError(\"Not authenticated - call authenticate() first\")\n        \n        return {\n            'Authorization': f'Bearer {self.access_token}',\n            'Content-Type': 'application/json'\n        }\n    \n    async def _ensure_authenticated(self):\n        \"\"\"Ensure we have a valid access token\"\"\"\n        if not self.access_token or (self.token_expiry and datetime.now() >= self.token_expiry):\n            await self.authenticate()\n    \n    async def get_drive_info(self) -> Optional[Dict[str, Any]]:\n        \"\"\"Get information about the user's OneDrive\"\"\"\n        await self._ensure_authenticated()\n        \n        try:\n            response = requests.get(\n                f\"{self.graph_url}/{self.drive_endpoint}\",\n                headers=self._get_headers()\n            )\n            response.raise_for_status()\n            return response.json()\n        except Exception as e:\n            print(f\"\u274c Failed to get drive info: {e}\")\n            return None\n    \n    async def create_folder(self, folder_path: str) -> Optional[str]:\n        \"\"\"\n        Create a folder in OneDrive\n        \n        Args:\n            folder_path: Path like '/E-Ink LLM Input' or '/My Folder/Subfolder'\n            \n        Returns:\n            Folder ID if successful, None otherwise\n        \"\"\"\n        await self._ensure_authenticated()\n        \n        # Clean and split path\n        path_parts = [part for part in folder_path.strip('/').split('/') if part]\n        if not path_parts:\n            return None\n        \n        current_parent = \"root\"\n        \n        try:\n            for folder_name in path_parts:\n                # Check if folder exists\n                existing_folder = await self._find_item_in_folder(current_parent, folder_name, \"folder\")\n                \n                if existing_folder:\n                    current_parent = existing_folder['id']\n                else:\n                    # Create the folder\n                    folder_data = {\n                        \"name\": folder_name,\n                        \"folder\": {},\n                        \"@microsoft.graph.conflictBehavior\": \"rename\"\n                    }\n                    \n                    response = requests.post(\n                        f\"{self.graph_url}/{self.drive_endpoint}/items/{current_parent}/children\",\n                        headers=self._get_headers(),\n                        json=folder_data\n                    )\n                    response.raise_for_status()\n                    \n                    new_folder = response.json()\n                    current_parent = new_folder['id']\n                    print(f\"\ud83d\udcc1 Created OneDrive folder: {folder_name}\")\n            \n            return current_parent\n            \n        except Exception as e:\n            print(f\"\u274c Failed to create folder {folder_path}: {e}\")\n            return None\n    \n    async def _find_item_in_folder(self, folder_id: str, item_name: str, item_type: str = None) -> Optional[Dict[str, Any]]:\n        \"\"\"Find an item (file or folder) in a specific folder\"\"\"\n        try:\n            response = requests.get(\n                f\"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}/children\",\n                headers=self._get_headers()\n            )\n            response.raise_for_status()\n            \n            items = response.json().get('value', [])\n            \n            for item in items:\n                if item['name'] == item_name:\n                    if item_type is None:\n                        return item\n                    elif item_type == \"folder\" and 'folder' in item:\n                        return item\n                    elif item_type == \"file\" and 'file' in item:\n                        return item\n            \n            return None\n            \n        except Exception as e:\n            print(f\"\u274c Failed to find item {item_name}: {e}\")\n            return None\n    \n    async def get_folder_id(self, folder_path: str) -> Optional[str]:\n        \"\"\"Get folder ID by path, creating if necessary\"\"\"\n        if not folder_path or folder_path == '/':\n            return \"root\"\n        \n        # Try to find existing folder first\n        path_parts = [part for part in folder_path.strip('/').split('/') if part]\n        current_parent = \"root\"\n        \n        try:\n            for folder_name in path_parts:\n                folder = await self._find_item_in_folder(current_parent, folder_name, \"folder\")\n                if folder:\n                    current_parent = folder['id']\n                else:\n                    # Folder doesn't exist, create it\n                    return await self.create_folder(folder_path)\n            \n            return current_parent\n            \n        except Exception as e:\n            print(f\"\u274c Failed to get folder ID for {folder_path}: {e}\")\n            return None\n    \n    async def list_files_in_folder(self, folder_path: str, file_extensions: List[str] = None) -> List[Dict[str, Any]]:\n        \"\"\"\n        List files in a OneDrive folder\n        \n        Args:\n            folder_path: Path like '/E-Ink LLM Input'\n            file_extensions: List of extensions to filter by (e.g., ['.pdf', '.jpg'])\n            \n        Returns:\n            List of file information dictionaries\n        \"\"\"\n        await self._ensure_authenticated()\n        \n        folder_id = await self.get_folder_id(folder_path)\n        if not folder_id:\n            return []\n        \n        try:\n            response = requests.get(\n                f\"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}/children\",\n                headers=self._get_headers()\n            )\n            response.raise_for_status()\n            \n            items = response.json().get('value', [])\n            files = []\n            \n            for item in items:\n                if 'file' in item:  # It's a file, not a folder\n                    if file_extensions:\n                        file_ext = Path(item['name']).suffix.lower()\n                        if file_ext not in file_extensions:\n                            continue\n                    \n                    files.append({\n                        'id': item['id'],\n                        'name': item['name'], \n                        'size': item['size'],\n                        'modified': item['lastModifiedDateTime'],\n                        'download_url': item.get('@microsoft.graph.downloadUrl'),\n                        'path': f\"{folder_path.rstrip('/')}/{item['name']}\"\n                    })\n            \n            return files\n            \n        except Exception as e:\n            print(f\"\u274c Failed to list files in {folder_path}: {e}\")\n            return []\n    \n    async def download_file(self, file_info: Dict[str, Any], local_path: str) -> bool:\n        \"\"\"\n        Download a file from OneDrive\n        \n        Args:\n            file_info: File information from list_files_in_folder\n            local_path: Local path to save the file\n            \n        Returns:\n            True if successful, False otherwise\n        \"\"\"\n        try:\n            download_url = file_info.get('download_url')\n            if not download_url:\n                # Get download URL if not provided\n                await self._ensure_authenticated()\n                response = requests.get(\n                    f\"{self.graph_url}/{self.drive_endpoint}/items/{file_info['id']}\",\n                    headers=self._get_headers()\n                )\n                response.raise_for_status()\n                download_url = response.json().get('@microsoft.graph.downloadUrl')\n            \n            if not download_url:\n                print(f\"\u274c No download URL for {file_info['name']}\")\n                return False\n            \n            # Download the file\n            response = requests.get(download_url)\n            response.raise_for_status()\n            \n            # Save to local path\n            local_file = Path(local_path)\n            local_file.parent.mkdir(parents=True, exist_ok=True)\n            \n            with open(local_file, 'wb') as f:\n                f.write(response.content)\n            \n            print(f\"\ud83d\udce5 Downloaded: {file_info['name']} -> {local_path}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to download {file_info['name']}: {e}\")\n            return False\n    \n    async def upload_file(self, local_path: str, onedrive_folder_path: str, filename: str = None) -> bool:\n        \"\"\"\n        Upload a file to OneDrive\n        \n        Args:\n            local_path: Path to local file\n            onedrive_folder_path: OneDrive folder path like '/E-Ink LLM Output'\n            filename: Optional filename override\n            \n        Returns:\n            True if successful, False otherwise\n        \"\"\"\n        await self._ensure_authenticated()\n        \n        local_file = Path(local_path)\n        if not local_file.exists():\n            print(f\"\u274c Local file not found: {local_path}\")\n            return False\n        \n        upload_name = filename or local_file.name\n        folder_id = await self.get_folder_id(onedrive_folder_path)\n        \n        if not folder_id:\n            print(f\"\u274c Could not access OneDrive folder: {onedrive_folder_path}\")\n            return False\n        \n        try:\n            # For small files (< 4MB), use simple upload\n            file_size = local_file.stat().st_size\n            \n            if file_size < 4 * 1024 * 1024:  # 4MB\n                with open(local_file, 'rb') as f:\n                    response = requests.put(\n                        f\"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}:/{upload_name}:/content\",\n                        headers={'Authorization': f'Bearer {self.access_token}'},\n                        data=f.read()\n                    )\n                response.raise_for_status()\n                print(f\"\ud83d\udce4 Uploaded: {local_path} -> {onedrive_folder_path}/{upload_name}\")\n                return True\n            \n            else:\n                # For large files, use upload session\n                return await self._upload_large_file(local_file, folder_id, upload_name)\n                \n        except Exception as e:\n            print(f\"\u274c Failed to upload {local_path}: {e}\")\n            return False\n    \n    async def _upload_large_file(self, local_file: Path, folder_id: str, filename: str) -> bool:\n        \"\"\"Upload large file using upload session\"\"\"\n        try:\n            # Create upload session\n            session_data = {\n                \"item\": {\n                    \"@microsoft.graph.conflictBehavior\": \"replace\",\n                    \"name\": filename\n                }\n            }\n            \n            response = requests.post(\n                f\"{self.graph_url}/{self.drive_endpoint}/items/{folder_id}:/{filename}:/createUploadSession\",\n                headers=self._get_headers(),\n                json=session_data\n            )\n            response.raise_for_status()\n            \n            upload_url = response.json()['uploadUrl']\n            file_size = local_file.stat().st_size\n            chunk_size = 320 * 1024  # 320KB chunks\n            \n            with open(local_file, 'rb') as f:\n                uploaded = 0\n                while uploaded < file_size:\n                    chunk = f.read(chunk_size)\n                    if not chunk:\n                        break\n                    \n                    chunk_start = uploaded\n                    chunk_end = min(uploaded + len(chunk) - 1, file_size - 1)\n                    \n                    headers = {\n                        'Content-Range': f'bytes {chunk_start}-{chunk_end}/{file_size}',\n                        'Content-Length': str(len(chunk))\n                    }\n                    \n                    response = requests.put(upload_url, headers=headers, data=chunk)\n                    response.raise_for_status()\n                    \n                    uploaded += len(chunk)\n                    print(f\"\ud83d\udce4 Upload progress: {uploaded}/{file_size} bytes ({uploaded/file_size*100:.1f}%)\")\n            \n            print(f\"\ud83d\udce4 Large file uploaded: {filename}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload large file {filename}: {e}\")\n            return False\n    \n    async def delete_file(self, file_info: Dict[str, Any]) -> bool:\n        \"\"\"Delete a file from OneDrive\"\"\"\n        await self._ensure_authenticated()\n        \n        try:\n            response = requests.delete(\n                f\"{self.graph_url}/{self.drive_endpoint}/items/{file_info['id']}\",\n                headers=self._get_headers()\n            )\n            response.raise_for_status()\n            print(f\"\ud83d\uddd1\ufe0f Deleted from OneDrive: {file_info['name']}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to delete {file_info['name']}: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/onedrive_client.py",
      "tags": [
        "onedrive",
        "microsoft-graph",
        "cloud-storage",
        "file-operations",
        "authentication",
        "msal",
        "oauth2",
        "async",
        "upload",
        "download",
        "azure"
      ],
      "updated_at": "2025-12-07T01:20:43.728559",
      "usage_example": "# Interactive authentication (device code flow)\nconfig = {\n    'client_id': 'your-azure-app-client-id',\n    'scopes': ['https://graph.microsoft.com/Files.ReadWrite.All']\n}\n\nclient = OneDriveClient(config)\n\n# Authenticate (will prompt for device code)\nawait client.authenticate()\n\n# Get drive information\ndrive_info = await client.get_drive_info()\nprint(f\"Drive owner: {drive_info['owner']['user']['displayName']}\")\n\n# Create folder\nfolder_id = await client.create_folder('/MyApp/Documents')\n\n# List files in folder\nfiles = await client.list_files_in_folder('/MyApp/Documents', file_extensions=['.pdf', '.docx'])\nfor file in files:\n    print(f\"Found: {file['name']} ({file['size']} bytes)\")\n\n# Download a file\nif files:\n    await client.download_file(files[0], './downloads/document.pdf')\n\n# Upload a file\nawait client.upload_file('./local/report.pdf', '/MyApp/Documents')\n\n# Delete a file\nif files:\n    await client.delete_file(files[0])\n\n# Service application example (client credentials flow)\nservice_config = {\n    'client_id': 'your-client-id',\n    'client_secret': 'your-client-secret',\n    'tenant_id': 'your-tenant-id',\n    'user_principal_name': 'user@domain.com'\n}\nservice_client = OneDriveClient(service_config)\nawait service_client.authenticate()"
    },
    {
      "best_practices": [
        "Always use await when calling generate_graphic() as it is an async method",
        "Check the return value for None to handle generation failures gracefully",
        "Ensure the api_key is valid before instantiation to avoid runtime errors in IllustrationGenerator",
        "The class maintains stateful generator instances, so reuse the same GraphicsGenerator instance for multiple graphic generations",
        "Handle exceptions at the caller level as the method catches and logs errors internally but returns None",
        "For SKETCH type graphics, be aware they are currently processed as ILLUSTRATION types",
        "Ensure GraphicSpec objects have valid 'type' and 'id' attributes before passing to generate_graphic()",
        "The method is thread-safe for concurrent calls as each generator maintains its own state"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Instance of ChartGenerator responsible for generating chart-type graphics",
            "is_class_variable": false,
            "name": "chart_generator",
            "type": "ChartGenerator"
          },
          {
            "description": "Instance of DiagramGenerator responsible for generating diagram-type graphics",
            "is_class_variable": false,
            "name": "diagram_generator",
            "type": "DiagramGenerator"
          },
          {
            "description": "Instance of IllustrationGenerator responsible for generating illustration and sketch-type graphics using AI",
            "is_class_variable": false,
            "name": "illustration_generator",
            "type": "IllustrationGenerator"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "OpenAI API key string required for illustration generation"
            },
            "purpose": "Initializes the GraphicsGenerator with three specialized generator instances",
            "returns": "None (constructor)",
            "signature": "__init__(self, api_key: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_graphic",
            "parameters": {
              "spec": "GraphicSpec object containing the type, id, and other specifications for the graphic to be generated"
            },
            "purpose": "Routes graphic generation requests to the appropriate specialized generator based on the GraphicSpec type",
            "returns": "The updated GraphicSpec object with generated graphic data on success, or None if generation fails or type is unknown",
            "signature": "async generate_graphic(self, spec: GraphicSpec) -> Optional[GraphicSpec]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required by ChartGenerator for chart generation",
          "import": "import matplotlib.pyplot as plt",
          "optional": false
        },
        {
          "condition": "Required by DiagramGenerator for diagram generation",
          "import": "import matplotlib.patches as patches",
          "optional": false
        },
        {
          "condition": "Required by ChartGenerator and DiagramGenerator for numerical operations",
          "import": "import numpy as np",
          "optional": false
        },
        {
          "condition": "Required by ChartGenerator for enhanced chart styling",
          "import": "import seaborn as sns",
          "optional": false
        },
        {
          "condition": "Required by DiagramGenerator and IllustrationGenerator for image manipulation",
          "import": "from PIL import Image, ImageDraw, ImageFont",
          "optional": false
        },
        {
          "condition": "Required by DiagramGenerator for graph-based diagrams",
          "import": "import networkx as nx",
          "optional": false
        },
        {
          "condition": "Required by IllustrationGenerator for AI-generated illustrations",
          "import": "from openai import OpenAI",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:19:45",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "matplotlib",
        "numpy",
        "seaborn",
        "PIL",
        "networkx",
        "openai"
      ],
      "description": "GraphicsGenerator is a coordinator class that orchestrates the generation of different types of graphics (charts, diagrams, illustrations, and sketches) by delegating to specialized generator classes.",
      "docstring": "Main graphics generation coordinator",
      "id": 2027,
      "imports": [
        "import asyncio",
        "import io",
        "import base64",
        "import json",
        "from enum import Enum",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Union",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import matplotlib.pyplot as plt",
        "import matplotlib.patches as patches",
        "from matplotlib.patches import FancyBboxPatch",
        "from matplotlib.patches import Rectangle",
        "from matplotlib.patches import Circle",
        "from matplotlib.patches import Arrow",
        "import numpy as np",
        "import seaborn as sns",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import networkx as nx",
        "from openai import OpenAI"
      ],
      "imports_required": [
        "import asyncio",
        "from typing import Optional",
        "from enum import Enum"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 556,
      "line_start": 529,
      "name": "GraphicsGenerator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "API key string required for the IllustrationGenerator, typically an OpenAI API key used for AI-generated illustrations. This key is passed through to the IllustrationGenerator during initialization."
      },
      "parent_class": null,
      "purpose": "This class serves as the main entry point for generating various types of graphics. It manages three specialized generators (ChartGenerator, DiagramGenerator, and IllustrationGenerator) and routes graphic generation requests to the appropriate generator based on the GraphicSpec type. It provides a unified async interface for generating all graphic types, handling errors gracefully and returning None on failure.",
      "return_annotation": null,
      "return_explained": "The constructor returns a GraphicsGenerator instance. The generate_graphic method returns an Optional[GraphicSpec] - either the updated GraphicSpec object with generated graphic data on success, or None if generation fails or the graphic type is unknown.",
      "settings_required": [
        "OpenAI API key must be provided as a string parameter during instantiation",
        "ChartGenerator, DiagramGenerator, and IllustrationGenerator classes must be defined and importable",
        "GraphicSpec dataclass must be defined with 'type' and 'id' attributes",
        "GraphicType enum must be defined with CHART, DIAGRAM, ILLUSTRATION, and SKETCH values"
      ],
      "source_code": "class GraphicsGenerator:\n    \"\"\"Main graphics generation coordinator\"\"\"\n    \n    def __init__(self, api_key: str):\n        self.chart_generator = ChartGenerator()\n        self.diagram_generator = DiagramGenerator()\n        self.illustration_generator = IllustrationGenerator(api_key)\n    \n    async def generate_graphic(self, spec: GraphicSpec) -> Optional[GraphicSpec]:\n        \"\"\"Generate a graphic based on its type and specification\"\"\"\n        \n        try:\n            if spec.type == GraphicType.CHART:\n                return self.chart_generator.generate_chart(spec)\n            elif spec.type == GraphicType.DIAGRAM:\n                return self.diagram_generator.generate_diagram(spec)\n            elif spec.type == GraphicType.ILLUSTRATION:\n                return await self.illustration_generator.generate_illustration(spec)\n            elif spec.type == GraphicType.SKETCH:\n                # For now, treat sketches similar to illustrations\n                return await self.illustration_generator.generate_illustration(spec)\n            else:\n                print(f\"Unknown graphic type: {spec.type}\")\n                return None\n                \n        except Exception as e:\n            print(f\"Error generating graphic {spec.id}: {e}\")\n            return None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/graphics_generator.py",
      "tags": [
        "graphics",
        "visualization",
        "coordinator",
        "async",
        "chart-generation",
        "diagram-generation",
        "illustration",
        "image-generation",
        "openai",
        "matplotlib",
        "factory-pattern"
      ],
      "updated_at": "2025-12-07T01:19:45.990434",
      "usage_example": "# Assuming GraphicSpec and GraphicType are defined\nimport asyncio\nfrom graphics_generator import GraphicsGenerator, GraphicSpec, GraphicType\n\n# Initialize the generator with API key\napi_key = \"your-openai-api-key\"\ngenerator = GraphicsGenerator(api_key)\n\n# Create a graphic specification\nspec = GraphicSpec(\n    id=\"chart_001\",\n    type=GraphicType.CHART,\n    title=\"Sales Data\",\n    data={\"Q1\": 100, \"Q2\": 150, \"Q3\": 200}\n)\n\n# Generate the graphic asynchronously\nasync def main():\n    result = await generator.generate_graphic(spec)\n    if result:\n        print(f\"Graphic generated successfully: {result.id}\")\n    else:\n        print(\"Failed to generate graphic\")\n\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "Always await the generate_illustration() method as it is async",
        "Ensure GraphicSpec objects have properly formatted parameters dictionary with 'concept' and 'style' keys",
        "The class automatically closes matplotlib figures to prevent memory leaks",
        "Use 'mathematical' or 'math' in style/concept for math illustrations, 'scientific' or 'science' for science illustrations",
        "The returned GraphicSpec object contains base64-encoded PNG data in the image_data attribute",
        "Error handling is built-in; failed generations return error placeholder illustrations rather than raising exceptions",
        "The OpenAI client is initialized but not currently used in the implementation; this may be for future DALL-E integration",
        "All generated images are 800x600 pixels by default (600x400 for error illustrations)",
        "Images are rendered at 150 DPI with white background and tight bounding boxes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "OpenAI client instance initialized with the provided API key, potentially for future DALL-E integration",
            "is_class_variable": false,
            "name": "client",
            "type": "OpenAI"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "OpenAI API key string for client initialization"
            },
            "purpose": "Initialize the IllustrationGenerator with an OpenAI API key",
            "returns": "None (constructor)",
            "signature": "__init__(self, api_key: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_illustration",
            "parameters": {
              "spec": "GraphicSpec object containing description and parameters dictionary with 'concept' and 'style' keys"
            },
            "purpose": "Main entry point to generate an illustration based on the provided specification, routing to appropriate generation method based on style and concept",
            "returns": "Modified GraphicSpec object with populated image_data (base64 PNG), width, and height attributes",
            "signature": "async generate_illustration(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_math_illustration",
            "parameters": {
              "spec": "GraphicSpec object with parameters containing 'concept' key (e.g., 'function', 'quadratic', 'derivative')"
            },
            "purpose": "Create mathematical concept illustrations including quadratic functions, derivatives, and generic math concepts",
            "returns": "GraphicSpec object with generated mathematical illustration as base64-encoded PNG",
            "signature": "_create_math_illustration(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_science_illustration",
            "parameters": {
              "spec": "GraphicSpec object with parameters containing 'concept' key (e.g., 'atom', 'atoms')"
            },
            "purpose": "Create scientific concept illustrations including atomic structures and generic science concepts",
            "returns": "GraphicSpec object with generated scientific illustration as base64-encoded PNG",
            "signature": "_create_science_illustration(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_generic_illustration",
            "parameters": {
              "spec": "GraphicSpec object with description to be displayed in the illustration"
            },
            "purpose": "Create a generic placeholder illustration with the description text displayed",
            "returns": "GraphicSpec object with generic placeholder illustration as base64-encoded PNG",
            "signature": "_create_generic_illustration(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fig_to_base64",
            "parameters": {
              "fig": "matplotlib.figure.Figure object to be converted"
            },
            "purpose": "Convert a matplotlib figure object to a base64-encoded PNG string",
            "returns": "Base64-encoded string representation of the figure as PNG image",
            "signature": "_fig_to_base64(self, fig) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_error_illustration",
            "parameters": {
              "error_msg": "Error message string to display in the illustration",
              "spec": "GraphicSpec object to be populated with error illustration"
            },
            "purpose": "Create an error placeholder illustration displaying the error message",
            "returns": "GraphicSpec object with error placeholder illustration as base64-encoded PNG",
            "signature": "_create_error_illustration(self, spec: GraphicSpec, error_msg: str) -> GraphicSpec"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only needed when generating scientific illustrations with atomic structures",
          "import": "from matplotlib.patches import Circle",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:19:19",
      "decorators": [],
      "dependencies": [
        "openai",
        "matplotlib",
        "numpy",
        "io",
        "base64"
      ],
      "description": "A class that generates educational illustrations and technical drawings for mathematical and scientific concepts using matplotlib and programmatic rendering.",
      "docstring": "Generates educational illustrations and technical drawings",
      "id": 2026,
      "imports": [
        "import asyncio",
        "import io",
        "import base64",
        "import json",
        "from enum import Enum",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Union",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import matplotlib.pyplot as plt",
        "import matplotlib.patches as patches",
        "from matplotlib.patches import FancyBboxPatch",
        "from matplotlib.patches import Rectangle",
        "from matplotlib.patches import Circle",
        "from matplotlib.patches import Arrow",
        "import numpy as np",
        "import seaborn as sns",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import networkx as nx",
        "from openai import OpenAI"
      ],
      "imports_required": [
        "from openai import OpenAI",
        "import matplotlib.pyplot as plt",
        "from matplotlib.patches import Circle",
        "import numpy as np",
        "import io",
        "import base64"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 527,
      "line_start": 375,
      "name": "IllustrationGenerator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "OpenAI API key string used to initialize the OpenAI client. Required for instantiation even though current implementation primarily uses programmatic generation rather than AI-based image generation. This suggests the class may be designed to support DALL-E integration for complex illustrations in the future."
      },
      "parent_class": null,
      "purpose": "The IllustrationGenerator class is responsible for creating visual educational content including mathematical function plots, scientific diagrams (like atomic structures), and generic illustrations. It provides a unified interface for generating different types of educational graphics based on specifications, with fallback error handling. The class uses matplotlib for rendering and returns base64-encoded PNG images suitable for embedding in documents or web pages.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an IllustrationGenerator object. The main method generate_illustration() returns a GraphicSpec object with populated image_data (base64-encoded PNG), width, and height attributes. All internal methods also return modified GraphicSpec objects with generated illustration data.",
      "settings_required": [
        "OpenAI API key (passed as constructor parameter)",
        "GraphicSpec class definition must be available in the codebase",
        "matplotlib backend configured appropriately for non-interactive rendering"
      ],
      "source_code": "class IllustrationGenerator:\n    \"\"\"Generates educational illustrations and technical drawings\"\"\"\n    \n    def __init__(self, api_key: str):\n        self.client = OpenAI(api_key=api_key)\n    \n    async def generate_illustration(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Generate an illustration using AI or programmatic methods\"\"\"\n        params = spec.parameters\n        concept = params.get('concept', spec.description)\n        style = params.get('style', 'educational')\n        \n        try:\n            # For educational concepts, try programmatic generation first\n            if 'mathematical' in style or 'math' in concept:\n                return self._create_math_illustration(spec)\n            elif 'scientific' in style or 'science' in concept:\n                return self._create_science_illustration(spec)\n            else:\n                # For complex illustrations, could use DALL-E (if needed)\n                return self._create_generic_illustration(spec)\n                \n        except Exception as e:\n            print(f\"Error generating illustration: {e}\")\n            return self._create_error_illustration(spec, f\"Illustration generation failed: {e}\")\n    \n    def _create_math_illustration(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Create mathematical concept illustrations\"\"\"\n        params = spec.parameters\n        concept = params.get('concept', 'function').lower()\n        \n        fig, ax = plt.subplots(figsize=(8, 6))\n        \n        if 'function' in concept or 'quadratic' in concept:\n            # Draw a quadratic function\n            x = np.linspace(-5, 5, 100)\n            y = x**2\n            ax.plot(x, y, 'black', linewidth=2)\n            ax.axhline(y=0, color='black', linewidth=0.5)\n            ax.axvline(x=0, color='black', linewidth=0.5)\n            ax.grid(True, alpha=0.3)\n            ax.set_title('Quadratic Function: y = x\u00b2', fontsize=14, fontweight='bold')\n            \n        elif 'derivative' in concept:\n            # Show function and its derivative\n            x = np.linspace(-3, 3, 100)\n            y = x**2\n            dy = 2*x\n            ax.plot(x, y, 'black', linewidth=2, label='f(x) = x\u00b2')\n            ax.plot(x, dy, 'gray', linewidth=2, linestyle='--', label=\"f'(x) = 2x\")\n            ax.legend()\n            ax.grid(True, alpha=0.3)\n            ax.set_title('Function and its Derivative', fontsize=14, fontweight='bold')\n            \n        else:\n            # Generic mathematical illustration\n            ax.text(0.5, 0.5, f'Mathematical Concept:\\n{concept}', \n                   ha='center', va='center', transform=ax.transAxes,\n                   fontsize=14, bbox=dict(boxstyle=\"round,pad=0.3\", facecolor=\"lightgray\"))\n        \n        ax.set_xlabel('x')\n        ax.set_ylabel('y')\n        plt.tight_layout()\n        \n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 800\n        spec.height = 600\n        plt.close(fig)\n        return spec\n    \n    def _create_science_illustration(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Create scientific concept illustrations\"\"\"\n        params = spec.parameters\n        concept = params.get('concept', 'atoms').lower()\n        \n        fig, ax = plt.subplots(figsize=(8, 6))\n        \n        if 'atom' in concept:\n            # Draw simple atomic structure\n            center = Circle((0.5, 0.5), 0.1, facecolor='gray', edgecolor='black')\n            ax.add_patch(center)\n            \n            # Electron orbits\n            for radius in [0.2, 0.3, 0.4]:\n                orbit = Circle((0.5, 0.5), radius, fill=False, edgecolor='black', linestyle='--')\n                ax.add_patch(orbit)\n            \n            ax.set_xlim(0, 1)\n            ax.set_ylim(0, 1)\n            ax.set_title('Atomic Structure', fontsize=14, fontweight='bold')\n            \n        else:\n            # Generic scientific illustration\n            ax.text(0.5, 0.5, f'Scientific Concept:\\n{concept}', \n                   ha='center', va='center', transform=ax.transAxes,\n                   fontsize=14, bbox=dict(boxstyle=\"round,pad=0.3\", facecolor=\"lightgray\"))\n        \n        ax.axis('off')\n        plt.tight_layout()\n        \n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 800\n        spec.height = 600\n        plt.close(fig)\n        return spec\n    \n    def _create_generic_illustration(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Create a generic illustration placeholder\"\"\"\n        fig, ax = plt.subplots(figsize=(8, 6))\n        \n        ax.text(0.5, 0.5, f'Illustration:\\n{spec.description}', \n               ha='center', va='center', transform=ax.transAxes,\n               fontsize=14, bbox=dict(boxstyle=\"round,pad=0.3\", facecolor=\"lightgray\"))\n        \n        ax.set_xlim(0, 1)\n        ax.set_ylim(0, 1)\n        ax.axis('off')\n        ax.set_title(spec.description, fontsize=14, fontweight='bold')\n        \n        plt.tight_layout()\n        \n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 800\n        spec.height = 600\n        plt.close(fig)\n        return spec\n    \n    def _fig_to_base64(self, fig) -> str:\n        \"\"\"Convert matplotlib figure to base64 encoded image\"\"\"\n        buffer = io.BytesIO()\n        fig.savefig(buffer, format='png', dpi=150, bbox_inches='tight',\n                   facecolor='white', edgecolor='none')\n        buffer.seek(0)\n        image_data = buffer.getvalue()\n        buffer.close()\n        \n        return base64.b64encode(image_data).decode('utf-8')\n    \n    def _create_error_illustration(self, spec: GraphicSpec, error_msg: str) -> GraphicSpec:\n        \"\"\"Create an error placeholder illustration\"\"\"\n        fig, ax = plt.subplots(figsize=(6, 4))\n        ax.text(0.5, 0.5, f\"Error: {error_msg}\", ha='center', va='center',\n               transform=ax.transAxes, fontsize=12,\n               bbox=dict(boxstyle=\"round,pad=0.3\", facecolor=\"lightgray\"))\n        ax.set_xlim(0, 1)\n        ax.set_ylim(0, 1)\n        ax.axis('off')\n        \n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 600\n        spec.height = 400\n        plt.close(fig)\n        return spec",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/graphics_generator.py",
      "tags": [
        "illustration",
        "educational",
        "graphics",
        "matplotlib",
        "visualization",
        "math",
        "science",
        "diagram",
        "image-generation",
        "base64",
        "async",
        "plotting"
      ],
      "updated_at": "2025-12-07T01:19:19.974260",
      "usage_example": "from openai import OpenAI\nimport matplotlib.pyplot as plt\nfrom matplotlib.patches import Circle\nimport numpy as np\nimport io\nimport base64\nfrom dataclasses import dataclass\nfrom typing import Dict, Any, Optional\n\n@dataclass\nclass GraphicSpec:\n    description: str\n    parameters: Dict[str, Any]\n    image_data: Optional[str] = None\n    width: int = 0\n    height: int = 0\n\n# Instantiate the generator\ngenerator = IllustrationGenerator(api_key='your-openai-api-key')\n\n# Create a math illustration\nmath_spec = GraphicSpec(\n    description='Quadratic function',\n    parameters={'concept': 'quadratic function', 'style': 'mathematical'}\n)\nresult = await generator.generate_illustration(math_spec)\nprint(f'Generated image size: {result.width}x{result.height}')\n\n# Create a science illustration\nscience_spec = GraphicSpec(\n    description='Atomic structure',\n    parameters={'concept': 'atom model', 'style': 'scientific'}\n)\nresult = await generator.generate_illustration(science_spec)\n\n# Create a generic illustration\ngeneric_spec = GraphicSpec(\n    description='Custom concept',\n    parameters={'concept': 'custom educational concept'}\n)\nresult = await generator.generate_illustration(generic_spec)"
    },
    {
      "best_practices": [
        "Always ensure the GraphicSpec object has a valid 'parameters' dictionary before calling generate_diagram()",
        "The class automatically closes matplotlib figures to prevent memory leaks, but be aware of memory usage when generating many diagrams",
        "Default values are provided for missing parameters: style defaults to 'flowchart', steps default to ['Start', 'Process', 'End'], direction defaults to 'vertical'",
        "Error handling is built-in: if diagram generation fails, an error diagram is returned instead of raising an exception",
        "The class is stateless and thread-safe - each method call is independent and doesn't modify class state",
        "For network diagrams, ensure edges reference nodes that exist in the nodes list to avoid networkx errors",
        "The generated base64 image data can be directly embedded in HTML img tags using data:image/png;base64, prefix",
        "DPI is set to 150 for good quality; modify _fig_to_base64 if different resolution is needed",
        "The class modifies the input GraphicSpec object in-place, setting image_data, width, and height attributes"
      ],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_diagram",
            "parameters": {
              "spec": "GraphicSpec object containing description and parameters dict with keys: 'style' (flowchart/process/network), 'steps' (list of step names), 'direction' (vertical/horizontal), 'nodes' (list of node names), 'edges' (list of tuples)"
            },
            "purpose": "Main entry point that generates a diagram based on the specification's style parameter and returns the modified spec with image data",
            "returns": "Modified GraphicSpec object with image_data (base64 PNG string), width, and height populated",
            "signature": "generate_diagram(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_flowchart",
            "parameters": {
              "spec": "GraphicSpec with parameters containing 'steps' (list of strings) and 'direction' ('vertical' or 'horizontal')"
            },
            "purpose": "Creates a flowchart diagram with boxes and arrows, supporting vertical or horizontal layout",
            "returns": "GraphicSpec with generated flowchart image data, width=1000, height=800",
            "signature": "_create_flowchart(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_draw_vertical_flowchart",
            "parameters": {
              "ax": "Matplotlib axes object to draw on",
              "steps": "List of step names/labels to display in the flowchart"
            },
            "purpose": "Draws flowchart boxes and arrows vertically on the provided matplotlib axes",
            "returns": "None (modifies ax in-place)",
            "signature": "_draw_vertical_flowchart(self, ax, steps)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_draw_horizontal_flowchart",
            "parameters": {
              "ax": "Matplotlib axes object to draw on",
              "steps": "List of step names/labels to display in the flowchart"
            },
            "purpose": "Draws flowchart boxes and arrows horizontally on the provided matplotlib axes",
            "returns": "None (modifies ax in-place)",
            "signature": "_draw_horizontal_flowchart(self, ax, steps)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_process_diagram",
            "parameters": {
              "spec": "GraphicSpec with process diagram parameters"
            },
            "purpose": "Creates a process diagram (currently delegates to flowchart creation)",
            "returns": "GraphicSpec with generated process diagram image data",
            "signature": "_create_process_diagram(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_network_diagram",
            "parameters": {
              "spec": "GraphicSpec with parameters containing 'nodes' (list of node names) and 'edges' (list of (source, target) tuples)"
            },
            "purpose": "Creates a network diagram using networkx with nodes and edges in a spring layout",
            "returns": "GraphicSpec with generated network diagram image data, width=800, height=800",
            "signature": "_create_network_diagram(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fig_to_base64",
            "parameters": {
              "fig": "Matplotlib figure object to convert"
            },
            "purpose": "Converts a matplotlib figure to a base64-encoded PNG string",
            "returns": "Base64-encoded string representation of the figure as PNG (150 DPI, white background)",
            "signature": "_fig_to_base64(self, fig) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_error_diagram",
            "parameters": {
              "error_msg": "Error message string to display in the diagram",
              "spec": "GraphicSpec object to populate with error diagram"
            },
            "purpose": "Creates a simple error placeholder diagram displaying the error message",
            "returns": "GraphicSpec with error diagram image data, width=600, height=400",
            "signature": "_create_error_diagram(self, spec: GraphicSpec, error_msg: str) -> GraphicSpec"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:18:47",
      "decorators": [],
      "dependencies": [
        "matplotlib",
        "numpy",
        "networkx",
        "io",
        "base64"
      ],
      "description": "A class that generates various types of diagrams including flowcharts, process flows, and network diagrams using matplotlib and networkx, returning base64-encoded PNG images.",
      "docstring": "Generates diagrams like flowcharts, process flows, organizational charts",
      "id": 2025,
      "imports": [
        "import asyncio",
        "import io",
        "import base64",
        "import json",
        "from enum import Enum",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Union",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import matplotlib.pyplot as plt",
        "import matplotlib.patches as patches",
        "from matplotlib.patches import FancyBboxPatch",
        "from matplotlib.patches import Rectangle",
        "from matplotlib.patches import Circle",
        "from matplotlib.patches import Arrow",
        "import numpy as np",
        "import seaborn as sns",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import networkx as nx",
        "from openai import OpenAI"
      ],
      "imports_required": [
        "import matplotlib.pyplot as plt",
        "from matplotlib.patches import FancyBboxPatch",
        "import numpy as np",
        "import networkx as nx",
        "import io",
        "import base64"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 373,
      "line_start": 228,
      "name": "DiagramGenerator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "No constructor parameters": "The class has no __init__ method defined, so it uses the default constructor with no parameters. Instantiation is straightforward with no required arguments."
      },
      "parent_class": null,
      "purpose": "DiagramGenerator provides a unified interface for creating different diagram types from specifications. It takes a GraphicSpec object containing diagram parameters (type, steps, nodes, edges, direction) and generates visual diagrams as base64-encoded images. The class supports flowcharts (vertical/horizontal), process diagrams, and network diagrams, with automatic error handling and fallback to error diagrams when generation fails.",
      "return_annotation": null,
      "return_explained": "The main method generate_diagram() returns a GraphicSpec object with populated image_data (base64-encoded PNG), width, and height attributes. All internal methods also return modified GraphicSpec objects. The image_data field contains a base64-encoded string that can be decoded to display or save the generated diagram.",
      "settings_required": [
        "Requires GraphicSpec class definition with attributes: parameters (dict), description (str), image_data (str), width (int), height (int)",
        "matplotlib backend configuration may be needed for headless environments (e.g., Agg backend)"
      ],
      "source_code": "class DiagramGenerator:\n    \"\"\"Generates diagrams like flowcharts, process flows, organizational charts\"\"\"\n    \n    def generate_diagram(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Generate a diagram based on specification\"\"\"\n        params = spec.parameters\n        diagram_type = params.get('style', 'flowchart').lower()\n        \n        try:\n            if diagram_type == 'flowchart':\n                return self._create_flowchart(spec)\n            elif diagram_type == 'process':\n                return self._create_process_diagram(spec)\n            elif diagram_type == 'network':\n                return self._create_network_diagram(spec)\n            else:\n                return self._create_flowchart(spec)  # Default to flowchart\n                \n        except Exception as e:\n            print(f\"Error generating diagram: {e}\")\n            return self._create_error_diagram(spec, f\"Diagram generation failed: {e}\")\n    \n    def _create_flowchart(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Create a flowchart diagram\"\"\"\n        params = spec.parameters\n        steps = params.get('steps', ['Start', 'Process', 'End'])\n        direction = params.get('direction', 'vertical')\n        \n        fig, ax = plt.subplots(figsize=(10, 8))\n        \n        if direction == 'horizontal':\n            self._draw_horizontal_flowchart(ax, steps)\n        else:\n            self._draw_vertical_flowchart(ax, steps)\n        \n        ax.set_xlim(-1, len(steps))\n        ax.set_ylim(-1, len(steps))\n        ax.axis('off')\n        ax.set_title(spec.description, fontsize=14, fontweight='bold', pad=20)\n        \n        plt.tight_layout()\n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 1000\n        spec.height = 800\n        plt.close(fig)\n        return spec\n    \n    def _draw_vertical_flowchart(self, ax, steps):\n        \"\"\"Draw a vertical flowchart\"\"\"\n        y_positions = np.linspace(len(steps)-1, 0, len(steps))\n        \n        for i, (step, y) in enumerate(zip(steps, y_positions)):\n            # Draw box\n            box = FancyBboxPatch((0.1, y-0.2), 1.8, 0.4,\n                               boxstyle=\"round,pad=0.05\",\n                               facecolor='white', edgecolor='black', linewidth=2)\n            ax.add_patch(box)\n            \n            # Add text\n            ax.text(1, y, step, ha='center', va='center', fontsize=11, fontweight='bold')\n            \n            # Draw arrow to next step\n            if i < len(steps) - 1:\n                ax.annotate('', xy=(1, y_positions[i+1]+0.2), xytext=(1, y-0.2),\n                           arrowprops=dict(arrowstyle='->', lw=2, color='black'))\n    \n    def _draw_horizontal_flowchart(self, ax, steps):\n        \"\"\"Draw a horizontal flowchart\"\"\"\n        x_positions = np.linspace(0, len(steps)-1, len(steps))\n        \n        for i, (step, x) in enumerate(zip(steps, x_positions)):\n            # Draw box\n            box = FancyBboxPatch((x-0.4, 0.3), 0.8, 0.4,\n                               boxstyle=\"round,pad=0.05\",\n                               facecolor='white', edgecolor='black', linewidth=2)\n            ax.add_patch(box)\n            \n            # Add text\n            ax.text(x, 0.5, step, ha='center', va='center', fontsize=11, fontweight='bold')\n            \n            # Draw arrow to next step\n            if i < len(steps) - 1:\n                ax.annotate('', xy=(x_positions[i+1]-0.4, 0.5), xytext=(x+0.4, 0.5),\n                           arrowprops=dict(arrowstyle='->', lw=2, color='black'))\n    \n    def _create_process_diagram(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Create a process diagram\"\"\"\n        # Similar to flowchart but with different styling\n        return self._create_flowchart(spec)\n    \n    def _create_network_diagram(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Create a network diagram using networkx\"\"\"\n        params = spec.parameters\n        nodes = params.get('nodes', ['A', 'B', 'C'])\n        edges = params.get('edges', [('A', 'B'), ('B', 'C')])\n        \n        fig, ax = plt.subplots(figsize=(8, 8))\n        \n        # Create network graph\n        G = nx.Graph()\n        G.add_nodes_from(nodes)\n        G.add_edges_from(edges)\n        \n        # Position nodes\n        pos = nx.spring_layout(G, seed=42)\n        \n        # Draw network\n        nx.draw(G, pos, ax=ax, with_labels=True, node_color='white',\n               node_size=1500, font_size=12, font_weight='bold',\n               edge_color='black', linewidths=2)\n        \n        ax.set_title(spec.description, fontsize=14, fontweight='bold')\n        plt.tight_layout()\n        \n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 800\n        spec.height = 800\n        plt.close(fig)\n        return spec\n    \n    def _fig_to_base64(self, fig) -> str:\n        \"\"\"Convert matplotlib figure to base64 encoded image\"\"\"\n        buffer = io.BytesIO()\n        fig.savefig(buffer, format='png', dpi=150, bbox_inches='tight',\n                   facecolor='white', edgecolor='none')\n        buffer.seek(0)\n        image_data = buffer.getvalue()\n        buffer.close()\n        \n        return base64.b64encode(image_data).decode('utf-8')\n    \n    def _create_error_diagram(self, spec: GraphicSpec, error_msg: str) -> GraphicSpec:\n        \"\"\"Create an error placeholder diagram\"\"\"\n        fig, ax = plt.subplots(figsize=(6, 4))\n        ax.text(0.5, 0.5, f\"Error: {error_msg}\", ha='center', va='center',\n               transform=ax.transAxes, fontsize=12,\n               bbox=dict(boxstyle=\"round,pad=0.3\", facecolor=\"lightgray\"))\n        ax.set_xlim(0, 1)\n        ax.set_ylim(0, 1)\n        ax.axis('off')\n        \n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 600\n        spec.height = 400\n        plt.close(fig)\n        return spec",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/graphics_generator.py",
      "tags": [
        "diagram-generation",
        "visualization",
        "flowchart",
        "network-diagram",
        "matplotlib",
        "networkx",
        "image-generation",
        "base64-encoding",
        "process-flow",
        "graph-visualization"
      ],
      "updated_at": "2025-12-07T01:18:47.592994",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Dict, Any\n\n@dataclass\nclass GraphicSpec:\n    description: str\n    parameters: Dict[str, Any]\n    image_data: str = ''\n    width: int = 0\n    height: int = 0\n\n# Instantiate the generator\ngenerator = DiagramGenerator()\n\n# Create a flowchart specification\nflowchart_spec = GraphicSpec(\n    description='User Login Process',\n    parameters={\n        'style': 'flowchart',\n        'steps': ['Start', 'Enter Credentials', 'Validate', 'Success'],\n        'direction': 'vertical'\n    }\n)\n\n# Generate the diagram\nresult = generator.generate_diagram(flowchart_spec)\nprint(f'Generated image size: {result.width}x{result.height}')\nprint(f'Image data length: {len(result.image_data)}')\n\n# Create a network diagram\nnetwork_spec = GraphicSpec(\n    description='Network Topology',\n    parameters={\n        'style': 'network',\n        'nodes': ['Server', 'Router', 'Client1', 'Client2'],\n        'edges': [('Server', 'Router'), ('Router', 'Client1'), ('Router', 'Client2')]\n    }\n)\n\nnetwork_result = generator.generate_diagram(network_spec)\n\n# Decode and save the image\nimport base64\nwith open('diagram.png', 'wb') as f:\n    f.write(base64.b64decode(network_result.image_data))"
    },
    {
      "best_practices": [
        "Always instantiate ChartGenerator before generating charts to ensure matplotlib styling is properly configured",
        "Ensure EInkStyler class is available and properly configured before instantiation",
        "GraphicSpec objects must have a 'parameters' dictionary with at least a 'type' key",
        "The class automatically closes matplotlib figures after conversion to prevent memory leaks",
        "Default chart type is 'bar' if an invalid or missing type is specified",
        "Chart data should be provided as lists in the parameters dictionary",
        "The class handles exceptions internally and returns error placeholder images on failure",
        "Generated images are base64-encoded PNG format at 150 DPI",
        "All charts are optimized for e-ink displays with high contrast, black edges, and white backgrounds",
        "Call generate_chart() for each chart needed; the method is stateless and can be called multiple times"
      ],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the ChartGenerator and configure matplotlib with e-ink optimized styling",
            "returns": "None",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_chart",
            "parameters": {
              "spec": "GraphicSpec object containing chart parameters including type, data, labels, and title"
            },
            "purpose": "Main public method to generate a chart based on the provided specification",
            "returns": "GraphicSpec object with populated image_data (base64 PNG), width (800), and height (600)",
            "signature": "generate_chart(self, spec: GraphicSpec) -> GraphicSpec"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_bar_chart",
            "parameters": {
              "ax": "Matplotlib axes object to draw on",
              "params": "Dictionary with 'data', 'labels', 'title', and 'ylabel' keys"
            },
            "purpose": "Create a bar chart on the provided matplotlib axes",
            "returns": "None (modifies ax in place)",
            "signature": "_create_bar_chart(self, ax, params)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_line_chart",
            "parameters": {
              "ax": "Matplotlib axes object to draw on",
              "params": "Dictionary with 'data', 'labels', 'title', 'xlabel', and 'ylabel' keys"
            },
            "purpose": "Create a line chart on the provided matplotlib axes",
            "returns": "None (modifies ax in place)",
            "signature": "_create_line_chart(self, ax, params)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_pie_chart",
            "parameters": {
              "ax": "Matplotlib axes object to draw on",
              "params": "Dictionary with 'data', 'labels', and 'title' keys"
            },
            "purpose": "Create a pie chart on the provided matplotlib axes with patterns for e-ink compatibility",
            "returns": "None (modifies ax in place)",
            "signature": "_create_pie_chart(self, ax, params)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_scatter_chart",
            "parameters": {
              "ax": "Matplotlib axes object to draw on",
              "params": "Dictionary with 'x_data', 'y_data', 'title', 'xlabel', and 'ylabel' keys"
            },
            "purpose": "Create a scatter plot on the provided matplotlib axes",
            "returns": "None (modifies ax in place)",
            "signature": "_create_scatter_chart(self, ax, params)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_apply_eink_styling",
            "parameters": {
              "ax": "Matplotlib axes object",
              "fig": "Matplotlib figure object"
            },
            "purpose": "Apply e-ink specific styling to ensure high contrast and visibility",
            "returns": "None (modifies fig and ax in place)",
            "signature": "_apply_eink_styling(self, fig, ax)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fig_to_base64",
            "parameters": {
              "fig": "Matplotlib figure object to convert"
            },
            "purpose": "Convert a matplotlib figure to a base64-encoded PNG string",
            "returns": "Base64-encoded string representation of the PNG image at 150 DPI",
            "signature": "_fig_to_base64(self, fig) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_error_placeholder",
            "parameters": {
              "error_msg": "Error message to display in the placeholder",
              "spec": "GraphicSpec object to populate with error image"
            },
            "purpose": "Create an error placeholder image when chart generation fails",
            "returns": "GraphicSpec object with error placeholder image (600x400)",
            "signature": "_create_error_placeholder(self, spec: GraphicSpec, error_msg: str) -> GraphicSpec"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:18:13",
      "decorators": [],
      "dependencies": [
        "matplotlib",
        "numpy",
        "io",
        "base64"
      ],
      "description": "A class that generates various types of charts (bar, line, pie, scatter) optimized for e-ink displays with high contrast and clear visibility.",
      "docstring": "Generates various types of charts optimized for e-ink displays",
      "id": 2024,
      "imports": [
        "import asyncio",
        "import io",
        "import base64",
        "import json",
        "from enum import Enum",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Union",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import matplotlib.pyplot as plt",
        "import matplotlib.patches as patches",
        "from matplotlib.patches import FancyBboxPatch",
        "from matplotlib.patches import Rectangle",
        "from matplotlib.patches import Circle",
        "from matplotlib.patches import Arrow",
        "import numpy as np",
        "import seaborn as sns",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import networkx as nx",
        "from openai import OpenAI"
      ],
      "imports_required": [
        "import matplotlib.pyplot as plt",
        "import io",
        "import base64"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 226,
      "line_start": 84,
      "name": "ChartGenerator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor initializes matplotlib with default style and applies e-ink specific styling from EInkStyler.get_eink_style()."
      },
      "parent_class": null,
      "purpose": "ChartGenerator is responsible for creating matplotlib-based charts specifically styled for e-ink displays. It handles chart generation from GraphicSpec objects, applies e-ink optimizations (high contrast, clear edges, white backgrounds), and converts the resulting figures to base64-encoded PNG images. The class supports multiple chart types and includes error handling with placeholder generation.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a ChartGenerator object. The main method generate_chart() returns a GraphicSpec object with populated image_data (base64-encoded PNG), width, and height attributes. Private methods return None (styling methods) or strings (base64 conversion) or GraphicSpec (error placeholder).",
      "settings_required": [
        "EInkStyler class must be available with get_eink_style() method and CHART_COLORS attribute",
        "GraphicSpec class/dataclass must be defined with parameters, image_data, width, and height attributes"
      ],
      "source_code": "class ChartGenerator:\n    \"\"\"Generates various types of charts optimized for e-ink displays\"\"\"\n    \n    def __init__(self):\n        # Apply e-ink styling\n        plt.style.use('default')\n        plt.rcParams.update(EInkStyler.get_eink_style())\n    \n    def generate_chart(self, spec: GraphicSpec) -> GraphicSpec:\n        \"\"\"Generate a chart based on specification\"\"\"\n        params = spec.parameters\n        chart_type = params.get('type', 'bar').lower()\n        \n        try:\n            fig, ax = plt.subplots(figsize=(8, 6))\n            \n            if chart_type == 'bar':\n                self._create_bar_chart(ax, params)\n            elif chart_type == 'line':\n                self._create_line_chart(ax, params)\n            elif chart_type == 'pie':\n                self._create_pie_chart(ax, params)\n            elif chart_type == 'scatter':\n                self._create_scatter_chart(ax, params)\n            else:\n                self._create_bar_chart(ax, params)  # Default to bar chart\n            \n            # Apply e-ink optimizations\n            self._apply_eink_styling(fig, ax)\n            \n            # Convert to image data\n            spec.image_data = self._fig_to_base64(fig)\n            spec.width = 800\n            spec.height = 600\n            \n            plt.close(fig)\n            return spec\n            \n        except Exception as e:\n            print(f\"Error generating chart: {e}\")\n            return self._create_error_placeholder(spec, f\"Chart generation failed: {e}\")\n    \n    def _create_bar_chart(self, ax, params):\n        \"\"\"Create a bar chart\"\"\"\n        data = params.get('data', [1, 2, 3])\n        labels = params.get('labels', [f'Item {i+1}' for i in range(len(data))])\n        title = params.get('title', 'Bar Chart')\n        \n        bars = ax.bar(labels, data, color=EInkStyler.CHART_COLORS[0], \n                     edgecolor='black', linewidth=1)\n        \n        # Add value labels on bars\n        for bar, value in zip(bars, data):\n            height = bar.get_height()\n            ax.text(bar.get_x() + bar.get_width()/2., height + 0.01*max(data),\n                   f'{value}', ha='center', va='bottom', fontsize=10)\n        \n        ax.set_title(title, fontsize=14, fontweight='bold', pad=20)\n        ax.set_ylabel(params.get('ylabel', 'Value'))\n        ax.grid(True, alpha=0.3)\n    \n    def _create_line_chart(self, ax, params):\n        \"\"\"Create a line chart\"\"\"\n        data = params.get('data', [1, 2, 3, 4])\n        labels = params.get('labels', list(range(len(data))))\n        title = params.get('title', 'Line Chart')\n        \n        ax.plot(labels, data, color='black', linewidth=2, marker='o', \n               markersize=6, markerfacecolor='white', markeredgecolor='black')\n        \n        ax.set_title(title, fontsize=14, fontweight='bold', pad=20)\n        ax.set_xlabel(params.get('xlabel', 'X-axis'))\n        ax.set_ylabel(params.get('ylabel', 'Y-axis'))\n        ax.grid(True, alpha=0.3)\n    \n    def _create_pie_chart(self, ax, params):\n        \"\"\"Create a pie chart\"\"\"\n        data = params.get('data', [1, 2, 3])\n        labels = params.get('labels', [f'Segment {i+1}' for i in range(len(data))])\n        title = params.get('title', 'Pie Chart')\n        \n        # Use patterns for e-ink compatibility\n        patterns = ['', '///', '...', '|||', '---']\n        \n        wedges, texts, autotexts = ax.pie(data, labels=labels, autopct='%1.1f%%',\n                                         colors=EInkStyler.CHART_COLORS[:len(data)],\n                                         wedgeprops={'edgecolor': 'black', 'linewidth': 1})\n        \n        ax.set_title(title, fontsize=14, fontweight='bold', pad=20)\n    \n    def _create_scatter_chart(self, ax, params):\n        \"\"\"Create a scatter plot\"\"\"\n        x_data = params.get('x_data', [1, 2, 3, 4])\n        y_data = params.get('y_data', [1, 4, 2, 3])\n        title = params.get('title', 'Scatter Plot')\n        \n        ax.scatter(x_data, y_data, c='black', s=50, alpha=0.7, edgecolors='black')\n        \n        ax.set_title(title, fontsize=14, fontweight='bold', pad=20)\n        ax.set_xlabel(params.get('xlabel', 'X-axis'))\n        ax.set_ylabel(params.get('ylabel', 'Y-axis'))\n        ax.grid(True, alpha=0.3)\n    \n    def _apply_eink_styling(self, fig, ax):\n        \"\"\"Apply e-ink specific styling to the figure\"\"\"\n        # Ensure white background\n        fig.patch.set_facecolor('white')\n        ax.set_facecolor('white')\n        \n        # Increase contrast\n        for spine in ax.spines.values():\n            spine.set_edgecolor('black')\n            spine.set_linewidth(1)\n        \n        # Optimize for e-ink display\n        plt.tight_layout()\n    \n    def _fig_to_base64(self, fig) -> str:\n        \"\"\"Convert matplotlib figure to base64 encoded image\"\"\"\n        buffer = io.BytesIO()\n        fig.savefig(buffer, format='png', dpi=150, bbox_inches='tight',\n                   facecolor='white', edgecolor='none')\n        buffer.seek(0)\n        image_data = buffer.getvalue()\n        buffer.close()\n        \n        return base64.b64encode(image_data).decode('utf-8')\n    \n    def _create_error_placeholder(self, spec: GraphicSpec, error_msg: str) -> GraphicSpec:\n        \"\"\"Create an error placeholder image\"\"\"\n        fig, ax = plt.subplots(figsize=(6, 4))\n        ax.text(0.5, 0.5, f\"Error: {error_msg}\", ha='center', va='center',\n               transform=ax.transAxes, fontsize=12, \n               bbox=dict(boxstyle=\"round,pad=0.3\", facecolor=\"lightgray\"))\n        ax.set_xlim(0, 1)\n        ax.set_ylim(0, 1)\n        ax.axis('off')\n        \n        spec.image_data = self._fig_to_base64(fig)\n        spec.width = 600\n        spec.height = 400\n        plt.close(fig)\n        return spec",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/graphics_generator.py",
      "tags": [
        "chart-generation",
        "visualization",
        "e-ink",
        "matplotlib",
        "bar-chart",
        "line-chart",
        "pie-chart",
        "scatter-plot",
        "image-generation",
        "base64-encoding"
      ],
      "updated_at": "2025-12-07T01:18:13.140529",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Dict, Any\n\n@dataclass\nclass GraphicSpec:\n    parameters: Dict[str, Any]\n    image_data: str = ''\n    width: int = 0\n    height: int = 0\n\nclass EInkStyler:\n    CHART_COLORS = ['#CCCCCC', '#999999', '#666666', '#333333']\n    @staticmethod\n    def get_eink_style():\n        return {'figure.facecolor': 'white', 'axes.facecolor': 'white'}\n\n# Create chart generator\ngenerator = ChartGenerator()\n\n# Create a bar chart\nbar_spec = GraphicSpec(parameters={\n    'type': 'bar',\n    'data': [10, 25, 15, 30],\n    'labels': ['Q1', 'Q2', 'Q3', 'Q4'],\n    'title': 'Quarterly Sales',\n    'ylabel': 'Revenue ($K)'\n})\nresult = generator.generate_chart(bar_spec)\nprint(f\"Generated chart: {result.width}x{result.height}\")\n\n# Create a line chart\nline_spec = GraphicSpec(parameters={\n    'type': 'line',\n    'data': [5, 10, 8, 15, 12],\n    'labels': ['Jan', 'Feb', 'Mar', 'Apr', 'May'],\n    'title': 'Monthly Trends',\n    'xlabel': 'Month',\n    'ylabel': 'Value'\n})\nresult = generator.generate_chart(line_spec)\n\n# Create a pie chart\npie_spec = GraphicSpec(parameters={\n    'type': 'pie',\n    'data': [30, 25, 20, 25],\n    'labels': ['A', 'B', 'C', 'D'],\n    'title': 'Distribution'\n})\nresult = generator.generate_chart(pie_spec)"
    },
    {
      "best_practices": [
        "This class is designed as a static utility and does not need to be instantiated. Access all attributes and methods directly via the class name (e.g., EInkStyler.COLORS).",
        "Use the COLORS dictionary for consistent color application across e-ink visualizations to maintain high contrast.",
        "Apply get_eink_style() to matplotlib's rcParams before creating plots to ensure all charts are e-ink optimized.",
        "The CHART_COLORS list provides grayscale values in order from darkest to lightest, useful for multi-series charts on e-ink displays.",
        "Font sizes are calibrated for typical e-ink display resolutions; adjust if targeting specific screen sizes.",
        "The class maintains no state and has no side effects, making it safe to use across multiple threads or processes.",
        "All colors use hex format for compatibility with matplotlib and other visualization libraries."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "High-contrast color palette with named colors (black, white, gray_dark, gray_medium, gray_light) mapped to hex color codes",
            "is_class_variable": true,
            "name": "COLORS",
            "type": "Dict[str, str]"
          },
          {
            "description": "List of five grayscale hex color codes ordered from darkest to lightest, suitable for multi-series charts on e-ink displays",
            "is_class_variable": true,
            "name": "CHART_COLORS",
            "type": "List[str]"
          },
          {
            "description": "Font size mappings for different text elements (title, subtitle, body, caption) optimized for e-ink readability",
            "is_class_variable": true,
            "name": "FONT_SIZES",
            "type": "Dict[str, int]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": true,
            "name": "get_eink_style",
            "parameters": {},
            "purpose": "Returns a dictionary of matplotlib style parameters optimized for e-ink displays",
            "returns": "Dictionary with matplotlib rcParams keys and values for e-ink display optimization, including figure colors, axes properties, grid settings, and text colors",
            "signature": "get_eink_style() -> dict"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:17:41",
      "decorators": [],
      "dependencies": [
        "matplotlib"
      ],
      "description": "A utility class providing styling configurations and color palettes optimized for e-ink displays with high contrast and minimal grayscale variations.",
      "docstring": "Styling configuration optimized for e-ink displays",
      "id": 2023,
      "imports": [
        "import asyncio",
        "import io",
        "import base64",
        "import json",
        "from enum import Enum",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Union",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import matplotlib.pyplot as plt",
        "import matplotlib.patches as patches",
        "from matplotlib.patches import FancyBboxPatch",
        "from matplotlib.patches import Rectangle",
        "from matplotlib.patches import Circle",
        "from matplotlib.patches import Arrow",
        "import numpy as np",
        "import seaborn as sns",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import networkx as nx",
        "from openai import OpenAI"
      ],
      "imports_required": [
        "import matplotlib.pyplot as plt"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 82,
      "line_start": 45,
      "name": "EInkStyler",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "This parameter appears in the docstring but is not used in the class definition. The class has no __init__ method and is not designed to be instantiated. All functionality is provided through class attributes and static methods."
      },
      "parent_class": null,
      "purpose": "EInkStyler provides predefined color schemes, font sizes, and matplotlib style configurations specifically designed for e-ink displays. It offers high-contrast color palettes and chart styling that work well with the limited color range and refresh characteristics of e-ink screens. The class is designed as a configuration container with static methods and class-level constants, requiring no instantiation for normal use.",
      "return_annotation": null,
      "return_explained": "The class itself returns nothing when referenced. The static method get_eink_style() returns a dictionary containing matplotlib style configuration parameters optimized for e-ink displays, with keys like 'figure.facecolor', 'axes.facecolor', etc., and their corresponding color/size values.",
      "settings_required": [
        "No environment variables or configuration files required",
        "Matplotlib must be installed and importable"
      ],
      "source_code": "class EInkStyler:\n    \"\"\"Styling configuration optimized for e-ink displays\"\"\"\n    \n    # E-ink optimized color palette (high contrast)\n    COLORS = {\n        'black': '#000000',\n        'white': '#FFFFFF', \n        'gray_dark': '#333333',\n        'gray_medium': '#666666',\n        'gray_light': '#CCCCCC'\n    }\n    \n    # Chart colors for e-ink (using patterns/shades)\n    CHART_COLORS = ['#000000', '#333333', '#666666', '#999999', '#CCCCCC']\n    \n    # Font settings\n    FONT_SIZES = {\n        'title': 14,\n        'subtitle': 12,\n        'body': 10,\n        'caption': 8\n    }\n    \n    @staticmethod\n    def get_eink_style():\n        \"\"\"Get matplotlib style configuration for e-ink displays\"\"\"\n        return {\n            'figure.facecolor': 'white',\n            'axes.facecolor': 'white',\n            'axes.edgecolor': 'black',\n            'axes.linewidth': 1.0,\n            'grid.color': '#CCCCCC',\n            'grid.linewidth': 0.5,\n            'text.color': 'black',\n            'axes.labelcolor': 'black',\n            'xtick.color': 'black',\n            'ytick.color': 'black'\n        }",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/graphics_generator.py",
      "tags": [
        "e-ink",
        "styling",
        "visualization",
        "matplotlib",
        "color-palette",
        "high-contrast",
        "display-optimization",
        "configuration",
        "static-utility"
      ],
      "updated_at": "2025-12-07T01:17:41.765351",
      "usage_example": "# Access color palette\nblack_color = EInkStyler.COLORS['black']\ngray_color = EInkStyler.COLORS['gray_medium']\n\n# Get chart colors for plotting\nchart_colors = EInkStyler.CHART_COLORS\n\n# Access font sizes\ntitle_size = EInkStyler.FONT_SIZES['title']\nbody_size = EInkStyler.FONT_SIZES['body']\n\n# Apply e-ink style to matplotlib\nimport matplotlib.pyplot as plt\neink_style = EInkStyler.get_eink_style()\nplt.rcParams.update(eink_style)\n\n# Create a plot with e-ink styling\nfig, ax = plt.subplots()\nax.plot([1, 2, 3], [1, 4, 9], color=EInkStyler.COLORS['black'])\nax.set_title('E-ink Optimized Plot', fontsize=EInkStyler.FONT_SIZES['title'])\nplt.show()"
    },
    {
      "best_practices": [
        "This is a dataclass, so it's designed to be a simple data container. Avoid adding complex methods or business logic.",
        "The image_data field should be populated after graphic generation, not during initialization unless loading a pre-generated graphic.",
        "Use the parameters dict for data and functional settings, and style_preferences for visual styling to maintain clear separation of concerns.",
        "The id should be unique within your application context to avoid confusion when tracking multiple graphics.",
        "Width and height are optional but should be set if you need consistent sizing across graphics.",
        "Since dataclasses are mutable by default, be cautious when sharing instances across threads or modifying after creation.",
        "Consider using frozen=True in the @dataclass decorator if immutability is desired: @dataclass(frozen=True).",
        "The GraphicType enum must be defined before using this class; ensure it's imported or defined in the same module.",
        "When serializing to JSON, you'll need custom handling for the GraphicType enum (convert to string) and ensure image_data is properly encoded.",
        "This class has no methods, so lifecycle is simple: instantiate, read/write attributes, and pass to other components."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for the graphic specification",
            "is_class_variable": false,
            "name": "id",
            "type": "str"
          },
          {
            "description": "Enum value specifying the type/category of graphic to generate",
            "is_class_variable": false,
            "name": "type",
            "type": "GraphicType"
          },
          {
            "description": "Human-readable description of what the graphic represents",
            "is_class_variable": false,
            "name": "description",
            "type": "str"
          },
          {
            "description": "Dictionary of generation parameters controlling data and functional aspects",
            "is_class_variable": false,
            "name": "parameters",
            "type": "Dict[str, Any]"
          },
          {
            "description": "Dictionary of styling options controlling visual appearance",
            "is_class_variable": false,
            "name": "style_preferences",
            "type": "Dict[str, Any]"
          },
          {
            "description": "Base64 encoded image string after generation, None before generation",
            "is_class_variable": false,
            "name": "image_data",
            "type": "Optional[str]"
          },
          {
            "description": "Width of the graphic in pixels, None if not specified",
            "is_class_variable": false,
            "name": "width",
            "type": "Optional[int]"
          },
          {
            "description": "Height of the graphic in pixels, None if not specified",
            "is_class_variable": false,
            "name": "height",
            "type": "Optional[int]"
          }
        ],
        "methods": []
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required to define or use the GraphicType enum that is referenced by the 'type' attribute",
          "import": "from enum import Enum",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:17:18",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing",
        "enum"
      ],
      "description": "A dataclass that defines the specification for a graphic to be generated, including its type, description, parameters, style preferences, and optional image data.",
      "docstring": "Specification for a graphic to be generated",
      "id": 2022,
      "imports": [
        "import asyncio",
        "import io",
        "import base64",
        "import json",
        "from enum import Enum",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Union",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import matplotlib.pyplot as plt",
        "import matplotlib.patches as patches",
        "from matplotlib.patches import FancyBboxPatch",
        "from matplotlib.patches import Rectangle",
        "from matplotlib.patches import Circle",
        "from matplotlib.patches import Arrow",
        "import numpy as np",
        "import seaborn as sns",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import networkx as nx",
        "from openai import OpenAI"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Dict, Any, Optional"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 43,
      "line_start": 34,
      "name": "GraphicSpec",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "description": "Human-readable string describing what the graphic should represent or display. Provides context for the graphic generation process.",
        "height": "Optional integer specifying the height of the generated graphic in pixels. Defaults to None if not specified.",
        "id": "Unique identifier string for the graphic specification. Used to track and reference specific graphics throughout the generation pipeline.",
        "image_data": "Optional string containing the Base64 encoded image data after the graphic has been generated. Defaults to None before generation.",
        "parameters": "Dictionary containing key-value pairs of generation-specific parameters. These control the data, layout, and functional aspects of the graphic (e.g., data points, axes labels, dimensions).",
        "style_preferences": "Dictionary containing key-value pairs of styling options. These control the visual appearance of the graphic (e.g., colors, fonts, themes, line styles).",
        "type": "GraphicType enum value that specifies the category or kind of graphic to be generated (e.g., chart, diagram, plot).",
        "width": "Optional integer specifying the width of the generated graphic in pixels. Defaults to None if not specified."
      },
      "parent_class": null,
      "purpose": "GraphicSpec serves as a data container for specifying all the necessary information required to generate a graphic. It encapsulates the graphic's identity, type, descriptive information, generation parameters, styling preferences, and can store the resulting image data after generation. This class is typically used in graphic generation pipelines where specifications need to be passed between components, stored, or serialized.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a GraphicSpec object containing all the specified attributes. As a dataclass, it automatically generates __init__, __repr__, and __eq__ methods. The object serves as an immutable-by-convention data container for graphic specifications.",
      "settings_required": [
        "GraphicType enum must be defined in the same module or imported from another module",
        "No environment variables or external configuration required for the class itself"
      ],
      "source_code": "class GraphicSpec:\n    \"\"\"Specification for a graphic to be generated\"\"\"\n    id: str\n    type: GraphicType\n    description: str\n    parameters: Dict[str, Any]\n    style_preferences: Dict[str, Any]\n    image_data: Optional[str] = None  # Base64 encoded image after generation\n    width: Optional[int] = None\n    height: Optional[int] = None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/graphics_generator.py",
      "tags": [
        "dataclass",
        "specification",
        "graphics",
        "data-container",
        "image-generation",
        "configuration",
        "metadata",
        "visualization"
      ],
      "updated_at": "2025-12-07T01:17:18.635132",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Dict, Any, Optional\nfrom enum import Enum\n\nclass GraphicType(Enum):\n    CHART = 'chart'\n    DIAGRAM = 'diagram'\n    PLOT = 'plot'\n\n@dataclass\nclass GraphicSpec:\n    id: str\n    type: GraphicType\n    description: str\n    parameters: Dict[str, Any]\n    style_preferences: Dict[str, Any]\n    image_data: Optional[str] = None\n    width: Optional[int] = None\n    height: Optional[int] = None\n\n# Create a graphic specification\nspec = GraphicSpec(\n    id='chart_001',\n    type=GraphicType.CHART,\n    description='Sales data bar chart',\n    parameters={'data': [10, 20, 30], 'labels': ['Q1', 'Q2', 'Q3']},\n    style_preferences={'color': 'blue', 'theme': 'dark'},\n    width=800,\n    height=600\n)\n\n# Access attributes\nprint(spec.id)  # 'chart_001'\nprint(spec.type)  # GraphicType.CHART\nprint(spec.description)  # 'Sales data bar chart'\n\n# After generation, set image data\nspec.image_data = 'base64_encoded_image_string_here'\n\n# Dataclass provides automatic __repr__\nprint(spec)  # Shows all attributes"
    },
    {
      "best_practices": [
        "Use GraphicType members directly (e.g., GraphicType.CHART) rather than string literals to ensure type safety",
        "When accepting graphic types as parameters, use type hints with GraphicType to enable IDE autocomplete and type checking",
        "Access the string value using the .value property when needed for serialization or API calls",
        "Use GraphicType(string_value) to convert string values back to enum members, which will raise ValueError if the string is invalid",
        "Enums are immutable and singleton - each member is instantiated only once",
        "Compare enum members using == or 'is' operators, both work correctly",
        "Use 'in' operator to check membership: if some_value in GraphicType._value2member_map_",
        "Iterate over all graphic types using: for gtype in GraphicType"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Represents chart-type graphics (value: 'chart')",
            "is_class_variable": true,
            "name": "CHART",
            "type": "GraphicType"
          },
          {
            "description": "Represents diagram-type graphics (value: 'diagram')",
            "is_class_variable": true,
            "name": "DIAGRAM",
            "type": "GraphicType"
          },
          {
            "description": "Represents illustration-type graphics (value: 'illustration')",
            "is_class_variable": true,
            "name": "ILLUSTRATION",
            "type": "GraphicType"
          },
          {
            "description": "Represents sketch-type graphics (value: 'sketch')",
            "is_class_variable": true,
            "name": "SKETCH",
            "type": "GraphicType"
          }
        ],
        "methods": []
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:16:49",
      "decorators": [],
      "dependencies": [
        "enum"
      ],
      "description": "An enumeration class that defines the types of graphics that can be generated in the system.",
      "docstring": "Types of graphics that can be generated",
      "id": 2021,
      "imports": [
        "import asyncio",
        "import io",
        "import base64",
        "import json",
        "from enum import Enum",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Union",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import matplotlib.pyplot as plt",
        "import matplotlib.patches as patches",
        "from matplotlib.patches import FancyBboxPatch",
        "from matplotlib.patches import Rectangle",
        "from matplotlib.patches import Circle",
        "from matplotlib.patches import Arrow",
        "import numpy as np",
        "import seaborn as sns",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import networkx as nx",
        "from openai import OpenAI"
      ],
      "imports_required": [
        "from enum import Enum"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 31,
      "line_start": 26,
      "name": "GraphicType",
      "parameters": [
        {
          "annotation": "Enum",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Inherits from Enum base class, which provides enumeration functionality. No custom __init__ parameters are defined for this enum."
      },
      "parent_class": null,
      "purpose": "GraphicType is an Enum class that provides a standardized set of constants representing different categories of graphics that can be created. It serves as a type-safe way to specify and validate graphic types throughout the application, ensuring consistency when working with chart generation, diagram creation, illustrations, and sketches. This enum is typically used as a parameter in graphic generation functions or methods to determine the type of visual output to produce.",
      "return_annotation": null,
      "return_explained": "Instantiating or accessing GraphicType members returns a GraphicType enum instance. Each member has a name (e.g., 'CHART') and a value (e.g., 'chart'). The value property returns the string representation of the graphic type.",
      "settings_required": [],
      "source_code": "class GraphicType(Enum):\n    \"\"\"Types of graphics that can be generated\"\"\"\n    CHART = \"chart\"\n    DIAGRAM = \"diagram\" \n    ILLUSTRATION = \"illustration\"\n    SKETCH = \"sketch\"",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/graphics_generator.py",
      "tags": [
        "enum",
        "enumeration",
        "graphics",
        "types",
        "constants",
        "chart",
        "diagram",
        "illustration",
        "sketch",
        "type-safety",
        "visualization"
      ],
      "updated_at": "2025-12-07T01:16:49.496773",
      "usage_example": "from enum import Enum\n\nclass GraphicType(Enum):\n    CHART = \"chart\"\n    DIAGRAM = \"diagram\"\n    ILLUSTRATION = \"illustration\"\n    SKETCH = \"sketch\"\n\n# Access enum members\ngraphic_type = GraphicType.CHART\nprint(graphic_type)  # GraphicType.CHART\nprint(graphic_type.value)  # \"chart\"\nprint(graphic_type.name)  # \"CHART\"\n\n# Compare enum members\nif graphic_type == GraphicType.CHART:\n    print(\"This is a chart type\")\n\n# Get enum by value\ntype_from_string = GraphicType(\"diagram\")\nprint(type_from_string)  # GraphicType.DIAGRAM\n\n# Iterate over all types\nfor gtype in GraphicType:\n    print(f\"{gtype.name}: {gtype.value}\")\n\n# Use in function signatures\ndef generate_graphic(graphic_type: GraphicType) -> None:\n    if graphic_type == GraphicType.CHART:\n        print(\"Generating chart...\")\n    elif graphic_type == GraphicType.DIAGRAM:\n        print(\"Generating diagram...\")"
    },
    {
      "best_practices": [
        "Always instantiate with a properly configured SessionManager instance",
        "Use async/await when calling get_enhanced_conversation_context() as it's an async method",
        "The class maintains no mutable state between calls - it's safe to reuse the same instance for multiple conversations",
        "Reference patterns use regex matching - ensure conversation text is properly formatted",
        "Context generation can be expensive for long conversations - consider caching results",
        "The class analyzes up to the last 3 exchanges for recent context by default",
        "Topic extraction is keyword-based - customize topic_keywords dict for domain-specific topics",
        "Reference map building looks for explicit step/exchange references and topical references",
        "Empty contexts are returned for non-existent conversations rather than raising errors",
        "The class logs operations at INFO level - configure logging appropriately for debugging"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Session manager instance for accessing conversation data and exchanges",
            "is_class_variable": false,
            "name": "session_manager",
            "type": "SessionManager"
          },
          {
            "description": "Logger instance for logging context manager operations",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "Dictionary of regex patterns for detecting references to previous exchanges (step_reference, exchange_reference, previous_reference, solution_reference, question_reference, data_reference)",
            "is_class_variable": false,
            "name": "reference_patterns",
            "type": "Dict[str, str]"
          },
          {
            "description": "Dictionary mapping topic categories to keyword lists for topic extraction (analysis, problem_solving, data_processing, visualization, collaboration, documentation)",
            "is_class_variable": false,
            "name": "topic_keywords",
            "type": "Dict[str, List[str]]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "session_manager": "SessionManager instance for accessing conversation data"
            },
            "purpose": "Initialize the conversation context manager with session manager and set up reference patterns and topic keywords",
            "returns": "None",
            "signature": "__init__(self, session_manager: SessionManager)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_enhanced_conversation_context",
            "parameters": {
              "conversation_id": "Unique identifier for the conversation",
              "current_input": "Optional current user input to analyze for references to previous exchanges"
            },
            "purpose": "Get comprehensive conversation context including timeline, references, topics, insights, and problem-solving chain",
            "returns": "ConversationContext dataclass containing all conversation intelligence including turns, topics, summary, insights, and reference map",
            "signature": "async get_enhanced_conversation_context(self, conversation_id: str, current_input: str = '') -> ConversationContext"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_conversation_turn",
            "parameters": {
              "exchange": "Dictionary containing exchange data with keys: exchange_id, exchange_number, timestamp, input_file, response_text, processing_time, tokens_used"
            },
            "purpose": "Build a ConversationTurn object from raw exchange data with summaries, topics, and key points",
            "returns": "ConversationTurn dataclass with processed exchange information",
            "signature": "async _build_conversation_turn(self, exchange: Dict[str, Any]) -> ConversationTurn"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_active_topics",
            "parameters": {
              "turns": "List of ConversationTurn objects to analyze"
            },
            "purpose": "Extract currently active topics from conversation turns based on frequency and recency",
            "returns": "List of up to 10 active topic strings",
            "signature": "_extract_active_topics(self, turns: List[ConversationTurn]) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_conversation_summary",
            "parameters": {
              "turns": "List of ConversationTurn objects"
            },
            "purpose": "Generate a high-level summary of the entire conversation",
            "returns": "String summary describing the conversation scope and main topics",
            "signature": "async _generate_conversation_summary(self, turns: List[ConversationTurn]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_key_insights",
            "parameters": {
              "turns": "List of ConversationTurn objects to analyze"
            },
            "purpose": "Extract key insights, conclusions, and findings from conversation turns",
            "returns": "List of up to 5 key insight strings with exchange numbers",
            "signature": "_extract_key_insights(self, turns: List[ConversationTurn]) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_problem_solving_chain",
            "parameters": {
              "turns": "List of ConversationTurn objects"
            },
            "purpose": "Build a chain showing problem-solving progression through the conversation",
            "returns": "List of dictionaries with keys: exchange_number, step_type (analysis/solution/clarification/implementation), description, topics",
            "signature": "_build_problem_solving_chain(self, turns: List[ConversationTurn]) -> List[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_reference_map",
            "parameters": {
              "current_input": "Current user input to analyze for references",
              "turns": "List of ConversationTurn objects"
            },
            "purpose": "Build a map of references between exchanges, identifying when exchanges reference previous ones",
            "returns": "Dictionary mapping exchange numbers to lists of ConversationReference objects",
            "signature": "_build_reference_map(self, turns: List[ConversationTurn], current_input: str) -> Dict[int, List[ConversationReference]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_find_references_in_text",
            "parameters": {
              "previous_turns": "List of previous ConversationTurn objects that could be referenced",
              "text": "Text to analyze for references"
            },
            "purpose": "Find explicit and topical references to previous exchanges in given text",
            "returns": "List of ConversationReference objects with relevance scores",
            "signature": "_find_references_in_text(self, text: str, previous_turns: List[ConversationTurn]) -> List[ConversationReference]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_recent_context",
            "parameters": {
              "recent_turns": "List of recent ConversationTurn objects (typically last 3)"
            },
            "purpose": "Generate formatted context string from recent conversation turns",
            "returns": "Formatted string with exchange summaries",
            "signature": "_generate_recent_context(self, recent_turns: List[ConversationTurn]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_summarize_input",
            "parameters": {
              "input_file": "Path to input file"
            },
            "purpose": "Create a brief summary of the input file based on filename",
            "returns": "String summary of input file",
            "signature": "_summarize_input(self, input_file: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_summarize_response",
            "parameters": {
              "response_text": "Full response text to summarize"
            },
            "purpose": "Create a brief summary of response text (first sentence or 100 characters)",
            "returns": "String summary of response",
            "signature": "_summarize_response(self, response_text: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_topics_from_text",
            "parameters": {
              "text": "Text to analyze for topics"
            },
            "purpose": "Extract topics from text using keyword matching against predefined topic categories",
            "returns": "List of topic category strings found in text",
            "signature": "_extract_topics_from_text(self, text: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_key_points",
            "parameters": {
              "text": "Response text to analyze"
            },
            "purpose": "Extract key points from response text based on indicator keywords",
            "returns": "List of up to 3 key point strings",
            "signature": "_extract_key_points(self, text: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_empty_context",
            "parameters": {
              "conversation_id": "Conversation identifier"
            },
            "purpose": "Create an empty ConversationContext for new conversations with no history",
            "returns": "Empty ConversationContext with default values",
            "signature": "_create_empty_context(self, conversation_id: str) -> ConversationContext"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_contextual_prompt_enhancement",
            "parameters": {
              "context": "ConversationContext object with conversation intelligence",
              "current_input": "Current user input"
            },
            "purpose": "Generate formatted prompt enhancement text based on conversation context for inclusion in LLM prompts",
            "returns": "Formatted string with conversation context, recent exchanges, topics, references, insights, and problem-solving progression",
            "signature": "generate_contextual_prompt_enhancement(self, context: ConversationContext, current_input: str) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:14:44",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "logging",
        "pathlib",
        "typing",
        "dataclasses",
        "datetime",
        "json",
        "re",
        "session_manager"
      ],
      "description": "Advanced conversation context manager that analyzes conversation history, extracts topics, builds reference maps, and generates contextual intelligence for multi-turn conversations.",
      "docstring": "Advanced conversation management with contextual intelligence",
      "id": 2014,
      "imports": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from datetime import datetime",
        "import json",
        "import re",
        "from session_manager import SessionManager"
      ],
      "imports_required": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List, Dict, Any, Optional, Tuple",
        "from dataclasses import dataclass",
        "from datetime import datetime",
        "import json",
        "import re",
        "from session_manager import SessionManager"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 416,
      "line_start": 57,
      "name": "ConversationContextManager",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "session_manager": "SessionManager instance that provides access to conversation data, exchanges, and session storage. Required for retrieving conversation history and exchange details."
      },
      "parent_class": null,
      "purpose": "This class manages conversation context by analyzing exchange history, identifying topics and patterns, tracking references between exchanges, building problem-solving chains, and generating enhanced context summaries. It provides comprehensive conversation intelligence including topic extraction, key insights, reference mapping, and contextual prompt enhancements to help maintain coherent multi-turn conversations with awareness of previous exchanges.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a ConversationContextManager object. Key method returns: get_enhanced_conversation_context() returns ConversationContext dataclass with comprehensive conversation intelligence; generate_contextual_prompt_enhancement() returns string with formatted context for prompt enhancement.",
      "settings_required": [
        "SessionManager instance must be configured and available",
        "ConversationContext dataclass must be defined with fields: conversation_id, total_exchanges, conversation_turns, active_topics, conversation_summary, key_insights, problem_solving_chain, recent_context, reference_map",
        "ConversationTurn dataclass must be defined with fields: exchange_id, exchange_number, timestamp, input_summary, response_summary, input_file, response_file, topics, key_points, processing_time, tokens_used",
        "ConversationReference dataclass must be defined with fields: exchange_number, exchange_id, reference_type, referenced_content, context_snippet, relevance_score",
        "Logging must be configured for __name__ logger"
      ],
      "source_code": "class ConversationContextManager:\n    \"\"\"Advanced conversation management with contextual intelligence\"\"\"\n    \n    def __init__(self, session_manager: SessionManager):\n        \"\"\"Initialize conversation context manager\"\"\"\n        self.session_manager = session_manager\n        self.logger = logging.getLogger(__name__)\n        \n        # Context analysis patterns\n        self.reference_patterns = {\n            'step_reference': r'(?:in )?step (\\d+)',\n            'exchange_reference': r'(?:in )?(?:exchange|turn) (\\d+)',\n            'previous_reference': r'(?:previously|earlier|before)',\n            'solution_reference': r'(?:the )?solution|approach|method',\n            'question_reference': r'(?:the )?question|problem|issue',\n            'data_reference': r'(?:the )?data|results|findings'\n        }\n        \n        # Topic extraction keywords\n        self.topic_keywords = {\n            'analysis': ['analyze', 'analysis', 'examine', 'study', 'investigate'],\n            'problem_solving': ['solve', 'solution', 'problem', 'issue', 'challenge'],\n            'data_processing': ['data', 'process', 'calculate', 'compute', 'transform'],\n            'visualization': ['chart', 'graph', 'plot', 'visualize', 'diagram'],\n            'collaboration': ['review', 'feedback', 'collaborate', 'discuss', 'refine'],\n            'documentation': ['document', 'record', 'notes', 'summary', 'report']\n        }\n    \n    async def get_enhanced_conversation_context(self, \n                                              conversation_id: str,\n                                              current_input: str = \"\") -> ConversationContext:\n        \"\"\"\n        Get enhanced conversation context with timeline and references\n        \n        Args:\n            conversation_id: Conversation ID\n            current_input: Current input to analyze for references\n            \n        Returns:\n            ConversationContext with comprehensive conversation intelligence\n        \"\"\"\n        self.logger.info(f\"Building enhanced context for conversation {conversation_id}\")\n        \n        # Get basic conversation data\n        conversation = self.session_manager.get_conversation(conversation_id)\n        if not conversation:\n            return self._create_empty_context(conversation_id)\n        \n        exchanges = self.session_manager.get_conversation_exchanges(conversation_id)\n        \n        # Build conversation turns\n        conversation_turns = []\n        for exchange in exchanges:\n            turn = await self._build_conversation_turn(exchange)\n            conversation_turns.append(turn)\n        \n        # Analyze conversation for topics and insights\n        active_topics = self._extract_active_topics(conversation_turns)\n        conversation_summary = await self._generate_conversation_summary(conversation_turns)\n        key_insights = self._extract_key_insights(conversation_turns)\n        problem_solving_chain = self._build_problem_solving_chain(conversation_turns)\n        \n        # Build reference map\n        reference_map = self._build_reference_map(conversation_turns, current_input)\n        \n        # Generate recent context (last 3 exchanges)\n        recent_context = self._generate_recent_context(conversation_turns[-3:])\n        \n        return ConversationContext(\n            conversation_id=conversation_id,\n            total_exchanges=len(conversation_turns),\n            conversation_turns=conversation_turns,\n            active_topics=active_topics,\n            conversation_summary=conversation_summary,\n            key_insights=key_insights,\n            problem_solving_chain=problem_solving_chain,\n            recent_context=recent_context,\n            reference_map=reference_map\n        )\n    \n    async def _build_conversation_turn(self, exchange: Dict[str, Any]) -> ConversationTurn:\n        \"\"\"Build a conversation turn from exchange data\"\"\"\n        \n        # Extract summaries from response text\n        response_text = exchange.get('response_text', '')\n        input_summary = self._summarize_input(exchange.get('input_file', ''))\n        response_summary = self._summarize_response(response_text)\n        \n        # Extract topics and key points\n        topics = self._extract_topics_from_text(response_text)\n        key_points = self._extract_key_points(response_text)\n        \n        return ConversationTurn(\n            exchange_id=exchange['exchange_id'],\n            exchange_number=exchange['exchange_number'],\n            timestamp=datetime.fromisoformat(exchange['timestamp']),\n            input_summary=input_summary,\n            response_summary=response_summary,\n            input_file=exchange.get('input_file', ''),\n            response_file=exchange.get('response_file', ''),\n            topics=topics,\n            key_points=key_points,\n            processing_time=exchange.get('processing_time', 0.0),\n            tokens_used=exchange.get('tokens_used', 0)\n        )\n    \n    def _extract_active_topics(self, turns: List[ConversationTurn]) -> List[str]:\n        \"\"\"Extract active topics from conversation turns\"\"\"\n        topic_counts = {}\n        \n        # Count topic occurrences across all turns\n        for turn in turns:\n            for topic in turn.topics:\n                topic_counts[topic] = topic_counts.get(topic, 0) + 1\n        \n        # Return topics that appear in multiple turns or recent turns\n        active_topics = []\n        recent_topics = set()\n        \n        # Get topics from last 2 turns\n        for turn in turns[-2:]:\n            recent_topics.update(turn.topics)\n        \n        # Include frequent topics and recent topics\n        for topic, count in topic_counts.items():\n            if count > 1 or topic in recent_topics:\n                active_topics.append(topic)\n        \n        return active_topics[:10]  # Limit to top 10\n    \n    async def _generate_conversation_summary(self, turns: List[ConversationTurn]) -> str:\n        \"\"\"Generate overall conversation summary\"\"\"\n        if not turns:\n            return \"New conversation\"\n        \n        # Simple summary based on turns\n        total_exchanges = len(turns)\n        main_topics = self._extract_active_topics(turns)\n        \n        if total_exchanges == 1:\n            return f\"Single exchange conversation focusing on {', '.join(main_topics[:3])}\"\n        else:\n            return f\"{total_exchanges}-turn conversation covering {', '.join(main_topics[:5])}\"\n    \n    def _extract_key_insights(self, turns: List[ConversationTurn]) -> List[str]:\n        \"\"\"Extract key insights from conversation\"\"\"\n        insights = []\n        \n        # Look for insights in key points\n        for turn in turns:\n            for point in turn.key_points:\n                if any(keyword in point.lower() for keyword in ['insight', 'conclusion', 'finding', 'result']):\n                    insights.append(f\"Exchange {turn.exchange_number}: {point}\")\n        \n        return insights[:5]  # Limit to top 5\n    \n    def _build_problem_solving_chain(self, turns: List[ConversationTurn]) -> List[Dict[str, Any]]:\n        \"\"\"Build problem-solving progression chain\"\"\"\n        chain = []\n        \n        for turn in turns:\n            # Identify problem-solving steps\n            step_type = 'analysis'\n            if any(keyword in turn.response_summary.lower() for keyword in ['solution', 'solve', 'fix']):\n                step_type = 'solution'\n            elif any(keyword in turn.response_summary.lower() for keyword in ['question', 'clarify', 'understand']):\n                step_type = 'clarification'\n            elif any(keyword in turn.response_summary.lower() for keyword in ['implement', 'apply', 'execute']):\n                step_type = 'implementation'\n            \n            chain.append({\n                'exchange_number': turn.exchange_number,\n                'step_type': step_type,\n                'description': turn.response_summary,\n                'topics': turn.topics\n            })\n        \n        return chain\n    \n    def _build_reference_map(self, \n                           turns: List[ConversationTurn], \n                           current_input: str) -> Dict[int, List[ConversationReference]]:\n        \"\"\"Build map of references between exchanges\"\"\"\n        reference_map = {}\n        \n        # Analyze current input for references to previous exchanges\n        if current_input:\n            references = self._find_references_in_text(current_input, turns)\n            if references:\n                reference_map[len(turns) + 1] = references  # Next exchange number\n        \n        # Analyze turn responses for references to previous turns\n        for i, turn in enumerate(turns[1:], 2):  # Start from second turn\n            references = self._find_references_in_text(turn.response_summary, turns[:i-1])\n            if references:\n                reference_map[turn.exchange_number] = references\n        \n        return reference_map\n    \n    def _find_references_in_text(self, \n                                text: str, \n                                previous_turns: List[ConversationTurn]) -> List[ConversationReference]:\n        \"\"\"Find references to previous exchanges in text\"\"\"\n        references = []\n        text_lower = text.lower()\n        \n        # Look for explicit step/exchange references\n        for pattern_name, pattern in self.reference_patterns.items():\n            matches = re.finditer(pattern, text_lower)\n            for match in matches:\n                if pattern_name in ['step_reference', 'exchange_reference']:\n                    try:\n                        ref_number = int(match.group(1))\n                        if ref_number <= len(previous_turns):\n                            referenced_turn = previous_turns[ref_number - 1]\n                            references.append(ConversationReference(\n                                exchange_number=ref_number,\n                                exchange_id=referenced_turn.exchange_id,\n                                reference_type=pattern_name,\n                                referenced_content=referenced_turn.response_summary,\n                                context_snippet=text[max(0, match.start()-20):match.end()+20],\n                                relevance_score=0.9\n                            ))\n                    except (ValueError, IndexError):\n                        continue\n        \n        # Look for topical references (lower confidence)\n        for turn in previous_turns[-3:]:  # Only check recent turns for topical refs\n            for topic in turn.topics:\n                if topic.lower() in text_lower:\n                    references.append(ConversationReference(\n                        exchange_number=turn.exchange_number,\n                        exchange_id=turn.exchange_id,\n                        reference_type='topic',\n                        referenced_content=topic,\n                        context_snippet=f\"Topic: {topic}\",\n                        relevance_score=0.6\n                    ))\n        \n        return references\n    \n    def _generate_recent_context(self, recent_turns: List[ConversationTurn]) -> str:\n        \"\"\"Generate context string from recent turns\"\"\"\n        if not recent_turns:\n            return \"\"\n        \n        context_parts = []\n        for turn in recent_turns:\n            context_parts.append(\n                f\"Exchange {turn.exchange_number}: {turn.input_summary} \u2192 {turn.response_summary}\"\n            )\n        \n        return \"\\n\".join(context_parts)\n    \n    def _summarize_input(self, input_file: str) -> str:\n        \"\"\"Create summary of input file\"\"\"\n        if not input_file:\n            return \"No input file\"\n        \n        filename = Path(input_file).name\n        if filename.startswith('test_'):\n            return f\"Test document: {filename}\"\n        elif any(keyword in filename.lower() for keyword in ['question', 'problem']):\n            return f\"Question/Problem: {filename}\"\n        else:\n            return f\"Document: {filename}\"\n    \n    def _summarize_response(self, response_text: str) -> str:\n        \"\"\"Create summary of response text\"\"\"\n        if not response_text:\n            return \"No response\"\n        \n        # Take first sentence or first 100 characters\n        sentences = response_text.split('.')\n        if sentences and len(sentences[0]) < 100:\n            return sentences[0].strip() + \".\"\n        else:\n            return response_text[:100].strip() + \"...\"\n    \n    def _extract_topics_from_text(self, text: str) -> List[str]:\n        \"\"\"Extract topics from text using keyword matching\"\"\"\n        topics = []\n        text_lower = text.lower()\n        \n        for topic_category, keywords in self.topic_keywords.items():\n            if any(keyword in text_lower for keyword in keywords):\n                topics.append(topic_category)\n        \n        return topics\n    \n    def _extract_key_points(self, text: str) -> List[str]:\n        \"\"\"Extract key points from response text\"\"\"\n        # Simple extraction of sentences with key indicators\n        key_indicators = ['important', 'key', 'main', 'primary', 'essential', 'critical']\n        \n        sentences = text.split('.')\n        key_points = []\n        \n        for sentence in sentences[:10]:  # Check first 10 sentences\n            sentence = sentence.strip()\n            if len(sentence) > 20 and any(indicator in sentence.lower() for indicator in key_indicators):\n                key_points.append(sentence + \".\")\n        \n        return key_points[:3]  # Return up to 3 key points\n    \n    def _create_empty_context(self, conversation_id: str) -> ConversationContext:\n        \"\"\"Create empty context for new conversations\"\"\"\n        return ConversationContext(\n            conversation_id=conversation_id,\n            total_exchanges=0,\n            conversation_turns=[],\n            active_topics=[],\n            conversation_summary=\"New conversation\",\n            key_insights=[],\n            problem_solving_chain=[],\n            recent_context=\"\",\n            reference_map={}\n        )\n    \n    def generate_contextual_prompt_enhancement(self, \n                                             context: ConversationContext,\n                                             current_input: str) -> str:\n        \"\"\"Generate prompt enhancement based on conversation context\"\"\"\n        \n        if context.total_exchanges == 0:\n            return \"This is the start of a new conversation.\"\n        \n        enhancements = []\n        \n        # Add conversation summary\n        enhancements.append(f\"CONVERSATION CONTEXT: {context.conversation_summary}\")\n        \n        # Add recent context\n        if context.recent_context:\n            enhancements.append(f\"RECENT EXCHANGES:\\n{context.recent_context}\")\n        \n        # Add active topics\n        if context.active_topics:\n            enhancements.append(f\"ACTIVE TOPICS: {', '.join(context.active_topics)}\")\n        \n        # Add references if found\n        references = context.reference_map.get(context.total_exchanges + 1, [])\n        if references:\n            ref_text = []\n            for ref in references:\n                ref_text.append(f\"References exchange {ref.exchange_number}: {ref.referenced_content}\")\n            enhancements.append(f\"REFERENCES:\\n\" + \"\\n\".join(ref_text))\n        \n        # Add key insights\n        if context.key_insights:\n            enhancements.append(f\"KEY INSIGHTS:\\n\" + \"\\n\".join(context.key_insights))\n        \n        # Add problem-solving progression\n        if len(context.problem_solving_chain) > 1:\n            chain_text = []\n            for step in context.problem_solving_chain[-3:]:  # Last 3 steps\n                chain_text.append(f\"Step {step['exchange_number']} ({step['step_type']}): {step['description']}\")\n            enhancements.append(f\"PROBLEM-SOLVING PROGRESSION:\\n\" + \"\\n\".join(chain_text))\n        \n        return \"\\n\\n\".join(enhancements)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/conversation_context.py",
      "tags": [
        "conversation-management",
        "context-analysis",
        "topic-extraction",
        "reference-tracking",
        "conversation-intelligence",
        "async",
        "nlp",
        "pattern-matching",
        "problem-solving-chain",
        "contextual-awareness"
      ],
      "updated_at": "2025-12-07T01:14:44.130164",
      "usage_example": "# Instantiate with session manager\nfrom session_manager import SessionManager\n\nsession_mgr = SessionManager()\ncontext_mgr = ConversationContextManager(session_mgr)\n\n# Get enhanced context for a conversation\ncontext = await context_mgr.get_enhanced_conversation_context(\n    conversation_id=\"conv_123\",\n    current_input=\"Can you explain step 2 in more detail?\"\n)\n\n# Access context information\nprint(f\"Total exchanges: {context.total_exchanges}\")\nprint(f\"Active topics: {context.active_topics}\")\nprint(f\"Summary: {context.conversation_summary}\")\n\n# Generate prompt enhancement\nenhancement = context_mgr.generate_contextual_prompt_enhancement(\n    context=context,\n    current_input=\"Can you explain step 2 in more detail?\"\n)\nprint(f\"Context enhancement:\\n{enhancement}\")\n\n# Access specific turns\nfor turn in context.conversation_turns:\n    print(f\"Exchange {turn.exchange_number}: {turn.response_summary}\")\n\n# Check references\nif context.reference_map:\n    for exchange_num, refs in context.reference_map.items():\n        print(f\"Exchange {exchange_num} references: {len(refs)} previous exchanges\")"
    },
    {
      "best_practices": [
        "Initialize all required attributes when creating an instance to avoid AttributeError exceptions",
        "Use conversation_id consistently across related operations to maintain conversation continuity",
        "Update total_exchanges incrementally as new turns are added to conversation_turns to keep counts synchronized",
        "Maintain chronological order in conversation_turns list for accurate timeline representation",
        "Keep active_topics list updated by removing stale topics and adding new ones as conversation evolves",
        "Update conversation_summary periodically rather than after every turn to balance detail and performance",
        "Use problem_solving_chain to track reasoning steps in complex multi-turn problem-solving scenarios",
        "Populate reference_map with integer keys corresponding to turn indices for efficient cross-referencing",
        "Keep recent_context concise and focused on the most immediately relevant information for context window management",
        "Consider implementing serialization methods (to_dict/from_dict) for persistence if needed",
        "Be mindful of memory usage when storing long conversations with many turns and references",
        "Validate that ConversationTurn and ConversationReference types are properly defined before instantiation"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for the conversation instance",
            "is_class_variable": false,
            "name": "conversation_id",
            "type": "str"
          },
          {
            "description": "Total count of message exchanges in the conversation",
            "is_class_variable": false,
            "name": "total_exchanges",
            "type": "int"
          },
          {
            "description": "Chronological list of conversation turns with message details",
            "is_class_variable": false,
            "name": "conversation_turns",
            "type": "List[ConversationTurn]"
          },
          {
            "description": "List of currently active topics in the conversation",
            "is_class_variable": false,
            "name": "active_topics",
            "type": "List[str]"
          },
          {
            "description": "High-level summary of the conversation content",
            "is_class_variable": false,
            "name": "conversation_summary",
            "type": "str"
          },
          {
            "description": "Important insights and conclusions extracted from the conversation",
            "is_class_variable": false,
            "name": "key_insights",
            "type": "List[str]"
          },
          {
            "description": "Sequence of problem-solving steps and reasoning chains",
            "is_class_variable": false,
            "name": "problem_solving_chain",
            "type": "List[Dict[str, Any]]"
          },
          {
            "description": "Most recent contextual information for immediate context management",
            "is_class_variable": false,
            "name": "recent_context",
            "type": "str"
          },
          {
            "description": "Mapping of turn indices to conversation references for cross-referencing",
            "is_class_variable": false,
            "name": "reference_map",
            "type": "Dict[int, List[ConversationReference]]"
          }
        ],
        "methods": []
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required if the ConversationContext is used in conjunction with session management functionality",
          "import": "from session_manager import SessionManager",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:13:55",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing",
        "datetime",
        "json",
        "re",
        "asyncio",
        "logging",
        "pathlib"
      ],
      "description": "A dataclass that stores comprehensive conversation context including timeline, turns, topics, insights, and references for managing rich conversational state.",
      "docstring": "Rich conversation context with timeline and references",
      "id": 2013,
      "imports": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from datetime import datetime",
        "import json",
        "import re",
        "from session_manager import SessionManager"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import List, Dict, Any, Optional, Tuple",
        "from datetime import datetime",
        "import json",
        "import re",
        "import asyncio",
        "import logging",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 55,
      "line_start": 45,
      "name": "ConversationContext",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "active_topics": "List of strings representing currently active or relevant topics being discussed in the conversation",
        "conversation_id": "Unique identifier string for the conversation instance, used to distinguish between different conversation sessions",
        "conversation_summary": "String containing a high-level summary of the conversation content and progression",
        "conversation_turns": "List of ConversationTurn objects representing the chronological sequence of exchanges in the conversation, each turn containing message details and metadata",
        "key_insights": "List of strings capturing important insights, conclusions, or notable points extracted from the conversation",
        "problem_solving_chain": "List of dictionaries tracking the problem-solving process, where each dictionary contains steps, decisions, or reasoning chains with flexible key-value structure",
        "recent_context": "String containing the most recent contextual information, typically used for immediate context window management",
        "reference_map": "Dictionary mapping integer indices to lists of ConversationReference objects, enabling cross-referencing between different parts of the conversation",
        "total_exchanges": "Integer count of the total number of message exchanges that have occurred in this conversation"
      },
      "parent_class": null,
      "purpose": "ConversationContext serves as a structured container for maintaining detailed conversation state across multiple exchanges. It tracks the conversation timeline through turns, maintains active topics, stores summaries and key insights, manages problem-solving chains, and provides a reference mapping system. This class is designed to support context-aware conversational AI systems that need to maintain coherent, long-running dialogues with historical awareness and cross-referencing capabilities.",
      "return_annotation": null,
      "return_explained": "As a dataclass, instantiation returns a ConversationContext object with all specified attributes initialized. The class itself does not define explicit methods with return values, but instances can be used to access and modify conversation state through attribute access.",
      "settings_required": [
        "ConversationTurn class must be defined or imported to populate conversation_turns attribute",
        "ConversationReference class must be defined or imported to populate reference_map values"
      ],
      "source_code": "class ConversationContext:\n    \"\"\"Rich conversation context with timeline and references\"\"\"\n    conversation_id: str\n    total_exchanges: int\n    conversation_turns: List[ConversationTurn]\n    active_topics: List[str]\n    conversation_summary: str\n    key_insights: List[str]\n    problem_solving_chain: List[Dict[str, Any]]\n    recent_context: str\n    reference_map: Dict[int, List[ConversationReference]]",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/conversation_context.py",
      "tags": [
        "conversation",
        "context-management",
        "dataclass",
        "state-management",
        "dialogue-system",
        "conversational-ai",
        "timeline",
        "reference-tracking",
        "problem-solving",
        "chat-history"
      ],
      "updated_at": "2025-12-07T01:13:55.077694",
      "usage_example": "from dataclasses import dataclass\nfrom typing import List, Dict, Any\nfrom datetime import datetime\n\n# Assuming ConversationTurn and ConversationReference are defined\ncontext = ConversationContext(\n    conversation_id=\"conv_12345\",\n    total_exchanges=5,\n    conversation_turns=[],\n    active_topics=[\"machine learning\", \"data preprocessing\"],\n    conversation_summary=\"Discussion about ML model training\",\n    key_insights=[\"User needs batch processing\", \"Performance is critical\"],\n    problem_solving_chain=[\n        {\"step\": 1, \"action\": \"identify requirements\", \"outcome\": \"batch processing needed\"},\n        {\"step\": 2, \"action\": \"propose solution\", \"outcome\": \"suggested parallel processing\"}\n    ],\n    recent_context=\"User asked about optimizing data pipeline\",\n    reference_map={0: [], 1: []}\n)\n\n# Access attributes\nprint(context.conversation_id)\nprint(context.active_topics)\n\n# Modify attributes\ncontext.total_exchanges += 1\ncontext.active_topics.append(\"optimization\")\ncontext.key_insights.append(\"Consider using GPU acceleration\")"
    },
    {
      "best_practices": [
        "Use meaningful exchange_id values that can be traced back to the original conversation storage system",
        "Ensure reference_type values are consistent across the application (use constants or enums for the four types: 'topic', 'solution', 'question', 'data')",
        "Normalize relevance_score to a consistent range (typically 0.0 to 1.0) for easier comparison and filtering",
        "Keep context_snippet concise but informative - it should provide enough context without requiring full exchange retrieval",
        "Consider making the class frozen (frozen=True) if immutability is desired after creation",
        "When storing multiple references, sort by relevance_score descending to prioritize most relevant references",
        "Validate that exchange_number is non-negative and exchange_id is non-empty when creating instances",
        "Use this class in conjunction with conversation management systems that maintain full exchange history"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Sequential number identifying the position of the exchange in the conversation",
            "is_class_variable": false,
            "name": "exchange_number",
            "type": "int"
          },
          {
            "description": "Unique string identifier for the specific exchange being referenced",
            "is_class_variable": false,
            "name": "exchange_id",
            "type": "str"
          },
          {
            "description": "Category of the reference: 'topic', 'solution', 'question', or 'data'",
            "is_class_variable": false,
            "name": "reference_type",
            "type": "str"
          },
          {
            "description": "The actual content from the previous exchange that is being referenced",
            "is_class_variable": false,
            "name": "referenced_content",
            "type": "str"
          },
          {
            "description": "Brief excerpt providing context around the referenced content",
            "is_class_variable": false,
            "name": "context_snippet",
            "type": "str"
          },
          {
            "description": "Numerical score indicating how relevant this reference is to the current context",
            "is_class_variable": false,
            "name": "relevance_score",
            "type": "float"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "context_snippet": "Brief contextual excerpt surrounding the reference",
              "exchange_id": "Unique identifier for the referenced exchange",
              "exchange_number": "Sequential position of the exchange in conversation history",
              "reference_type": "Category of reference: 'topic', 'solution', 'question', or 'data'",
              "referenced_content": "The actual content being referenced from the previous exchange",
              "relevance_score": "Numerical score indicating relevance (typically 0.0-1.0)"
            },
            "purpose": "Initializes a new ConversationReference instance with all required attributes. Auto-generated by @dataclass decorator.",
            "returns": "None - initializes the instance",
            "signature": "__init__(exchange_number: int, exchange_id: str, reference_type: str, referenced_content: str, context_snippet: str, relevance_score: float) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the ConversationReference instance. Auto-generated by @dataclass decorator.",
            "returns": "String representation showing all attribute values in the format: ConversationReference(exchange_number=..., exchange_id=..., ...)",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with this instance"
            },
            "purpose": "Compares two ConversationReference instances for equality based on all attributes. Auto-generated by @dataclass decorator.",
            "returns": "True if all attributes are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:13:24",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [],
      "description": "A dataclass that stores a reference to a previous conversation exchange, including metadata about the reference type, content, and relevance.",
      "docstring": "Reference to a previous exchange",
      "id": 2012,
      "imports": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from datetime import datetime",
        "import json",
        "import re",
        "from session_manager import SessionManager"
      ],
      "imports_required": [
        "from dataclasses import dataclass"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 42,
      "line_start": 35,
      "name": "ConversationReference",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "context_snippet": "A string providing a brief excerpt or summary of the surrounding context from the referenced exchange. Helps understand the reference without retrieving the full exchange.",
        "exchange_id": "A unique string identifier for the specific exchange being referenced. Allows for precise lookup and retrieval of the original exchange.",
        "exchange_number": "An integer representing the sequential position of the exchange in the conversation history. Used for ordering and identifying specific exchanges chronologically.",
        "reference_type": "A string categorizing the type of reference. Expected values are 'topic', 'solution', 'question', or 'data'. This helps classify what aspect of the previous exchange is being referenced.",
        "referenced_content": "A string containing the actual content from the previous exchange that is being referenced. This is the substantive information being recalled.",
        "relevance_score": "A float value (typically between 0.0 and 1.0) indicating how relevant this reference is to the current context. Higher scores indicate greater relevance, useful for ranking or filtering references."
      },
      "parent_class": null,
      "purpose": "ConversationReference serves as a structured data container for tracking and referencing previous exchanges in a conversation. It enables conversation continuity by storing key information about past interactions including the exchange identifier, type of reference (topic, solution, question, or data), the actual content being referenced, contextual snippets, and a relevance score for ranking or filtering purposes. This is typically used in conversational AI systems, chatbots, or dialogue management systems to maintain context and enable coherent multi-turn conversations.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a ConversationReference object with all six attributes initialized. As a dataclass, it automatically generates __init__, __repr__, __eq__, and other special methods. The object serves as an immutable-by-convention data container for conversation reference metadata.",
      "settings_required": [],
      "source_code": "class ConversationReference:\n    \"\"\"Reference to a previous exchange\"\"\"\n    exchange_number: int\n    exchange_id: str\n    reference_type: str  # 'topic', 'solution', 'question', 'data'\n    referenced_content: str\n    context_snippet: str\n    relevance_score: float",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/conversation_context.py",
      "tags": [
        "dataclass",
        "conversation",
        "reference",
        "dialogue-management",
        "context-tracking",
        "conversation-history",
        "metadata",
        "data-container",
        "chatbot",
        "conversational-ai"
      ],
      "updated_at": "2025-12-07T01:13:24.855432",
      "usage_example": "from dataclasses import dataclass\n\n@dataclass\nclass ConversationReference:\n    exchange_number: int\n    exchange_id: str\n    reference_type: str\n    referenced_content: str\n    context_snippet: str\n    relevance_score: float\n\n# Create a reference to a previous exchange\nref = ConversationReference(\n    exchange_number=5,\n    exchange_id=\"conv_123_ex_5\",\n    reference_type=\"solution\",\n    referenced_content=\"To solve this, use the formula: x = (a + b) / 2\",\n    context_snippet=\"User asked about calculating averages\",\n    relevance_score=0.92\n)\n\n# Access attributes\nprint(ref.exchange_number)  # 5\nprint(ref.reference_type)   # 'solution'\nprint(ref.relevance_score)  # 0.92\n\n# Dataclass provides automatic __repr__\nprint(ref)\n\n# Dataclass provides automatic equality comparison\nref2 = ConversationReference(5, \"conv_123_ex_5\", \"solution\", \"To solve this, use the formula: x = (a + b) / 2\", \"User asked about calculating averages\", 0.92)\nprint(ref == ref2)  # True"
    },
    {
      "best_practices": [
        "This is a pure data container with no methods, so instantiation is straightforward using keyword arguments",
        "Always provide all required fields during instantiation as dataclasses without default values require all parameters",
        "Use consistent formats for exchange_id (e.g., UUID strings) to ensure uniqueness across conversations",
        "Ensure exchange_number is sequential and starts from 1 for proper conversation ordering",
        "Store actual file content in the files referenced by input_file and response_file, using these attributes only for references",
        "Use datetime.now() or datetime.utcnow() for timestamp to ensure accurate time tracking",
        "Keep topics and key_points lists concise and relevant for effective conversation analysis",
        "Track processing_time accurately to monitor performance and identify bottlenecks",
        "Monitor tokens_used for API cost management and rate limiting considerations",
        "Consider serializing to JSON or similar format for persistence using dataclasses.asdict() or similar utilities",
        "This class is immutable by default unless frozen=True is added to @dataclass decorator; consider immutability for thread safety"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for this conversation turn",
            "is_class_variable": false,
            "name": "exchange_id",
            "type": "str"
          },
          {
            "description": "Sequential number indicating the position of this turn in the conversation",
            "is_class_variable": false,
            "name": "exchange_number",
            "type": "int"
          },
          {
            "description": "When this conversation turn occurred",
            "is_class_variable": false,
            "name": "timestamp",
            "type": "datetime"
          },
          {
            "description": "Summary or full text of the user's input for this turn",
            "is_class_variable": false,
            "name": "input_summary",
            "type": "str"
          },
          {
            "description": "Summary or full text of the system's response for this turn",
            "is_class_variable": false,
            "name": "response_summary",
            "type": "str"
          },
          {
            "description": "File path where the full input content is stored",
            "is_class_variable": false,
            "name": "input_file",
            "type": "str"
          },
          {
            "description": "File path where the full response content is stored",
            "is_class_variable": false,
            "name": "response_file",
            "type": "str"
          },
          {
            "description": "List of main topics or themes identified in this conversation turn",
            "is_class_variable": false,
            "name": "topics",
            "type": "List[str]"
          },
          {
            "description": "List of important points or takeaways from this exchange",
            "is_class_variable": false,
            "name": "key_points",
            "type": "List[str]"
          },
          {
            "description": "Time in seconds taken to process and generate the response",
            "is_class_variable": false,
            "name": "processing_time",
            "type": "float"
          },
          {
            "description": "Count of tokens consumed during this exchange",
            "is_class_variable": false,
            "name": "tokens_used",
            "type": "int"
          }
        ],
        "methods": []
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:12:54",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "datetime",
        "typing"
      ],
      "description": "A dataclass representing a single turn in a conversation, storing metadata about the exchange including input/output summaries, files, topics, and processing metrics.",
      "docstring": "Individual turn in a conversation",
      "id": 2011,
      "imports": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from datetime import datetime",
        "import json",
        "import re",
        "from session_manager import SessionManager"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from datetime import datetime",
        "from typing import List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 32,
      "line_start": 20,
      "name": "ConversationTurn",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "exchange_id": "Unique identifier for this conversation turn, typically a UUID or similar unique string to distinguish this exchange from others",
        "exchange_number": "Sequential number indicating the position of this turn in the conversation (e.g., 1 for first turn, 2 for second turn)",
        "input_file": "File path or filename where the full input content is stored, allowing reference to complete input data",
        "input_summary": "String containing a summary or the full text of the user's input/query for this turn",
        "key_points": "List of strings capturing the important points or takeaways from this exchange",
        "processing_time": "Float value representing the time (in seconds) taken to process and generate the response",
        "response_file": "File path or filename where the full response content is stored, allowing reference to complete response data",
        "response_summary": "String containing a summary or the full text of the system's response for this turn",
        "timestamp": "datetime object recording when this conversation turn occurred, used for chronological ordering and time-based analysis",
        "tokens_used": "Integer count of tokens consumed during this exchange, relevant for API usage tracking and cost management",
        "topics": "List of strings representing the main topics or themes identified in this conversation turn"
      },
      "parent_class": null,
      "purpose": "ConversationTurn serves as a structured data container for capturing all relevant information about a single exchange in a conversation. It stores both the content summaries (input and response), file references, extracted topics and key points, and performance metrics (processing time and token usage). This class is typically used in conversation management systems to maintain a history of interactions, enable conversation analysis, and support session persistence.",
      "return_annotation": null,
      "return_explained": "As a dataclass, instantiation returns a ConversationTurn object with all specified attributes initialized. The object provides automatic __init__, __repr__, __eq__, and other methods generated by the @dataclass decorator. No explicit methods return values as this is a pure data container.",
      "settings_required": [
        "No specific configuration or environment variables required for this dataclass itself",
        "File paths in input_file and response_file should be valid and accessible if file operations are performed elsewhere"
      ],
      "source_code": "class ConversationTurn:\n    \"\"\"Individual turn in a conversation\"\"\"\n    exchange_id: str\n    exchange_number: int\n    timestamp: datetime\n    input_summary: str\n    response_summary: str\n    input_file: str\n    response_file: str\n    topics: List[str]\n    key_points: List[str]\n    processing_time: float\n    tokens_used: int",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/conversation_context.py",
      "tags": [
        "dataclass",
        "conversation",
        "turn",
        "exchange",
        "chat-history",
        "session-management",
        "metadata",
        "data-container",
        "conversation-tracking",
        "nlp",
        "chatbot"
      ],
      "updated_at": "2025-12-07T01:12:54.848376",
      "usage_example": "from dataclasses import dataclass\nfrom datetime import datetime\nfrom typing import List\n\n@dataclass\nclass ConversationTurn:\n    exchange_id: str\n    exchange_number: int\n    timestamp: datetime\n    input_summary: str\n    response_summary: str\n    input_file: str\n    response_file: str\n    topics: List[str]\n    key_points: List[str]\n    processing_time: float\n    tokens_used: int\n\n# Create a conversation turn\nturn = ConversationTurn(\n    exchange_id=\"turn-001\",\n    exchange_number=1,\n    timestamp=datetime.now(),\n    input_summary=\"User asked about Python dataclasses\",\n    response_summary=\"Explained dataclass features and benefits\",\n    input_file=\"/path/to/input_001.txt\",\n    response_file=\"/path/to/response_001.txt\",\n    topics=[\"Python\", \"dataclasses\", \"programming\"],\n    key_points=[\"Dataclasses reduce boilerplate\", \"Auto-generate methods\"],\n    processing_time=1.23,\n    tokens_used=450\n)\n\n# Access attributes\nprint(f\"Turn {turn.exchange_number}: {turn.input_summary}\")\nprint(f\"Topics: {', '.join(turn.topics)}\")\nprint(f\"Processing took {turn.processing_time}s using {turn.tokens_used} tokens\")"
    },
    {
      "best_practices": [
        "Always check that the PDF file exists before calling extract_all_pages() to avoid FileNotFoundError",
        "Set max_pages appropriately based on your use case to prevent memory issues with very large PDFs",
        "Use high_quality=True for documents requiring accurate OCR or detailed analysis, but be aware of increased processing time and memory usage",
        "The class maintains no persistent state between method calls except for configuration (max_pages, high_quality, dpi_scale), making it safe to reuse for multiple documents",
        "PageAnalysis and DocumentSummary dataclasses must be defined before using this class",
        "Call extract_all_pages() first to get page data before using other methods like create_context_aware_prompt() or generate_document_summary()",
        "The logger attribute uses __name__, so configure logging at the module level for proper log output",
        "For large documents, consider processing pages in batches rather than all at once to manage memory",
        "The create_combined_visualization() method can produce very large images; adjust max_width parameter based on your needs",
        "Document classification in _classify_document_type() uses simple keyword matching; consider enhancing with NLP for production use",
        "Context-aware prompts include up to 3 previous and 2 upcoming pages; adjust these limits in create_context_aware_prompt() if needed"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Maximum number of pages to process from a PDF document, set during initialization",
            "is_class_variable": false,
            "name": "max_pages",
            "type": "int"
          },
          {
            "description": "Flag indicating whether to use high-quality rendering for page images",
            "is_class_variable": false,
            "name": "high_quality",
            "type": "bool"
          },
          {
            "description": "DPI scaling factor for page rendering. Set to 2.0 if high_quality is True, otherwise 1.5",
            "is_class_variable": false,
            "name": "dpi_scale",
            "type": "float"
          },
          {
            "description": "Logger instance for logging processing information, warnings, and errors",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "high_quality": "Use high DPI for better quality (default: True)",
              "max_pages": "Maximum number of pages to process (default: 50)"
            },
            "purpose": "Initialize the MultiPagePDFProcessor with configuration parameters",
            "returns": "None - initializes instance attributes",
            "signature": "__init__(self, max_pages: int = 50, high_quality: bool = True)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_all_pages",
            "parameters": {
              "pdf_path": "Path object pointing to the PDF file to process"
            },
            "purpose": "Extract all pages from a PDF file with metadata, converting each page to an image and extracting text content",
            "returns": "Tuple containing: (1) List of PageAnalysis objects, one per page with image and text data, (2) Dictionary with document-level metadata including source_type, source_file, total_pages, processed_pages, file_size, creation_date, title, author, subject",
            "signature": "extract_all_pages(self, pdf_path: Path) -> Tuple[List[PageAnalysis], Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_single_page",
            "parameters": {
              "doc": "Open fitz.Document object",
              "page_num": "Zero-based page number to extract"
            },
            "purpose": "Internal method to extract a single page from an open PDF document, rendering it as an image and extracting text",
            "returns": "PageAnalysis object containing page_number (1-based), image_b64 (base64-encoded PNG), text_content (extracted text), and dimensions (width, height)",
            "signature": "_extract_single_page(self, doc: fitz.Document, page_num: int) -> PageAnalysis"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_context_aware_prompt",
            "parameters": {
              "conversation_context": "Optional previous conversation history to include in prompt",
              "current_page": "1-based page number to create prompt for",
              "pages": "List of all PageAnalysis objects for the document"
            },
            "purpose": "Generate a context-aware prompt for LLM analysis of a specific page, including context from previous pages, upcoming pages preview, and conversation history",
            "returns": "String containing a comprehensive prompt for LLM analysis with document context, previous page summaries (up to 3), upcoming page previews (up to 2), and conversation history",
            "signature": "create_context_aware_prompt(self, pages: List[PageAnalysis], current_page: int, conversation_context: str = '') -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_document_summary",
            "parameters": {
              "document_metadata": "Document-level metadata dictionary from extract_all_pages()",
              "pages": "List of all analyzed PageAnalysis objects"
            },
            "purpose": "Generate a comprehensive summary of the entire document including classification, topics, findings, and confidence score",
            "returns": "DocumentSummary object containing total_pages, document_type (classified), main_topics (set of topics), key_findings, page_summaries (list of per-page summaries), overall_summary (narrative summary), and confidence_score (0.0-1.0)",
            "signature": "generate_document_summary(self, pages: List[PageAnalysis], document_metadata: Dict[str, Any]) -> DocumentSummary"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_classify_document_type",
            "parameters": {
              "metadata": "Document metadata dictionary",
              "pages": "List of PageAnalysis objects"
            },
            "purpose": "Internal method to classify document type based on content patterns and keywords",
            "returns": "String representing document type: 'research_paper', 'legal_document', 'financial_report', 'instructional', 'meeting_document', 'book_document', or 'general_document'",
            "signature": "_classify_document_type(self, pages: List[PageAnalysis], metadata: Dict[str, Any]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_overall_summary",
            "parameters": {
              "doc_type": "Classified document type string",
              "pages": "List of PageAnalysis objects"
            },
            "purpose": "Internal method to generate a narrative overall summary of the document",
            "returns": "String containing a narrative summary describing the document's content, structure, and type-specific characteristics",
            "signature": "_generate_overall_summary(self, pages: List[PageAnalysis], doc_type: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_calculate_confidence_score",
            "parameters": {
              "pages": "List of PageAnalysis objects"
            },
            "purpose": "Internal method to calculate a confidence score for the document analysis based on text coverage, analysis coverage, and content quality",
            "returns": "Float between 0.0 and 1.0 representing confidence in the analysis, rounded to 2 decimal places. Calculated from text_coverage (30%), analysis_coverage (40%), and text_score (30%)",
            "signature": "_calculate_confidence_score(self, pages: List[PageAnalysis]) -> float"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_combined_visualization",
            "parameters": {
              "max_width": "Maximum width for the combined image in pixels (default: 2400). Images are scaled down if wider.",
              "pages": "List of PageAnalysis objects to combine"
            },
            "purpose": "Create a single combined image visualization of multiple pages stacked vertically",
            "returns": "Base64-encoded string of the combined PNG image with all pages stacked vertically",
            "signature": "create_combined_visualization(self, pages: List[PageAnalysis], max_width: int = 2400) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:12:02",
      "decorators": [],
      "dependencies": [
        "fitz",
        "PyMuPDF",
        "base64",
        "io",
        "pathlib",
        "typing",
        "dataclasses",
        "PIL",
        "Pillow",
        "logging",
        "sys"
      ],
      "description": "A class for processing multi-page PDF documents with context-aware analysis, OCR, and summarization capabilities.",
      "docstring": "Process multi-page PDFs with context awareness and summarization",
      "id": 2009,
      "imports": [
        "import fitz",
        "import base64",
        "import io",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from typing import Optional",
        "from dataclasses import dataclass",
        "from PIL import Image as PILImage",
        "import logging",
        "import sys"
      ],
      "imports_required": [
        "import fitz",
        "import base64",
        "import io",
        "from pathlib import Path",
        "from typing import List, Dict, Any, Tuple, Optional",
        "from dataclasses import dataclass",
        "from PIL import Image as PILImage",
        "import logging"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 372,
      "line_start": 39,
      "name": "MultiPagePDFProcessor",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "high_quality": "Boolean flag to enable high-quality image rendering for better OCR and analysis. When True, uses DPI scale of 2.0; when False, uses 1.5. Higher quality produces better results but larger images and slower processing. Default is True.",
        "max_pages": "Maximum number of pages to process from a PDF document. Acts as a safety limit to prevent processing extremely large documents. Default is 50 pages. If a PDF has more pages than this limit, only the first max_pages will be processed."
      },
      "parent_class": null,
      "purpose": "MultiPagePDFProcessor handles extraction, analysis, and summarization of multi-page PDF documents. It converts PDF pages to high-quality images, extracts text content, maintains context across pages, generates document-level summaries, classifies document types, and creates visualizations. The class is designed for comprehensive document understanding with AI-assisted analysis, supporting workflows that require page-by-page processing with awareness of document structure and flow.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a MultiPagePDFProcessor object configured with the specified parameters. Key method returns: extract_all_pages() returns a tuple of (List[PageAnalysis], Dict[str, Any]) containing page analyses and document metadata; create_context_aware_prompt() returns a string prompt for LLM analysis; generate_document_summary() returns a DocumentSummary object; create_combined_visualization() returns a base64-encoded string of the combined page images.",
      "settings_required": [
        "Requires PageAnalysis dataclass to be defined with fields: page_number, image_b64, text_content, dimensions, analysis_result (optional), key_elements (optional)",
        "Requires DocumentSummary dataclass to be defined with fields: total_pages, document_type, main_topics, key_findings, page_summaries, overall_summary, confidence_score",
        "PyMuPDF (fitz) library must be installed: pip install PyMuPDF",
        "Pillow library must be installed: pip install Pillow",
        "Logging should be configured before use for proper log output"
      ],
      "source_code": "class MultiPagePDFProcessor:\n    \"\"\"Process multi-page PDFs with context awareness and summarization\"\"\"\n    \n    def __init__(self, max_pages: int = 50, high_quality: bool = True):\n        \"\"\"\n        Initialize multi-page PDF processor\n        \n        Args:\n            max_pages: Maximum number of pages to process (safety limit)\n            high_quality: Use high DPI for better OCR and analysis\n        \"\"\"\n        self.max_pages = max_pages\n        self.high_quality = high_quality\n        self.dpi_scale = 2.0 if high_quality else 1.5\n        self.logger = logging.getLogger(__name__)\n        \n    def extract_all_pages(self, pdf_path: Path) -> Tuple[List[PageAnalysis], Dict[str, Any]]:\n        \"\"\"\n        Extract all pages from PDF with metadata\n        \n        Args:\n            pdf_path: Path to PDF file\n            \n        Returns:\n            Tuple of (list of page analyses, document metadata)\n        \"\"\"\n        if not pdf_path.exists():\n            raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n        try:\n            doc = fitz.open(pdf_path)\n            page_count = len(doc)\n            \n            if page_count > self.max_pages:\n                self.logger.warning(f\"PDF has {page_count} pages, processing first {self.max_pages}\")\n                page_count = self.max_pages\n            \n            # Document-level metadata\n            doc_metadata = {\n                'source_type': 'multi_page_pdf',\n                'source_file': str(pdf_path),\n                'total_pages': len(doc),\n                'processed_pages': page_count,\n                'file_size': pdf_path.stat().st_size,\n                'creation_date': doc.metadata.get('creationDate', ''),\n                'title': doc.metadata.get('title', ''),\n                'author': doc.metadata.get('author', ''),\n                'subject': doc.metadata.get('subject', '')\n            }\n            \n            pages = []\n            \n            for page_num in range(page_count):\n                page_analysis = self._extract_single_page(doc, page_num)\n                pages.append(page_analysis)\n                \n                self.logger.info(f\"Extracted page {page_num + 1}/{page_count}: \"\n                               f\"{len(page_analysis.text_content)} chars text\")\n            \n            doc.close()\n            return pages, doc_metadata\n            \n        except Exception as e:\n            self.logger.error(f\"Error extracting PDF pages: {e}\")\n            raise\n    \n    def _extract_single_page(self, doc: fitz.Document, page_num: int) -> PageAnalysis:\n        \"\"\"Extract single page with image and text\"\"\"\n        page = doc[page_num]\n        \n        # Extract text content\n        text_content = page.get_text()\n        \n        # Render page as high-quality image\n        mat = fitz.Matrix(self.dpi_scale, self.dpi_scale)\n        pix = page.get_pixmap(matrix=mat)\n        \n        # Convert to PIL Image\n        img_data = pix.tobytes(\"png\")\n        img = PILImage.open(io.BytesIO(img_data))\n        \n        # Convert to base64\n        buffer = io.BytesIO()\n        img.save(buffer, format='PNG')\n        img_b64 = base64.b64encode(buffer.getvalue()).decode()\n        \n        return PageAnalysis(\n            page_number=page_num + 1,\n            image_b64=img_b64,\n            text_content=text_content.strip(),\n            dimensions=(img.width, img.height)\n        )\n    \n    def create_context_aware_prompt(self, pages: List[PageAnalysis], \n                                   current_page: int,\n                                   conversation_context: str = \"\") -> str:\n        \"\"\"\n        Create context-aware prompt for current page analysis\n        \n        Args:\n            pages: All page analyses\n            current_page: Current page number (1-based)\n            conversation_context: Previous conversation context\n            \n        Returns:\n            Context-aware prompt for LLM\n        \"\"\"\n        total_pages = len(pages)\n        current_idx = current_page - 1\n        \n        # Build context from previous pages\n        previous_context = \"\"\n        if current_page > 1:\n            # Include summaries of previous pages\n            prev_summaries = []\n            for i in range(min(3, current_idx)):  # Last 3 pages for context\n                page = pages[current_idx - 1 - i]\n                if page.analysis_result:\n                    prev_summaries.append(f\"Page {page.page_number}: {page.analysis_result[:200]}...\")\n            \n            if prev_summaries:\n                previous_context = \"\\n\\nPrevious pages context:\\n\" + \"\\n\".join(reversed(prev_summaries))\n        \n        # Build forward context (if available)\n        forward_context = \"\"\n        if current_page < total_pages:\n            next_pages_text = []\n            for i in range(current_idx + 1, min(current_idx + 3, total_pages)):  # Next 2 pages\n                if pages[i].text_content:\n                    next_pages_text.append(f\"Page {i + 1} preview: {pages[i].text_content[:100]}...\")\n            \n            if next_pages_text:\n                forward_context = \"\\n\\nUpcoming pages preview:\\n\" + \"\\n\".join(next_pages_text)\n        \n        # Conversation context\n        conv_context = \"\"\n        if conversation_context:\n            conv_context = f\"\\n\\nConversation history:\\n{conversation_context}\"\n        \n        # Main prompt\n        prompt = f\"\"\"You are analyzing page {current_page} of {total_pages} from a document.\n\nDOCUMENT CONTEXT:\n- Current page: {current_page}/{total_pages}\n- This is {'the first' if current_page == 1 else 'a middle' if current_page < total_pages else 'the final'} page\n{previous_context}\n{forward_context}\n{conv_context}\n\nPlease analyze this page considering:\n1. Content on this specific page\n2. How it relates to previous pages (if any)\n3. Document flow and continuity\n4. Key information that should be highlighted\n5. Any questions or clarifications needed\n\nProvide a comprehensive analysis that builds upon the document context.\"\"\"\n\n        return prompt\n    \n    def generate_document_summary(self, pages: List[PageAnalysis], \n                                 document_metadata: Dict[str, Any]) -> DocumentSummary:\n        \"\"\"\n        Generate comprehensive document summary\n        \n        Args:\n            pages: All analyzed pages\n            document_metadata: Document-level metadata\n            \n        Returns:\n            DocumentSummary with comprehensive analysis\n        \"\"\"\n        # Extract key information\n        total_pages = len(pages)\n        \n        # Collect page summaries\n        page_summaries = []\n        main_topics = set()\n        key_findings = []\n        \n        for page in pages:\n            if page.analysis_result:\n                # Create page summary\n                summary = f\"Page {page.page_number}: {page.analysis_result[:150]}...\"\n                page_summaries.append(summary)\n                \n                # Extract topics (simplified - could be enhanced with NLP)\n                if page.key_elements:\n                    main_topics.update(page.key_elements)\n        \n        # Determine document type based on content\n        doc_type = self._classify_document_type(pages, document_metadata)\n        \n        # Generate overall summary\n        overall_summary = self._generate_overall_summary(pages, doc_type)\n        \n        # Calculate confidence score\n        confidence = self._calculate_confidence_score(pages)\n        \n        return DocumentSummary(\n            total_pages=total_pages,\n            document_type=doc_type,\n            main_topics=list(main_topics),\n            key_findings=key_findings,\n            page_summaries=page_summaries,\n            overall_summary=overall_summary,\n            confidence_score=confidence\n        )\n    \n    def _classify_document_type(self, pages: List[PageAnalysis], \n                               metadata: Dict[str, Any]) -> str:\n        \"\"\"Classify document type based on content and metadata\"\"\"\n        # Simple classification based on content patterns\n        text_content = \" \".join([page.text_content for page in pages])\n        text_lower = text_content.lower()\n        \n        # Check for common document types\n        if any(word in text_lower for word in ['research', 'study', 'methodology', 'results', 'conclusion']):\n            return 'research_paper'\n        elif any(word in text_lower for word in ['contract', 'agreement', 'terms', 'conditions']):\n            return 'legal_document'\n        elif any(word in text_lower for word in ['financial', 'budget', 'revenue', 'expenses']):\n            return 'financial_report'\n        elif any(word in text_lower for word in ['manual', 'instructions', 'guide', 'how to']):\n            return 'instructional'\n        elif any(word in text_lower for word in ['meeting', 'agenda', 'minutes', 'discussion']):\n            return 'meeting_document'\n        elif len(pages) > 10 and any(word in text_lower for word in ['chapter', 'section']):\n            return 'book_document'\n        else:\n            return 'general_document'\n    \n    def _generate_overall_summary(self, pages: List[PageAnalysis], doc_type: str) -> str:\n        \"\"\"Generate overall document summary\"\"\"\n        total_pages = len(pages)\n        \n        # Count pages with substantial content\n        content_pages = sum(1 for page in pages if len(page.text_content) > 100)\n        \n        summary = f\"This {doc_type.replace('_', ' ')} contains {total_pages} pages \"\n        summary += f\"with {content_pages} pages of substantial content. \"\n        \n        if any(page.analysis_result for page in pages):\n            summary += \"The document has been analyzed with AI assistance, \"\n            summary += \"providing detailed insights for each page. \"\n        \n        # Add type-specific summary elements\n        if doc_type == 'research_paper':\n            summary += \"Key sections likely include methodology, results, and conclusions.\"\n        elif doc_type == 'legal_document':\n            summary += \"Contains legal terms and conditions requiring careful review.\"\n        elif doc_type == 'financial_report':\n            summary += \"Includes financial data and metrics for analysis.\"\n        elif doc_type == 'instructional':\n            summary += \"Provides step-by-step guidance and instructions.\"\n        \n        return summary\n    \n    def _calculate_confidence_score(self, pages: List[PageAnalysis]) -> float:\n        \"\"\"Calculate confidence score for document analysis\"\"\"\n        if not pages:\n            return 0.0\n        \n        # Factors contributing to confidence\n        text_coverage = sum(1 for page in pages if page.text_content) / len(pages)\n        analysis_coverage = sum(1 for page in pages if page.analysis_result) / len(pages)\n        \n        # Average text length per page (normalized)\n        avg_text_length = sum(len(page.text_content) for page in pages) / len(pages)\n        text_score = min(avg_text_length / 500, 1.0)  # Normalize to 500 chars\n        \n        # Combined confidence score\n        confidence = (text_coverage * 0.3 + analysis_coverage * 0.4 + text_score * 0.3)\n        \n        return round(confidence, 2)\n    \n    def create_combined_visualization(self, pages: List[PageAnalysis], \n                                    max_width: int = 2400) -> str:\n        \"\"\"\n        Create combined visualization of multiple pages\n        \n        Args:\n            pages: List of page analyses\n            max_width: Maximum width for combined image\n            \n        Returns:\n            Base64 encoded combined image\n        \"\"\"\n        if not pages:\n            raise ValueError(\"No pages to combine\")\n        \n        # Load all page images\n        images = []\n        for page in pages:\n            img_data = base64.b64decode(page.image_b64)\n            img = PILImage.open(io.BytesIO(img_data))\n            images.append(img)\n        \n        # Calculate combined dimensions\n        total_height = sum(img.height for img in images)\n        max_width_actual = max(img.width for img in images)\n        \n        # Scale if necessary\n        if max_width_actual > max_width:\n            scale_factor = max_width / max_width_actual\n            scaled_images = []\n            total_height = 0\n            \n            for img in images:\n                new_width = int(img.width * scale_factor)\n                new_height = int(img.height * scale_factor)\n                scaled_img = img.resize((new_width, new_height), PILImage.Resampling.LANCZOS)\n                scaled_images.append(scaled_img)\n                total_height += new_height\n            \n            images = scaled_images\n            combined_width = max_width\n        else:\n            combined_width = max_width_actual\n        \n        # Create combined image\n        combined = PILImage.new('RGB', (combined_width, total_height), 'white')\n        \n        y_offset = 0\n        for img in images:\n            combined.paste(img, (0, y_offset))\n            y_offset += img.height\n        \n        # Convert to base64\n        buffer = io.BytesIO()\n        combined.save(buffer, format='PNG')\n        combined_b64 = base64.b64encode(buffer.getvalue()).decode()\n        \n        return combined_b64",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/multi_page_processor.py",
      "tags": [
        "pdf-processing",
        "document-analysis",
        "ocr",
        "multi-page",
        "context-aware",
        "summarization",
        "image-extraction",
        "document-classification",
        "text-extraction",
        "visualization",
        "pymupdf",
        "fitz"
      ],
      "updated_at": "2025-12-07T01:12:02.485633",
      "usage_example": "from pathlib import Path\nfrom dataclasses import dataclass\nfrom typing import List, Optional\n\n@dataclass\nclass PageAnalysis:\n    page_number: int\n    image_b64: str\n    text_content: str\n    dimensions: tuple\n    analysis_result: Optional[str] = None\n    key_elements: Optional[List[str]] = None\n\n@dataclass\nclass DocumentSummary:\n    total_pages: int\n    document_type: str\n    main_topics: List[str]\n    key_findings: List[str]\n    page_summaries: List[str]\n    overall_summary: str\n    confidence_score: float\n\n# Initialize processor\nprocessor = MultiPagePDFProcessor(max_pages=50, high_quality=True)\n\n# Extract all pages from PDF\npdf_path = Path('document.pdf')\npages, metadata = processor.extract_all_pages(pdf_path)\n\nprint(f\"Processed {len(pages)} pages\")\nprint(f\"Document title: {metadata['title']}\")\n\n# Create context-aware prompt for page 1\nprompt = processor.create_context_aware_prompt(pages, current_page=1)\n\n# Generate document summary\nsummary = processor.generate_document_summary(pages, metadata)\nprint(f\"Document type: {summary.document_type}\")\nprint(f\"Confidence: {summary.confidence_score}\")\n\n# Create combined visualization\ncombined_image = processor.create_combined_visualization(pages, max_width=2400)"
    },
    {
      "best_practices": [
        "Always ensure total_pages matches the length of page_summaries list for consistency",
        "Keep confidence_score between 0.0 and 1.0 to maintain standard probability conventions",
        "Use descriptive and consistent document_type values to enable proper categorization",
        "Populate main_topics with distinct, non-overlapping topics for clarity",
        "Keep key_findings concise and actionable, focusing on the most important insights",
        "Since this is a dataclass, it's immutable by convention - avoid modifying attributes after instantiation",
        "Consider using frozen=True in the @dataclass decorator if true immutability is required",
        "Validate data before instantiation to ensure all required fields are properly populated",
        "Use this class as a return type for document analysis functions to maintain consistent API contracts"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Total number of pages in the analyzed document",
            "is_class_variable": false,
            "name": "total_pages",
            "type": "int"
          },
          {
            "description": "Classification or category of the document (e.g., 'report', 'invoice', 'contract')",
            "is_class_variable": false,
            "name": "document_type",
            "type": "str"
          },
          {
            "description": "List of primary topics or themes identified across the document",
            "is_class_variable": false,
            "name": "main_topics",
            "type": "List[str]"
          },
          {
            "description": "List of important findings, conclusions, or insights extracted from the document",
            "is_class_variable": false,
            "name": "key_findings",
            "type": "List[str]"
          },
          {
            "description": "List of summaries for each page, where index corresponds to page number",
            "is_class_variable": false,
            "name": "page_summaries",
            "type": "List[str]"
          },
          {
            "description": "Comprehensive summary synthesizing information from the entire document",
            "is_class_variable": false,
            "name": "overall_summary",
            "type": "str"
          },
          {
            "description": "Confidence level of the analysis results, typically ranging from 0.0 to 1.0",
            "is_class_variable": false,
            "name": "confidence_score",
            "type": "float"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "confidence_score": "Confidence level of the analysis (0.0 to 1.0)",
              "document_type": "Classification or category of the document",
              "key_findings": "List of important findings or conclusions",
              "main_topics": "List of primary topics identified in the document",
              "overall_summary": "Comprehensive summary of the entire document",
              "page_summaries": "List of summaries for each page",
              "total_pages": "Total number of pages in the analyzed document"
            },
            "purpose": "Automatically generated constructor that initializes all instance attributes with provided values",
            "returns": "None (constructor)",
            "signature": "__init__(total_pages: int, document_type: str, main_topics: List[str], key_findings: List[str], page_summaries: List[str], overall_summary: str, confidence_score: float)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Automatically generated method that returns a string representation of the object showing all attributes",
            "returns": "String representation in the format 'DocumentSummary(total_pages=..., document_type=..., ...)'",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Automatically generated method that compares two DocumentSummary instances for equality based on all attributes",
            "returns": "True if all attributes are equal, False otherwise",
            "signature": "__eq__(other) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:11:12",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass that encapsulates comprehensive analysis results of a document, including page-level and document-level summaries, topics, findings, and confidence metrics.",
      "docstring": "Summary of complete document analysis",
      "id": 2008,
      "imports": [
        "import fitz",
        "import base64",
        "import io",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from typing import Optional",
        "from dataclasses import dataclass",
        "from PIL import Image as PILImage",
        "import logging",
        "import sys"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 37,
      "line_start": 29,
      "name": "DocumentSummary",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "confidence_score": "Float value between 0.0 and 1.0 representing the confidence level of the analysis. Higher values indicate greater confidence in the extracted information.",
        "document_type": "String classification of the document type (e.g., 'report', 'invoice', 'contract', 'research paper'). Helps categorize the document for downstream processing.",
        "key_findings": "List of strings containing the most important findings, conclusions, or insights extracted from the document. Each string represents a significant finding.",
        "main_topics": "List of strings containing the primary topics or themes identified across the entire document. Each string represents a distinct topic.",
        "overall_summary": "String containing a comprehensive summary of the entire document, synthesizing information from all pages.",
        "page_summaries": "List of strings where each element contains the summary of a corresponding page. The list length should match total_pages.",
        "total_pages": "Integer representing the total number of pages analyzed in the document. Must be a positive integer."
      },
      "parent_class": null,
      "purpose": "DocumentSummary serves as a structured data container for storing and transferring complete document analysis results. It aggregates information from multi-page document processing, including individual page summaries, extracted topics, key findings, and an overall document summary with a confidence score. This class is typically used as the output format for document analysis pipelines that process PDFs or other multi-page documents.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a DocumentSummary object containing all the specified attributes. As a dataclass, it automatically generates __init__, __repr__, __eq__, and other methods. The object serves as an immutable-by-convention data structure for passing document analysis results.",
      "settings_required": [],
      "source_code": "class DocumentSummary:\n    \"\"\"Summary of complete document analysis\"\"\"\n    total_pages: int\n    document_type: str\n    main_topics: List[str]\n    key_findings: List[str]\n    page_summaries: List[str]\n    overall_summary: str\n    confidence_score: float",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/multi_page_processor.py",
      "tags": [
        "dataclass",
        "document-analysis",
        "data-structure",
        "summary",
        "nlp",
        "document-processing",
        "metadata",
        "pdf-analysis",
        "text-extraction"
      ],
      "updated_at": "2025-12-07T01:11:12.683277",
      "usage_example": "from dataclasses import dataclass\nfrom typing import List\n\n@dataclass\nclass DocumentSummary:\n    total_pages: int\n    document_type: str\n    main_topics: List[str]\n    key_findings: List[str]\n    page_summaries: List[str]\n    overall_summary: str\n    confidence_score: float\n\n# Create a document summary instance\nsummary = DocumentSummary(\n    total_pages=5,\n    document_type='research paper',\n    main_topics=['machine learning', 'natural language processing', 'transformers'],\n    key_findings=['Transformers outperform RNNs', 'Attention mechanism is key'],\n    page_summaries=['Page 1: Introduction to NLP', 'Page 2: Methodology', 'Page 3: Results', 'Page 4: Discussion', 'Page 5: Conclusion'],\n    overall_summary='This paper presents a comprehensive study on transformer models in NLP tasks.',\n    confidence_score=0.92\n)\n\n# Access attributes\nprint(f'Document has {summary.total_pages} pages')\nprint(f'Type: {summary.document_type}')\nprint(f'Confidence: {summary.confidence_score}')\nfor topic in summary.main_topics:\n    print(f'Topic: {topic}')"
    },
    {
      "best_practices": [
        "This is a dataclass, so it should be treated as an immutable data container. Avoid modifying attributes after instantiation unless necessary.",
        "The image_b64 field can contain large amounts of data for high-resolution pages. Consider memory implications when storing many PageAnalysis objects.",
        "Always provide the required fields (page_number, image_b64, text_content, dimensions) during instantiation. Optional fields can be set later if needed.",
        "Use meaningful values for content_type to enable consistent categorization across your application (e.g., establish a fixed set of content types).",
        "The key_elements list should contain standardized element names for consistency in downstream processing.",
        "When serializing PageAnalysis objects (e.g., to JSON), be aware that the image_b64 field may significantly increase payload size.",
        "Page numbers should typically start at 1 to match conventional PDF page numbering, though 0-indexing is also acceptable if used consistently.",
        "The dimensions tuple should represent (width, height) in pixels, matching the resolution of the image_b64 data."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The sequential number of the page within the PDF document",
            "is_class_variable": false,
            "name": "page_number",
            "type": "int"
          },
          {
            "description": "Base64-encoded string representation of the page rendered as an image",
            "is_class_variable": false,
            "name": "image_b64",
            "type": "str"
          },
          {
            "description": "The extracted text content from the PDF page",
            "is_class_variable": false,
            "name": "text_content",
            "type": "str"
          },
          {
            "description": "A tuple containing the width and height of the page image in pixels",
            "is_class_variable": false,
            "name": "dimensions",
            "type": "Tuple[int, int]"
          },
          {
            "description": "Optional string containing analysis results, summaries, or structured output from page analysis",
            "is_class_variable": false,
            "name": "analysis_result",
            "type": "Optional[str]"
          },
          {
            "description": "Optional classification of the page content type (e.g., 'table', 'text', 'image', 'mixed')",
            "is_class_variable": false,
            "name": "content_type",
            "type": "Optional[str]"
          },
          {
            "description": "Optional list of identified key elements or features on the page (e.g., headers, tables, charts)",
            "is_class_variable": false,
            "name": "key_elements",
            "type": "Optional[List[str]]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "analysis_result": "Optional analysis results or summary",
              "content_type": "Optional content type classification",
              "dimensions": "Tuple of (width, height) in pixels",
              "image_b64": "Base64-encoded image representation of the page",
              "key_elements": "Optional list of identified key elements",
              "page_number": "The page number within the PDF document",
              "text_content": "Extracted text from the page"
            },
            "purpose": "Initializes a new PageAnalysis instance with the provided page data and optional analysis results. Auto-generated by the dataclass decorator.",
            "returns": "None (constructor)",
            "signature": "__init__(page_number: int, image_b64: str, text_content: str, dimensions: Tuple[int, int], analysis_result: Optional[str] = None, content_type: Optional[str] = None, key_elements: Optional[List[str]] = None) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the PageAnalysis object showing all field values. Auto-generated by the dataclass decorator.",
            "returns": "String representation of the object in the format 'PageAnalysis(page_number=..., image_b64=..., ...)'",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two PageAnalysis objects for equality based on all field values. Auto-generated by the dataclass decorator.",
            "returns": "True if all fields are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:10:42",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass that encapsulates the analysis results for a single PDF page, including its image representation, text content, dimensions, and optional analysis metadata.",
      "docstring": "Analysis result for a single PDF page",
      "id": 2007,
      "imports": [
        "import fitz",
        "import base64",
        "import io",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from typing import Optional",
        "from dataclasses import dataclass",
        "from PIL import Image as PILImage",
        "import logging",
        "import sys"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Tuple, Optional, List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 26,
      "line_start": 18,
      "name": "PageAnalysis",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "analysis_result": "Optional string containing the results of any analysis performed on the page (e.g., summary, classification results, or structured analysis output). Defaults to None if no analysis has been performed.",
        "content_type": "Optional string indicating the type or category of content on the page (e.g., 'table', 'text', 'image', 'mixed'). Defaults to None if not classified.",
        "dimensions": "A tuple of two integers (width, height) representing the pixel dimensions of the page image.",
        "image_b64": "Base64-encoded string representation of the page rendered as an image. This allows the visual content of the page to be stored and transmitted as text.",
        "key_elements": "Optional list of strings identifying important elements or features found on the page (e.g., ['header', 'table', 'chart']). Defaults to None if not analyzed.",
        "page_number": "The sequential number of the page within the PDF document (typically 1-indexed). Used to identify and order pages within a document.",
        "text_content": "The extracted text content from the PDF page. Contains all readable text elements found on the page."
      },
      "parent_class": null,
      "purpose": "PageAnalysis serves as a structured data container for storing comprehensive information about a single PDF page after processing and analysis. It holds the page's visual representation (as base64-encoded image), extracted text content, page dimensions, and optional analysis results such as content type classification and key elements identification. This class is typically used in PDF processing pipelines where pages need to be analyzed individually and their results stored in a structured format for further processing or reporting.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a PageAnalysis object containing all the specified attributes. As a dataclass, it automatically generates __init__, __repr__, __eq__, and other special methods. The object serves as an immutable-by-convention data container for page analysis results.",
      "settings_required": [],
      "source_code": "class PageAnalysis:\n    \"\"\"Analysis result for a single PDF page\"\"\"\n    page_number: int\n    image_b64: str\n    text_content: str\n    dimensions: Tuple[int, int]\n    analysis_result: Optional[str] = None\n    content_type: Optional[str] = None\n    key_elements: Optional[List[str]] = None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/multi_page_processor.py",
      "tags": [
        "dataclass",
        "pdf-processing",
        "page-analysis",
        "data-container",
        "document-processing",
        "image-storage",
        "text-extraction",
        "metadata"
      ],
      "updated_at": "2025-12-07T01:10:42.369808",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Tuple, Optional, List\n\n@dataclass\nclass PageAnalysis:\n    page_number: int\n    image_b64: str\n    text_content: str\n    dimensions: Tuple[int, int]\n    analysis_result: Optional[str] = None\n    content_type: Optional[str] = None\n    key_elements: Optional[List[str]] = None\n\n# Create a PageAnalysis instance for a simple page\npage_analysis = PageAnalysis(\n    page_number=1,\n    image_b64=\"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==\",\n    text_content=\"This is the text content of page 1.\",\n    dimensions=(800, 1100)\n)\n\n# Create a PageAnalysis instance with optional fields\ndetailed_analysis = PageAnalysis(\n    page_number=2,\n    image_b64=\"base64_encoded_image_data_here\",\n    text_content=\"Page 2 contains a table and chart.\",\n    dimensions=(800, 1100),\n    analysis_result=\"This page contains financial data with a summary table and trend chart.\",\n    content_type=\"mixed\",\n    key_elements=[\"table\", \"chart\", \"header\"]\n)\n\n# Access attributes\nprint(f\"Page {page_analysis.page_number}: {page_analysis.dimensions}\")\nprint(f\"Content type: {detailed_analysis.content_type}\")\nprint(f\"Key elements: {detailed_analysis.key_elements}\")"
    },
    {
      "best_practices": [
        "Always call create_conversation() before adding exchanges to establish a valid conversation context",
        "The database is automatically initialized on instantiation via setup_database()",
        "Use add_exchange() after each user-AI interaction to maintain complete conversation history",
        "Call update_context_summary() periodically to maintain a high-level summary of long conversations",
        "Use get_conversation_context() to provide recent history to LLMs for context-aware responses",
        "The class uses context managers (with statements) for database connections, ensuring proper resource cleanup",
        "Exchange sequence numbers are automatically managed and incremented",
        "Conversation IDs are unique and include timestamps for easy identification",
        "The metadata parameter in add_exchange() allows storing arbitrary JSON-serializable data",
        "Database operations are atomic within each method using transactions",
        "Consider the max_exchanges parameter in get_conversation_context() to balance context richness vs token usage"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Path object pointing to the SQLite database file location",
            "is_class_variable": false,
            "name": "db_path",
            "type": "Path"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "db_path": "Path to SQLite database file, defaults to 'eink_sessions.db'"
            },
            "purpose": "Initialize the SessionManager with a database path and set up the database schema",
            "returns": "None (constructor)",
            "signature": "__init__(self, db_path: str = 'eink_sessions.db')"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_database",
            "parameters": {},
            "purpose": "Initialize SQLite database schema with conversations and exchanges tables",
            "returns": "None - creates database tables if they don't exist",
            "signature": "setup_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_conversation",
            "parameters": {
              "user_id": "Optional identifier for the user starting the conversation"
            },
            "purpose": "Create a new conversation with a unique ID and initialize it in the database",
            "returns": "Unique conversation ID string in format 'conv_YYYYMMDD_HHMMSS_<8-char-uuid>'",
            "signature": "create_conversation(self, user_id: Optional[str] = None) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "add_exchange",
            "parameters": {
              "conversation_id": "ID of the conversation to add the exchange to",
              "input_file": "Path or name of the input file",
              "input_type": "Type of input (e.g., 'pdf', 'text', 'image')",
              "metadata": "Optional dictionary of additional metadata to store",
              "processing_time": "Time taken to process the exchange in seconds",
              "response_text": "The AI's response text",
              "tokens_used": "Number of tokens consumed in the exchange"
            },
            "purpose": "Add a new exchange (user input and AI response) to an existing conversation",
            "returns": "Unique exchange ID string in format 'ex_<8-char-uuid>'",
            "signature": "add_exchange(self, conversation_id: str, input_file: str, input_type: str, response_text: str, processing_time: float, tokens_used: int, metadata: Dict[str, Any] = None) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_conversation",
            "parameters": {
              "conversation_id": "ID of the conversation to retrieve"
            },
            "purpose": "Retrieve complete conversation state including all exchanges",
            "returns": "ConversationState object containing all conversation data and exchanges, or None if not found",
            "signature": "get_conversation(self, conversation_id: str) -> Optional[ConversationState]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_context_summary",
            "parameters": {
              "conversation_id": "ID of the conversation to update",
              "summary": "New context summary text"
            },
            "purpose": "Update the high-level context summary for a conversation",
            "returns": "None - updates database in place",
            "signature": "update_context_summary(self, conversation_id: str, summary: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_session_filename",
            "parameters": {
              "conversation_id": "ID of the conversation",
              "exchange_number": "Sequence number of the exchange",
              "is_error": "Whether this is an error response (changes prefix to ERROR)",
              "original_filename": "Original filename to base the new name on"
            },
            "purpose": "Generate a session-aware filename that includes conversation and exchange context",
            "returns": "Formatted filename string like 'RESPONSE_conv_id_ex001_basename.pdf' or 'ERROR_conv_id_ex001_basename.pdf'",
            "signature": "generate_session_filename(self, conversation_id: str, exchange_number: int, original_filename: str, is_error: bool = False) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_conversation_context",
            "parameters": {
              "conversation_id": "ID of the conversation",
              "max_exchanges": "Maximum number of recent exchanges to include (default 3)"
            },
            "purpose": "Get formatted recent conversation context suitable for providing to an LLM",
            "returns": "Formatted string containing context summary and recent exchanges, or empty string if conversation not found",
            "signature": "get_conversation_context(self, conversation_id: str, max_exchanges: int = 3) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "list_active_conversations",
            "parameters": {},
            "purpose": "List all active conversations ordered by most recent activity",
            "returns": "List of dictionaries, each containing conversation_id, user_id, created_at, last_activity, and total_exchanges",
            "signature": "list_active_conversations(self) -> List[Dict[str, Any]]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:09:48",
      "decorators": [],
      "dependencies": [
        "uuid",
        "json",
        "sqlite3",
        "datetime",
        "pathlib",
        "typing",
        "dataclasses"
      ],
      "description": "SessionManager is a class that manages conversation sessions and tracking using SQLite database, storing conversations and their exchanges with metadata.",
      "docstring": "Manages conversation sessions and tracking",
      "id": 2005,
      "imports": [
        "import uuid",
        "import json",
        "import sqlite3",
        "from datetime import datetime",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from dataclasses import dataclass",
        "from dataclasses import asdict"
      ],
      "imports_required": [
        "import uuid",
        "import json",
        "import sqlite3",
        "from datetime import datetime",
        "from pathlib import Path",
        "from typing import Dict, List, Optional, Any",
        "from dataclasses import dataclass, asdict"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 255,
      "line_start": 42,
      "name": "SessionManager_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "db_path": "Path to the SQLite database file for storing session data. Defaults to 'eink_sessions.db'. The parent directory will be created if it doesn't exist. This parameter allows customization of where session data is persisted."
      },
      "parent_class": null,
      "purpose": "This class provides comprehensive session management for conversational AI applications. It handles creating and tracking conversations, storing individual exchanges (user inputs and AI responses), maintaining conversation context, and generating session-aware filenames. It uses SQLite for persistent storage and supports features like conversation history retrieval, context summarization, and active conversation listing. Ideal for applications that need to maintain stateful conversations across multiple interactions.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a SessionManager object that manages conversation sessions. Key method returns: create_conversation() returns a unique conversation ID string; add_exchange() returns an exchange ID string; get_conversation() returns a ConversationState object or None; get_conversation_context() returns a formatted string with recent conversation history; list_active_conversations() returns a list of dictionaries containing conversation metadata.",
      "settings_required": [
        "Requires ConversationState and Exchange dataclasses to be defined (referenced but not shown in source)",
        "Write permissions for the database file location",
        "SQLite3 support (typically built into Python)"
      ],
      "source_code": "class SessionManager:\n    \"\"\"Manages conversation sessions and tracking\"\"\"\n    \n    def __init__(self, db_path: str = \"eink_sessions.db\"):\n        self.db_path = Path(db_path)\n        self.db_path.parent.mkdir(exist_ok=True)\n        self.setup_database()\n        print(f\"\ud83d\udcca Session manager initialized: {self.db_path}\")\n    \n    def setup_database(self):\n        \"\"\"Initialize SQLite database for session tracking\"\"\"\n        with sqlite3.connect(self.db_path) as conn:\n            conn.execute(\"\"\"\n            CREATE TABLE IF NOT EXISTS conversations (\n                conversation_id TEXT PRIMARY KEY,\n                user_id TEXT,\n                created_at TEXT,\n                last_activity TEXT,\n                total_exchanges INTEGER DEFAULT 0,\n                status TEXT DEFAULT 'active',\n                context_summary TEXT DEFAULT '',\n                metadata TEXT DEFAULT '{}'\n            )\n            \"\"\")\n            \n            conn.execute(\"\"\"\n            CREATE TABLE IF NOT EXISTS exchanges (\n                exchange_id TEXT PRIMARY KEY,\n                conversation_id TEXT,\n                sequence_number INTEGER,\n                input_file TEXT,\n                input_type TEXT,\n                response_text TEXT,\n                processing_time REAL,\n                tokens_used INTEGER,\n                created_at TEXT,\n                metadata TEXT DEFAULT '{}',\n                FOREIGN KEY (conversation_id) REFERENCES conversations(conversation_id)\n            )\n            \"\"\")\n            \n            conn.commit()\n    \n    def create_conversation(self, user_id: Optional[str] = None) -> str:\n        \"\"\"Create a new conversation with unique ID\"\"\"\n        timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n        short_uuid = uuid.uuid4().hex[:8]\n        conversation_id = f\"conv_{timestamp}_{short_uuid}\"\n        \n        now = datetime.now().isoformat()\n        \n        with sqlite3.connect(self.db_path) as conn:\n            conn.execute(\"\"\"\n            INSERT INTO conversations \n            (conversation_id, user_id, created_at, last_activity, status)\n            VALUES (?, ?, ?, ?, 'active')\n            \"\"\", (conversation_id, user_id, now, now))\n            conn.commit()\n        \n        print(f\"\ud83c\udd94 New conversation started: {conversation_id}\")\n        return conversation_id\n    \n    def add_exchange(self, conversation_id: str, input_file: str, input_type: str, \n                    response_text: str, processing_time: float, tokens_used: int,\n                    metadata: Dict[str, Any] = None) -> str:\n        \"\"\"Add an exchange to a conversation\"\"\"\n        \n        if metadata is None:\n            metadata = {}\n        \n        exchange_id = f\"ex_{uuid.uuid4().hex[:8]}\"\n        now = datetime.now().isoformat()\n        \n        with sqlite3.connect(self.db_path) as conn:\n            # Get current exchange count\n            cursor = conn.execute(\n                \"SELECT total_exchanges FROM conversations WHERE conversation_id = ?\",\n                (conversation_id,)\n            )\n            result = cursor.fetchone()\n            if not result:\n                raise ValueError(f\"Conversation {conversation_id} not found\")\n            \n            sequence_number = result[0] + 1\n            \n            # Insert exchange\n            conn.execute(\"\"\"\n            INSERT INTO exchanges \n            (exchange_id, conversation_id, sequence_number, input_file, input_type,\n             response_text, processing_time, tokens_used, created_at, metadata)\n            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n            \"\"\", (exchange_id, conversation_id, sequence_number, input_file, input_type,\n                  response_text, processing_time, tokens_used, now, json.dumps(metadata)))\n            \n            # Update conversation\n            conn.execute(\"\"\"\n            UPDATE conversations \n            SET total_exchanges = ?, last_activity = ?\n            WHERE conversation_id = ?\n            \"\"\", (sequence_number, now, conversation_id))\n            \n            conn.commit()\n        \n        print(f\"\ud83d\udcdd Exchange added: {conversation_id} #{sequence_number}\")\n        return exchange_id\n    \n    def get_conversation(self, conversation_id: str) -> Optional[ConversationState]:\n        \"\"\"Retrieve conversation state\"\"\"\n        with sqlite3.connect(self.db_path) as conn:\n            # Get conversation info\n            cursor = conn.execute(\"\"\"\n            SELECT conversation_id, user_id, created_at, last_activity, \n                   total_exchanges, status, context_summary, metadata\n            FROM conversations WHERE conversation_id = ?\n            \"\"\", (conversation_id,))\n            \n            conv_row = cursor.fetchone()\n            if not conv_row:\n                return None\n            \n            # Get exchanges\n            cursor = conn.execute(\"\"\"\n            SELECT exchange_id, sequence_number, input_file, input_type,\n                   response_text, processing_time, tokens_used, created_at, metadata\n            FROM exchanges WHERE conversation_id = ? ORDER BY sequence_number\n            \"\"\", (conversation_id,))\n            \n            exchanges = []\n            for ex_row in cursor.fetchall():\n                exchange = Exchange(\n                    exchange_id=ex_row[0],\n                    sequence_number=ex_row[1],\n                    input_file=ex_row[2],\n                    input_type=ex_row[3],\n                    response_text=ex_row[4],\n                    processing_time=ex_row[5],\n                    tokens_used=ex_row[6],\n                    created_at=ex_row[7],\n                    metadata=json.loads(ex_row[8] or '{}')\n                )\n                exchanges.append(exchange)\n            \n            return ConversationState(\n                conversation_id=conv_row[0],\n                user_id=conv_row[1],\n                created_at=conv_row[2],\n                last_activity=conv_row[3],\n                total_exchanges=conv_row[4],\n                status=conv_row[5],\n                exchanges=exchanges,\n                context_summary=conv_row[6] or '',\n                metadata=json.loads(conv_row[7] or '{}')\n            )\n    \n    def update_context_summary(self, conversation_id: str, summary: str):\n        \"\"\"Update conversation context summary\"\"\"\n        with sqlite3.connect(self.db_path) as conn:\n            conn.execute(\"\"\"\n            UPDATE conversations SET context_summary = ? WHERE conversation_id = ?\n            \"\"\", (summary, conversation_id))\n            conn.commit()\n    \n    def generate_session_filename(self, conversation_id: str, exchange_number: int, \n                                 original_filename: str, is_error: bool = False) -> str:\n        \"\"\"Generate filename with session context\"\"\"\n        base_name = Path(original_filename).stem\n        \n        if is_error:\n            prefix = \"ERROR\"\n        else:\n            prefix = \"RESPONSE\"\n            \n        return f\"{prefix}_{conversation_id}_ex{exchange_number:03d}_{base_name}.pdf\"\n    \n    def get_conversation_context(self, conversation_id: str, max_exchanges: int = 3) -> str:\n        \"\"\"Get recent conversation context for LLM\"\"\"\n        conversation = self.get_conversation(conversation_id)\n        if not conversation or not conversation.exchanges:\n            return \"\"\n        \n        # Get last N exchanges for context\n        recent_exchanges = conversation.exchanges[-max_exchanges:]\n        \n        context_parts = []\n        if conversation.context_summary:\n            context_parts.append(f\"Context: {conversation.context_summary}\")\n        \n        context_parts.append(f\"Recent exchanges from conversation {conversation_id}:\")\n        \n        for ex in recent_exchanges:\n            context_parts.append(f\"Exchange #{ex.sequence_number}: {ex.input_type} -> {ex.response_text[:100]}...\")\n        \n        return \"\\n\".join(context_parts)\n    \n    def list_active_conversations(self) -> List[Dict[str, Any]]:\n        \"\"\"List all active conversations\"\"\"\n        with sqlite3.connect(self.db_path) as conn:\n            cursor = conn.execute(\"\"\"\n            SELECT conversation_id, user_id, created_at, last_activity, total_exchanges\n            FROM conversations WHERE status = 'active'\n            ORDER BY last_activity DESC\n            \"\"\")\n            \n            conversations = []\n            for row in cursor.fetchall():\n                conversations.append({\n                    'conversation_id': row[0],\n                    'user_id': row[1],\n                    'created_at': row[2],\n                    'last_activity': row[3],\n                    'total_exchanges': row[4]\n                })\n            \n            return conversations",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/session_manager.py",
      "tags": [
        "session-management",
        "conversation-tracking",
        "database",
        "sqlite",
        "state-management",
        "chat-history",
        "context-management",
        "persistence",
        "conversational-ai"
      ],
      "updated_at": "2025-12-07T01:09:48.364338",
      "usage_example": "# Initialize the session manager\nsession_mgr = SessionManager(db_path=\"my_sessions.db\")\n\n# Create a new conversation\nconv_id = session_mgr.create_conversation(user_id=\"user123\")\n\n# Add an exchange to the conversation\nexchange_id = session_mgr.add_exchange(\n    conversation_id=conv_id,\n    input_file=\"document.pdf\",\n    input_type=\"pdf\",\n    response_text=\"This document discusses...\",\n    processing_time=2.5,\n    tokens_used=150,\n    metadata={\"model\": \"gpt-4\"}\n)\n\n# Retrieve conversation state\nconversation = session_mgr.get_conversation(conv_id)\nif conversation:\n    print(f\"Total exchanges: {conversation.total_exchanges}\")\n\n# Get recent context for LLM\ncontext = session_mgr.get_conversation_context(conv_id, max_exchanges=3)\n\n# Generate session-aware filename\nfilename = session_mgr.generate_session_filename(\n    conversation_id=conv_id,\n    exchange_number=1,\n    original_filename=\"input.pdf\"\n)\n\n# List all active conversations\nactive_convs = session_mgr.list_active_conversations()\nfor conv in active_convs:\n    print(f\"{conv['conversation_id']}: {conv['total_exchanges']} exchanges\")"
    },
    {
      "best_practices": [
        "Always initialize conversation_id with a unique value (e.g., using uuid.uuid4())",
        "Use ISO format strings for created_at and last_activity timestamps for consistency and parseability",
        "Update last_activity timestamp whenever the conversation state changes",
        "Increment total_exchanges counter when adding new exchanges to the exchanges list",
        "Ensure status only contains valid values: 'active', 'completed', or 'archived'",
        "Use the metadata dictionary for extensible properties rather than adding new attributes",
        "Consider using asdict() from dataclasses module when serializing to JSON or database",
        "Keep context_summary updated to maintain conversation continuity across sessions",
        "Handle user_id as Optional to support both authenticated and anonymous conversations",
        "Validate that exchanges list length matches total_exchanges for data integrity"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for the conversation",
            "is_class_variable": false,
            "name": "conversation_id",
            "type": "str"
          },
          {
            "description": "Optional identifier for the user participating in the conversation",
            "is_class_variable": false,
            "name": "user_id",
            "type": "Optional[str]"
          },
          {
            "description": "ISO format timestamp string of when the conversation was created",
            "is_class_variable": false,
            "name": "created_at",
            "type": "str"
          },
          {
            "description": "ISO format timestamp string of the most recent activity",
            "is_class_variable": false,
            "name": "last_activity",
            "type": "str"
          },
          {
            "description": "Total count of message exchanges in the conversation",
            "is_class_variable": false,
            "name": "total_exchanges",
            "type": "int"
          },
          {
            "description": "Current status of the conversation: 'active', 'completed', or 'archived'",
            "is_class_variable": false,
            "name": "status",
            "type": "str"
          },
          {
            "description": "List of Exchange objects representing the conversation history",
            "is_class_variable": false,
            "name": "exchanges",
            "type": "List[Exchange]"
          },
          {
            "description": "Summary or description of the conversation context",
            "is_class_variable": false,
            "name": "context_summary",
            "type": "str"
          },
          {
            "description": "Dictionary containing arbitrary additional information about the conversation",
            "is_class_variable": false,
            "name": "metadata",
            "type": "Dict[str, Any]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "context_summary": "Summary of conversation context",
              "conversation_id": "Unique identifier for the conversation",
              "created_at": "ISO timestamp of conversation creation",
              "exchanges": "List of Exchange objects",
              "last_activity": "ISO timestamp of last activity",
              "metadata": "Additional metadata dictionary",
              "status": "Conversation status string",
              "total_exchanges": "Count of exchanges",
              "user_id": "Optional user identifier"
            },
            "purpose": "Automatically generated constructor by @dataclass decorator that initializes all instance attributes",
            "returns": "None (constructor)",
            "signature": "__init__(conversation_id: str, user_id: Optional[str], created_at: str, last_activity: str, total_exchanges: int, status: str, exchanges: List[Exchange], context_summary: str, metadata: Dict[str, Any]) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Automatically generated string representation method by @dataclass decorator",
            "returns": "String representation of the ConversationState object showing all attributes",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Automatically generated equality comparison method by @dataclass decorator",
            "returns": "True if all attributes are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:09:11",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass that represents the complete state of a conversation, including its metadata, exchanges, and lifecycle information.",
      "docstring": "Represents the state of a conversation",
      "id": 2004,
      "imports": [
        "import uuid",
        "import json",
        "import sqlite3",
        "from datetime import datetime",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from dataclasses import dataclass",
        "from dataclasses import asdict"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Dict, List, Optional, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 40,
      "line_start": 30,
      "name": "ConversationState",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "context_summary": "String containing a summary or description of the conversation context, useful for maintaining conversation continuity",
        "conversation_id": "Unique identifier for the conversation, typically a string UUID or similar unique value",
        "created_at": "ISO format timestamp string indicating when the conversation was initiated",
        "exchanges": "List of Exchange objects representing the conversation history, where each Exchange contains a user message and assistant response",
        "last_activity": "ISO format timestamp string indicating the most recent activity in the conversation",
        "metadata": "Dictionary containing arbitrary additional information about the conversation, allowing for flexible extension of conversation properties",
        "status": "String indicating the current state of the conversation. Valid values are 'active', 'completed', or 'archived'",
        "total_exchanges": "Integer count of the total number of exchanges (message pairs) in the conversation",
        "user_id": "Optional identifier for the user participating in the conversation. Can be None for anonymous conversations"
      },
      "parent_class": null,
      "purpose": "ConversationState serves as a data container for tracking and managing conversation sessions. It stores all relevant information about a conversation including its unique identifier, user association, timing information, exchange history, current status, and contextual metadata. This class is typically used in conversation management systems, chatbots, or dialogue systems to maintain state across multiple interactions.",
      "return_annotation": null,
      "return_explained": "As a dataclass, instantiation returns a ConversationState object with all specified attributes initialized. The dataclass decorator automatically generates __init__, __repr__, __eq__, and other methods. The object can be converted to a dictionary using asdict() from the dataclasses module.",
      "settings_required": [
        "Exchange class must be defined in the same module or imported, as it is used in the exchanges attribute type hint"
      ],
      "source_code": "class ConversationState:\n    \"\"\"Represents the state of a conversation\"\"\"\n    conversation_id: str\n    user_id: Optional[str]\n    created_at: str\n    last_activity: str\n    total_exchanges: int\n    status: str  # 'active', 'completed', 'archived'\n    exchanges: List[Exchange]\n    context_summary: str\n    metadata: Dict[str, Any]",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/session_manager.py",
      "tags": [
        "dataclass",
        "conversation",
        "state-management",
        "chatbot",
        "dialogue-system",
        "data-container",
        "conversation-tracking",
        "session-management"
      ],
      "updated_at": "2025-12-07T01:09:11.339262",
      "usage_example": "from dataclasses import dataclass, asdict\nfrom typing import Dict, List, Optional, Any\nfrom datetime import datetime\nimport uuid\n\n# Assuming Exchange class is defined\n@dataclass\nclass Exchange:\n    user_message: str\n    assistant_response: str\n    timestamp: str\n\n@dataclass\nclass ConversationState:\n    conversation_id: str\n    user_id: Optional[str]\n    created_at: str\n    last_activity: str\n    total_exchanges: int\n    status: str\n    exchanges: List[Exchange]\n    context_summary: str\n    metadata: Dict[str, Any]\n\n# Create a new conversation state\nconversation = ConversationState(\n    conversation_id=str(uuid.uuid4()),\n    user_id=\"user123\",\n    created_at=datetime.now().isoformat(),\n    last_activity=datetime.now().isoformat(),\n    total_exchanges=0,\n    status=\"active\",\n    exchanges=[],\n    context_summary=\"Initial conversation about Python\",\n    metadata={\"source\": \"web\", \"language\": \"en\"}\n)\n\n# Add an exchange\nexchange = Exchange(\n    user_message=\"Hello\",\n    assistant_response=\"Hi there!\",\n    timestamp=datetime.now().isoformat()\n)\nconversation.exchanges.append(exchange)\nconversation.total_exchanges += 1\nconversation.last_activity = datetime.now().isoformat()\n\n# Convert to dictionary for serialization\nconversation_dict = asdict(conversation)\n\n# Update status\nconversation.status = \"completed\""
    },
    {
      "best_practices": [
        "Always generate unique exchange_id values using uuid.uuid4() to avoid collisions",
        "Use ISO 8601 format for created_at timestamps (datetime.now().isoformat()) for consistent parsing",
        "Increment sequence_number sequentially within a conversation to maintain proper ordering",
        "Store only serializable data in the metadata dictionary (strings, numbers, lists, dicts) to enable JSON conversion",
        "Use asdict() from dataclasses module to convert Exchange instances to dictionaries for database storage or API responses",
        "Consider making Exchange frozen (frozen=True in @dataclass decorator) if immutability is desired",
        "Validate input_type against a predefined set of allowed types to maintain data consistency",
        "Store processing_time with sufficient precision (float) for accurate performance analysis",
        "Keep response_text as the primary content field; use metadata for supplementary information"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for this exchange",
            "is_class_variable": false,
            "name": "exchange_id",
            "type": "str"
          },
          {
            "description": "Order position of this exchange in the conversation",
            "is_class_variable": false,
            "name": "sequence_number",
            "type": "int"
          },
          {
            "description": "Path or filename of the input file processed",
            "is_class_variable": false,
            "name": "input_file",
            "type": "str"
          },
          {
            "description": "Type/format of the input (e.g., 'text', 'image')",
            "is_class_variable": false,
            "name": "input_type",
            "type": "str"
          },
          {
            "description": "The actual response content generated",
            "is_class_variable": false,
            "name": "response_text",
            "type": "str"
          },
          {
            "description": "Time taken to process this exchange in seconds",
            "is_class_variable": false,
            "name": "processing_time",
            "type": "float"
          },
          {
            "description": "Number of tokens consumed in this exchange",
            "is_class_variable": false,
            "name": "tokens_used",
            "type": "int"
          },
          {
            "description": "ISO 8601 timestamp of when the exchange was created",
            "is_class_variable": false,
            "name": "created_at",
            "type": "str"
          },
          {
            "description": "Dictionary of additional metadata about the exchange",
            "is_class_variable": false,
            "name": "metadata",
            "type": "Dict[str, Any]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "created_at": "ISO timestamp of creation",
              "exchange_id": "Unique identifier string for the exchange",
              "input_file": "Path to input file",
              "input_type": "Type of input content",
              "metadata": "Additional key-value metadata",
              "processing_time": "Time taken in seconds",
              "response_text": "Generated response content",
              "sequence_number": "Integer position in conversation sequence",
              "tokens_used": "Token count for the exchange"
            },
            "purpose": "Automatically generated constructor that initializes an Exchange instance with all required attributes",
            "returns": "None (initializes the instance)",
            "signature": "__init__(exchange_id: str, sequence_number: int, input_file: str, input_type: str, response_text: str, processing_time: float, tokens_used: int, created_at: str, metadata: Dict[str, Any]) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Automatically generated method that returns a string representation of the Exchange instance showing all attributes",
            "returns": "String representation in format 'Exchange(exchange_id=..., sequence_number=..., ...)'",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Automatically generated method that compares two Exchange instances for equality based on all attributes",
            "returns": "True if all attributes are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:08:41",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses"
      ],
      "description": "A dataclass representing a single exchange (input-response pair) in a conversation, storing metadata about the interaction including timing, tokens, and file information.",
      "docstring": "Represents a single exchange in a conversation",
      "id": 2003,
      "imports": [
        "import uuid",
        "import json",
        "import sqlite3",
        "from datetime import datetime",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from dataclasses import dataclass",
        "from dataclasses import asdict"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 27,
      "line_start": 17,
      "name": "Exchange",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "created_at": "ISO 8601 formatted timestamp string indicating when this exchange was created (e.g., '2024-01-15T10:30:00Z').",
        "exchange_id": "Unique identifier for this exchange, typically a UUID string. Used to reference and retrieve specific exchanges from storage.",
        "input_file": "Path or filename of the input file that was processed in this exchange. Can be a relative or absolute path string.",
        "input_type": "String indicating the type/format of the input (e.g., 'text', 'image', 'audio', 'pdf'). Used to determine how the input should be processed or displayed.",
        "metadata": "Dictionary containing additional arbitrary key-value pairs for storing extra information about the exchange (e.g., model name, temperature, user info).",
        "processing_time": "Float value representing the time taken to process this exchange in seconds. Used for performance monitoring and optimization.",
        "response_text": "The actual text response generated or received during this exchange. Contains the main output content.",
        "sequence_number": "Integer representing the order of this exchange within a conversation. Starts from 0 or 1 and increments with each new exchange.",
        "tokens_used": "Integer count of tokens consumed during this exchange. Important for tracking API usage and costs in LLM applications."
      },
      "parent_class": null,
      "purpose": "The Exchange class serves as a structured data container for capturing all relevant information about a single conversational exchange. It tracks the input source, response content, performance metrics (processing time, token usage), and maintains conversation ordering through sequence numbers. This class is typically used in conversation management systems to persist and retrieve interaction history, enabling features like conversation replay, analytics, and audit trails.",
      "return_annotation": null,
      "return_explained": "As a dataclass, instantiation returns an Exchange object with all specified attributes. The class automatically generates __init__, __repr__, __eq__, and other methods. When used with asdict() from dataclasses module, it returns a dictionary representation of the exchange suitable for JSON serialization or database storage.",
      "settings_required": [],
      "source_code": "class Exchange:\n    \"\"\"Represents a single exchange in a conversation\"\"\"\n    exchange_id: str\n    sequence_number: int\n    input_file: str\n    input_type: str\n    response_text: str\n    processing_time: float\n    tokens_used: int\n    created_at: str\n    metadata: Dict[str, Any]",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/session_manager.py",
      "tags": [
        "dataclass",
        "conversation",
        "exchange",
        "data-structure",
        "conversation-history",
        "metadata",
        "tracking",
        "serialization",
        "immutable-data"
      ],
      "updated_at": "2025-12-07T01:08:41.023107",
      "usage_example": "from dataclasses import dataclass, asdict\nfrom typing import Dict, Any\nfrom datetime import datetime\nimport uuid\n\n@dataclass\nclass Exchange:\n    exchange_id: str\n    sequence_number: int\n    input_file: str\n    input_type: str\n    response_text: str\n    processing_time: float\n    tokens_used: int\n    created_at: str\n    metadata: Dict[str, Any]\n\n# Create a new exchange\nexchange = Exchange(\n    exchange_id=str(uuid.uuid4()),\n    sequence_number=1,\n    input_file='user_query.txt',\n    input_type='text',\n    response_text='This is the AI response',\n    processing_time=1.23,\n    tokens_used=150,\n    created_at=datetime.now().isoformat(),\n    metadata={'model': 'gpt-4', 'temperature': 0.7}\n)\n\n# Access attributes\nprint(exchange.exchange_id)\nprint(exchange.response_text)\n\n# Convert to dictionary for serialization\nexchange_dict = asdict(exchange)\nprint(exchange_dict)"
    },
    {
      "best_practices": [
        "Always check if the returned SessionInfo is None before accessing its attributes",
        "The class tries multiple detection methods in order of reliability (metadata > filename > footer > content), stopping early if high confidence (>=0.9) is achieved",
        "Confidence scores range from 0.6 (content detection) to 0.95 (metadata detection), use these to determine if you should trust the detection",
        "The class is stateless - each detection is independent and the detector can be reused for multiple PDFs",
        "Ensure PyPDF2 or pypdf is installed before using this class, otherwise all detections will return None",
        "The conversation ID format expected is 'conv_YYYYMMDD_HHMMSS_XXXXXXXX' where XXXXXXXX is an 8-character hex string",
        "Exchange numbers are expected to be integers, typically formatted as 'ex001', 'ex002', etc. in filenames",
        "For production use, configure logging appropriately to capture detection failures and debug information",
        "The detector reads PDF files from disk, ensure proper file permissions and handle file not found errors",
        "Detection methods fail gracefully - if one method throws an exception, others are still attempted"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Logger instance for the class, used to log detection progress, warnings, and debug information",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "Compiled regex pattern for matching conversation IDs in format 'conv_YYYYMMDD_HHMMSS_XXXXXXXX'",
            "is_class_variable": false,
            "name": "conv_id_pattern",
            "type": "re.Pattern"
          },
          {
            "description": "Compiled regex pattern for matching exchange numbers in various formats (ex001, exchange 1, ex#1, etc.)",
            "is_class_variable": false,
            "name": "exchange_pattern",
            "type": "re.Pattern"
          },
          {
            "description": "Compiled regex pattern for matching session footer format 'Session: conv_id | Exchange #num'",
            "is_class_variable": false,
            "name": "session_footer_pattern",
            "type": "re.Pattern"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the SessionDetector with logger and compile regex patterns for session detection",
            "returns": "None - initializes the instance",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "detect_session_from_pdf",
            "parameters": {
              "pdf_path": "Path to the PDF file as a string. Can be relative or absolute path."
            },
            "purpose": "Main public method to detect session information from a PDF file using multiple detection methods in order of reliability",
            "returns": "SessionInfo object containing conversation_id, exchange_number, confidence (0.0-1.0), and source if detected; None if no session information found or PDF reading unavailable",
            "signature": "detect_session_from_pdf(self, pdf_path: str) -> Optional[SessionInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_detect_from_metadata",
            "parameters": {
              "pdf_path": "Path object pointing to the PDF file"
            },
            "purpose": "Private method to detect session info from PDF metadata fields (subject, title, creator). Highest confidence method (0.95).",
            "returns": "SessionInfo with confidence 0.95 and source 'metadata' if found, None otherwise",
            "signature": "_detect_from_metadata(self, pdf_path: Path) -> Optional[SessionInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_detect_from_filename",
            "parameters": {
              "pdf_path": "Path object pointing to the PDF file"
            },
            "purpose": "Private method to detect session info from the PDF filename. Looks for patterns like 'conv_YYYYMMDD_HHMMSS_XXXXXXXX_exNNN'. Confidence 0.7-0.9.",
            "returns": "SessionInfo with confidence 0.7-0.9 and source 'filename' if found, None otherwise",
            "signature": "_detect_from_filename(self, pdf_path: Path) -> Optional[SessionInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_detect_from_footer",
            "parameters": {
              "pdf_path": "Path object pointing to the PDF file"
            },
            "purpose": "Private method to detect session info from PDF footer content. Checks last 3 pages for footer pattern 'Session: conv_id | Exchange #num'. Confidence 0.85.",
            "returns": "SessionInfo with confidence 0.85 and source 'footer' if found, None otherwise",
            "signature": "_detect_from_footer(self, pdf_path: Path) -> Optional[SessionInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_detect_from_content",
            "parameters": {
              "pdf_path": "Path object pointing to the PDF file"
            },
            "purpose": "Private method to detect session info from PDF content (first page). Last resort method with lowest confidence (0.6).",
            "returns": "SessionInfo with confidence 0.6 and source 'content' if found, None otherwise",
            "signature": "_detect_from_content(self, pdf_path: Path) -> Optional[SessionInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_next_exchange_number",
            "parameters": {
              "conversation_id": "The conversation ID string (currently not used in calculation but provided for future extensibility)",
              "detected_exchange": "The exchange number detected from the PDF"
            },
            "purpose": "Calculate the next exchange number for conversation continuation based on the detected exchange number",
            "returns": "Integer representing the next exchange number (detected_exchange + 1)",
            "signature": "get_next_exchange_number(self, conversation_id: str, detected_exchange: int) -> int"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required for PDF reading functionality. If not available, the class will log warnings and return None from detect_session_from_pdf",
          "import": "from PyPDF2 import PdfReader",
          "optional": true
        },
        {
          "condition": "Alternative to PyPDF2 for PDF reading. Either PyPDF2 or pypdf must be installed for the class to function",
          "import": "from pypdf import PdfReader",
          "optional": true
        }
      ],
      "created_at": "2025-12-07 00:07:49",
      "decorators": [],
      "dependencies": [
        "re",
        "logging",
        "pathlib",
        "typing",
        "dataclasses",
        "PyPDF2",
        "pypdf"
      ],
      "description": "Detects session information (conversation ID and exchange number) from PDF files using multiple detection methods including metadata, filename, footer, and content analysis.",
      "docstring": "Detect session information from PDF files for automatic conversation continuation",
      "id": 2001,
      "imports": [
        "import re",
        "import logging",
        "from pathlib import Path",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import Dict",
        "from typing import Any",
        "from dataclasses import dataclass",
        "from PyPDF2 import PdfReader",
        "import sys",
        "import pypdf",
        "from pypdf import PdfReader"
      ],
      "imports_required": [
        "import re",
        "import logging",
        "from pathlib import Path",
        "from typing import Optional"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 243,
      "line_start": 33,
      "name": "SessionDetector",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "No constructor parameters": "The __init__ method takes no parameters. It initializes a logger and compiles regex patterns for session detection internally."
      },
      "parent_class": null,
      "purpose": "This class provides automatic session detection capabilities for PDF files to enable conversation continuation. It attempts to extract conversation IDs and exchange numbers from PDFs using a prioritized set of detection methods, returning the most reliable match with confidence scores. The class is designed to work with PDFs that contain session information embedded in various locations (metadata, filenames, footers, or content) and supports the conversation ID format 'conv_YYYYMMDD_HHMMSS_XXXXXXXX' along with exchange numbers.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a SessionDetector object. The main method detect_session_from_pdf returns Optional[SessionInfo] - either a SessionInfo object containing conversation_id, exchange_number, confidence score (0.0-1.0), and source of detection, or None if no session information could be detected. The get_next_exchange_number method returns an integer representing the next exchange number to use.",
      "settings_required": [
        "PDF_READER_AVAILABLE flag must be set (typically checked at module level to determine if PyPDF2 or pypdf is installed)",
        "SessionInfo dataclass must be defined with fields: conversation_id (str), exchange_number (int), confidence (float), source (str)",
        "Logging must be configured if you want to see detection progress and debug information"
      ],
      "source_code": "class SessionDetector:\n    \"\"\"Detect session information from PDF files for automatic conversation continuation\"\"\"\n    \n    def __init__(self):\n        self.logger = logging.getLogger(__name__)\n        \n        # Regex patterns for session detection\n        self.conv_id_pattern = re.compile(r'conv_(\\d{8}_\\d{6}_[a-f0-9]{8})')\n        self.exchange_pattern = re.compile(r'ex(?:change)?[#\\s]*(\\d+)', re.IGNORECASE)\n        self.session_footer_pattern = re.compile(\n            r'Session:\\s*(conv_\\d{8}_\\d{6}_[a-f0-9]{8})\\s*\\|\\s*Exchange\\s*#?(\\d+)', \n            re.IGNORECASE\n        )\n        \n    def detect_session_from_pdf(self, pdf_path: str) -> Optional[SessionInfo]:\n        \"\"\"\n        Detect session information from a PDF file using multiple methods\n        \n        Args:\n            pdf_path: Path to the PDF file\n            \n        Returns:\n            SessionInfo if detected, None otherwise\n        \"\"\"\n        if not PDF_READER_AVAILABLE:\n            self.logger.warning(\"PDF reading not available - install PyPDF2 or pypdf\")\n            return None\n            \n        pdf_path = Path(pdf_path)\n        if not pdf_path.exists():\n            self.logger.error(f\"PDF file not found: {pdf_path}\")\n            return None\n            \n        # Try multiple detection methods in order of reliability\n        methods = [\n            self._detect_from_metadata,\n            self._detect_from_filename,\n            self._detect_from_footer,\n            self._detect_from_content\n        ]\n        \n        best_match = None\n        \n        for method in methods:\n            try:\n                result = method(pdf_path)\n                if result and (not best_match or result.confidence > best_match.confidence):\n                    best_match = result\n                    # If we have high confidence, use it immediately\n                    if result.confidence >= 0.9:\n                        break\n            except Exception as e:\n                self.logger.debug(f\"Detection method {method.__name__} failed: {e}\")\n                continue\n        \n        if best_match:\n            self.logger.info(f\"Detected session: {best_match.conversation_id} \"\n                           f\"exchange #{best_match.exchange_number} \"\n                           f\"(confidence: {best_match.confidence:.2f}, source: {best_match.source})\")\n        else:\n            self.logger.debug(f\"No session information detected in {pdf_path.name}\")\n            \n        return best_match\n    \n    def _detect_from_metadata(self, pdf_path: Path) -> Optional[SessionInfo]:\n        \"\"\"Detect session info from PDF metadata\"\"\"\n        try:\n            with open(pdf_path, 'rb') as file:\n                pdf_reader = PdfReader(file)\n                metadata = pdf_reader.metadata\n                \n                if not metadata:\n                    return None\n                \n                # Check various metadata fields\n                fields_to_check = [\n                    getattr(metadata, 'subject', ''),\n                    getattr(metadata, 'title', ''),\n                    getattr(metadata, 'creator', ''),\n                    str(metadata.get('/Subject', '')),\n                    str(metadata.get('/Title', '')),\n                    str(metadata.get('/Creator', ''))\n                ]\n                \n                for field in fields_to_check:\n                    if field:\n                        conv_match = self.conv_id_pattern.search(str(field))\n                        if conv_match:\n                            conv_id = f\"conv_{conv_match.group(1)}\"\n                            \n                            # Look for exchange number in same field\n                            ex_match = self.exchange_pattern.search(str(field))\n                            if ex_match:\n                                return SessionInfo(\n                                    conversation_id=conv_id,\n                                    exchange_number=int(ex_match.group(1)),\n                                    confidence=0.95,\n                                    source='metadata'\n                                )\n                \n        except Exception as e:\n            self.logger.debug(f\"Metadata detection failed: {e}\")\n        \n        return None\n    \n    def _detect_from_filename(self, pdf_path: Path) -> Optional[SessionInfo]:\n        \"\"\"Detect session info from filename\"\"\"\n        filename = pdf_path.name\n        \n        # Look for session-aware filename pattern\n        # RESPONSE_conv_20250731_224420_6a63a783_ex001_filename.pdf\n        # ERROR_conv_20250731_224420_6a63a783_ex002_filename.pdf\n        \n        conv_match = self.conv_id_pattern.search(filename)\n        if conv_match:\n            conv_id = f\"conv_{conv_match.group(1)}\"\n            \n            # Look for exchange number after conversation ID\n            ex_match = re.search(rf'{re.escape(conv_id)}_ex(\\d+)', filename)\n            if ex_match:\n                return SessionInfo(\n                    conversation_id=conv_id,\n                    exchange_number=int(ex_match.group(1)),\n                    confidence=0.9,\n                    source='filename'\n                )\n                \n            # Fallback: any exchange number in filename\n            ex_match = self.exchange_pattern.search(filename)\n            if ex_match:\n                return SessionInfo(\n                    conversation_id=conv_id,\n                    exchange_number=int(ex_match.group(1)),\n                    confidence=0.7,\n                    source='filename'\n                )\n        \n        return None\n    \n    def _detect_from_footer(self, pdf_path: Path) -> Optional[SessionInfo]:\n        \"\"\"Detect session info from PDF footer content\"\"\"\n        try:\n            with open(pdf_path, 'rb') as file:\n                pdf_reader = PdfReader(file)\n                \n                # Check last few pages for footer information\n                pages_to_check = min(3, len(pdf_reader.pages))\n                \n                for i in range(pages_to_check):\n                    page = pdf_reader.pages[-(i+1)]  # Start from last page\n                    text = page.extract_text()\n                    \n                    # Look for footer pattern: \"Session: conv_id | Exchange #num\"\n                    footer_match = self.session_footer_pattern.search(text)\n                    if footer_match:\n                        return SessionInfo(\n                            conversation_id=footer_match.group(1),\n                            exchange_number=int(footer_match.group(2)),\n                            confidence=0.85,\n                            source='footer'\n                        )\n                        \n        except Exception as e:\n            self.logger.debug(f\"Footer detection failed: {e}\")\n        \n        return None\n    \n    def _detect_from_content(self, pdf_path: Path) -> Optional[SessionInfo]:\n        \"\"\"Detect session info from PDF content (last resort)\"\"\"\n        try:\n            with open(pdf_path, 'rb') as file:\n                pdf_reader = PdfReader(file)\n                \n                # Extract text from first page (most likely to contain session info)\n                if len(pdf_reader.pages) > 0:\n                    text = pdf_reader.pages[0].extract_text()\n                    \n                    # Look for conversation ID anywhere in content\n                    conv_match = self.conv_id_pattern.search(text)\n                    if conv_match:\n                        conv_id = f\"conv_{conv_match.group(1)}\"\n                        \n                        # Look for exchange number in same content\n                        ex_match = self.exchange_pattern.search(text)\n                        if ex_match:\n                            return SessionInfo(\n                                conversation_id=conv_id,\n                                exchange_number=int(ex_match.group(1)),\n                                confidence=0.6,\n                                source='content'\n                            )\n                        \n        except Exception as e:\n            self.logger.debug(f\"Content detection failed: {e}\")\n        \n        return None\n    \n    def get_next_exchange_number(self, conversation_id: str, detected_exchange: int) -> int:\n        \"\"\"\n        Calculate the next exchange number for continuation\n        \n        Args:\n            conversation_id: The conversation ID\n            detected_exchange: The exchange number from the PDF\n            \n        Returns:\n            Next exchange number to use\n        \"\"\"\n        # For response PDFs, the next exchange is detected + 1\n        # For error PDFs, we might want to retry the same exchange or increment\n        return detected_exchange + 1",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/session_detector.py",
      "tags": [
        "pdf-processing",
        "session-detection",
        "conversation-tracking",
        "metadata-extraction",
        "pattern-matching",
        "regex",
        "file-analysis",
        "confidence-scoring"
      ],
      "updated_at": "2025-12-07T01:07:49.158884",
      "usage_example": "# Instantiate the detector\ndetector = SessionDetector()\n\n# Detect session from a PDF file\npdf_path = 'RESPONSE_conv_20250731_224420_6a63a783_ex001_report.pdf'\nsession_info = detector.detect_session_from_pdf(pdf_path)\n\nif session_info:\n    print(f\"Conversation ID: {session_info.conversation_id}\")\n    print(f\"Exchange Number: {session_info.exchange_number}\")\n    print(f\"Confidence: {session_info.confidence}\")\n    print(f\"Source: {session_info.source}\")\n    \n    # Get next exchange number for continuation\n    next_exchange = detector.get_next_exchange_number(\n        session_info.conversation_id,\n        session_info.exchange_number\n    )\n    print(f\"Next exchange: {next_exchange}\")\nelse:\n    print(\"No session information detected\")"
    },
    {
      "best_practices": [
        "Always ensure confidence values are between 0.0 and 1.0 when creating instances",
        "Use one of the four valid source values: 'metadata', 'footer', 'filename', or 'content'",
        "Consider adding validation logic if extending this class to enforce confidence range and source value constraints",
        "This is a data container class - it should not contain business logic, only data storage",
        "The dataclass decorator automatically generates __init__, __repr__, and __eq__ methods, so no need to define them manually",
        "If immutability is desired, add frozen=True to the @dataclass decorator",
        "Exchange numbers should typically start at 1 or 0 and increment sequentially",
        "When comparing SessionInfo objects, all four attributes must match for equality"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for the conversation session extracted from the PDF",
            "is_class_variable": false,
            "name": "conversation_id",
            "type": "str"
          },
          {
            "description": "Sequential number of the exchange within the conversation, used for ordering",
            "is_class_variable": false,
            "name": "exchange_number",
            "type": "int"
          },
          {
            "description": "Confidence level of the extraction ranging from 0.0 (no confidence) to 1.0 (complete confidence)",
            "is_class_variable": false,
            "name": "confidence",
            "type": "float"
          },
          {
            "description": "Source of the extraction, valid values are 'metadata', 'footer', 'filename', or 'content'",
            "is_class_variable": false,
            "name": "source",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "confidence": "Float between 0.0 and 1.0 indicating extraction confidence",
              "conversation_id": "String identifier for the conversation",
              "exchange_number": "Integer representing the exchange sequence number",
              "source": "String indicating extraction source ('metadata', 'footer', 'filename', or 'content')"
            },
            "purpose": "Initialize a SessionInfo instance with conversation metadata. Automatically generated by @dataclass decorator.",
            "returns": "None - initializes the instance",
            "signature": "__init__(conversation_id: str, exchange_number: int, confidence: float, source: str) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a string representation of the SessionInfo instance. Automatically generated by @dataclass decorator.",
            "returns": "String representation in format: SessionInfo(conversation_id='...', exchange_number=..., confidence=..., source='...')",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compare two SessionInfo instances for equality. Automatically generated by @dataclass decorator.",
            "returns": "True if all attributes match, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:07:13",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses"
      ],
      "description": "A dataclass that stores session information extracted from PDF documents, including conversation ID, exchange number, confidence level, and source of extraction.",
      "docstring": "Session information extracted from PDF",
      "id": 2000,
      "imports": [
        "import re",
        "import logging",
        "from pathlib import Path",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import Dict",
        "from typing import Any",
        "from dataclasses import dataclass",
        "from PyPDF2 import PdfReader",
        "import sys",
        "import pypdf",
        "from pypdf import PdfReader"
      ],
      "imports_required": [
        "from dataclasses import dataclass"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 31,
      "line_start": 26,
      "name": "SessionInfo",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "confidence": "A float value between 0.0 and 1.0 indicating the confidence level of the extraction. Higher values indicate more reliable extraction. 1.0 represents complete confidence, 0.0 represents no confidence.",
        "conversation_id": "A string identifier for the conversation session. This uniquely identifies a conversation thread or session within the PDF document.",
        "exchange_number": "An integer representing the sequential number of the exchange within the conversation. Used to order multiple exchanges in a single conversation.",
        "source": "A string indicating where the session information was extracted from. Valid values are 'metadata' (PDF metadata fields), 'footer' (page footer text), 'filename' (PDF filename parsing), or 'content' (document body content)."
      },
      "parent_class": null,
      "purpose": "SessionInfo serves as a structured data container for metadata about conversation sessions extracted from PDF files. It tracks the conversation identifier, the exchange number within that conversation, the confidence level of the extraction (0.0 to 1.0), and the source method used to extract this information (metadata, footer, filename, or content). This class is typically used in PDF parsing workflows to maintain structured session data with quality metrics.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a SessionInfo object with all four attributes set. As a dataclass, it automatically provides __init__, __repr__, __eq__, and other standard methods. The object is immutable by default unless frozen=False is specified in the dataclass decorator.",
      "settings_required": [],
      "source_code": "class SessionInfo:\n    \"\"\"Session information extracted from PDF\"\"\"\n    conversation_id: str\n    exchange_number: int\n    confidence: float  # 0.0 to 1.0\n    source: str  # 'metadata', 'footer', 'filename', 'content'",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/session_detector.py",
      "tags": [
        "dataclass",
        "pdf-parsing",
        "session-management",
        "metadata",
        "data-container",
        "conversation-tracking",
        "extraction",
        "confidence-scoring"
      ],
      "updated_at": "2025-12-07T01:07:13.461580",
      "usage_example": "from dataclasses import dataclass\n\n@dataclass\nclass SessionInfo:\n    conversation_id: str\n    exchange_number: int\n    confidence: float\n    source: str\n\n# Create a SessionInfo instance\nsession = SessionInfo(\n    conversation_id=\"conv_12345\",\n    exchange_number=3,\n    confidence=0.95,\n    source=\"metadata\"\n)\n\n# Access attributes\nprint(session.conversation_id)  # Output: conv_12345\nprint(session.exchange_number)  # Output: 3\nprint(session.confidence)  # Output: 0.95\nprint(session.source)  # Output: metadata\n\n# Dataclass provides automatic __repr__\nprint(session)  # Output: SessionInfo(conversation_id='conv_12345', exchange_number=3, confidence=0.95, source='metadata')\n\n# Dataclass provides automatic equality comparison\nsession2 = SessionInfo(\"conv_12345\", 3, 0.95, \"metadata\")\nprint(session == session2)  # Output: True"
    },
    {
      "best_practices": [
        "Always use the async detect_annotations_in_pdf method as the main entry point rather than calling internal methods directly",
        "Ensure the AnnotationInfo and AnnotationResult dataclasses are properly defined before instantiating the detector",
        "The detector processes only the first 5 pages of PDFs by default to manage memory and performance - modify _pdf_to_images if you need more pages",
        "Color ranges are defined in HSV color space and can be customized by modifying the color_ranges dictionary after instantiation",
        "Minimum area thresholds in min_areas can be adjusted based on your document resolution and annotation size requirements",
        "The detector combines both visual (image-based) and native PDF annotation detection for comprehensive coverage",
        "Handle None return values from detect_annotations_in_pdf to gracefully manage detection failures",
        "The confidence threshold of 0.3 for visual annotations can be adjusted in _detect_annotations_in_image for stricter or looser detection",
        "Native PDF annotations receive a confidence of 0.9 as they are explicitly defined in the PDF structure",
        "The detector renders PDF pages at 2x zoom (DPI) for better detail - adjust the Matrix parameters in _pdf_to_images if needed for performance",
        "Geometric pattern detection (strikethrough/underline) uses morphological operations and may need tuning for different document types"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Dictionary mapping color names to HSV color range definitions with 'lower' and 'upper' numpy arrays and 'type' classification. Includes ranges for red_markup, red_markup_2, yellow_highlight, green_highlight, and blue_markup",
            "is_class_variable": false,
            "name": "color_ranges",
            "type": "Dict[str, Dict[str, Any]]"
          },
          {
            "description": "Dictionary mapping annotation types to minimum pixel area thresholds for detection. Includes thresholds for highlight (100), markup (20), strikethrough (50), underline (30), and insertion (10)",
            "is_class_variable": false,
            "name": "min_areas",
            "type": "Dict[str, int]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initializes the AnnotationDetector with predefined color ranges for different annotation types and minimum area thresholds",
            "returns": "None - initializes instance attributes color_ranges and min_areas",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "detect_annotations_in_pdf",
            "parameters": {
              "pdf_path": "String path to the PDF file to analyze"
            },
            "purpose": "Main entry point that detects all annotations in a PDF document by combining image-based and native PDF annotation detection",
            "returns": "AnnotationResult object containing all detected annotations, counts, and summary, or None if detection fails",
            "signature": "async detect_annotations_in_pdf(self, pdf_path: str) -> Optional[AnnotationResult]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_pdf_to_images",
            "parameters": {
              "pdf_path": "String path to the PDF file to convert"
            },
            "purpose": "Converts PDF pages to OpenCV image arrays for visual processing, limited to first 5 pages at 2x resolution",
            "returns": "List of numpy arrays representing each page as an OpenCV image, empty list on failure",
            "signature": "_pdf_to_images(self, pdf_path: str) -> List[np.ndarray]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_detect_annotations_in_image",
            "parameters": {
              "image": "OpenCV image array (numpy ndarray) to analyze",
              "page_number": "Integer page number for annotation metadata"
            },
            "purpose": "Detects annotations in a single image using color-based detection in HSV color space and geometric pattern detection",
            "returns": "List of AnnotationInfo objects for all annotations detected in the image",
            "signature": "_detect_annotations_in_image(self, image: np.ndarray, page_number: int) -> List[AnnotationInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_detect_geometric_patterns",
            "parameters": {
              "image": "OpenCV image array to analyze for geometric patterns",
              "page_number": "Integer page number for annotation metadata"
            },
            "purpose": "Detects strikethrough and underline patterns using morphological operations on horizontal lines",
            "returns": "List of AnnotationInfo objects for detected strikethrough and underline annotations",
            "signature": "_detect_geometric_patterns(self, image: np.ndarray, page_number: int) -> List[AnnotationInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_detect_native_pdf_annotations",
            "parameters": {
              "pdf_path": "String path to the PDF file to extract annotations from"
            },
            "purpose": "Extracts native PDF annotations (comments, highlights, etc.) directly from the PDF structure using PyMuPDF",
            "returns": "List of AnnotationInfo objects for all native PDF annotations found",
            "signature": "_detect_native_pdf_annotations(self, pdf_path: str) -> List[AnnotationInfo]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_calculate_annotation_confidence",
            "parameters": {
              "annotation_type": "String type of annotation ('highlight', 'markup', etc.) to apply type-specific heuristics",
              "contour": "OpenCV contour array representing the annotation boundary"
            },
            "purpose": "Calculates a confidence score (0.0-1.0) for an annotation based on its shape, size, and type expectations",
            "returns": "Float confidence score between 0.0 and 1.0",
            "signature": "_calculate_annotation_confidence(self, contour: np.ndarray, annotation_type: str) -> float"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_detection_summary",
            "parameters": {
              "annotations": "List of all detected AnnotationInfo objects",
              "pages_processed": "Integer number of pages that were processed"
            },
            "purpose": "Generates a human-readable summary string of the detection results including counts by type and average confidence",
            "returns": "String summary of detection results",
            "signature": "_generate_detection_summary(self, annotations: List[AnnotationInfo], pages_processed: int) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:06:48",
      "decorators": [],
      "dependencies": [
        "cv2",
        "numpy",
        "typing",
        "dataclasses",
        "pathlib",
        "PyPDF2",
        "fitz",
        "logging"
      ],
      "description": "A class that detects various types of annotations in PDF documents including red pen markups, highlights, strikethrough lines, underlines, and insertion marks using computer vision and native PDF annotation extraction.",
      "docstring": "Detects various types of annotations in PDF documents:\n- Red pen markups and corrections\n- Yellow/green highlights\n- Strikethrough lines\n- Underlines\n- Insertion marks",
      "id": 1999,
      "imports": [
        "import cv2",
        "import numpy as np",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import PyPDF2",
        "import fitz",
        "import logging"
      ],
      "imports_required": [
        "import cv2",
        "import numpy as np",
        "from typing import List, Dict, Any, Optional, Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import PyPDF2",
        "import fitz",
        "import logging"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 348,
      "line_start": 36,
      "name": "AnnotationDetector",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "No constructor parameters": "The __init__ method takes no parameters. All configuration is done through predefined instance attributes that are initialized with default values for color ranges and minimum area thresholds."
      },
      "parent_class": null,
      "purpose": "The AnnotationDetector class provides comprehensive annotation detection capabilities for PDF documents. It combines computer vision techniques (color-based detection using OpenCV) with native PDF annotation extraction (using PyMuPDF/fitz) to identify and classify different types of annotations. The class processes PDF pages as images to detect visual annotations like colored highlights and markups, while also extracting native PDF annotations. It's designed for document analysis workflows where identifying user annotations, edits, and markups is important, such as grading systems, document review processes, or collaborative editing analysis.",
      "return_annotation": null,
      "return_explained": "The class itself returns an AnnotationDetector instance when instantiated. The main method detect_annotations_in_pdf returns an Optional[AnnotationResult] containing all detected annotations, total count, pages processed, and a summary string. Returns None if detection fails. Individual helper methods return List[AnnotationInfo] for annotations found in specific contexts, or List[np.ndarray] for image conversions.",
      "settings_required": [
        "Requires AnnotationInfo dataclass to be defined with fields: annotation_type, confidence, area, color, bounds, page_number, and optional text_content",
        "Requires AnnotationResult dataclass to be defined with fields: annotations, total_annotations, pages_processed, detection_summary",
        "Requires a logger instance named 'logger' to be configured for logging output",
        "OpenCV (cv2) must be installed with image processing capabilities",
        "PyMuPDF (fitz) must be installed for PDF processing"
      ],
      "source_code": "class AnnotationDetector:\n    \"\"\"\n    Detects various types of annotations in PDF documents:\n    - Red pen markups and corrections\n    - Yellow/green highlights\n    - Strikethrough lines\n    - Underlines\n    - Insertion marks\n    \"\"\"\n    \n    def __init__(self):\n        # Color ranges for different annotation types (HSV format)\n        self.color_ranges = {\n            'red_markup': {\n                'lower': np.array([0, 120, 120]),\n                'upper': np.array([10, 255, 255]),\n                'type': 'markup'\n            },\n            'red_markup_2': {\n                'lower': np.array([170, 120, 120]),\n                'upper': np.array([180, 255, 255]),\n                'type': 'markup'\n            },\n            'yellow_highlight': {\n                'lower': np.array([20, 100, 100]),\n                'upper': np.array([30, 255, 255]),\n                'type': 'highlight'\n            },\n            'green_highlight': {\n                'lower': np.array([40, 100, 100]),\n                'upper': np.array([80, 255, 255]),\n                'type': 'highlight'\n            },\n            'blue_markup': {\n                'lower': np.array([100, 120, 120]),\n                'upper': np.array([130, 255, 255]),\n                'type': 'markup'\n            }\n        }\n        \n        # Minimum sizes for different annotation types\n        self.min_areas = {\n            'highlight': 100,    # Highlights should be reasonably large\n            'markup': 20,        # Markup can be small pen strokes\n            'strikethrough': 50, # Strikethrough lines\n            'underline': 30,     # Underlines\n            'insertion': 10      # Small insertion marks\n        }\n    \n    async def detect_annotations_in_pdf(self, pdf_path: str) -> Optional[AnnotationResult]:\n        \"\"\"\n        Detect annotations in a PDF document\n        \n        Args:\n            pdf_path: Path to the PDF file\n            \n        Returns:\n            AnnotationResult with detected annotations or None if failed\n        \"\"\"\n        try:\n            logger.info(f\"Starting annotation detection for {pdf_path}\")\n            \n            # Convert PDF pages to images for processing\n            images = self._pdf_to_images(pdf_path)\n            if not images:\n                logger.warning(\"Failed to convert PDF to images\")\n                return None\n            \n            all_annotations = []\n            pages_processed = 0\n            \n            # Process each page\n            for page_num, image in enumerate(images, 1):\n                page_annotations = self._detect_annotations_in_image(image, page_num)\n                all_annotations.extend(page_annotations)\n                pages_processed += 1\n                \n                if page_annotations:\n                    logger.info(f\"Page {page_num}: Found {len(page_annotations)} annotations\")\n            \n            # Also check for native PDF annotations\n            native_annotations = self._detect_native_pdf_annotations(pdf_path)\n            all_annotations.extend(native_annotations)\n            \n            # Generate summary\n            total_annotations = len(all_annotations)\n            summary = self._generate_detection_summary(all_annotations, pages_processed)\n            \n            logger.info(f\"Annotation detection complete: {total_annotations} annotations found\")\n            \n            return AnnotationResult(\n                annotations=all_annotations,\n                total_annotations=total_annotations,\n                pages_processed=pages_processed,\n                detection_summary=summary\n            )\n            \n        except Exception as e:\n            logger.error(f\"Error in annotation detection: {e}\")\n            return None\n    \n    def _pdf_to_images(self, pdf_path: str) -> List[np.ndarray]:\n        \"\"\"Convert PDF pages to OpenCV images\"\"\"\n        images = []\n        try:\n            # Use PyMuPDF for better image quality\n            doc = fitz.open(pdf_path)\n            \n            for page_num in range(min(doc.page_count, 5)):  # Limit to first 5 pages\n                page = doc.load_page(page_num)\n                \n                # Render at high DPI for better annotation detection\n                mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better detail\n                pix = page.get_pixmap(matrix=mat, alpha=False)\n                \n                # Convert to OpenCV format\n                img_data = pix.tobytes(\"ppm\")\n                nparr = np.frombuffer(img_data, np.uint8)\n                img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)\n                \n                if img is not None:\n                    images.append(img)\n            \n            doc.close()\n            return images\n            \n        except Exception as e:\n            logger.error(f\"Error converting PDF to images: {e}\")\n            return []\n    \n    def _detect_annotations_in_image(self, image: np.ndarray, page_number: int) -> List[AnnotationInfo]:\n        \"\"\"Detect annotations in a single image using color-based detection\"\"\"\n        annotations = []\n        \n        # Convert to HSV for better color detection\n        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)\n        \n        # Detect each color range\n        for color_name, color_info in self.color_ranges.items():\n            mask = cv2.inRange(hsv, color_info['lower'], color_info['upper'])\n            \n            # Find contours\n            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n            \n            for contour in contours:\n                area = cv2.contourArea(contour)\n                annotation_type = color_info['type']\n                \n                # Check minimum area threshold\n                if area < self.min_areas.get(annotation_type, 20):\n                    continue\n                \n                # Get bounding rectangle\n                x, y, w, h = cv2.boundingRect(contour)\n                \n                # Calculate confidence based on area and shape\n                confidence = self._calculate_annotation_confidence(contour, annotation_type)\n                \n                if confidence > 0.3:  # Minimum confidence threshold\n                    # Get average color in the region\n                    roi = image[y:y+h, x:x+w]\n                    avg_color = np.mean(roi.reshape(-1, 3), axis=0)\n                    \n                    annotations.append(AnnotationInfo(\n                        annotation_type=annotation_type,\n                        confidence=confidence,\n                        area=int(area),\n                        color=tuple(map(int, avg_color[::-1])),  # BGR to RGB\n                        bounds=(x, y, w, h),\n                        page_number=page_number\n                    ))\n        \n        # Detect geometric patterns (strikethrough, underline)\n        geometric_annotations = self._detect_geometric_patterns(image, page_number)\n        annotations.extend(geometric_annotations)\n        \n        return annotations\n    \n    def _detect_geometric_patterns(self, image: np.ndarray, page_number: int) -> List[AnnotationInfo]:\n        \"\"\"Detect strikethrough and underline patterns\"\"\"\n        annotations = []\n        \n        # Convert to grayscale\n        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n        \n        # Detect horizontal lines (potential strikethrough/underline)\n        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))\n        detected_lines = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)\n        \n        # Find contours of lines\n        contours, _ = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n        \n        for contour in contours:\n            area = cv2.contourArea(contour)\n            if area < 30:  # Too small to be a meaningful line\n                continue\n            \n            x, y, w, h = cv2.boundingRect(contour)\n            \n            # Classify as strikethrough or underline based on aspect ratio\n            aspect_ratio = w / h if h > 0 else 0\n            \n            if aspect_ratio > 5:  # Long, thin line\n                # Determine if it's strikethrough or underline based on context\n                # This is a simplified heuristic\n                annotation_type = 'strikethrough' if aspect_ratio > 10 else 'underline'\n                \n                annotations.append(AnnotationInfo(\n                    annotation_type=annotation_type,\n                    confidence=0.7,\n                    area=int(area),\n                    color=(0, 0, 0),  # Default to black\n                    bounds=(x, y, w, h),\n                    page_number=page_number\n                ))\n        \n        return annotations\n    \n    def _detect_native_pdf_annotations(self, pdf_path: str) -> List[AnnotationInfo]:\n        \"\"\"Detect native PDF annotations (comments, highlights, etc.)\"\"\"\n        annotations = []\n        \n        try:\n            doc = fitz.open(pdf_path)\n            \n            for page_num in range(doc.page_count):\n                page = doc.load_page(page_num)\n                \n                # Get annotations from the page\n                annot_list = page.annots()\n                \n                for annot in annot_list:\n                    annot_dict = annot.info\n                    annot_type = annot_dict.get('type_name', 'unknown')\n                    \n                    # Map PDF annotation types to our types\n                    type_mapping = {\n                        'Highlight': 'highlight',\n                        'StrikeOut': 'strikethrough',\n                        'Underline': 'underline',\n                        'FreeText': 'markup',\n                        'Text': 'markup',\n                        'Ink': 'markup'\n                    }\n                    \n                    mapped_type = type_mapping.get(annot_type, 'markup')\n                    \n                    # Get annotation bounds\n                    rect = annot.rect\n                    bounds = (int(rect.x0), int(rect.y0), int(rect.width), int(rect.height))\n                    \n                    annotations.append(AnnotationInfo(\n                        annotation_type=mapped_type,\n                        confidence=0.9,  # High confidence for native annotations\n                        area=int(rect.width * rect.height),\n                        color=(255, 255, 0),  # Default yellow\n                        bounds=bounds,\n                        page_number=page_num + 1,\n                        text_content=annot_dict.get('content', '')\n                    ))\n            \n            doc.close()\n            \n        except Exception as e:\n            logger.error(f\"Error detecting native PDF annotations: {e}\")\n        \n        return annotations\n    \n    def _calculate_annotation_confidence(self, contour: np.ndarray, annotation_type: str) -> float:\n        \"\"\"Calculate confidence score for an annotation based on shape and size\"\"\"\n        area = cv2.contourArea(contour)\n        \n        # Get contour properties\n        x, y, w, h = cv2.boundingRect(contour)\n        aspect_ratio = w / h if h > 0 else 0\n        \n        # Base confidence on area\n        area_confidence = min(area / 1000, 1.0)  # Normalize by expected area\n        \n        # Adjust based on annotation type expectations\n        shape_confidence = 0.5\n        if annotation_type == 'highlight':\n            # Highlights should be roughly rectangular\n            shape_confidence = 0.8 if 2 <= aspect_ratio <= 10 else 0.4\n        elif annotation_type == 'markup':\n            # Markup can be any shape\n            shape_confidence = 0.7\n        \n        # Final confidence\n        return min((area_confidence + shape_confidence) / 2, 1.0)\n    \n    def _generate_detection_summary(self, annotations: List[AnnotationInfo], pages_processed: int) -> str:\n        \"\"\"Generate a summary of the detection process\"\"\"\n        if not annotations:\n            return f\"No annotations detected across {pages_processed} pages\"\n        \n        # Count by type\n        type_counts = {}\n        for ann in annotations:\n            type_counts[ann.annotation_type] = type_counts.get(ann.annotation_type, 0) + 1\n        \n        # Generate summary\n        summary_parts = [f\"Detected {len(annotations)} annotations across {pages_processed} pages\"]\n        \n        if type_counts:\n            type_summary = \", \".join([f\"{count} {type}\" for type, count in type_counts.items()])\n            summary_parts.append(f\"Types: {type_summary}\")\n        \n        # Add confidence summary\n        avg_confidence = sum(ann.confidence for ann in annotations) / len(annotations)\n        summary_parts.append(f\"Average confidence: {avg_confidence:.2f}\")\n        \n        return \". \".join(summary_parts)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/annotation_detector.py",
      "tags": [
        "pdf-processing",
        "annotation-detection",
        "computer-vision",
        "opencv",
        "document-analysis",
        "image-processing",
        "color-detection",
        "async",
        "pdf-annotations",
        "markup-detection",
        "highlight-detection"
      ],
      "updated_at": "2025-12-07T01:06:48.418751",
      "usage_example": "# Define required dataclasses\nfrom dataclasses import dataclass\nfrom typing import List, Tuple, Optional\n\n@dataclass\nclass AnnotationInfo:\n    annotation_type: str\n    confidence: float\n    area: int\n    color: Tuple[int, int, int]\n    bounds: Tuple[int, int, int, int]\n    page_number: int\n    text_content: str = ''\n\n@dataclass\nclass AnnotationResult:\n    annotations: List[AnnotationInfo]\n    total_annotations: int\n    pages_processed: int\n    detection_summary: str\n\n# Set up logging\nimport logging\nlogger = logging.getLogger(__name__)\nlogger.setLevel(logging.INFO)\n\n# Instantiate the detector\ndetector = AnnotationDetector()\n\n# Detect annotations in a PDF\nimport asyncio\n\nasync def analyze_pdf():\n    result = await detector.detect_annotations_in_pdf('document.pdf')\n    \n    if result:\n        print(f\"Found {result.total_annotations} annotations\")\n        print(f\"Summary: {result.detection_summary}\")\n        \n        for ann in result.annotations:\n            print(f\"Page {ann.page_number}: {ann.annotation_type} (confidence: {ann.confidence:.2f})\")\n    else:\n        print(\"Detection failed\")\n\n# Run the async function\nasyncio.run(analyze_pdf())"
    },
    {
      "best_practices": [
        "This is a dataclass, so it automatically generates __init__, __repr__, __eq__, and other methods. No need to manually implement these.",
        "The class is immutable by default unless frozen=True is explicitly set in the @dataclass decorator. Consider adding frozen=True if immutability is desired.",
        "Always ensure that the total_annotations count matches the length of the annotations list for consistency.",
        "The detection_summary should provide meaningful information about the detection process, including any warnings or errors encountered.",
        "When creating instances, ensure all four fields are provided as they are required (no default values are specified).",
        "This class is designed as a return type for annotation detection functions, not for direct manipulation of annotation data.",
        "The annotations list should contain only AnnotationInfo objects to maintain type consistency.",
        "Consider validating that pages_processed is a positive integer and total_annotations is non-negative when creating instances."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "List containing all AnnotationInfo objects detected during the annotation detection process",
            "is_class_variable": false,
            "name": "annotations",
            "type": "List[AnnotationInfo]"
          },
          {
            "description": "Integer count of the total number of annotations found across all processed pages",
            "is_class_variable": false,
            "name": "total_annotations",
            "type": "int"
          },
          {
            "description": "Integer indicating the number of pages that were analyzed during the detection operation",
            "is_class_variable": false,
            "name": "pages_processed",
            "type": "int"
          },
          {
            "description": "Human-readable string summary describing the results of the annotation detection process",
            "is_class_variable": false,
            "name": "detection_summary",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "annotations": "List of AnnotationInfo objects representing detected annotations",
              "detection_summary": "Human-readable summary of the detection process",
              "pages_processed": "Number of pages that were processed",
              "total_annotations": "Total count of annotations detected"
            },
            "purpose": "Initializes an AnnotationResult instance with detection results. Auto-generated by @dataclass decorator.",
            "returns": "None (constructor)",
            "signature": "__init__(annotations: List[AnnotationInfo], total_annotations: int, pages_processed: int, detection_summary: str) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the AnnotationResult instance. Auto-generated by @dataclass decorator.",
            "returns": "String representation showing all field values",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two AnnotationResult instances for equality based on all fields. Auto-generated by @dataclass decorator.",
            "returns": "True if all fields are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:06:07",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "typing",
        "dataclasses"
      ],
      "description": "A dataclass that encapsulates the results of an annotation detection process on PDF documents, containing detected annotations, processing statistics, and a summary.",
      "docstring": "Result of annotation detection process",
      "id": 1998,
      "imports": [
        "import cv2",
        "import numpy as np",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import PyPDF2",
        "import fitz",
        "import logging"
      ],
      "imports_required": [
        "from typing import List",
        "from dataclasses import dataclass"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 34,
      "line_start": 29,
      "name": "AnnotationResult",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "annotations": "A list of AnnotationInfo objects representing all annotations detected during the processing. Each AnnotationInfo contains details about a single annotation found in the document.",
        "detection_summary": "A string providing a human-readable summary of the detection results. Typically includes information about what was found, any issues encountered, or overall statistics in text form.",
        "pages_processed": "An integer indicating how many pages were analyzed during the annotation detection process. Useful for understanding the scope of the detection operation.",
        "total_annotations": "An integer count of the total number of annotations detected across all processed pages. This provides a quick summary metric without needing to count the annotations list."
      },
      "parent_class": null,
      "purpose": "This dataclass serves as a structured container for the output of annotation detection operations. It aggregates all detected annotations (as AnnotationInfo objects), tracks the number of annotations found and pages processed, and provides a human-readable summary of the detection results. It's designed to be returned by annotation detection functions to provide comprehensive information about the detection process outcome.",
      "return_annotation": null,
      "return_explained": "When instantiated, returns an AnnotationResult object containing all the annotation detection results. This object is immutable by default (as a dataclass) and provides attribute access to all four fields: annotations (list), total_annotations (int), pages_processed (int), and detection_summary (str).",
      "settings_required": [
        "Requires the AnnotationInfo class to be defined in the same module or imported, as it's used as the type for the annotations list"
      ],
      "source_code": "class AnnotationResult:\n    \"\"\"Result of annotation detection process\"\"\"\n    annotations: List[AnnotationInfo]\n    total_annotations: int\n    pages_processed: int\n    detection_summary: str",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/annotation_detector.py",
      "tags": [
        "dataclass",
        "result-container",
        "annotation-detection",
        "pdf-processing",
        "data-structure",
        "immutable",
        "return-type"
      ],
      "updated_at": "2025-12-07T01:06:07.404611",
      "usage_example": "from dataclasses import dataclass\nfrom typing import List\n\n# Assuming AnnotationInfo is defined elsewhere\n# Create an AnnotationResult instance\nresult = AnnotationResult(\n    annotations=[annotation1, annotation2, annotation3],\n    total_annotations=3,\n    pages_processed=10,\n    detection_summary=\"Successfully detected 3 annotations across 10 pages\"\n)\n\n# Access the results\nprint(f\"Found {result.total_annotations} annotations\")\nprint(f\"Processed {result.pages_processed} pages\")\nprint(result.detection_summary)\n\n# Iterate through annotations\nfor annotation in result.annotations:\n    # Process each annotation\n    pass"
    },
    {
      "best_practices": [
        "This is a data container class with no methods - use it to store and pass annotation information between functions",
        "Ensure confidence values are always between 0 and 1 when creating instances",
        "RGB color values should be integers between 0 and 255",
        "Page numbers should be consistent with your PDF processing library's indexing (0-based or 1-based)",
        "The bounds tuple follows (x, y, width, height) format - ensure consistency when creating instances",
        "text_content is optional and defaults to None - only populate it when text extraction is performed",
        "Consider validating input values in a factory function or wrapper if strict constraints are needed",
        "This dataclass is immutable by default - create new instances rather than modifying existing ones",
        "Use type hints when working with collections of AnnotationInfo objects (e.g., List[AnnotationInfo])",
        "The area field should match the calculated area from bounds (width * height) for consistency"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Type of annotation detected: 'highlight', 'strikethrough', 'markup', 'underline', or 'insertion'",
            "is_class_variable": false,
            "name": "annotation_type",
            "type": "str"
          },
          {
            "description": "Confidence score of the detection, ranging from 0 to 1",
            "is_class_variable": false,
            "name": "confidence",
            "type": "float"
          },
          {
            "description": "Area of the annotation in pixels",
            "is_class_variable": false,
            "name": "area",
            "type": "int"
          },
          {
            "description": "RGB color values of the annotation as a tuple (R, G, B)",
            "is_class_variable": false,
            "name": "color",
            "type": "Tuple[int, int, int]"
          },
          {
            "description": "Bounding box coordinates and dimensions as (x, y, width, height)",
            "is_class_variable": false,
            "name": "bounds",
            "type": "Tuple[int, int, int, int]"
          },
          {
            "description": "Page number where the annotation was found",
            "is_class_variable": false,
            "name": "page_number",
            "type": "int"
          },
          {
            "description": "Associated text content extracted from the annotation region, or None if not available",
            "is_class_variable": false,
            "name": "text_content",
            "type": "Optional[str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "annotation_type": "Type of annotation ('highlight', 'strikethrough', 'markup', 'underline', 'insertion')",
              "area": "Annotation area in pixels",
              "bounds": "Bounding box as (x, y, width, height)",
              "color": "RGB color tuple (R, G, B)",
              "confidence": "Detection confidence score (0-1)",
              "page_number": "Page number where annotation appears",
              "text_content": "Optional associated text content"
            },
            "purpose": "Initialize an AnnotationInfo instance with all required annotation metadata. Auto-generated by @dataclass decorator.",
            "returns": "None - initializes the instance",
            "signature": "__init__(annotation_type: str, confidence: float, area: int, color: Tuple[int, int, int], bounds: Tuple[int, int, int, int], page_number: int, text_content: Optional[str] = None) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a string representation of the AnnotationInfo instance. Auto-generated by @dataclass decorator.",
            "returns": "String representation showing all field values",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compare two AnnotationInfo instances for equality based on all fields. Auto-generated by @dataclass decorator.",
            "returns": "True if all fields are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:05:41",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass that stores comprehensive information about a detected annotation in a PDF document, including its type, visual properties, location, and associated text content.",
      "docstring": "Information about a detected annotation",
      "id": 1997,
      "imports": [
        "import cv2",
        "import numpy as np",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import PyPDF2",
        "import fitz",
        "import logging"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Tuple, Optional"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 26,
      "line_start": 18,
      "name": "AnnotationInfo",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "annotation_type": "String identifier for the type of annotation detected. Expected values are: 'highlight', 'strikethrough', 'markup', 'underline', or 'insertion'. This categorizes the visual annotation style.",
        "area": "Integer representing the area of the annotation in pixels. Calculated from the bounding box dimensions, useful for filtering or prioritizing annotations by size.",
        "bounds": "Tuple of four integers (x, y, width, height) defining the bounding box of the annotation. x and y are the top-left corner coordinates, width and height define the rectangle dimensions in pixels.",
        "color": "Tuple of three integers (R, G, B) representing the RGB color values of the annotation. Each value ranges from 0 to 255. Used to identify and categorize annotations by their visual appearance.",
        "confidence": "Float value between 0 and 1 representing the confidence score of the annotation detection. Higher values indicate greater certainty that the detected region is indeed an annotation of the specified type.",
        "page_number": "Integer indicating which page of the PDF document contains this annotation. Page numbering typically starts at 0 or 1 depending on the implementation context.",
        "text_content": "Optional string containing any text associated with or extracted from the annotation region. May be None if no text is available or if text extraction was not performed."
      },
      "parent_class": null,
      "purpose": "This dataclass serves as a structured container for metadata about annotations detected in PDF documents. It captures visual characteristics (color, area, bounds), classification information (annotation type, confidence score), location data (page number, bounding box), and optional text content. It is typically used as a return type or data transfer object in PDF annotation detection and analysis workflows.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an AnnotationInfo object containing all the specified annotation metadata. As a dataclass, it automatically generates __init__, __repr__, __eq__, and other methods. The object is immutable by default unless frozen=False is specified in the dataclass decorator.",
      "settings_required": [],
      "source_code": "class AnnotationInfo:\n    \"\"\"Information about a detected annotation\"\"\"\n    annotation_type: str  # 'highlight', 'strikethrough', 'markup', 'underline', 'insertion'\n    confidence: float     # Confidence score 0-1\n    area: int            # Area in pixels\n    color: Tuple[int, int, int]  # RGB color\n    bounds: Tuple[int, int, int, int]  # x, y, width, height\n    page_number: int     # Page where annotation was found\n    text_content: Optional[str] = None  # Associated text if available",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/annotation_detector.py",
      "tags": [
        "dataclass",
        "annotation",
        "pdf",
        "metadata",
        "document-analysis",
        "data-structure",
        "detection",
        "bounding-box",
        "text-extraction"
      ],
      "updated_at": "2025-12-07T01:05:41.406511",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Tuple, Optional\n\n@dataclass\nclass AnnotationInfo:\n    annotation_type: str\n    confidence: float\n    area: int\n    color: Tuple[int, int, int]\n    bounds: Tuple[int, int, int, int]\n    page_number: int\n    text_content: Optional[str] = None\n\n# Create an annotation info object for a yellow highlight\nannotation = AnnotationInfo(\n    annotation_type='highlight',\n    confidence=0.95,\n    area=15000,\n    color=(255, 255, 0),\n    bounds=(100, 200, 300, 50),\n    page_number=1,\n    text_content='Important passage to remember'\n)\n\n# Access attributes\nprint(f\"Type: {annotation.annotation_type}\")\nprint(f\"Confidence: {annotation.confidence}\")\nprint(f\"Location: Page {annotation.page_number}, bounds {annotation.bounds}\")\nprint(f\"Text: {annotation.text_content}\")\n\n# Create annotation without text content\nstrikethrough = AnnotationInfo(\n    annotation_type='strikethrough',\n    confidence=0.87,\n    area=8000,\n    color=(255, 0, 0),\n    bounds=(150, 300, 200, 20),\n    page_number=2\n)"
    },
    {
      "best_practices": [
        "Always provide an authenticated RemarkableRestClient instance before creating the watcher",
        "Ensure the callback function is async and handles exceptions internally to prevent the watch loop from crashing",
        "The watcher maintains state in processed_files set - creating a new instance will reset tracking and reprocess all files",
        "Files are downloaded to temporary directories that are automatically cleaned up after callback execution",
        "Use appropriate poll_interval values to balance responsiveness with API rate limits and resource usage",
        "The watcher runs indefinitely until KeyboardInterrupt or fatal error - plan for graceful shutdown in production",
        "Initial call to get_new_files() marks all existing files as processed to avoid reprocessing on startup",
        "Callback receives both document metadata (dict) and local file path (Path) for flexible processing",
        "The watcher continues running even if individual file processing fails, logging errors but not stopping",
        "Consider implementing retry logic in your callback for transient failures"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The REST API client instance used to communicate with the reMarkable tablet",
            "is_class_variable": false,
            "name": "rest_client",
            "type": "RemarkableRestClient"
          },
          {
            "description": "The name of the folder being monitored on the reMarkable tablet",
            "is_class_variable": false,
            "name": "watch_folder_name",
            "type": "str"
          },
          {
            "description": "Number of seconds to wait between polling checks for new files",
            "is_class_variable": false,
            "name": "poll_interval",
            "type": "int"
          },
          {
            "description": "Logger instance for recording errors and warnings during operation",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "Set of document IDs that have already been processed, used to track state and avoid reprocessing files",
            "is_class_variable": false,
            "name": "processed_files",
            "type": "Set[str]"
          },
          {
            "description": "Timestamp of the last check for new files, initialized to current time at instantiation",
            "is_class_variable": false,
            "name": "last_check_time",
            "type": "datetime"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "poll_interval": "Seconds between polling checks (default: 60)",
              "rest_client": "Authenticated RemarkableRestClient instance for API communication",
              "watch_folder_name": "Name of the reMarkable folder to monitor"
            },
            "purpose": "Initialize the file watcher with REST client, target folder name, and polling interval",
            "returns": "None - constructor initializes the instance",
            "signature": "__init__(self, rest_client: RemarkableRestClient, watch_folder_name: str, poll_interval: int = 60)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_new_files",
            "parameters": {},
            "purpose": "Check for and return new files in the watched folder that haven't been processed yet",
            "returns": "List of document metadata dictionaries for newly detected files. Each dict contains 'ID', 'VissibleName', and other document properties. Returns empty list if no new files or on error.",
            "signature": "async get_new_files(self) -> List[Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "start_watching",
            "parameters": {
              "callback": "Async function that takes two arguments: doc (Dict) containing document metadata and local_file (Path) pointing to the downloaded file. This function is called for each new file detected."
            },
            "purpose": "Start the continuous monitoring loop that checks for new files and executes the callback for each new file detected",
            "returns": "None - runs indefinitely until KeyboardInterrupt or fatal error. Does not return normally.",
            "signature": "async start_watching(self, callback)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:05:07",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "logging",
        "tempfile",
        "datetime",
        "pathlib",
        "typing"
      ],
      "description": "A file watcher class that monitors a specific folder on a reMarkable tablet using the REST API, polling for new files at regular intervals and triggering callbacks when new files are detected.",
      "docstring": "File watcher using the REST API client",
      "id": 1996,
      "imports": [
        "import asyncio",
        "import json",
        "import logging",
        "import tempfile",
        "import time",
        "import uuid",
        "import zipfile",
        "from datetime import datetime",
        "from datetime import timedelta",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Set",
        "from typing import Tuple",
        "import requests",
        "import base64",
        "import base64",
        "import base64"
      ],
      "imports_required": [
        "import asyncio",
        "import logging",
        "import tempfile",
        "from datetime import datetime",
        "from pathlib import Path",
        "from typing import Dict, List, Set"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 872,
      "line_start": 778,
      "name": "RemarkableRestFileWatcher",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "poll_interval": "The number of seconds to wait between checks for new files. Defaults to 60 seconds. Lower values provide faster detection but increase API calls and resource usage.",
        "rest_client": "An instance of RemarkableRestClient that provides the REST API interface for communicating with the reMarkable tablet. This client must be properly authenticated and configured before being passed to the watcher.",
        "watch_folder_name": "The name of the folder on the reMarkable tablet to monitor for new files. This is a human-readable folder name (e.g., 'Inbox'), not a folder ID."
      },
      "parent_class": null,
      "purpose": "This class provides automated monitoring of a reMarkable tablet folder through REST API polling. It tracks which files have been processed, detects new files since the last check, downloads them to temporary storage, and executes user-defined callbacks for processing. The watcher runs continuously in an async loop, making it suitable for building automated workflows that respond to new documents added to a reMarkable tablet folder.",
      "return_annotation": null,
      "return_explained": "The constructor returns an instance of RemarkableRestFileWatcher. The get_new_files() method returns a List[Dict] containing document metadata dictionaries for newly detected files, with each dict containing keys like 'ID' and 'VissibleName'. The start_watching() method does not return a value but runs indefinitely until interrupted.",
      "settings_required": [
        "A properly configured and authenticated RemarkableRestClient instance must be created before instantiating this watcher",
        "The watch folder must exist on the reMarkable tablet before starting the watcher",
        "Sufficient disk space for temporary file storage during processing"
      ],
      "source_code": "class RemarkableRestFileWatcher:\n    \"\"\"File watcher using the REST API client\"\"\"\n    \n    def __init__(self, rest_client: RemarkableRestClient, \n                 watch_folder_name: str, poll_interval: int = 60):\n        self.rest_client = rest_client\n        self.watch_folder_name = watch_folder_name\n        self.poll_interval = poll_interval\n        self.logger = logging.getLogger(__name__)\n        \n        self.processed_files: Set[str] = set()\n        self.last_check_time = datetime.now()\n    \n    async def get_new_files(self) -> List[Dict]:\n        \"\"\"Get list of new files since last check\"\"\"\n        try:\n            # Find the watch folder\n            folder_id = self.rest_client.find_folder_by_name(self.watch_folder_name)\n            if folder_id is None:\n                self.logger.warning(f\"Watch folder '{self.watch_folder_name}' not found\")\n                return []\n            \n            # Get documents in the folder\n            all_files = self.rest_client.get_documents_in_folder(folder_id)\n            \n            # Filter for new files\n            new_files = []\n            for doc in all_files:\n                doc_id = doc.get(\"ID\")\n                if doc_id and doc_id not in self.processed_files:\n                    new_files.append(doc)\n                    self.processed_files.add(doc_id)\n            \n            return new_files\n            \n        except Exception as e:\n            self.logger.error(f\"Error checking for new files: {e}\")\n            return []\n    \n    async def start_watching(self, callback):\n        \"\"\"\n        Start watching for new files\n        \n        Args:\n            callback: Async function to call with new files\n        \"\"\"\n        print(f\"\ud83d\udc41\ufe0f  Started watching reMarkable folder: {self.watch_folder_name}\")\n        print(f\"\ud83d\udd04 Checking every {self.poll_interval} seconds...\")\n        \n        # Get initial file list to mark as processed\n        initial_files = await self.get_new_files()\n        print(f\"\ud83d\udcc1 Tracking {len(self.processed_files)} existing files\")\n        \n        try:\n            while True:\n                try:\n                    new_files = await self.get_new_files()\n                    \n                    if new_files:\n                        print(f\"\ud83d\udce5 Found {len(new_files)} new file(s)\")\n                        \n                        with tempfile.TemporaryDirectory() as temp_dir:\n                            temp_path = Path(temp_dir)\n                            \n                            for doc in new_files:\n                                doc_id = doc.get(\"ID\")\n                                doc_name = doc.get(\"VissibleName\", \"Unknown\")\n                                \n                                print(f\"\ud83d\udcc4 Processing: {doc_name}\")\n                                \n                                # Download file\n                                local_file = self.rest_client.download_document(\n                                    doc_id, doc_name, temp_path\n                                )\n                                \n                                if local_file:\n                                    try:\n                                        await callback(doc, local_file)\n                                    except Exception as e:\n                                        self.logger.error(f\"Error in callback for {doc_name}: {e}\")\n                                        print(f\"\u274c Error processing {doc_name}: {e}\")\n                    \n                    # Wait before next check\n                    await asyncio.sleep(self.poll_interval)\n                    \n                except Exception as e:\n                    self.logger.error(f\"Error in watch loop: {e}\")\n                    print(f\"\u274c Watch error: {e}\")\n                    await asyncio.sleep(self.poll_interval)\n        \n        except KeyboardInterrupt:\n            print(f\"\\n\ud83d\uded1 Stopping reMarkable file watcher...\")\n        except Exception as e:\n            self.logger.error(f\"Fatal error in file watcher: {e}\")\n            print(f\"\u274c Fatal watcher error: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/remarkable_rest_client.py",
      "tags": [
        "file-watcher",
        "polling",
        "async",
        "remarkable-tablet",
        "rest-api",
        "monitoring",
        "document-processing",
        "callback-pattern",
        "file-system"
      ],
      "updated_at": "2025-12-07T01:05:07.610671",
      "usage_example": "import asyncio\nfrom pathlib import Path\nfrom typing import Dict\n\n# Assume rest_client is already created and authenticated\nrest_client = RemarkableRestClient(device_token='your_token')\n\n# Create the watcher\nwatcher = RemarkableRestFileWatcher(\n    rest_client=rest_client,\n    watch_folder_name='Inbox',\n    poll_interval=30\n)\n\n# Define callback for processing new files\nasync def process_new_file(doc: Dict, local_file: Path):\n    doc_name = doc.get('VissibleName', 'Unknown')\n    print(f'Processing {doc_name} at {local_file}')\n    # Your processing logic here\n    with open(local_file, 'rb') as f:\n        content = f.read()\n        # Do something with the file content\n\n# Start watching (runs indefinitely)\nasync def main():\n    await watcher.start_watching(process_new_file)\n\nif __name__ == '__main__':\n    asyncio.run(main())"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "config_dir": "Type: Optional[str]"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, config_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_device_token",
            "parameters": {},
            "purpose": "Load device token from file if it exists",
            "returns": "None",
            "signature": "_load_device_token(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_device_token",
            "parameters": {
              "token": "Type: str"
            },
            "purpose": "Save device token to file",
            "returns": "None",
            "signature": "_save_device_token(self, token)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "register_device",
            "parameters": {
              "one_time_code": "Type: str"
            },
            "purpose": "Register a new device with reMarkable Cloud\n\nArgs:\n    one_time_code: 8-character code from reMarkable account\n    \nReturns:\n    True if registration successful",
            "returns": "Returns bool",
            "signature": "register_device(self, one_time_code) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_user_token",
            "parameters": {},
            "purpose": "Get a fresh user token using the device token\n\nReturns:\n    True if user token obtained successfully",
            "returns": "Returns bool",
            "signature": "get_user_token(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "discover_storage_host",
            "parameters": {},
            "purpose": "Discover the document storage service host\n\nReturns:\n    True if host discovered successfully",
            "returns": "Returns bool",
            "signature": "discover_storage_host(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "list_documents",
            "parameters": {},
            "purpose": "List all documents and folders in reMarkable Cloud using Chrome extension API\n\nReturns:\n    List of document/folder metadata dictionaries",
            "returns": "Returns List[Dict]",
            "signature": "list_documents(self) -> List[Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_download_url",
            "parameters": {
              "doc_id": "Type: str"
            },
            "purpose": "Get download URL for a specific document\n\nArgs:\n    doc_id: Document UUID\n    \nReturns:\n    Download URL or None if failed",
            "returns": "Returns Optional[str]",
            "signature": "get_document_download_url(self, doc_id) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "download_document",
            "parameters": {
              "doc_id": "Type: str",
              "doc_name": "Type: str",
              "output_dir": "Type: Path"
            },
            "purpose": "Download a document from reMarkable Cloud\n\nArgs:\n    doc_id: Document UUID\n    doc_name: Document name for filename\n    output_dir: Directory to save the file\n    \nReturns:\n    Path to downloaded file or None if failed",
            "returns": "Returns Optional[Path]",
            "signature": "download_document(self, doc_id, doc_name, output_dir) -> Optional[Path]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_pdf_zip_package",
            "parameters": {
              "doc_name": "Type: str",
              "pdf_path": "Type: Path"
            },
            "purpose": "Create a ZIP package for PDF upload in reMarkable format\n\nArgs:\n    pdf_path: Path to PDF file\n    doc_name: Document name\n    \nReturns:\n    ZIP package as bytes",
            "returns": "Returns bytes",
            "signature": "create_pdf_zip_package(self, pdf_path, doc_name) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_document",
            "parameters": {
              "document_name": "Type: Optional[str]",
              "file_path": "Type: Path",
              "folder_id": "Type: str"
            },
            "purpose": "Upload a PDF document to reMarkable Cloud\n\nArgs:\n    file_path: Path to PDF file\n    folder_id: Target folder ID (empty string for root)\n    document_name: Name for the document\n    \nReturns:\n    True if upload successful",
            "returns": "Returns bool",
            "signature": "upload_document(self, file_path, folder_id, document_name) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "find_folder_by_name",
            "parameters": {
              "folder_name": "Type: str"
            },
            "purpose": "Find a folder by name and return its ID\n\nArgs:\n    folder_name: Name of the folder to find\n    \nReturns:\n    Folder ID or None if not found",
            "returns": "Returns Optional[str]",
            "signature": "find_folder_by_name(self, folder_name) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_documents_in_folder",
            "parameters": {
              "folder_id": "Type: str"
            },
            "purpose": "Get all documents in a specific folder\n\nArgs:\n    folder_id: Folder ID (empty string for root)\n    \nReturns:\n    List of document metadata dictionaries",
            "returns": "Returns List[Dict]",
            "signature": "get_documents_in_folder(self, folder_id) -> List[Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_storage_host",
            "parameters": {},
            "purpose": "Extract storage host from JWT token (tectonic service)\nBased on Chrome extension logic",
            "returns": "Returns str",
            "signature": "_get_storage_host(self) -> str"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:04:36",
      "decorators": [],
      "dependencies": [],
      "description": "Direct REST API client for reMarkable Cloud without external dependencies",
      "docstring": "Direct REST API client for reMarkable Cloud without external dependencies",
      "id": 1995,
      "imports": [
        "import asyncio",
        "import json",
        "import logging",
        "import tempfile",
        "import time",
        "import uuid",
        "import zipfile",
        "from datetime import datetime",
        "from datetime import timedelta",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Set",
        "from typing import Tuple",
        "import requests",
        "import base64",
        "import base64",
        "import base64"
      ],
      "imports_required": [
        "import asyncio",
        "import json",
        "import logging",
        "import tempfile",
        "import time"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 775,
      "line_start": 22,
      "name": "RemarkableRestClient",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Direct REST API client for reMarkable Cloud without external dependencies",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableRestClient:\n    \"\"\"Direct REST API client for reMarkable Cloud without external dependencies\"\"\"\n    \n    def __init__(self, config_dir: Optional[str] = None):\n        self.config_dir = Path(config_dir) if config_dir else Path.home() / '.eink-llm'\n        self.config_dir.mkdir(exist_ok=True)\n        \n        self.device_token_file = self.config_dir / 'remarkable_device_token'\n        self.config_file = self.config_dir / 'remarkable_config.json'\n        \n        self.logger = logging.getLogger(__name__)\n        self.device_token = None\n        self.user_token = None\n        self.storage_host = None\n        self.authenticated = False\n        \n        # API endpoints (UPDATED to working endpoints from ddvk/rmapi project)\n        self.base_url = \"https://webapp-prod.cloud.remarkable.engineering\"\n        self.device_endpoint = f\"{self.base_url}/token/json/2/device/new\"\n        self.user_endpoint = f\"{self.base_url}/token/json/2/user/new\"\n        self.service_manager_url = \"https://service-manager-production-dot-remarkable-production.appspot.com/service/json/1/document-storage?environment=production&apiVer=2\"\n        self.device_register_url = \"https://my.remarkable.com/connect/desktop\"\n        \n        # Load existing device token if available\n        self._load_device_token()\n    \n    def _load_device_token(self):\n        \"\"\"Load device token from file if it exists\"\"\"\n        if self.device_token_file.exists():\n            try:\n                with open(self.device_token_file, 'r') as f:\n                    self.device_token = f.read().strip()\n                print(\"\ud83d\udd11 Loaded existing device token\")\n            except Exception as e:\n                self.logger.error(f\"Error loading device token: {e}\")\n    \n    def _save_device_token(self, token: str):\n        \"\"\"Save device token to file\"\"\"\n        try:\n            with open(self.device_token_file, 'w') as f:\n                f.write(token)\n            self.device_token = token\n            print(\"\ud83d\udcbe Device token saved\")\n        except Exception as e:\n            self.logger.error(f\"Error saving device token: {e}\")\n    \n    def register_device(self, one_time_code: str) -> bool:\n        \"\"\"\n        Register a new device with reMarkable Cloud\n        \n        Args:\n            one_time_code: 8-character code from reMarkable account\n            \n        Returns:\n            True if registration successful\n        \"\"\"\n        device_id = str(uuid.uuid4())\n        payload = {\n            \"code\": one_time_code,\n            \"deviceDesc\": \"desktop-windows\",  # Using same as rmcl\n            \"deviceID\": device_id\n        }\n        \n        print(\"\ud83d\udd10 Registering new device with reMarkable Cloud...\")\n        \n        # Add proper headers (matching rmcl user agent)\n        headers = {\n            \"Content-Type\": \"application/json\",\n            \"User-Agent\": \"rmcl <https://github.com/rschroll/rmcl>\"\n        }\n        \n        try:\n            response = requests.post(\n                self.device_endpoint,\n                json=payload,\n                headers=headers,\n                timeout=30\n            )\n            \n            if response.status_code == 200:\n                device_token = response.text.strip()\n                if device_token and len(device_token) > 10:  # Basic validation\n                    self._save_device_token(device_token)\n                    print(\"\u2705 Device registered successfully!\")\n                    return True\n                else:\n                    print(f\"\u274c Invalid device token received: {device_token[:50]}...\")\n                    return False\n            else:\n                print(f\"\u274c Device registration failed: HTTP {response.status_code}\")\n                if response.text:\n                    print(f\"   Response: {response.text[:100]}\")\n                return False\n                    \n        except Exception as e:\n            print(f\"\u274c Device registration error: {e}\")\n            return False\n    \n    def get_user_token(self) -> bool:\n        \"\"\"\n        Get a fresh user token using the device token\n        \n        Returns:\n            True if user token obtained successfully\n        \"\"\"\n        if not self.device_token:\n            print(\"\u274c No device token available. Please register device first.\")\n            return False\n        \n        try:\n            print(\"\ud83d\udd04 Getting fresh user token...\")\n            \n            headers = {\n                \"Authorization\": f\"Bearer {self.device_token}\",\n                \"User-Agent\": \"remarkable-desktop-linux/2.15.1.382\"\n            }\n            \n            response = requests.post(\n                self.user_endpoint,\n                headers=headers,\n                timeout=30\n            )\n            \n            if response.status_code == 200:\n                self.user_token = response.text.strip()\n                print(\"\u2705 User token obtained\")\n                return True\n            else:\n                print(f\"\u274c User token request failed: {response.status_code}\")\n                print(f\"Response: {response.text[:200]}\")\n                \n                # If token is invalid, remove the device token\n                if response.status_code == 401:\n                    print(\"\ud83d\uddd1\ufe0f  Device token appears invalid, removing...\")\n                    if self.device_token_file.exists():\n                        self.device_token_file.unlink()\n                    self.device_token = None\n                    \n                return False\n                \n        except Exception as e:\n            self.logger.error(f\"User token error: {e}\")\n            print(f\"\u274c User token error: {e}\")\n            return False\n    \n    def discover_storage_host(self) -> bool:\n        \"\"\"\n        Discover the document storage service host\n        \n        Returns:\n            True if host discovered successfully\n        \"\"\"\n        try:\n            print(\"\ud83d\udd0d Discovering storage service host...\")\n            response = requests.get(self.service_manager_url, timeout=30)\n            \n            if response.status_code == 200:\n                service_info = response.json()\n                self.storage_host = service_info.get(\"Host\")\n                if self.storage_host:\n                    print(f\"\u2705 Storage host: {self.storage_host}\")\n                    return True\n                else:\n                    print(\"\u274c No storage host in response\")\n                    return False\n            else:\n                print(f\"\u274c Service discovery failed: {response.status_code}\")\n                return False\n                \n        except Exception as e:\n            self.logger.error(f\"Service discovery error: {e}\")\n            print(f\"\u274c Service discovery error: {e}\")\n            return False\n    \n    async def authenticate(self, one_time_code: Optional[str] = None) -> bool:\n        \"\"\"\n        Authenticate with reMarkable Cloud\n        \n        Args:\n            one_time_code: One-time code for device registration (if needed)\n            \n        Returns:\n            True if authentication successful\n        \"\"\"\n        try:\n            # If we have a device token, try to get user token\n            if self.device_token:\n                print(\"\ud83d\udd11 Using existing device token...\")\n                if self.get_user_token() and self.discover_storage_host():\n                    self.authenticated = True\n                    return True\n            \n            # If one-time code provided, register new device\n            if one_time_code:\n                if self.register_device(one_time_code):\n                    if self.get_user_token() and self.discover_storage_host():\n                        self.authenticated = True\n                        return True\n            \n            print(\"\u274c Authentication failed. Please provide a one-time code.\")\n            return False\n            \n        except Exception as e:\n            self.logger.error(f\"Authentication error: {e}\")\n            print(f\"\u274c Authentication error: {e}\")\n            return False\n    \n    def list_documents(self) -> List[Dict]:\n        \"\"\"\n        List all documents and folders in reMarkable Cloud using Chrome extension API\n        \n        Returns:\n            List of document/folder metadata dictionaries\n        \"\"\"\n        if not self.authenticated or not self.user_token:\n            print(\"\u274c Not authenticated\")\n            return []\n        \n        try:\n            # Use the Chrome extension API endpoint with storage host from token\n            storage_host = self._get_storage_host()\n            list_url = f\"{storage_host}/doc/v2/files\"\n            \n            headers = {\n                \"Authorization\": f\"Bearer {self.user_token}\",\n                \"rM-Source\": \"E-Ink-LLM-Assistant\"\n            }\n            \n            response = requests.get(list_url, headers=headers, timeout=30)\n            \n            if response.status_code == 200:\n                docs = response.json()\n                print(f\"\ud83d\udcc1 Retrieved {len(docs)} items from cloud\")\n                return docs\n            else:\n                print(f\"\u274c Failed to list documents: {response.status_code}\")\n                return []\n                \n        except Exception as e:\n            self.logger.error(f\"Error listing documents: {e}\")\n            print(f\"\u274c Error listing documents: {e}\")\n            return []\n    \n    def get_document_download_url(self, doc_id: str) -> Optional[str]:\n        \"\"\"\n        Get download URL for a specific document\n        \n        Args:\n            doc_id: Document UUID\n            \n        Returns:\n            Download URL or None if failed\n        \"\"\"\n        if not self.authenticated or not self.storage_host:\n            print(\"\u274c Not authenticated\")\n            return None\n        \n        try:\n            single_url = f\"https://{self.storage_host}/document-storage/json/2/docs?doc={doc_id}&withBlob=true\"\n            response = requests.get(\n                single_url,\n                headers={\"Authorization\": f\"Bearer {self.user_token}\"},\n                timeout=30\n            )\n            \n            if response.status_code == 200:\n                doc_meta = response.json()\n                if doc_meta and len(doc_meta) > 0:\n                    blob_url = doc_meta[0].get(\"BlobURLGet\")\n                    return blob_url\n            \n            print(f\"\u274c Failed to get download URL for {doc_id}\")\n            return None\n            \n        except Exception as e:\n            self.logger.error(f\"Error getting download URL: {e}\")\n            return None\n    \n    def download_document(self, doc_id: str, doc_name: str, output_dir: Path) -> Optional[Path]:\n        \"\"\"\n        Download a document from reMarkable Cloud\n        \n        Args:\n            doc_id: Document UUID\n            doc_name: Document name for filename\n            output_dir: Directory to save the file\n            \n        Returns:\n            Path to downloaded file or None if failed\n        \"\"\"\n        try:\n            blob_url = self.get_document_download_url(doc_id)\n            if not blob_url:\n                return None\n            \n            # Download the content\n            response = requests.get(blob_url, timeout=60)\n            if response.status_code != 200:\n                print(f\"\u274c Failed to download content: {response.status_code}\")\n                return None\n            \n            # Create output directory\n            output_dir.mkdir(parents=True, exist_ok=True)\n            \n            # Generate safe filename\n            safe_name = \"\".join(c for c in doc_name if c.isalnum() or c in (' ', '-', '_')).rstrip()\n            if not safe_name:\n                safe_name = f\"document_{doc_id[:8]}\"\n            \n            # Save as ZIP first (reMarkable format)\n            zip_path = output_dir / f\"{safe_name}.zip\"\n            with open(zip_path, 'wb') as f:\n                f.write(response.content)\n            \n            # Try to extract PDF if available\n            try:\n                with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n                    # Look for PDF file in the ZIP\n                    pdf_files = [name for name in zip_ref.namelist() if name.endswith('.pdf')]\n                    if pdf_files:\n                        # Extract the PDF\n                        pdf_path = output_dir / f\"{safe_name}.pdf\"\n                        with zip_ref.open(pdf_files[0]) as pdf_file:\n                            with open(pdf_path, 'wb') as out_file:\n                                out_file.write(pdf_file.read())\n                        \n                        # Clean up ZIP file\n                        zip_path.unlink()\n                        print(f\"\ud83d\udce5 Downloaded PDF: {doc_name} -> {pdf_path.name}\")\n                        return pdf_path\n            except Exception:\n                # If PDF extraction fails, keep the ZIP\n                pass\n            \n            print(f\"\ud83d\udce5 Downloaded ZIP: {doc_name} -> {zip_path.name}\")\n            return zip_path\n            \n        except Exception as e:\n            self.logger.error(f\"Error downloading {doc_name}: {e}\")\n            print(f\"\u274c Failed to download {doc_name}: {e}\")\n            return None\n    \n    def create_pdf_zip_package(self, pdf_path: Path, doc_name: str) -> bytes:\n        \"\"\"\n        Create a ZIP package for PDF upload in reMarkable format\n        \n        Args:\n            pdf_path: Path to PDF file\n            doc_name: Document name\n            \n        Returns:\n            ZIP package as bytes\n        \"\"\"\n        try:\n            # Create temporary ZIP file\n            with tempfile.NamedTemporaryFile() as temp_zip:\n                with zipfile.ZipFile(temp_zip, 'w', zipfile.ZIP_DEFLATED) as zip_ref:\n                    # Add the PDF file\n                    zip_ref.write(pdf_path, f\"{doc_name}.pdf\")\n                    \n                    # Create .content metadata file\n                    content_metadata = {\n                        \"extraMetadata\": {},\n                        \"fileType\": \"pdf\",\n                        \"fontName\": \"\",\n                        \"lastOpenedPage\": 0,\n                        \"lineHeight\": -1,\n                        \"margins\": 100,\n                        \"pageCount\": 1,  # Will be updated by reMarkable\n                        \"textScale\": 1,\n                        \"transform\": {\n                            \"m11\": 1, \"m12\": 0, \"m13\": 0,\n                            \"m21\": 0, \"m22\": 1, \"m23\": 0,\n                            \"m31\": 0, \"m32\": 0, \"m33\": 1\n                        }\n                    }\n                    \n                    zip_ref.writestr(f\"{doc_name}.content\", json.dumps(content_metadata))\n                    \n                    # Create .pagedata file (empty for PDF)\n                    zip_ref.writestr(f\"{doc_name}.pagedata\", \"\")\n                \n                # Read the ZIP content\n                temp_zip.seek(0)\n                return temp_zip.read()\n                \n        except Exception as e:\n            self.logger.error(f\"Error creating ZIP package: {e}\")\n            raise\n    \n    def upload_document(self, file_path: Path, folder_id: str = \"\", \n                       document_name: Optional[str] = None) -> bool:\n        \"\"\"\n        Upload a PDF document to reMarkable Cloud\n        \n        Args:\n            file_path: Path to PDF file\n            folder_id: Target folder ID (empty string for root)\n            document_name: Name for the document\n            \n        Returns:\n            True if upload successful\n        \"\"\"\n        if not self.authenticated or not self.storage_host:\n            print(\"\u274c Not authenticated\")\n            return False\n        \n        try:\n            if not file_path.exists():\n                print(f\"\u274c File not found: {file_path}\")\n                return False\n            \n            if file_path.suffix.lower() != '.pdf':\n                print(f\"\u274c Only PDF files supported. Got: {file_path.suffix}\")\n                return False\n            \n            # Use filename if no document name provided\n            if not document_name:\n                document_name = file_path.stem\n            \n            print(f\"\ud83d\udce4 Uploading {file_path.name} as '{document_name}'...\")\n            \n            # Step 1: Create upload request\n            new_id = str(uuid.uuid4())\n            timestamp = datetime.utcnow().isoformat() + \"Z\"\n            \n            req_payload = [{\n                \"ID\": new_id,\n                \"Version\": 1,\n                \"ModifiedClient\": timestamp\n            }]\n            \n            req_url = f\"https://{self.storage_host}/document-storage/json/2/upload/request\"\n            response = requests.put(\n                req_url,\n                json=req_payload,\n                headers={\"Authorization\": f\"Bearer {self.user_token}\"},\n                timeout=30\n            )\n            \n            if response.status_code != 200:\n                print(f\"\u274c Upload request failed: {response.status_code}\")\n                return False\n            \n            upload_data = response.json()\n            blob_put_url = upload_data[0][\"BlobURLPut\"]\n            \n            # Step 2: Upload the content\n            zip_content = self.create_pdf_zip_package(file_path, document_name)\n            \n            upload_response = requests.put(blob_put_url, data=zip_content, timeout=60)\n            if upload_response.status_code != 200:\n                print(f\"\u274c Content upload failed: {upload_response.status_code}\")\n                return False\n            \n            # Step 3: Update metadata\n            meta_payload = [{\n                \"ID\": new_id,\n                \"Version\": 1,\n                \"ModifiedClient\": timestamp,\n                \"Type\": \"DocumentType\",\n                \"VissibleName\": document_name,\n                \"Parent\": folder_id,\n                \"Bookmarked\": False\n            }]\n            \n            update_url = f\"https://{self.storage_host}/document-storage/json/2/upload/update-status\"\n            meta_response = requests.put(\n                update_url,\n                json=meta_payload,\n                headers={\"Authorization\": f\"Bearer {self.user_token}\"},\n                timeout=30\n            )\n            \n            if meta_response.status_code == 200:\n                print(f\"\u2705 Upload successful: {document_name}\")\n                return True\n            else:\n                print(f\"\u274c Metadata update failed: {meta_response.status_code}\")\n                return False\n                \n        except Exception as e:\n            self.logger.error(f\"Error uploading {file_path}: {e}\")\n            print(f\"\u274c Upload failed: {e}\")\n            return False\n    \n    def find_folder_by_name(self, folder_name: str) -> Optional[str]:\n        \"\"\"\n        Find a folder by name and return its ID\n        \n        Args:\n            folder_name: Name of the folder to find\n            \n        Returns:\n            Folder ID or None if not found\n        \"\"\"\n        docs = self.list_documents()\n        for doc in docs:\n            if (doc.get(\"Type\") == \"CollectionType\" and \n                doc.get(\"VissibleName\") == folder_name):\n                return doc.get(\"ID\")\n        return None\n    \n    def get_documents_in_folder(self, folder_id: str = \"\") -> List[Dict]:\n        \"\"\"\n        Get all documents in a specific folder\n        \n        Args:\n            folder_id: Folder ID (empty string for root)\n            \n        Returns:\n            List of document metadata dictionaries\n        \"\"\"\n        docs = self.list_documents()\n        folder_docs = []\n        \n        for doc in docs:\n            if (doc.get(\"Type\") == \"DocumentType\" and \n                doc.get(\"Parent\") == folder_id):\n                folder_docs.append(doc)\n        \n        return folder_docs\n\n    async def create_folder(self, folder_name: str, parent_id: Optional[str] = None) -> bool:\n        \"\"\"\n        Create a folder in reMarkable Cloud\n        \n        Args:\n            folder_name: Name of the folder to create\n            parent_id: Parent folder ID (None for root)\n            \n        Returns:\n            True if folder created successfully, False otherwise\n        \"\"\"\n        try:\n            storage_host = self._get_storage_host()\n            if not storage_host:\n                print(\"\u274c Could not determine storage host\")\n                return False\n            \n            folder_id = await self._create_folder(folder_name, parent_id, storage_host)\n            return folder_id is not None\n            \n        except Exception as e:\n            print(f\"\u274c Error creating folder '{folder_name}': {e}\")\n            return False\n\n    async def upload_content(self, content: bytes, filename: str, folder_path: str = None, \n                           file_type: str = \"application/pdf\") -> Optional[str]:\n        \"\"\"\n        Upload content directly to reMarkable Cloud using new API endpoints\n        \n        Args:\n            content: File content as bytes\n            filename: Name of the file\n            folder_path: Target folder path (e.g., \"/My Folder\")\n            file_type: MIME type of the file\n        \n        Returns:\n            Document ID if successful, None otherwise\n        \"\"\"\n        if not self.authenticated:\n            print(\"\u274c Not authenticated with reMarkable Cloud\")\n            return None\n        \n        try:\n            # Get storage host from user token\n            storage_host = self._get_storage_host()\n            \n            # Find or create target folder\n            parent_id = None\n            if folder_path:\n                parent_id = await self._ensure_folder_exists(folder_path, storage_host)\n            \n            # Create metadata (based on Chrome extension pattern)\n            metadata = {\n                'file_name': filename,\n                'type': 'DocumentType'\n            }\n            if parent_id:\n                metadata['parent'] = parent_id\n            \n            # Encode metadata as base64\n            import base64\n            meta_encoded = base64.b64encode(json.dumps(metadata).encode()).decode()\n            \n            headers = {\n                'Authorization': f'Bearer {self.user_token}',\n                'rM-Source': 'E-Ink-LLM-Assistant',\n                'rM-Meta': meta_encoded,\n                'Content-Type': file_type,\n                'User-Agent': 'E-Ink-LLM-Assistant/1.0'\n            }\n            \n            url = f\"{storage_host}/doc/v2/files\"\n            \n            response = requests.post(url, headers=headers, data=content, timeout=60)\n            \n            if response.status_code in [200, 201]:\n                # Extract document ID from Location header\n                location = response.headers.get('Location', '')\n                document_id = location.split('/')[-1] if location else None\n                print(f\"\u2705 Uploaded {filename} to reMarkable Cloud (ID: {document_id})\")\n                return document_id\n            else:\n                print(f\"\u274c Upload failed: {response.status_code} - {response.text}\")\n                return None\n                        \n        except Exception as e:\n            print(f\"\u274c Upload error: {e}\")\n            return None\n    \n    def _get_storage_host(self) -> str:\n        \"\"\"\n        Extract storage host from JWT token (tectonic service)\n        Based on Chrome extension logic\n        \"\"\"\n        try:\n            import base64\n            # Decode JWT payload\n            payload = self.user_token.split('.')[1]\n            # Add padding if needed\n            payload += '=' * (4 - len(payload) % 4)\n            decoded = json.loads(base64.b64decode(payload))\n            \n            tectonic = decoded.get('tectonic')\n            if tectonic and isinstance(tectonic, str) and tectonic:\n                return f\"https://{tectonic}.tectonic.remarkable.com\"\n            else:\n                # Fallback to internal cloud\n                return \"https://internal.cloud.remarkable.com\"\n        except Exception:\n            # Fallback if token parsing fails\n            return \"https://internal.cloud.remarkable.com\"\n    \n    async def _ensure_folder_exists(self, folder_path: str, storage_host: str) -> Optional[str]:\n        \"\"\"\n        Ensure folder exists, create if necessary\n        \n        Args:\n            folder_path: Path to folder (starting with /)\n            storage_host: Storage service host URL\n        \n        Returns:\n            Folder ID if successful\n        \"\"\"\n        try:\n            # Get current files to find existing folder\n            folder_id = await self._find_folder_by_path(folder_path, storage_host)\n            if folder_id:\n                return folder_id\n            \n            # Create folder(s) as needed\n            path_parts = [part for part in folder_path.split('/') if part]\n            current_parent = None\n            current_path = \"\"\n            \n            for part in path_parts:\n                current_path += f\"/{part}\"\n                existing_id = await self._find_folder_by_path(current_path, storage_host)\n                \n                if existing_id:\n                    current_parent = existing_id\n                else:\n                    # Create this folder\n                    current_parent = await self._create_folder(part, current_parent, storage_host)\n                    if not current_parent:\n                        raise Exception(f\"Failed to create folder '{part}'\")\n            \n            return current_parent\n            \n        except Exception as e:\n            print(f\"\u274c Failed to ensure folder exists: {e}\")\n            return None\n    \n    async def _find_folder_by_path(self, folder_path: str, storage_host: str) -> Optional[str]:\n        \"\"\"Find folder ID by path\"\"\"\n        try:\n            if folder_path == \"/\" or folder_path == \"\":\n                return None  # Root folder\n            \n            # Get all files/folders\n            url = f\"{storage_host}/doc/v2/files\"\n            headers = {\n                'Authorization': f'Bearer {self.user_token}',\n                'rM-Source': 'E-Ink-LLM-Assistant'\n            }\n            \n            response = requests.get(url, headers=headers, timeout=30)\n            if response.status_code != 200:\n                return None\n            \n            files = response.json()\n            path_parts = [part for part in folder_path.split('/') if part]\n            current_parent = None\n            \n            for part in path_parts:\n                found = False\n                for item in files:\n                    if (item.get('visibleName') == part and \n                        item.get('type') == 'CollectionType' and\n                        item.get('parent') == current_parent):\n                        current_parent = item.get('id')\n                        found = True\n                        break\n                \n                if not found:\n                    return None\n            \n            return current_parent\n                    \n        except Exception:\n            return None\n    \n    async def _create_folder(self, folder_name: str, parent_id: Optional[str], storage_host: str) -> Optional[str]:\n        \"\"\"Create a single folder\"\"\"\n        try:\n            import base64\n            \n            # Create folder metadata\n            metadata = {\n                'file_name': folder_name,\n                'type': 'CollectionType'\n            }\n            if parent_id:\n                metadata['parent'] = parent_id\n            \n            # Encode metadata as base64\n            meta_encoded = base64.b64encode(json.dumps(metadata).encode()).decode()\n            \n            headers = {\n                'Authorization': f'Bearer {self.user_token}',\n                'rM-Source': 'E-Ink-LLM-Assistant',\n                'rM-Meta': meta_encoded,\n                'Content-Type': 'folder'  # Special content type for folders\n            }\n            \n            url = f\"{storage_host}/doc/v2/files\"\n            \n            response = requests.post(url, headers=headers, data='', timeout=30)\n            \n            if response.status_code in [200, 201]:\n                # Extract folder ID from Location header\n                location = response.headers.get('Location', '')\n                folder_id = location.split('/')[-1] if location else None\n                print(f\"\u2705 Created folder '{folder_name}' (ID: {folder_id})\")\n                return folder_id\n            else:\n                print(f\"\u274c Failed to create folder '{folder_name}': {response.status_code} - {response.text}\")\n                return None\n                        \n        except Exception as e:\n            print(f\"\u274c Error creating folder '{folder_name}': {e}\")\n            return None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/remarkable_rest_client.py",
      "tags": [
        "class",
        "remarkablerestclient"
      ],
      "updated_at": "2025-12-07T01:04:36.145398",
      "usage_example": "# Example usage:\n# result = RemarkableRestClient(bases)"
    },
    {
      "best_practices": [
        "Always check if a file is supported using is_supported_file() before calling extract_image() to avoid exceptions",
        "Choose enable_multi_page=False for simple use cases where only the first page is needed to improve performance",
        "Set an appropriate max_pages limit based on your memory constraints and processing requirements",
        "The class automatically handles image resizing to 2048px maximum dimension to optimize for API limits",
        "Images with transparency (RGBA, LA, P modes) are automatically converted to RGB with white background",
        "PDF pages are rendered at 2x zoom (DPI) for better quality suitable for e-ink displays",
        "Handle exceptions from extract_image() as file processing can fail due to corrupted files or unsupported formats",
        "For multi-page PDFs, the returned metadata includes text_content per page which can be used for content analysis",
        "The class is stateless after initialization - you can reuse the same instance for multiple files",
        "Base64 strings returned are ready for direct use in API calls or data URIs"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Flag indicating whether multi-page PDF processing is enabled",
            "is_class_variable": false,
            "name": "enable_multi_page",
            "type": "bool"
          },
          {
            "description": "Maximum number of pages to process from multi-page PDFs",
            "is_class_variable": false,
            "name": "max_pages",
            "type": "int"
          },
          {
            "description": "Instance of MultiPagePDFProcessor for handling multi-page PDFs, or None if multi-page processing is disabled",
            "is_class_variable": false,
            "name": "multi_page_processor",
            "type": "Optional[MultiPagePDFProcessor]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "enable_multi_page": "Boolean to enable multi-page PDF processing (default: True)",
              "max_pages": "Maximum number of pages to process in multi-page mode (default: 50)"
            },
            "purpose": "Initialize the InputProcessor with configuration for PDF processing mode",
            "returns": "None - initializes the instance",
            "signature": "__init__(self, enable_multi_page: bool = True, max_pages: int = 50)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "extract_image",
            "parameters": {
              "file_path": "String path to the input file (PDF or image format)"
            },
            "purpose": "Main method to extract and encode images from PDF or image files to base64 format",
            "returns": "For single page/image: Tuple of (base64_encoded_image, metadata_dict). For multi-page PDF: Tuple of (list_of_base64_images, enhanced_metadata_dict)",
            "signature": "extract_image(self, file_path: str) -> Union[Tuple[str, dict], Tuple[List[str], dict]]"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_extract_from_pdf",
            "parameters": {
              "pdf_path": "Path object pointing to the PDF file"
            },
            "purpose": "Extract the first page of a PDF as a base64-encoded image with metadata",
            "returns": "Tuple of (base64_encoded_image, metadata_dict) containing source info, page count, and dimensions",
            "signature": "_extract_from_pdf(pdf_path: Path) -> Tuple[str, dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_multi_page_pdf",
            "parameters": {
              "pdf_path": "Path object pointing to the PDF file"
            },
            "purpose": "Extract all pages from a PDF using the multi-page processor with text extraction",
            "returns": "Tuple of (list_of_base64_images, enhanced_metadata_dict) with per-page text content and analysis",
            "signature": "_extract_multi_page_pdf(self, pdf_path: Path) -> Tuple[List[str], dict]"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_encode_image",
            "parameters": {
              "image_path": "Path object pointing to the image file"
            },
            "purpose": "Encode a standard image file to base64 with automatic format conversion and resizing",
            "returns": "Tuple of (base64_encoded_image, metadata_dict) with source info, format, and dimensions",
            "signature": "_encode_image(image_path: Path) -> Tuple[str, dict]"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "is_supported_file",
            "parameters": {
              "file_path": "Path object to check for supported extension"
            },
            "purpose": "Check if a file type is supported by the processor based on file extension",
            "returns": "Boolean indicating whether the file type is supported (PDF or common image formats)",
            "signature": "is_supported_file(file_path: Path) -> bool"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "required when enable_multi_page=True for multi-page PDF processing",
          "import": "from multi_page_processor import MultiPagePDFProcessor, PageAnalysis",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:02:05",
      "decorators": [],
      "dependencies": [
        "base64",
        "asyncio",
        "json",
        "pathlib",
        "PIL",
        "fitz",
        "io",
        "typing",
        "os",
        "multi_page_processor"
      ],
      "description": "A class that handles extraction and encoding of images from various input formats including PDFs (single or multi-page) and standard image files, converting them to base64-encoded strings with metadata.",
      "docstring": "Handles extraction and encoding of images from various input formats",
      "id": 1985,
      "imports": [
        "import base64",
        "import asyncio",
        "import json",
        "from pathlib import Path",
        "from PIL import Image as PILImage",
        "import fitz",
        "import io",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import List",
        "from typing import Union",
        "import os",
        "from multi_page_processor import MultiPagePDFProcessor",
        "from multi_page_processor import PageAnalysis"
      ],
      "imports_required": [
        "import base64",
        "import asyncio",
        "import json",
        "from pathlib import Path",
        "from PIL import Image as PILImage",
        "import fitz",
        "import io",
        "from typing import Optional, Tuple, List, Union",
        "import os",
        "from multi_page_processor import MultiPagePDFProcessor, PageAnalysis"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 151,
      "line_start": 13,
      "name": "InputProcessor",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "enable_multi_page": "Boolean flag that determines whether to process all pages of a PDF (True) or only the first page (False). When enabled, uses MultiPagePDFProcessor for comprehensive page extraction. Default is True.",
        "max_pages": "Integer specifying the maximum number of pages to process from a multi-page PDF. This limit prevents excessive processing time and memory usage for very large documents. Default is 50 pages."
      },
      "parent_class": null,
      "purpose": "InputProcessor provides a unified interface for processing different image input formats. It can extract images from PDF files (either first page only or all pages), encode standard image files to base64, and return comprehensive metadata about the processed content. The class supports multi-page PDF processing with text extraction and content analysis, making it suitable for document processing pipelines that need to convert visual content for API consumption or further processing.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an InputProcessor object. The main extract_image method returns different types based on input: For single-page mode or image files, returns a tuple of (base64_string, metadata_dict). For multi-page PDFs, returns a tuple of (list_of_base64_strings, enhanced_metadata_dict). Metadata includes source_type, source_file, dimensions, and for multi-page PDFs, detailed per-page information including text content and content analysis.",
      "settings_required": [
        "PyMuPDF (fitz) library must be installed for PDF processing",
        "Pillow (PIL) library must be installed for image processing",
        "MultiPagePDFProcessor class must be available in the multi_page_processor module",
        "Sufficient memory for processing large images and PDFs (images are resized to max 2048px if larger)"
      ],
      "source_code": "class InputProcessor:\n    \"\"\"Handles extraction and encoding of images from various input formats\"\"\"\n    \n    def __init__(self, enable_multi_page: bool = True, max_pages: int = 50):\n        \"\"\"\n        Initialize input processor\n        \n        Args:\n            enable_multi_page: Enable multi-page PDF processing\n            max_pages: Maximum pages to process in multi-page mode\n        \"\"\"\n        self.enable_multi_page = enable_multi_page\n        self.max_pages = max_pages\n        self.multi_page_processor = MultiPagePDFProcessor(max_pages=max_pages) if enable_multi_page else None\n    \n    def extract_image(self, file_path: str) -> Union[Tuple[str, dict], Tuple[List[str], dict]]:\n        \"\"\"\n        Extract image(s) from PDF or encode image file to base64\n        \n        Args:\n            file_path: Path to the input file\n            \n        Returns:\n            For single page: Tuple of (base64_encoded_image, metadata_dict)\n            For multi-page: Tuple of (list_of_base64_images, metadata_dict)\n        \"\"\"\n        file_path = Path(file_path)\n        \n        if file_path.suffix.lower() == '.pdf':\n            if self.enable_multi_page:\n                return self._extract_multi_page_pdf(file_path)\n            else:\n                return self._extract_from_pdf(file_path)\n        else:\n            return self._encode_image(file_path)\n    \n    @staticmethod\n    def _extract_from_pdf(pdf_path: Path) -> Tuple[str, dict]:\n        \"\"\"Extract first page of PDF as image\"\"\"\n        try:\n            doc = fitz.open(pdf_path)\n            page = doc[0]  # Get first page\n            \n            # Render page as image (high DPI for e-ink clarity)\n            mat = fitz.Matrix(2.0, 2.0)  # 2x zoom for better quality\n            pix = page.get_pixmap(matrix=mat)\n            \n            # Convert to PIL Image\n            img_data = pix.tobytes(\"png\")\n            img = PILImage.open(io.BytesIO(img_data))\n            \n            # Convert to base64\n            buffer = io.BytesIO()\n            img.save(buffer, format='PNG')\n            img_b64 = base64.b64encode(buffer.getvalue()).decode()\n            \n            metadata = {\n                'source_type': 'pdf',\n                'source_file': str(pdf_path),\n                'page_count': len(doc),\n                'dimensions': (img.width, img.height)\n            }\n            \n            doc.close()\n            return img_b64, metadata\n            \n        except Exception as e:\n            raise Exception(f\"Error processing PDF {pdf_path}: {str(e)}\")\n    \n    def _extract_multi_page_pdf(self, pdf_path: Path) -> Tuple[List[str], dict]:\n        \"\"\"Extract all pages from PDF using multi-page processor\"\"\"\n        try:\n            pages, metadata = self.multi_page_processor.extract_all_pages(pdf_path)\n            \n            # Convert to expected format\n            page_images = [page.image_b64 for page in pages]\n            \n            # Enhanced metadata with multi-page info\n            enhanced_metadata = {\n                **metadata,\n                'pages': [\n                    {\n                        'page_number': page.page_number,\n                        'text_content': page.text_content,\n                        'dimensions': page.dimensions,\n                        'has_content': len(page.text_content.strip()) > 0\n                    }\n                    for page in pages\n                ],\n                'total_text_length': sum(len(page.text_content) for page in pages),\n                'content_pages': sum(1 for page in pages if len(page.text_content.strip()) > 0)\n            }\n            \n            return page_images, enhanced_metadata\n            \n        except Exception as e:\n            raise Exception(f\"Error processing multi-page PDF {pdf_path}: {str(e)}\")\n    \n    @staticmethod\n    def _encode_image(image_path: Path) -> Tuple[str, dict]:\n        \"\"\"Encode image file to base64\"\"\"\n        try:\n            # Open and process image\n            with PILImage.open(image_path) as img:\n                # Convert to RGB if necessary (for PNG with transparency)\n                if img.mode in ('RGBA', 'LA', 'P'):\n                    background = PILImage.new('RGB', img.size, (255, 255, 255))\n                    if img.mode == 'P':\n                        img = img.convert('RGBA')\n                    background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)\n                    img = background\n                \n                # Resize if too large (optimize for API limits)\n                max_size = 2048\n                if img.width > max_size or img.height > max_size:\n                    img.thumbnail((max_size, max_size), PILImage.Resampling.LANCZOS)\n                \n                # Convert to base64\n                buffer = io.BytesIO()\n                img.save(buffer, format='JPEG', quality=85)\n                img_b64 = base64.b64encode(buffer.getvalue()).decode()\n                \n                metadata = {\n                    'source_type': 'image',\n                    'source_file': str(image_path),\n                    'original_format': image_path.suffix.lower(),\n                    'dimensions': img.size\n                }\n                \n                return img_b64, metadata\n                \n        except Exception as e:\n            raise Exception(f\"Error processing image {image_path}: {str(e)}\")\n    \n    @staticmethod\n    def is_supported_file(file_path: Path) -> bool:\n        \"\"\"Check if file type is supported\"\"\"\n        supported_extensions = {'.pdf', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'}\n        return file_path.suffix.lower() in supported_extensions",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/input_processor.py",
      "tags": [
        "image-processing",
        "pdf-extraction",
        "base64-encoding",
        "document-processing",
        "multi-page",
        "file-conversion",
        "metadata-extraction",
        "image-encoding",
        "pdf-to-image"
      ],
      "updated_at": "2025-12-07T01:02:05.873025",
      "usage_example": "# Basic usage with single-page PDF processing\nfrom pathlib import Path\nfrom input_processor import InputProcessor\n\n# Initialize processor for single-page mode\nprocessor = InputProcessor(enable_multi_page=False)\n\n# Process a PDF (first page only)\nimage_b64, metadata = processor.extract_image('document.pdf')\nprint(f\"Processed {metadata['source_type']}: {metadata['dimensions']}\")\n\n# Process an image file\nimage_b64, metadata = processor.extract_image('photo.jpg')\nprint(f\"Image dimensions: {metadata['dimensions']}\")\n\n# Initialize for multi-page processing\nmulti_processor = InputProcessor(enable_multi_page=True, max_pages=10)\n\n# Process multi-page PDF\npage_images, metadata = multi_processor.extract_image('report.pdf')\nprint(f\"Processed {len(page_images)} pages\")\nfor page_info in metadata['pages']:\n    print(f\"Page {page_info['page_number']}: {page_info['has_content']}\")\n\n# Check if file is supported before processing\nfile_path = Path('document.pdf')\nif InputProcessor.is_supported_file(file_path):\n    result = processor.extract_image(str(file_path))"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "Type: Optional[str]",
              "auto_detect_session": "Type: bool",
              "compact_mode": "Type: bool",
              "conversation_id": "Type: Optional[str]",
              "enable_editing_workflow": "Type: bool",
              "enable_hybrid_mode": "Type: bool",
              "enable_multi_page": "Type: bool",
              "max_pages": "Type: int",
              "watch_folder": "Type: Optional[str]"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, api_key, watch_folder, conversation_id, compact_mode, auto_detect_session, enable_multi_page, max_pages, enable_editing_workflow, enable_hybrid_mode)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_logging",
            "parameters": {},
            "purpose": "Set up logging for the application",
            "returns": "None",
            "signature": "setup_logging(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:00:55",
      "decorators": [],
      "dependencies": [],
      "description": "Main processor class that handles the complete workflow",
      "docstring": "Main processor class that handles the complete workflow",
      "id": 1983,
      "imports": [
        "import os",
        "import asyncio",
        "import time",
        "from pathlib import Path",
        "from watchdog.observers import Observer",
        "from watchdog.events import FileSystemEventHandler",
        "from datetime import datetime",
        "from typing import Optional",
        "import logging",
        "from input_processor import InputProcessor",
        "from llm_handler import LLMHandler",
        "from pdf_generator import PDFGenerator",
        "from session_manager import SessionManager",
        "from compact_formatter import CompactResponseFormatter",
        "from session_detector import SessionDetector",
        "from session_detector import detect_session_from_file",
        "from multi_page_llm_handler import MultiPageLLMHandler",
        "from editing_workflow import EditingWorkflowHandler",
        "from conversation_context import ConversationContextManager",
        "from hybrid_response_handler import HybridResponseHandler"
      ],
      "imports_required": [
        "import os",
        "import asyncio",
        "import time",
        "from pathlib import Path",
        "from watchdog.observers import Observer"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 455,
      "line_start": 61,
      "name": "EInkLLMProcessor",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Main processor class that handles the complete workflow",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class EInkLLMProcessor:\n    \"\"\"Main processor class that handles the complete workflow\"\"\"\n    \n    def __init__(self, api_key: Optional[str] = None, watch_folder: Optional[str] = None, \n                 conversation_id: Optional[str] = None, compact_mode: bool = True,\n                 auto_detect_session: bool = True, enable_multi_page: bool = True,\n                 max_pages: int = 50, enable_editing_workflow: bool = True,\n                 enable_hybrid_mode: bool = True):\n        # Initialize components\n        self.input_processor = InputProcessor(enable_multi_page=enable_multi_page, max_pages=max_pages)\n        self.llm_handler = LLMHandler(api_key)\n        self.multi_page_handler = MultiPageLLMHandler(api_key) if enable_multi_page else None\n        self.pdf_generator = PDFGenerator()\n        self.session_manager = SessionManager()\n        self.compact_formatter = CompactResponseFormatter()\n        self.session_detector = SessionDetector()\n        self.conversation_context = ConversationContextManager(self.session_manager)\n        self.editing_workflow = EditingWorkflowHandler(self.llm_handler) if enable_editing_workflow else None\n        \n        # Initialize hybrid response handler if available and enabled\n        self.hybrid_handler = None\n        self.enable_hybrid_mode = enable_hybrid_mode and HYBRID_AVAILABLE\n        if self.enable_hybrid_mode:\n            self.hybrid_handler = HybridResponseHandler(api_key)\n            print(f\"\ud83c\udfa8 Hybrid mode enabled (text + graphics)\")\n        elif enable_hybrid_mode and not HYBRID_AVAILABLE:\n            print(f\"\u26a0\ufe0f Hybrid mode requested but dependencies not available\")\n        \n        # Configuration\n        self.compact_mode = compact_mode\n        self.auto_detect_session = auto_detect_session\n        self.enable_multi_page = enable_multi_page\n        self.max_pages = max_pages\n        self.enable_editing_workflow = enable_editing_workflow\n        \n        # Session management\n        if conversation_id:\n            self.conversation_id = conversation_id\n        else:\n            self.conversation_id = self.session_manager.create_conversation()\n        \n        # Set up watch folder\n        self.watch_folder = Path(watch_folder) if watch_folder else Path.cwd() / \"watch\"\n        self.watch_folder.mkdir(exist_ok=True)\n        \n        # Set up logging\n        self.setup_logging()\n        \n        print(f\"\ud83c\udfaf E-Ink LLM Processor initialized\")\n        print(f\"\ud83c\udd94 Conversation ID: {self.conversation_id}\")\n        print(f\"\ud83d\udcc1 Watch folder: {self.watch_folder.absolute()}\")\n        print(f\"\ud83e\udd16 Models: {self.llm_handler.small_model} (preprocessing), {self.llm_handler.main_model} (main)\")\n        print(f\"\ud83d\udcdd Compact mode: {'ON' if self.compact_mode else 'OFF'}\")\n        print(f\"\ud83c\udfa8 Hybrid mode: {'ON' if self.enable_hybrid_mode else 'OFF'} (text + graphics)\")\n        print(f\"\ud83d\udd0d Auto-detect sessions: {'ON' if self.auto_detect_session else 'OFF'}\")\n        print(f\"\ud83d\udcca Multi-page PDFs: {'ON' if self.enable_multi_page else 'OFF'} (max {self.max_pages} pages)\")\n        print(f\"\ud83d\udcbe Session tracking: {self.session_manager.db_path}\")\n    \n    def setup_logging(self):\n        \"\"\"Set up logging for the application\"\"\"\n        log_file = self.watch_folder / \"eink_llm.log\"\n        logging.basicConfig(\n            level=logging.INFO,\n            format='%(asctime)s - %(levelname)s - %(message)s',\n            handlers=[\n                logging.FileHandler(log_file),\n                logging.StreamHandler()\n            ]\n        )\n        self.logger = logging.getLogger(__name__)\n    \n    async def process_file(self, file_path: Path) -> Optional[Path]:\n        \"\"\"\n        Process a single file and generate response PDF\n        \n        Args:\n            file_path: Path to input file\n            \n        Returns:\n            Path to generated response PDF or None if failed\n        \"\"\"\n        start_time = time.time()\n        \n        try:\n            # Auto-detect session information if enabled\n            detected_session = None\n            if self.auto_detect_session:\n                detected_session = detect_session_from_file(str(file_path))\n                if detected_session:\n                    if detected_session.confidence >= 0.7:  # High confidence threshold\n                        print(f\"\ud83d\udd0d Auto-detected session: {detected_session.conversation_id} \"\n                              f\"(exchange #{detected_session.exchange_number}, \"\n                              f\"confidence: {detected_session.confidence:.2f})\")\n                        \n                        # Update conversation ID to detected one\n                        self.conversation_id = detected_session.conversation_id\n                        print(f\"\ud83d\udd04 Switched to detected conversation: {self.conversation_id}\")\n                    else:\n                        print(f\"\ud83d\udd0d Detected session with low confidence ({detected_session.confidence:.2f}), \"\n                              f\"continuing with current conversation\")\n            \n            print(f\"\\n{'='*60}\")\n            print(f\"\ud83d\ude80 PROCESSING: {file_path.name}\")\n            print(f\"\ud83c\udd94 Conversation: {self.conversation_id}\")\n            if detected_session and detected_session.confidence >= 0.7:\n                print(f\"\ud83c\udfaf Auto-continuation from exchange #{detected_session.exchange_number}\")\n            print(f\"{'='*60}\")\n            \n            # Step 1: Extract image(s) from input\n            print(f\"\ud83d\udcf8 Step 1: Extracting image from {file_path.suffix} file...\")\n            extraction_result = self.input_processor.extract_image(str(file_path))\n            \n            # Handle both single-page and multi-page results\n            if isinstance(extraction_result[0], list):\n                # Multi-page PDF\n                page_images, metadata = extraction_result\n                is_multi_page = True\n                print(f\"\u2705 Multi-page extraction successful\")\n                print(f\"   \u2022 Total pages: {metadata.get('total_pages', len(page_images))}\")\n                print(f\"   \u2022 Processed pages: {len(page_images)}\")\n                print(f\"   \u2022 Content pages: {metadata.get('content_pages', 'Unknown')}\")\n                print(f\"   \u2022 Total text length: {metadata.get('total_text_length', 0):,} chars\")\n            else:\n                # Single page\n                image_b64, metadata = extraction_result\n                is_multi_page = False\n                print(f\"\u2705 Image extracted successfully\")\n                print(f\"   \u2022 Dimensions: {metadata.get('dimensions', 'Unknown')}\")\n                print(f\"   \u2022 Source type: {metadata.get('source_type', 'Unknown')}\")\n            \n            # Step 2: Get conversation context\n            conversation_context = self.session_manager.get_conversation_context(self.conversation_id)\n            if conversation_context:\n                print(f\"\ud83d\udcac Using conversation context ({len(conversation_context)} chars)\")\n                metadata['conversation_context'] = conversation_context\n                \n                # Add detected session context if available\n                if detected_session and detected_session.confidence >= 0.7:\n                    metadata['continuation_note'] = (\n                        f\"This appears to be a follow-up to exchange #{detected_session.exchange_number} \"\n                        f\"in conversation {detected_session.conversation_id}. \"\n                        f\"Please provide a response that builds upon the previous conversation.\"\n                    )\n            \n            # Step 3: Process with AI\n            if is_multi_page and self.multi_page_handler:\n                print(f\"\ud83e\udde0 Step 3: Processing multi-page document with AI...\")\n                \n                # Get pages from multi-page processor\n                pages, _ = self.input_processor.multi_page_processor.extract_all_pages(file_path)\n                \n                # Analyze with multi-page handler\n                multi_result = await self.multi_page_handler.analyze_multi_page_document(\n                    pages, metadata, conversation_context\n                )\n                \n                llm_response = multi_result.combined_response\n                print(f\"\u2705 Multi-page AI processing complete\")\n                print(f\"   \u2022 Pages analyzed: {multi_result.processing_stats['pages_processed']}\")\n                print(f\"   \u2022 Analysis method: {', '.join(multi_result.processing_stats['analysis_methods'])}\")\n                print(f\"   \u2022 Document type: {multi_result.document_summary.document_type}\")\n                print(f\"   \u2022 Response length: {len(llm_response):,} characters\")\n                \n                # Use first page image for PDF generation\n                image_b64 = page_images[0] if page_images else \"\"\n                \n            else:\n                print(f\"\ud83e\udde0 Step 3: Processing with AI...\")\n                \n                # Get enhanced context using conversation context manager\n                enhanced_prompt = self.conversation_context.enhance_prompt_with_context(\n                    base_prompt=\"\",  # Will be handled by LLM handler\n                    conversation_id=self.conversation_id,\n                    session_manager=self.session_manager\n                )\n                \n                # Add enhanced context to metadata\n                if enhanced_prompt:\n                    metadata['enhanced_context'] = enhanced_prompt\n                    print(f\"   \u2022 Enhanced with conversation context\")\n                \n                llm_response = await self.llm_handler.analyze_and_respond(image_b64, metadata)\n                print(f\"\u2705 AI processing complete ({len(llm_response):,} characters)\")\n            \n            # Step 3.5: Process editing workflow if enabled\n            editing_workflow_result = None\n            if self.editing_workflow:\n                print(f\"\u270f\ufe0f  Step 3.5: Processing editing workflow...\")\n                editing_workflow_result = await self.editing_workflow.process_document_for_editing(\n                    file_path, image_b64, llm_response\n                )\n                if editing_workflow_result:\n                    print(f\"   \u2022 Detected {editing_workflow_result.annotations_detected} annotations\")\n                    print(f\"   \u2022 Confidence: {editing_workflow_result.confidence_score:.2f}\")\n                    if editing_workflow_result.rewritten_content:\n                        print(f\"   \u2022 Generated rewritten content ({len(editing_workflow_result.rewritten_content):,} chars)\")\n                        # Add editing workflow results to metadata for PDF generation\n                        metadata['editing_workflow'] = {\n                            'annotations_detected': editing_workflow_result.annotations_detected,\n                            'confidence_score': editing_workflow_result.confidence_score,\n                            'recommendations': editing_workflow_result.recommendations,\n                            'rewritten_content': editing_workflow_result.rewritten_content\n                        }\n                else:\n                    print(f\"   \u2022 No annotations detected or workflow failed\")\n            \n            # Step 4: Apply compact formatting if enabled\n            final_response = llm_response\n            if self.compact_mode:\n                print(f\"\ud83c\udfaf Step 4: Applying compact formatting...\")\n                compact_response = self.compact_formatter.parse_llm_response_to_compact(llm_response)\n                if compact_response:\n                    final_response = compact_response\n                    print(f\"   \u2022 Compressed: {len(llm_response)} \u2192 {len(compact_response)} chars ({len(compact_response)/len(llm_response)*100:.0f}%)\")\n                else:\n                    print(f\"   \u2022 Compact formatting failed, using original\")\n            \n            # Step 5: Generate output PDF with session-aware filename\n            print(f\"\ud83d\udcc4 Step 5: Generating response PDF...\")\n            \n            # Get current exchange number\n            conversation = self.session_manager.get_conversation(self.conversation_id)\n            next_exchange_num = (conversation.total_exchanges + 1) if conversation else 1\n            \n            output_filename = self.session_manager.generate_session_filename(\n                self.conversation_id, next_exchange_num, file_path.name\n            )\n            output_path = file_path.parent / output_filename\n            \n            # Enable hybrid mode in metadata for this processing\n            if self.enable_hybrid_mode:\n                metadata['enable_hybrid_mode'] = True\n            \n            # Use hybrid response handler if available and response contains graphics\n            if (self.enable_hybrid_mode and self.hybrid_handler and \n                '[GRAPHIC:' in final_response):\n                \n                print(f\"   \ud83c\udfa8 Using hybrid mode (text + graphics)\")\n                generated_pdf = await self.hybrid_handler.process_hybrid_response(\n                    llm_response=final_response,\n                    metadata=metadata,\n                    output_path=str(output_path),\n                    conversation_id=self.conversation_id,\n                    exchange_number=next_exchange_num\n                )\n            else:\n                # Use standard PDF generation\n                if self.enable_hybrid_mode and '[GRAPHIC:' in final_response:\n                    print(f\"   \u26a0\ufe0f Graphics detected but hybrid handler not available, using standard mode\")\n                \n                self.pdf_generator.create_response_pdf(\n                    llm_response=final_response,\n                    original_image_b64=image_b64,\n                    metadata=metadata,\n                    output_path=str(output_path),\n                    conversation_id=self.conversation_id,\n                    exchange_number=next_exchange_num\n                )\n            \n            # Step 6: Record exchange in session\n            processing_time = time.time() - start_time\n            usage_stats = self.llm_handler.get_usage_summary()\n            \n            exchange_id = self.session_manager.add_exchange(\n                conversation_id=self.conversation_id,\n                input_file=str(file_path),\n                input_type=file_path.suffix,\n                response_text=final_response,\n                processing_time=processing_time,\n                tokens_used=usage_stats['total_tokens_used'],\n                metadata={\n                    'dimensions': metadata.get('dimensions'),\n                    'source_type': metadata.get('source_type'),\n                    'compact_mode': self.compact_mode,\n                    'original_response_length': len(llm_response),\n                    'final_response_length': len(final_response)\n                }\n            )\n            \n            # Log success\n            print(f\"\\n\ud83c\udf89 SUCCESS! Processing completed in {processing_time:.1f} seconds\")\n            print(f\"\ud83d\udcc4 Response saved: {output_path.name}\")\n            print(f\"\ud83c\udd94 Exchange ID: {exchange_id}\")\n            \n            # Log usage statistics\n            print(f\"\ud83d\udcca Usage: {usage_stats['total_tokens_used']} tokens, ~${usage_stats['total_cost_estimate']:.3f}\")\n            \n            self.logger.info(f\"Successfully processed {file_path.name} -> {output_path.name} \"\n                           f\"({processing_time:.1f}s, {usage_stats['total_tokens_used']} tokens, {exchange_id})\")\n            \n            return output_path\n            \n        except Exception as e:\n            error_msg = f\"Error processing {file_path.name}: {str(e)}\"\n            print(f\"\\n\u274c ERROR: {error_msg}\")\n            self.logger.error(error_msg)\n            \n            # Generate error PDF with session-aware filename\n            try:\n                conversation = self.session_manager.get_conversation(self.conversation_id)\n                next_exchange_num = (conversation.total_exchanges + 1) if conversation else 1\n                \n                error_filename = self.session_manager.generate_session_filename(\n                    self.conversation_id, next_exchange_num, file_path.name, is_error=True\n                )\n                error_output_path = file_path.parent / error_filename\n                \n                self.pdf_generator.generate_error_pdf(\n                    error_message=str(e),\n                    original_file=str(file_path),\n                    output_path=str(error_output_path),\n                    conversation_id=self.conversation_id,\n                    exchange_number=next_exchange_num\n                )\n                print(f\"\ud83d\udcc4 Error report saved: {error_output_path.name}\")\n                \n                # Record error exchange\n                processing_time = time.time() - start_time\n                self.session_manager.add_exchange(\n                    conversation_id=self.conversation_id,\n                    input_file=str(file_path),\n                    input_type=file_path.suffix,\n                    response_text=f\"ERROR: {str(e)}\",\n                    processing_time=processing_time,\n                    tokens_used=0,\n                    metadata={'error': True, 'error_message': str(e)}\n                )\n                \n                return error_output_path\n            except Exception as pdf_error:\n                print(f\"\u274c Failed to generate error PDF: {pdf_error}\")\n                return None\n    \n    async def process_existing_files(self):\n        \"\"\"Process any existing files in the watch folder\"\"\"\n        print(f\"\ud83d\udd0d Checking for existing files in {self.watch_folder}...\")\n        \n        existing_files = [\n            f for f in self.watch_folder.iterdir() \n            if f.is_file() and InputProcessor.is_supported_file(f) and not f.name.startswith(('RESPONSE_', 'ERROR_'))\n        ]\n        \n        if existing_files:\n            print(f\"\ud83d\udcc1 Found {len(existing_files)} existing file(s) to process\")\n            for file_path in existing_files:\n                await self.process_file(file_path)\n        else:\n            print(f\"\ud83d\udcc1 No existing files found\")\n    \n    async def start_watching(self, process_existing: bool = True):\n        \"\"\"\n        Start watching the folder for new files\n        \n        Args:\n            process_existing: Whether to process existing files on startup\n        \"\"\"\n        print(f\"\\n\ud83c\udfaf Starting E-Ink LLM File Processor\")\n        print(f\"\ud83d\udcc1 Watching folder: {self.watch_folder.absolute()}\")\n        print(f\"\ud83d\udcdd Supported formats: PDF, JPG, JPEG, PNG, GIF, BMP, TIFF, WEBP\")\n        print(f\"\ud83d\udca1 Place files in the watch folder to process them automatically\")\n        print(f\"\ud83d\udcc4 Responses will be saved with conversation tracking\")\n        print(f\"\\n{'='*60}\")\n        \n        # Process existing files if requested\n        if process_existing:\n            await self.process_existing_files()\n        \n        # Set up file system watcher\n        event_handler = EInkFileHandler(self)\n        observer = Observer()\n        observer.schedule(event_handler, str(self.watch_folder), recursive=False)\n        \n        # Start watching\n        observer.start()\n        print(f\"\ud83d\udc41\ufe0f  File watcher started. Monitoring for new files...\")\n        print(f\"\ud83d\udcbe Logs are saved to: {self.watch_folder / 'eink_llm.log'}\")\n        print(f\"\ud83d\uded1 Press Ctrl+C to stop\")\n        \n        try:\n            while True:\n                await asyncio.sleep(1)\n        except KeyboardInterrupt:\n            print(f\"\\n\ud83d\uded1 Stopping file watcher...\")\n            observer.stop()\n            \n            # Print final usage summary\n            usage_stats = self.llm_handler.get_usage_summary()\n            print(f\"\\n\ud83d\udcca FINAL USAGE SUMMARY:\")\n            print(f\"   \u2022 Preprocessing calls: {usage_stats['preprocessing_calls']}\")\n            print(f\"   \u2022 Main processing calls: {usage_stats['main_processing_calls']}\")\n            print(f\"   \u2022 Total tokens used: {usage_stats['total_tokens_used']:,}\")\n            print(f\"   \u2022 Estimated cost: ${usage_stats['total_cost_estimate']:.3f}\")\n            \n        observer.join()\n        print(f\"\u2705 File watcher stopped\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/processor.py",
      "tags": [
        "class",
        "einkllmprocessor"
      ],
      "updated_at": "2025-12-07T01:00:55.843938",
      "usage_example": "# Example usage:\n# result = EInkLLMProcessor(bases)"
    },
    {
      "best_practices": [
        "Always ensure an async event loop is running before using this handler, as it creates async tasks",
        "The handler includes a 2-second delay before processing to ensure files are fully written - adjust if needed for larger files",
        "Files starting with 'RESPONSE_' are automatically excluded to prevent processing output files",
        "The processing_files set prevents duplicate processing - files are tracked during processing and removed when complete",
        "Must be used with watchdog.observers.Observer to actually monitor the file system",
        "The processor_instance must implement async process_file(file_path) method and static is_supported_file(file_path) method",
        "Handle exceptions in the processor's process_file method to prevent handler crashes",
        "The handler only processes files (not directories) and only supported file types",
        "Files are only processed if they exist at the time of delayed processing (handles quick deletions)"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The processor instance used to handle actual file processing via its process_file() method",
            "is_class_variable": false,
            "name": "processor",
            "type": "InputProcessor (or compatible)"
          },
          {
            "description": "A set of Path objects currently being processed, used to prevent duplicate processing of the same file",
            "is_class_variable": false,
            "name": "processing_files",
            "type": "set"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "processor_instance": "An InputProcessor or compatible object that provides process_file() async method and is_supported_file() static method"
            },
            "purpose": "Initialize the file handler with a processor instance and set up tracking for files being processed",
            "returns": "None (constructor)",
            "signature": "__init__(self, processor_instance)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "on_created",
            "parameters": {
              "event": "FileSystemEvent object from watchdog containing src_path and is_directory attributes"
            },
            "purpose": "Callback method triggered by watchdog when a new file is created in the monitored directory. Validates the file and schedules async processing if appropriate.",
            "returns": "None",
            "signature": "on_created(self, event)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_delayed_process",
            "parameters": {
              "file_path": "Path object representing the file to be processed"
            },
            "purpose": "Asynchronously process a file after a 2-second delay to ensure it's fully written. Tracks processing state to prevent duplicates and ensures cleanup in finally block.",
            "returns": "None (async coroutine)",
            "signature": "async _delayed_process(self, file_path: Path)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:00:55",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "pathlib",
        "watchdog",
        "input_processor"
      ],
      "description": "A file system event handler that monitors a directory for new files and automatically processes them asynchronously when detected.",
      "docstring": "File system event handler for processing new files",
      "id": 1982,
      "imports": [
        "import os",
        "import asyncio",
        "import time",
        "from pathlib import Path",
        "from watchdog.observers import Observer",
        "from watchdog.events import FileSystemEventHandler",
        "from datetime import datetime",
        "from typing import Optional",
        "import logging",
        "from input_processor import InputProcessor",
        "from llm_handler import LLMHandler",
        "from pdf_generator import PDFGenerator",
        "from session_manager import SessionManager",
        "from compact_formatter import CompactResponseFormatter",
        "from session_detector import SessionDetector",
        "from session_detector import detect_session_from_file",
        "from multi_page_llm_handler import MultiPageLLMHandler",
        "from editing_workflow import EditingWorkflowHandler",
        "from conversation_context import ConversationContextManager",
        "from hybrid_response_handler import HybridResponseHandler"
      ],
      "imports_required": [
        "import asyncio",
        "from pathlib import Path",
        "from watchdog.events import FileSystemEventHandler",
        "from input_processor import InputProcessor"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 59,
      "line_start": 29,
      "name": "EInkFileHandler",
      "parameters": [
        {
          "annotation": "FileSystemEventHandler",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "processor_instance": "An instance of InputProcessor (or compatible processor) that provides the process_file() coroutine method and is_supported_file() static method. This processor handles the actual file processing logic once a file is detected and validated."
      },
      "parent_class": null,
      "purpose": "EInkFileHandler extends FileSystemEventHandler to watch for file creation events in a monitored directory. When a new supported file is created (excluding response files), it triggers asynchronous processing after a delay to ensure the file is fully written. It maintains state to prevent duplicate processing of the same file and integrates with an InputProcessor instance to handle the actual file processing logic.",
      "return_annotation": null,
      "return_explained": "The __init__ method returns an instance of EInkFileHandler. The on_created method returns None (implicit). The _delayed_process method is an async coroutine that returns None after processing completes or if the file is already being processed.",
      "settings_required": [
        "Requires an InputProcessor instance (or compatible processor) with process_file() async method and is_supported_file() static method",
        "Must be used with watchdog.observers.Observer to monitor file system events",
        "Requires an async event loop to be running for _delayed_process coroutine execution"
      ],
      "source_code": "class EInkFileHandler(FileSystemEventHandler):\n    \"\"\"File system event handler for processing new files\"\"\"\n    \n    def __init__(self, processor_instance):\n        self.processor = processor_instance\n        self.processing_files = set()  # Track files currently being processed\n        \n    def on_created(self, event):\n        \"\"\"Handle new file creation\"\"\"\n        if not event.is_directory:\n            file_path = Path(event.src_path)\n            \n            # Check if it's a supported file type and not already being processed\n            if (InputProcessor.is_supported_file(file_path) and \n                file_path not in self.processing_files and\n                not file_path.name.startswith('RESPONSE_')):  # Don't process our own output files\n                \n                print(f\"\ud83d\udcc1 New file detected: {file_path.name}\")\n                # Add a small delay to ensure file is fully written\n                asyncio.create_task(self._delayed_process(file_path))\n    \n    async def _delayed_process(self, file_path: Path):\n        \"\"\"Process file with a small delay to ensure it's fully written\"\"\"\n        await asyncio.sleep(2)  # Wait 2 seconds for file to be fully written\n        \n        if file_path.exists() and file_path not in self.processing_files:\n            self.processing_files.add(file_path)\n            try:\n                await self.processor.process_file(file_path)\n            finally:\n                self.processing_files.discard(file_path)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/processor.py",
      "tags": [
        "file-system",
        "event-handler",
        "async",
        "file-monitoring",
        "watchdog",
        "file-processing",
        "automation",
        "debouncing"
      ],
      "updated_at": "2025-12-07T01:00:55.831167",
      "usage_example": "from pathlib import Path\nimport asyncio\nfrom watchdog.observers import Observer\nfrom input_processor import InputProcessor\nfrom eink_file_handler import EInkFileHandler\n\n# Create processor instance\nprocessor = InputProcessor()\n\n# Create file handler with processor\nfile_handler = EInkFileHandler(processor)\n\n# Set up observer to watch a directory\nobserver = Observer()\nwatch_path = Path('./watched_folder')\nobserver.schedule(file_handler, str(watch_path), recursive=False)\n\n# Start observing\nobserver.start()\n\ntry:\n    # Keep running (file handler will process files automatically)\n    while True:\n        asyncio.sleep(1)\nexcept KeyboardInterrupt:\n    observer.stop()\nobserver.join()"
    },
    {
      "best_practices": [
        "Use the static method directly without instantiating the class (HybridPromptEnhancer.enhance_prompt_for_hybrid_output())",
        "Provide accurate content_analysis dictionaries with 'content_type' and 'elements' keys to get context-specific graphics suggestions",
        "The content_analysis should include 'content_type' (string) and 'elements' (list) to trigger appropriate graphics instructions",
        "Use 'math' in elements list to get mathematical graphics suggestions",
        "Use 'diagram' as content_type or 'diagrams' in elements to get diagram-specific instructions",
        "Include 'question' in content_type to get question-answering graphics guidance",
        "The enhanced prompt is designed for e-ink displays, so it emphasizes high contrast and simple designs",
        "The method is stateless and thread-safe, making it suitable for concurrent use",
        "The returned prompt includes placeholder format [GRAPHIC:type:description:parameters] that downstream systems should parse",
        "Content analysis can be minimal (empty dict) and the method will still provide base graphics instructions"
      ],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": true,
            "name": "enhance_prompt_for_hybrid_output",
            "parameters": {
              "base_prompt": "The original prompt string that will be augmented with graphics capability instructions",
              "content_analysis": "Dictionary containing content analysis metadata with keys 'content_type' (str) and 'elements' (list) to customize graphics suggestions"
            },
            "purpose": "Enhances a base prompt by appending comprehensive instructions for generating hybrid text+graphics responses, with content-specific suggestions based on the analysis",
            "returns": "Enhanced prompt string containing the original prompt plus detailed graphics instructions, placeholder format examples, content-specific suggestions, and integration guidelines",
            "signature": "enhance_prompt_for_hybrid_output(base_prompt: str, content_analysis: Dict[str, Any]) -> str"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:58:53",
      "decorators": [],
      "dependencies": [
        "typing"
      ],
      "description": "A utility class that enhances LLM prompts by adding instructions and formatting guidelines to encourage hybrid text+graphics responses with embedded graphic placeholders.",
      "docstring": "Enhances LLM prompts to encourage hybrid text+graphics responses",
      "id": 1976,
      "imports": [
        "import json",
        "import re",
        "import asyncio",
        "import base64",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import Union",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import hashlib",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from pdf_generator import PDFGenerator"
      ],
      "imports_required": [
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 300,
      "line_start": 204,
      "name": "HybridPromptEnhancer",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "This class has no constructor parameters. It is designed as a utility class with static methods only, requiring no instantiation or initialization."
      },
      "parent_class": null,
      "purpose": "This class provides functionality to augment base prompts with comprehensive instructions for generating hybrid responses that include both text and graphics. It analyzes content type and elements to provide context-specific guidance on when and how to include graphics (charts, diagrams, illustrations, sketches) using a placeholder format. The enhanced prompts guide LLMs to strategically incorporate visual elements that complement textual explanations, particularly useful for e-ink displays and educational content.",
      "return_annotation": null,
      "return_explained": "The class itself returns a HybridPromptEnhancer type when instantiated (though instantiation is unnecessary). The enhance_prompt_for_hybrid_output method returns a string containing the original prompt augmented with detailed graphics capability instructions, placeholder format examples, content-specific suggestions, and integration guidelines.",
      "settings_required": [],
      "source_code": "class HybridPromptEnhancer:\n    \"\"\"Enhances LLM prompts to encourage hybrid text+graphics responses\"\"\"\n    \n    @staticmethod\n    def enhance_prompt_for_hybrid_output(base_prompt: str, content_analysis: Dict[str, Any]) -> str:\n        \"\"\"\n        Enhance prompt to encourage hybrid text+graphics responses\n        \n        Args:\n            base_prompt: Original prompt\n            content_analysis: Content analysis from LLM handler\n            \n        Returns:\n            Enhanced prompt encouraging graphics generation\n        \"\"\"\n        \n        # Graphics capability instruction\n        graphics_instruction = \"\"\"\n\n**HYBRID RESPONSE CAPABILITY:**\nYou can now include graphics in your responses! Use the following placeholder format to request graphics:\n\n[GRAPHIC:type:description:parameters]\n\n**Available Graphic Types:**\n- chart: Data visualizations (bar, line, pie, scatter charts)\n- diagram: Process flows, organizational charts, concept maps\n- illustration: Educational diagrams, mathematical concepts, technical drawings\n- sketch: Simple drawings, annotations, visual explanations\n\n**Placeholder Format Examples:**\n- [GRAPHIC:chart:Sales Comparison:{\"type\":\"bar\",\"data\":[25,40,30,45],\"labels\":[\"Q1\",\"Q2\",\"Q3\",\"Q4\"],\"title\":\"Quarterly Sales\"}]\n- [GRAPHIC:diagram:Process Flow:{\"steps\":[\"Input\",\"Process\",\"Output\"],\"style\":\"flowchart\",\"direction\":\"horizontal\"}]\n- [GRAPHIC:illustration:Mathematical Concept:{\"concept\":\"quadratic_function\",\"style\":\"educational\",\"annotations\":true}]\n\n**When to Include Graphics:**\n- Data that would benefit from visualization\n- Complex processes that need step-by-step diagrams\n- Mathematical or scientific concepts\n- Comparisons that work better visually\n- Any content where a graphic would enhance understanding\n\n**Graphics Integration Guidelines:**\n1. Place graphic placeholders exactly where you want them in your text\n2. Ensure graphics complement and enhance your written explanation\n3. Provide clear, descriptive parameters for graphic generation\n4. Use graphics strategically - not every response needs them\n5. Consider the e-ink display limitations (high contrast, simple designs work best)\n\n**Response Structure with Graphics:**\n- Start with your text explanation\n- Insert graphic placeholders at relevant points\n- Continue your explanation referencing the graphics\n- Ensure the response flows naturally even without the graphics\n\"\"\"\n        \n        # Content-specific graphics suggestions\n        content_type = content_analysis.get(\"content_type\", \"mixed\")\n        elements = content_analysis.get(\"elements\", [])\n        \n        if \"math\" in elements:\n            graphics_instruction += \"\"\"\n\n**For Mathematical Content:**\n- Use illustration graphics for mathematical concepts\n- Include diagrams for geometric problems\n- Create charts for data analysis or statistics\n- Show step-by-step visual solutions where helpful\n\"\"\"\n        \n        if content_type == \"diagram\" or \"diagrams\" in elements:\n            graphics_instruction += \"\"\"\n\n**For Diagram Analysis:**\n- Create enhanced versions of hand-drawn diagrams\n- Add professional diagram representations\n- Include process flow improvements\n- Provide alternative visual perspectives\n\"\"\"\n        \n        if \"question\" in content_type.lower():\n            graphics_instruction += \"\"\"\n\n**For Questions Requiring Visual Answers:**\n- Include relevant charts or graphs for data questions\n- Create diagrams for process or concept questions\n- Use illustrations for educational explanations\n- Provide visual examples where they enhance understanding\n\"\"\"\n        \n        # Combine base prompt with graphics enhancement\n        enhanced_prompt = base_prompt + graphics_instruction + \"\"\"\n\n**Important:** Only include graphics when they genuinely enhance your response. A good text-only response is better than a response with unnecessary graphics. Focus on clarity and helpfulness above all else.\n\"\"\"\n        \n        return enhanced_prompt",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/hybrid_response_handler.py",
      "tags": [
        "prompt-engineering",
        "llm",
        "graphics",
        "hybrid-output",
        "text-enhancement",
        "visualization",
        "prompt-augmentation",
        "static-utility",
        "content-analysis",
        "e-ink",
        "educational"
      ],
      "updated_at": "2025-12-07T00:58:53.638896",
      "usage_example": "from typing import Dict, Any\n\n# No instantiation needed - use static method directly\nbase_prompt = \"Explain the water cycle to a student.\"\ncontent_analysis = {\n    \"content_type\": \"question\",\n    \"elements\": [\"science\", \"educational\"]\n}\n\n# Call static method directly on class\nenhanced_prompt = HybridPromptEnhancer.enhance_prompt_for_hybrid_output(\n    base_prompt=base_prompt,\n    content_analysis=content_analysis\n)\n\n# Use enhanced prompt with LLM\nprint(enhanced_prompt)\n\n# Example with mathematical content\nmath_analysis = {\n    \"content_type\": \"question\",\n    \"elements\": [\"math\", \"geometry\"]\n}\nmath_prompt = HybridPromptEnhancer.enhance_prompt_for_hybrid_output(\n    base_prompt=\"Solve this quadratic equation: x^2 + 5x + 6 = 0\",\n    content_analysis=math_analysis\n)"
    },
    {
      "best_practices": [
        "Always use async/await when calling process_hybrid_response as it performs asynchronous graphics generation",
        "Ensure the API key provided has sufficient permissions and quota for graphics generation",
        "The placeholder format must be strictly followed: [GRAPHIC:type:description:json_params] where json_params is valid JSON",
        "Handle the case where graphics generation may fail - the class will continue processing but skip failed graphics",
        "The output_path directory must exist and be writable before calling process_hybrid_response",
        "conversation_id and exchange_number are optional but recommended for tracking multi-turn conversations",
        "The class automatically falls back to standard PDF generation if no graphics placeholders are found",
        "Graphics are optimized for e-ink displays with high contrast and simple styles by default",
        "Each graphic placeholder generates a unique ID based on MD5 hash to prevent duplicates",
        "Error handling is built-in: failed graphics are logged but don't stop the entire process"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Instance of GraphicsGenerator used to create visual content from specifications",
            "is_class_variable": false,
            "name": "graphics_generator",
            "type": "GraphicsGenerator"
          },
          {
            "description": "Instance of HybridPDFGenerator used to assemble text and graphics into final PDF",
            "is_class_variable": false,
            "name": "pdf_generator",
            "type": "HybridPDFGenerator"
          },
          {
            "description": "Compiled regex pattern for matching graphic placeholders in format [GRAPHIC:type:description:json_params], case-insensitive",
            "is_class_variable": false,
            "name": "placeholder_pattern",
            "type": "re.Pattern"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "API key for the graphics generation service"
            },
            "purpose": "Initialize the handler with required dependencies and compile the regex pattern for placeholder parsing",
            "returns": "None (constructor)",
            "signature": "__init__(self, api_key: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "process_hybrid_response",
            "parameters": {
              "conversation_id": "Optional unique identifier for the conversation session",
              "exchange_number": "Optional sequential number of this exchange in the conversation",
              "llm_response": "Raw LLM response text containing graphic placeholders in the format [GRAPHIC:type:description:json_params]",
              "metadata": "Dictionary containing processing metadata like query, timestamp, model used",
              "output_path": "File system path where the final PDF should be saved"
            },
            "purpose": "Main entry point that orchestrates the complete workflow: parse placeholders, generate graphics, and assemble final PDF",
            "returns": "String containing the file path to the generated hybrid PDF document",
            "signature": "async process_hybrid_response(self, llm_response: str, metadata: Dict[str, Any], output_path: str, conversation_id: Optional[str] = None, exchange_number: Optional[int] = None) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_parse_hybrid_response",
            "parameters": {
              "llm_response": "Raw LLM response text to parse",
              "metadata": "Processing metadata to include in the HybridResponse object"
            },
            "purpose": "Parse the LLM response to extract text content and identify all graphic placeholders using regex pattern matching",
            "returns": "HybridResponse object containing the original text, empty graphics list (populated later), list of parsed placeholders, and metadata",
            "signature": "_parse_hybrid_response(self, llm_response: str, metadata: Dict[str, Any]) -> HybridResponse"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_graphics",
            "parameters": {
              "metadata": "Processing metadata (currently unused but available for future enhancements)",
              "placeholders": "List of GraphicPlaceholder objects extracted from the LLM response"
            },
            "purpose": "Asynchronously generate all graphics from the parsed placeholders using the GraphicsGenerator",
            "returns": "List of GraphicSpec objects representing successfully generated graphics (failed generations are skipped)",
            "signature": "async _generate_graphics(self, placeholders: List[GraphicPlaceholder], metadata: Dict[str, Any]) -> List[GraphicSpec]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_assemble_hybrid_pdf",
            "parameters": {
              "conversation_id": "Optional conversation identifier",
              "exchange_number": "Optional exchange number",
              "generated_graphics": "List of successfully generated GraphicSpec objects",
              "hybrid_response": "HybridResponse object containing text content and placeholder information",
              "output_path": "File system path for the output PDF"
            },
            "purpose": "Assemble the final PDF by combining text content with generated graphics at their placeholder positions",
            "returns": "String containing the file path to the assembled hybrid PDF",
            "signature": "async _assemble_hybrid_pdf(self, hybrid_response: HybridResponse, generated_graphics: List[GraphicSpec], output_path: str, conversation_id: Optional[str] = None, exchange_number: Optional[int] = None) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only when no graphics placeholders are detected in the LLM response (fallback to standard PDF generation)",
          "import": "from pdf_generator import PDFGenerator",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 23:58:31",
      "decorators": [],
      "dependencies": [
        "json",
        "re",
        "asyncio",
        "base64",
        "typing",
        "dataclasses",
        "pathlib",
        "hashlib",
        "graphics_generator",
        "hybrid_pdf_generator",
        "pdf_generator"
      ],
      "description": "Orchestrates the complete workflow for generating hybrid PDF documents that combine LLM text responses with dynamically generated graphics (charts, diagrams, illustrations).",
      "docstring": "Handles the complete hybrid response generation workflow",
      "id": 1975,
      "imports": [
        "import json",
        "import re",
        "import asyncio",
        "import base64",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import Union",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import hashlib",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from pdf_generator import PDFGenerator"
      ],
      "imports_required": [
        "import json",
        "import re",
        "import asyncio",
        "import base64",
        "from typing import Dict, Any, List, Optional, Tuple, Union",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import hashlib",
        "from graphics_generator import GraphicsGenerator, GraphicSpec, GraphicType",
        "from hybrid_pdf_generator import HybridPDFGenerator"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 202,
      "line_start": 39,
      "name": "HybridResponseHandler",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "API key for the graphics generation service (passed to GraphicsGenerator). Required for authenticating requests to generate visual content like charts, diagrams, and illustrations."
      },
      "parent_class": null,
      "purpose": "HybridResponseHandler is responsible for processing LLM responses that contain special graphic placeholders, generating the requested graphics using an AI graphics generator, and assembling them into a final PDF document. It parses placeholder syntax, coordinates asynchronous graphic generation, handles fallback to standard text-only PDFs when no graphics are present, and manages the complete lifecycle from raw LLM response to final hybrid PDF output. This class is designed for applications that need to enhance text responses with visual elements optimized for e-ink displays.",
      "return_annotation": null,
      "return_explained": "The constructor returns a HybridResponseHandler instance. The main method process_hybrid_response returns a string containing the file path to the generated hybrid PDF. If no graphics are detected, it falls back to standard PDF generation and returns that path instead.",
      "settings_required": [
        "API key for graphics generation service (passed to constructor)",
        "GraphicsGenerator must be properly configured and available",
        "HybridPDFGenerator must be available for PDF assembly",
        "PDFGenerator must be available for fallback text-only PDF generation",
        "Write permissions for the output_path directory"
      ],
      "source_code": "class HybridResponseHandler:\n    \"\"\"Handles the complete hybrid response generation workflow\"\"\"\n    \n    def __init__(self, api_key: str):\n        self.graphics_generator = GraphicsGenerator(api_key)\n        self.pdf_generator = HybridPDFGenerator()\n        self.placeholder_pattern = re.compile(\n            r'\\[GRAPHIC:(\\w+):([^:]+):([^\\]]+)\\]',\n            re.IGNORECASE\n        )\n    \n    async def process_hybrid_response(self, \n                                    llm_response: str, \n                                    metadata: Dict[str, Any],\n                                    output_path: str,\n                                    conversation_id: Optional[str] = None,\n                                    exchange_number: Optional[int] = None) -> str:\n        \"\"\"\n        Process a hybrid LLM response and generate final PDF\n        \n        Args:\n            llm_response: LLM response containing text and graphic placeholders\n            metadata: Processing metadata\n            output_path: Path for final PDF\n            conversation_id: Session conversation ID\n            exchange_number: Exchange number in conversation\n            \n        Returns:\n            Path to generated hybrid PDF\n        \"\"\"\n        print(f\"\ud83c\udfa8 Processing hybrid response with graphics...\")\n        \n        # Step 1: Parse response for graphics placeholders\n        hybrid_response = self._parse_hybrid_response(llm_response, metadata)\n        \n        if not hybrid_response.placeholders:\n            print(f\"   \u2022 No graphics detected, using standard text generation\")\n            # Fall back to standard PDF generation\n            from pdf_generator import PDFGenerator\n            standard_generator = PDFGenerator()\n            return standard_generator.create_response_pdf(\n                llm_response, \"\", metadata, output_path, conversation_id, exchange_number\n            )\n        \n        print(f\"   \u2022 Found {len(hybrid_response.placeholders)} graphic placeholders\")\n        \n        # Step 2: Generate graphics\n        generated_graphics = await self._generate_graphics(hybrid_response.placeholders, metadata)\n        \n        # Step 3: Assemble final PDF\n        final_pdf_path = await self._assemble_hybrid_pdf(\n            hybrid_response, generated_graphics, output_path, \n            conversation_id, exchange_number\n        )\n        \n        print(f\"\u2705 Hybrid PDF generated: {Path(final_pdf_path).name}\")\n        return final_pdf_path\n    \n    def _parse_hybrid_response(self, llm_response: str, metadata: Dict[str, Any]) -> HybridResponse:\n        \"\"\"\n        Parse LLM response to extract text content and graphic placeholders\n        \n        Expected placeholder format:\n        [GRAPHIC:chart:Sales Data Comparison:{\"type\":\"bar\",\"data\":[10,20,30],\"labels\":[\"A\",\"B\",\"C\"]}]\n        [GRAPHIC:diagram:Process Flow:{\"steps\":[\"Step 1\",\"Step 2\",\"Step 3\"],\"style\":\"flowchart\"}]\n        [GRAPHIC:illustration:Mathematical Concept:{\"concept\":\"derivatives\",\"style\":\"educational\"}]\n        \"\"\"\n        placeholders = []\n        \n        # Find all graphic placeholders\n        matches = self.placeholder_pattern.findall(llm_response)\n        \n        for match in matches:\n            graphic_type, description, params_json = match\n            \n            try:\n                parameters = json.loads(params_json)\n            except json.JSONDecodeError:\n                print(f\"\u26a0\ufe0f Warning: Invalid JSON in graphic placeholder: {params_json}\")\n                parameters = {\"description\": description}\n            \n            # Generate unique ID for this graphic\n            placeholder_text = f\"[GRAPHIC:{graphic_type}:{description}:{params_json}]\"\n            graphic_id = hashlib.md5(placeholder_text.encode()).hexdigest()[:8]\n            \n            placeholder = GraphicPlaceholder(\n                id=graphic_id,\n                graphic_type=graphic_type,\n                description=description,\n                parameters=parameters,\n                position_marker=placeholder_text\n            )\n            \n            placeholders.append(placeholder)\n        \n        return HybridResponse(\n            text_content=llm_response,\n            graphics=[],  # Will be populated after generation\n            placeholders=placeholders,\n            metadata=metadata\n        )\n    \n    async def _generate_graphics(self, \n                               placeholders: List[GraphicPlaceholder], \n                               metadata: Dict[str, Any]) -> List[GraphicSpec]:\n        \"\"\"Generate all graphics from placeholders\"\"\"\n        generated_graphics = []\n        \n        for placeholder in placeholders:\n            print(f\"   \ud83c\udfa8 Generating {placeholder.graphic_type}: {placeholder.description}\")\n            \n            try:\n                # Convert placeholder to GraphicSpec\n                graphic_spec = GraphicSpec(\n                    id=placeholder.id,\n                    type=GraphicType(placeholder.graphic_type),\n                    description=placeholder.description,\n                    parameters=placeholder.parameters,\n                    style_preferences={\n                        \"eink_optimized\": True,\n                        \"high_contrast\": True,\n                        \"simple_style\": True\n                    }\n                )\n                \n                # Generate the graphic\n                generated_graphic = await self.graphics_generator.generate_graphic(graphic_spec)\n                \n                if generated_graphic:\n                    generated_graphics.append(generated_graphic)\n                    print(f\"     \u2705 Generated {placeholder.graphic_type}\")\n                else:\n                    print(f\"     \u274c Failed to generate {placeholder.graphic_type}\")\n                    \n            except Exception as e:\n                print(f\"     \u274c Error generating {placeholder.graphic_type}: {e}\")\n                continue\n        \n        return generated_graphics\n    \n    async def _assemble_hybrid_pdf(self, \n                                 hybrid_response: HybridResponse,\n                                 generated_graphics: List[GraphicSpec],\n                                 output_path: str,\n                                 conversation_id: Optional[str] = None,\n                                 exchange_number: Optional[int] = None) -> str:\n        \"\"\"Assemble final PDF with text and graphics\"\"\"\n        print(f\"   \ud83d\udcc4 Assembling hybrid PDF...\")\n        \n        # Create graphics lookup\n        graphics_lookup = {graphic.id: graphic for graphic in generated_graphics}\n        \n        # Generate final PDF\n        final_path = await self.pdf_generator.create_hybrid_pdf(\n            text_content=hybrid_response.text_content,\n            placeholders=hybrid_response.placeholders,\n            graphics=graphics_lookup,\n            metadata=hybrid_response.metadata,\n            output_path=output_path,\n            conversation_id=conversation_id,\n            exchange_number=exchange_number\n        )\n        \n        return final_path",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/hybrid_response_handler.py",
      "tags": [
        "pdf-generation",
        "hybrid-content",
        "graphics-generation",
        "async",
        "document-assembly",
        "llm-response-processing",
        "placeholder-parsing",
        "visualization",
        "e-ink-optimization",
        "workflow-orchestration"
      ],
      "updated_at": "2025-12-07T00:58:31.206745",
      "usage_example": "import asyncio\nfrom hybrid_response_handler import HybridResponseHandler\n\n# Initialize handler with API key\nhandler = HybridResponseHandler(api_key=\"your-api-key-here\")\n\n# LLM response with graphic placeholders\nllm_response = '''\nHere is the sales analysis:\n\n[GRAPHIC:chart:Sales Data Comparison:{\"type\":\"bar\",\"data\":[10,20,30],\"labels\":[\"Q1\",\"Q2\",\"Q3\"]}]\n\nThe data shows steady growth across quarters.\n'''\n\n# Metadata for processing\nmetadata = {\n    \"query\": \"Show me sales data\",\n    \"timestamp\": \"2024-01-15T10:30:00\",\n    \"model\": \"gpt-4\"\n}\n\n# Process and generate hybrid PDF\nasync def main():\n    pdf_path = await handler.process_hybrid_response(\n        llm_response=llm_response,\n        metadata=metadata,\n        output_path=\"./output/response.pdf\",\n        conversation_id=\"conv_123\",\n        exchange_number=1\n    )\n    print(f\"Generated PDF: {pdf_path}\")\n\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "This is a dataclass, so it automatically generates __init__, __repr__, and __eq__ methods. Use it as an immutable data container.",
        "Ensure that the number and IDs of placeholders correspond correctly to the graphics list to maintain proper mapping.",
        "The metadata dictionary should be used for non-structural information that doesn't fit into the other fields.",
        "When creating instances, ensure all required fields are provided as dataclasses require all fields without defaults to be specified.",
        "Consider using frozen=True in the dataclass decorator if immutability is desired: @dataclass(frozen=True).",
        "The placeholders should reference positions or identifiers in the text_content where graphics will be inserted during rendering.",
        "Validate that GraphicSpec objects in the graphics list are properly configured before creating a HybridResponse instance."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The textual content of the hybrid response, potentially containing placeholder references for graphics",
            "is_class_variable": false,
            "name": "text_content",
            "type": "str"
          },
          {
            "description": "List of graphic specifications defining the visual elements to be included in the response",
            "is_class_variable": false,
            "name": "graphics",
            "type": "List[GraphicSpec]"
          },
          {
            "description": "List of placeholder objects that map graphics to their intended positions in the text content",
            "is_class_variable": false,
            "name": "placeholders",
            "type": "List[GraphicPlaceholder]"
          },
          {
            "description": "Dictionary containing additional metadata and contextual information about the response",
            "is_class_variable": false,
            "name": "metadata",
            "type": "Dict[str, Any]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "graphics": "List of GraphicSpec objects defining the graphics to include",
              "metadata": "Dictionary of additional metadata about the response",
              "placeholders": "List of GraphicPlaceholder objects indicating where graphics should be placed",
              "text_content": "The textual content of the response"
            },
            "purpose": "Initializes a new HybridResponse instance with text content, graphics, placeholders, and metadata. Auto-generated by dataclass decorator.",
            "returns": "None - initializes the instance",
            "signature": "__init__(text_content: str, graphics: List[GraphicSpec], placeholders: List[GraphicPlaceholder], metadata: Dict[str, Any]) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the HybridResponse instance. Auto-generated by dataclass decorator.",
            "returns": "String representation showing all field values",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two HybridResponse instances for equality based on all fields. Auto-generated by dataclass decorator.",
            "returns": "True if all fields are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:57:55",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass that encapsulates a complete hybrid response containing both text content and graphical elements with their placeholders and metadata.",
      "docstring": "Complete hybrid response with text and graphics",
      "id": 1974,
      "imports": [
        "import json",
        "import re",
        "import asyncio",
        "import base64",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import Union",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import hashlib",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from pdf_generator import PDFGenerator"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Dict, Any, List",
        "from graphics_generator import GraphicSpec, GraphicPlaceholder"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 37,
      "line_start": 32,
      "name": "HybridResponse",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "graphics": "A list of GraphicSpec objects that define the specifications for each graphic element to be generated or included in the response. Each GraphicSpec contains details about the type, data, and rendering parameters for a graphic.",
        "metadata": "A dictionary containing arbitrary key-value pairs for additional information about the response, such as generation timestamps, source information, processing parameters, or any other contextual data.",
        "placeholders": "A list of GraphicPlaceholder objects that indicate where in the text_content each graphic should be positioned. These placeholders map graphics to their intended locations in the final rendered output.",
        "text_content": "A string containing the textual portion of the response. This is the main text that may contain references or placeholders for graphics."
      },
      "parent_class": null,
      "purpose": "HybridResponse serves as a data container for responses that combine textual information with graphics. It stores the text content, graphic specifications, placeholder information for where graphics should be inserted, and additional metadata. This class is typically used in systems that generate documents or reports with embedded visualizations, allowing for structured representation of mixed-content responses before rendering to formats like PDF.",
      "return_annotation": null,
      "return_explained": "As a dataclass, instantiation returns a HybridResponse object with the four specified attributes initialized. The object serves as an immutable-by-convention data structure that can be passed between components for processing, rendering, or serialization.",
      "settings_required": [
        "Requires GraphicSpec and GraphicPlaceholder classes to be available from the graphics_generator module"
      ],
      "source_code": "class HybridResponse:\n    \"\"\"Complete hybrid response with text and graphics\"\"\"\n    text_content: str\n    graphics: List[GraphicSpec]\n    placeholders: List[GraphicPlaceholder]\n    metadata: Dict[str, Any]",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/hybrid_response_handler.py",
      "tags": [
        "dataclass",
        "hybrid-content",
        "graphics",
        "text-and-graphics",
        "document-generation",
        "data-container",
        "response-model",
        "pdf-generation",
        "visualization"
      ],
      "updated_at": "2025-12-07T00:57:55.190520",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Dict, Any, List\nfrom graphics_generator import GraphicSpec, GraphicPlaceholder, GraphicType\n\n# Create graphic specifications\ngraphic1 = GraphicSpec(type=GraphicType.BAR_CHART, data={'values': [1, 2, 3]}, title='Sample Chart')\n\n# Create placeholders\nplaceholder1 = GraphicPlaceholder(id='chart1', position=100)\n\n# Instantiate HybridResponse\nresponse = HybridResponse(\n    text_content='Here is the analysis: [GRAPHIC:chart1] shows the results.',\n    graphics=[graphic1],\n    placeholders=[placeholder1],\n    metadata={'generated_at': '2024-01-01', 'version': '1.0'}\n)\n\n# Access attributes\nprint(response.text_content)\nprint(f'Number of graphics: {len(response.graphics)}')\nprint(f'Metadata: {response.metadata}')"
    },
    {
      "best_practices": [
        "Use unique and descriptive IDs to avoid conflicts when multiple graphics are present in the same document",
        "Ensure the graphic_type matches the types supported by your graphics generation system (GraphicsGenerator in this codebase)",
        "The position_marker should be a unique string that won't accidentally match regular text content",
        "Store all necessary generation parameters in the parameters dictionary to enable complete graphic recreation",
        "Keep the description field meaningful for accessibility and documentation purposes",
        "Since this is a dataclass, all attributes are required at instantiation unless default values are provided",
        "The parameters dictionary should be JSON-serializable if you need to persist or transmit placeholders",
        "Consider validating the parameters dictionary structure based on graphic_type to catch errors early",
        "This class is immutable by default (dataclass without frozen=True), but attributes can be modified after creation if needed"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Unique identifier for this graphic placeholder instance",
            "is_class_variable": false,
            "name": "id",
            "type": "str"
          },
          {
            "description": "Type of graphic to be generated (e.g., 'chart', 'diagram', 'graph')",
            "is_class_variable": false,
            "name": "graphic_type",
            "type": "str"
          },
          {
            "description": "Human-readable description of what the graphic represents",
            "is_class_variable": false,
            "name": "description",
            "type": "str"
          },
          {
            "description": "Dictionary containing all parameters needed to generate the graphic",
            "is_class_variable": false,
            "name": "parameters",
            "type": "Dict[str, Any]"
          },
          {
            "description": "Unique marker string indicating where in the text this graphic should be inserted",
            "is_class_variable": false,
            "name": "position_marker",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "description": "Human-readable description of the graphic",
              "graphic_type": "Type of graphic to generate",
              "id": "Unique identifier for the graphic",
              "parameters": "Dictionary of generation parameters",
              "position_marker": "Text marker indicating insertion position"
            },
            "purpose": "Initializes a new GraphicPlaceholder instance with all required attributes. Auto-generated by the dataclass decorator.",
            "returns": "None (constructor)",
            "signature": "__init__(self, id: str, graphic_type: str, description: str, parameters: Dict[str, Any], position_marker: str) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the GraphicPlaceholder instance showing all attributes. Auto-generated by the dataclass decorator.",
            "returns": "String representation in the format 'GraphicPlaceholder(id=..., graphic_type=..., ...)'",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two GraphicPlaceholder instances for equality based on all attributes. Auto-generated by the dataclass decorator.",
            "returns": "True if all attributes are equal, False otherwise",
            "signature": "__eq__(self, other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:57:27",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses"
      ],
      "description": "A dataclass that represents a placeholder for graphics (charts, diagrams, etc.) embedded within text responses, storing metadata about the graphic's type, description, parameters, and position.",
      "docstring": "Represents a graphic placeholder in the text response",
      "id": 1973,
      "imports": [
        "import json",
        "import re",
        "import asyncio",
        "import base64",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import Union",
        "from dataclasses import dataclass",
        "from pathlib import Path",
        "import hashlib",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from pdf_generator import PDFGenerator"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 29,
      "line_start": 23,
      "name": "GraphicPlaceholder",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "description": "A human-readable string describing what the graphic represents or displays, useful for documentation and accessibility purposes",
        "graphic_type": "A string specifying the type of graphic to be generated (e.g., 'chart', 'diagram', 'graph'). This determines which generation method will be used",
        "id": "A unique string identifier for this graphic placeholder, used to reference and track the specific graphic instance throughout the document generation process",
        "parameters": "A dictionary containing key-value pairs of configuration parameters needed to generate the specific graphic. The structure depends on the graphic_type and may include data, styling options, dimensions, etc.",
        "position_marker": "A string marker that indicates where in the text this graphic should be inserted or replaced, typically a unique placeholder string like '[GRAPHIC_1]'"
      },
      "parent_class": null,
      "purpose": "GraphicPlaceholder serves as a data container for tracking where graphics should be inserted in text-based responses. It stores all necessary information to identify, describe, and generate a graphic at a specific location, including a unique identifier, the type of graphic, descriptive text, generation parameters, and a position marker for text replacement. This class is typically used in document generation workflows where text and graphics need to be coordinated.",
      "return_annotation": null,
      "return_explained": "Instantiating GraphicPlaceholder returns an instance of the class with all five attributes (id, graphic_type, description, parameters, position_marker) set to the provided values. As a dataclass, it automatically generates __init__, __repr__, and __eq__ methods.",
      "settings_required": [],
      "source_code": "class GraphicPlaceholder:\n    \"\"\"Represents a graphic placeholder in the text response\"\"\"\n    id: str\n    graphic_type: str\n    description: str\n    parameters: Dict[str, Any]\n    position_marker: str",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/hybrid_response_handler.py",
      "tags": [
        "dataclass",
        "placeholder",
        "graphics",
        "document-generation",
        "data-container",
        "metadata",
        "text-processing",
        "pdf-generation",
        "visualization"
      ],
      "updated_at": "2025-12-07T00:57:27.252335",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Dict, Any\n\n@dataclass\nclass GraphicPlaceholder:\n    id: str\n    graphic_type: str\n    description: str\n    parameters: Dict[str, Any]\n    position_marker: str\n\n# Create a placeholder for a bar chart\nplaceholder = GraphicPlaceholder(\n    id=\"chart_001\",\n    graphic_type=\"bar_chart\",\n    description=\"Sales data by quarter\",\n    parameters={\n        \"data\": [100, 150, 200, 175],\n        \"labels\": [\"Q1\", \"Q2\", \"Q3\", \"Q4\"],\n        \"title\": \"Quarterly Sales\",\n        \"width\": 800,\n        \"height\": 600\n    },\n    position_marker=\"[GRAPHIC_chart_001]\"\n)\n\n# Access attributes\nprint(placeholder.id)  # \"chart_001\"\nprint(placeholder.graphic_type)  # \"bar_chart\"\nprint(placeholder.parameters[\"title\"])  # \"Quarterly Sales\"\n\n# Dataclass provides automatic equality checking\nplaceholder2 = GraphicPlaceholder(\n    id=\"chart_001\",\n    graphic_type=\"bar_chart\",\n    description=\"Sales data by quarter\",\n    parameters={\"data\": [100, 150, 200, 175], \"labels\": [\"Q1\", \"Q2\", \"Q3\", \"Q4\"], \"title\": \"Quarterly Sales\", \"width\": 800, \"height\": 600},\n    position_marker=\"[GRAPHIC_chart_001]\"\n)\nprint(placeholder == placeholder2)  # True"
    },
    {
      "best_practices": [
        "Always use async/await when calling process_document_for_editing as it performs asynchronous operations",
        "Ensure the LLMHandler is properly initialized with valid credentials before instantiating EditingWorkflowHandler",
        "The workflow only generates rewritten content if 2 or more annotations are detected (threshold for 'substantial markup')",
        "Handle None return values from process_document_for_editing to gracefully manage workflow failures",
        "The image_b64 parameter is currently passed but not used in the workflow - it may be for future enhancements",
        "Check the confidence_score in the result to assess the reliability of detected annotations (ranges 0.0-1.0)",
        "The workflow is stateless - each call to process_document_for_editing is independent",
        "Log messages are printed to console and logged via the logger - ensure logging is configured appropriately",
        "Annotation detection requires the PDF file to exist at the specified file_path",
        "The original_analysis parameter should contain the full text content of the document for best rewriting results"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Instance of LLMHandler used for AI-powered text generation and rewriting operations",
            "is_class_variable": false,
            "name": "llm_handler",
            "type": "LLMHandler"
          },
          {
            "description": "Instance of AnnotationDetector responsible for detecting and analyzing annotations in PDF documents",
            "is_class_variable": false,
            "name": "annotation_detector",
            "type": "AnnotationDetector"
          },
          {
            "description": "Instance of TextRewriter that uses the LLM to generate improved text based on detected annotations",
            "is_class_variable": false,
            "name": "text_rewriter",
            "type": "TextRewriter"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "llm_handler": "An LLMHandler instance that provides AI capabilities for text generation and rewriting"
            },
            "purpose": "Initializes the EditingWorkflowHandler with required dependencies for annotation detection and text rewriting",
            "returns": "None - constructor initializes the instance",
            "signature": "__init__(self, llm_handler: LLMHandler)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "process_document_for_editing",
            "parameters": {
              "file_path": "Path object pointing to the PDF document to be processed",
              "image_b64": "Base64 encoded image representation of the document (currently unused but may be for future features)",
              "original_analysis": "The original text content/analysis of the document that will be used as the base for rewriting"
            },
            "purpose": "Main workflow method that processes a document through the complete editing pipeline: detects annotations, analyzes types, generates rewritten content, and provides recommendations",
            "returns": "EditingWorkflowResult containing annotations_detected, confidence_score, recommendations, rewritten_content, annotation_details, and workflow_summary. Returns None if the workflow encounters an error. Returns a result with 0 annotations if no edits are found.",
            "signature": "async process_document_for_editing(self, file_path: Path, image_b64: str, original_analysis: str) -> Optional[EditingWorkflowResult]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_recommendations",
            "parameters": {
              "annotation_result": "An annotation result object containing detected annotations with their types and properties"
            },
            "purpose": "Generates human-readable editing recommendations based on the types and quantities of detected annotations",
            "returns": "List of string recommendations describing the editing work needed (e.g., 'Heavy editing detected - consider major revision')",
            "signature": "_generate_recommendations(self, annotation_result) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_calculate_confidence_score",
            "parameters": {
              "annotation_result": "An annotation result object containing detected annotations with confidence values"
            },
            "purpose": "Calculates a confidence score (0.0-1.0) for the editing workflow based on the number and quality of detected annotations",
            "returns": "Float between 0.0 and 1.0 representing confidence in the annotation detection and workflow results. Returns 0.0 if no annotations found, minimum 0.1 if any annotations detected.",
            "signature": "_calculate_confidence_score(self, annotation_result) -> float"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_workflow_summary",
            "parameters": {
              "annotation_result": "An annotation result object containing all detected annotations",
              "rewritten_length": "Integer representing the character count of generated rewritten content (0 if none generated)"
            },
            "purpose": "Generates a comprehensive text summary of the workflow execution including annotation counts, types, and rewritten content length",
            "returns": "String summary describing the workflow results, e.g., 'Editing workflow processed 5 annotations. Generated 1,234 characters of improved content. Annotation breakdown: 2 strikethrough, 3 highlight.'",
            "signature": "_generate_workflow_summary(self, annotation_result, rewritten_length: int) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:56:55",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "typing",
        "pathlib",
        "dataclasses",
        "logging",
        "annotation_detector",
        "text_rewriter",
        "llm_handler"
      ],
      "description": "Orchestrates a complete document editing workflow that detects annotations in PDFs, analyzes their types and intent, generates AI-powered text improvements, and provides editing recommendations.",
      "docstring": "Handles the complete editing workflow:\n1. Detect annotations in document\n2. Analyze annotation types and intent\n3. Generate AI-powered text improvements\n4. Provide editing recommendations",
      "id": 1972,
      "imports": [
        "import asyncio",
        "from typing import Optional",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from pathlib import Path",
        "from dataclasses import dataclass",
        "import logging",
        "from annotation_detector import AnnotationDetector",
        "from text_rewriter import TextRewriter",
        "from llm_handler import LLMHandler"
      ],
      "imports_required": [
        "import asyncio",
        "from typing import Optional, Dict, Any, List",
        "from pathlib import Path",
        "from dataclasses import dataclass",
        "import logging",
        "from annotation_detector import AnnotationDetector",
        "from text_rewriter import TextRewriter",
        "from llm_handler import LLMHandler"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 185,
      "line_start": 28,
      "name": "EditingWorkflowHandler",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "llm_handler": "An instance of LLMHandler that provides AI/LLM capabilities for text rewriting and analysis. This handler is passed to the TextRewriter component and is essential for generating improved content based on detected annotations. Must be a properly initialized LLMHandler with valid API credentials."
      },
      "parent_class": null,
      "purpose": "This class serves as the main coordinator for processing documents with editing annotations. It integrates annotation detection, analysis, and AI-powered rewriting to help users understand and apply edits marked in PDF documents. The workflow includes: (1) detecting visual annotations like strikethroughs, highlights, and markups, (2) analyzing annotation types to understand editing intent, (3) generating rewritten content based on detected edits, and (4) providing actionable recommendations. It's designed for document processing pipelines where PDFs contain manual editing marks that need to be interpreted and applied programmatically.",
      "return_annotation": null,
      "return_explained": "The constructor returns an EditingWorkflowHandler instance. The main method 'process_document_for_editing' returns an Optional[EditingWorkflowResult] - either an EditingWorkflowResult dataclass containing annotations_detected (int), confidence_score (float), recommendations (List[str]), rewritten_content (Optional[str]), annotation_details (List[Dict]), and workflow_summary (str), or None if the workflow fails. Returns a result with 0 annotations if no edits are detected.",
      "settings_required": [
        "LLMHandler must be properly configured with API credentials (e.g., OpenAI API key)",
        "AnnotationDetector dependencies must be available (likely PDF processing libraries)",
        "Logging configuration should be set up to capture workflow progress",
        "EditingWorkflowResult dataclass must be defined in the module or imported"
      ],
      "source_code": "class EditingWorkflowHandler:\n    \"\"\"\n    Handles the complete editing workflow:\n    1. Detect annotations in document\n    2. Analyze annotation types and intent\n    3. Generate AI-powered text improvements\n    4. Provide editing recommendations\n    \"\"\"\n    \n    def __init__(self, llm_handler: LLMHandler):\n        self.llm_handler = llm_handler\n        self.annotation_detector = AnnotationDetector()\n        self.text_rewriter = TextRewriter(llm_handler)\n    \n    async def process_document_for_editing(\n        self, \n        file_path: Path, \n        image_b64: str, \n        original_analysis: str\n    ) -> Optional[EditingWorkflowResult]:\n        \"\"\"\n        Process a document through the complete editing workflow\n        \n        Args:\n            file_path: Path to the original document\n            image_b64: Base64 encoded image of the document\n            original_analysis: The original AI analysis of the document\n            \n        Returns:\n            EditingWorkflowResult with workflow results or None if failed\n        \"\"\"\n        try:\n            logger.info(f\"Starting editing workflow for {file_path.name}\")\n            \n            # Step 1: Detect annotations\n            print(f\"   \ud83d\udd0d Detecting annotations...\")\n            annotation_result = await self.annotation_detector.detect_annotations_in_pdf(str(file_path))\n            \n            if not annotation_result or annotation_result.total_annotations == 0:\n                logger.info(\"No annotations detected, skipping editing workflow\")\n                return EditingWorkflowResult(\n                    annotations_detected=0,\n                    confidence_score=0.0,\n                    recommendations=[\"No annotations detected - document appears to be final\"],\n                    workflow_summary=\"No editing annotations found in document\"\n                )\n            \n            print(f\"   \u2705 Found {annotation_result.total_annotations} annotations\")\n            \n            # Step 2: Analyze annotation types and generate recommendations\n            recommendations = self._generate_recommendations(annotation_result)\n            \n            # Step 3: Generate rewritten content if significant annotations found\n            rewritten_content = None\n            if annotation_result.total_annotations >= 2:  # Only rewrite if substantial markup\n                print(f\"   \u270f\ufe0f  Generating rewritten content...\")\n                rewritten_content = await self.text_rewriter.rewrite_document_from_annotations(\n                    original_analysis, annotation_result.annotations\n                )\n                \n                if rewritten_content:\n                    print(f\"   \u2705 Generated {len(rewritten_content):,} characters of rewritten content\")\n                else:\n                    print(f\"   \u26a0\ufe0f  Failed to generate rewritten content\")\n            \n            # Step 4: Calculate confidence score\n            confidence_score = self._calculate_confidence_score(annotation_result)\n            \n            # Step 5: Generate workflow summary\n            workflow_summary = self._generate_workflow_summary(\n                annotation_result, len(rewritten_content) if rewritten_content else 0\n            )\n            \n            return EditingWorkflowResult(\n                annotations_detected=annotation_result.total_annotations,\n                confidence_score=confidence_score,\n                recommendations=recommendations,\n                rewritten_content=rewritten_content,\n                annotation_details=[{\n                    'type': ann.annotation_type,\n                    'confidence': ann.confidence,\n                    'area': ann.area,\n                    'text': ann.text_content or 'No text detected'\n                } for ann in annotation_result.annotations],\n                workflow_summary=workflow_summary\n            )\n            \n        except Exception as e:\n            logger.error(f\"Error in editing workflow: {e}\")\n            return None\n    \n    def _generate_recommendations(self, annotation_result) -> List[str]:\n        \"\"\"Generate editing recommendations based on detected annotations\"\"\"\n        recommendations = []\n        \n        # Count annotation types\n        annotation_types = {}\n        for ann in annotation_result.annotations:\n            annotation_types[ann.annotation_type] = annotation_types.get(ann.annotation_type, 0) + 1\n        \n        # Generate type-specific recommendations\n        if 'strikethrough' in annotation_types:\n            recommendations.append(f\"Document contains {annotation_types['strikethrough']} deletion(s) - content removal suggested\")\n        \n        if 'highlight' in annotation_types:\n            recommendations.append(f\"Document contains {annotation_types['highlight']} highlight(s) - important sections marked\")\n        \n        if 'markup' in annotation_types:\n            recommendations.append(f\"Document contains {annotation_types['markup']} markup(s) - corrections or additions suggested\")\n        \n        if 'underline' in annotation_types:\n            recommendations.append(f\"Document contains {annotation_types['underline']} underline(s) - emphasis or corrections indicated\")\n        \n        # Overall recommendations\n        if annotation_result.total_annotations >= 5:\n            recommendations.append(\"Heavy editing detected - consider major revision\")\n        elif annotation_result.total_annotations >= 2:\n            recommendations.append(\"Moderate editing detected - focused improvements needed\")\n        else:\n            recommendations.append(\"Light editing detected - minor adjustments suggested\")\n        \n        return recommendations\n    \n    def _calculate_confidence_score(self, annotation_result) -> float:\n        \"\"\"Calculate confidence score for the editing workflow\"\"\"\n        if not annotation_result or annotation_result.total_annotations == 0:\n            return 0.0\n        \n        # Base confidence on number and quality of annotations\n        base_confidence = min(annotation_result.total_annotations * 0.2, 0.8)\n        \n        # Boost confidence for high-confidence annotations\n        avg_annotation_confidence = sum(ann.confidence for ann in annotation_result.annotations) / len(annotation_result.annotations)\n        confidence_boost = (avg_annotation_confidence - 0.5) * 0.4\n        \n        # Final confidence capped at 1.0\n        final_confidence = min(base_confidence + confidence_boost, 1.0)\n        return max(final_confidence, 0.1)  # Minimum 0.1 if any annotations found\n    \n    def _generate_workflow_summary(self, annotation_result, rewritten_length: int) -> str:\n        \"\"\"Generate a summary of the workflow process\"\"\"\n        summary_parts = [\n            f\"Editing workflow processed {annotation_result.total_annotations} annotations\"\n        ]\n        \n        if rewritten_length > 0:\n            summary_parts.append(f\"Generated {rewritten_length:,} characters of improved content\")\n        \n        # Add annotation type breakdown\n        annotation_types = {}\n        for ann in annotation_result.annotations:\n            annotation_types[ann.annotation_type] = annotation_types.get(ann.annotation_type, 0) + 1\n        \n        if annotation_types:\n            type_summary = \", \".join([f\"{count} {type}\" for type, count in annotation_types.items()])\n            summary_parts.append(f\"Annotation breakdown: {type_summary}\")\n        \n        return \". \".join(summary_parts) + \".\"",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/editing_workflow.py",
      "tags": [
        "document-processing",
        "pdf-analysis",
        "annotation-detection",
        "ai-editing",
        "text-rewriting",
        "workflow-orchestration",
        "async",
        "llm-integration",
        "editing-recommendations",
        "document-markup"
      ],
      "updated_at": "2025-12-07T00:56:55.837486",
      "usage_example": "from pathlib import Path\nimport asyncio\nfrom llm_handler import LLMHandler\nfrom editing_workflow_handler import EditingWorkflowHandler\n\n# Initialize dependencies\nllm_handler = LLMHandler(api_key='your-api-key')\n\n# Create workflow handler\nworkflow = EditingWorkflowHandler(llm_handler)\n\n# Process a document\nasync def process_doc():\n    file_path = Path('document_with_edits.pdf')\n    image_b64 = 'base64_encoded_image_string'\n    original_analysis = 'Original document text content'\n    \n    result = await workflow.process_document_for_editing(\n        file_path=file_path,\n        image_b64=image_b64,\n        original_analysis=original_analysis\n    )\n    \n    if result:\n        print(f'Annotations found: {result.annotations_detected}')\n        print(f'Confidence: {result.confidence_score}')\n        print(f'Recommendations: {result.recommendations}')\n        if result.rewritten_content:\n            print(f'Rewritten content: {result.rewritten_content[:200]}...')\n    else:\n        print('Workflow failed')\n\n# Run the async workflow\nasyncio.run(process_doc())"
    },
    {
      "best_practices": [
        "Always provide values for the three required fields (annotations_detected, confidence_score, recommendations) when instantiating.",
        "Use confidence_score values between 0.0 and 1.0 for consistency with standard probability conventions.",
        "Populate annotation_details when detailed tracking of individual annotations is needed for debugging or reporting.",
        "Include rewritten_content only when the workflow actually performs content rewriting to avoid confusion.",
        "Use workflow_summary to provide human-readable context about the processing results.",
        "This is an immutable data container by default; avoid modifying attributes after instantiation unless the dataclass is explicitly made mutable.",
        "When passing this result between components, all consumers should handle the optional fields gracefully (check for None before accessing).",
        "Consider validating that annotations_detected matches the length of annotation_details if both are provided.",
        "The recommendations list should never be empty when instantiated; always provide at least one recommendation or use an empty list explicitly."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The total number of annotations detected during the workflow process",
            "is_class_variable": false,
            "name": "annotations_detected",
            "type": "int"
          },
          {
            "description": "A confidence score indicating the reliability of the workflow results, typically between 0.0 and 1.0",
            "is_class_variable": false,
            "name": "confidence_score",
            "type": "float"
          },
          {
            "description": "A list of actionable recommendations generated from the editing workflow",
            "is_class_variable": false,
            "name": "recommendations",
            "type": "List[str]"
          },
          {
            "description": "The rewritten or edited content if content rewriting was performed, otherwise None",
            "is_class_variable": false,
            "name": "rewritten_content",
            "type": "Optional[str]"
          },
          {
            "description": "Detailed information about each annotation as a list of dictionaries, or None if not provided",
            "is_class_variable": false,
            "name": "annotation_details",
            "type": "Optional[List[Dict[str, Any]]]"
          },
          {
            "description": "A high-level summary of the workflow execution and results, or None if not provided",
            "is_class_variable": false,
            "name": "workflow_summary",
            "type": "Optional[str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "annotation_details": "Optional list of dictionaries with annotation metadata",
              "annotations_detected": "Integer count of detected annotations",
              "confidence_score": "Float confidence score (typically 0.0-1.0)",
              "recommendations": "List of recommendation strings",
              "rewritten_content": "Optional rewritten text content",
              "workflow_summary": "Optional summary string of the workflow"
            },
            "purpose": "Initializes a new EditingWorkflowResult instance with the provided workflow results. This method is automatically generated by the @dataclass decorator.",
            "returns": "None (constructor)",
            "signature": "__init__(annotations_detected: int, confidence_score: float, recommendations: List[str], rewritten_content: Optional[str] = None, annotation_details: Optional[List[Dict[str, Any]]] = None, workflow_summary: Optional[str] = None) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the EditingWorkflowResult instance showing all field values. Automatically generated by @dataclass.",
            "returns": "String representation in the format 'EditingWorkflowResult(annotations_detected=..., confidence_score=..., ...)'",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two EditingWorkflowResult instances for equality based on all field values. Automatically generated by @dataclass.",
            "returns": "True if all fields are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:56:17",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass that encapsulates the results from an editing workflow process, including detected annotations, confidence scores, recommendations, and optional rewritten content.",
      "docstring": "Result from the editing workflow process",
      "id": 1971,
      "imports": [
        "import asyncio",
        "from typing import Optional",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from pathlib import Path",
        "from dataclasses import dataclass",
        "import logging",
        "from annotation_detector import AnnotationDetector",
        "from text_rewriter import TextRewriter",
        "from llm_handler import LLMHandler"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import Optional, List, Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 26,
      "line_start": 19,
      "name": "EditingWorkflowResult",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "annotation_details": "Optional list of dictionaries, where each dictionary contains detailed information about individual annotations. Each dictionary can have arbitrary keys and values (Dict[str, Any]) to store metadata like annotation type, location, severity, etc.",
        "annotations_detected": "Integer count of the total number of annotations detected in the processed content. This is a required field that indicates how many editing marks, comments, or annotations were found.",
        "confidence_score": "Float value representing the confidence level of the detection and processing, typically ranging from 0.0 to 1.0. Higher values indicate greater confidence in the results. This is a required field.",
        "recommendations": "List of string recommendations generated during the workflow. These are actionable suggestions or insights derived from the editing process. This is a required field and should contain at least one recommendation.",
        "rewritten_content": "Optional string containing the rewritten or edited version of the original content. If the workflow includes content rewriting, this field will contain the modified text; otherwise, it remains None.",
        "workflow_summary": "Optional string providing a high-level summary of the entire workflow execution, including key findings, processing steps taken, or overall assessment."
      },
      "parent_class": null,
      "purpose": "EditingWorkflowResult serves as a structured container for the output of an editing workflow. It stores metrics about detected annotations, confidence scores, actionable recommendations, and optionally the rewritten content, detailed annotation information, and a workflow summary. This class is designed to be instantiated as a return value from editing workflow operations, providing a standardized way to communicate results between components.",
      "return_annotation": null,
      "return_explained": "As a dataclass, instantiation returns an EditingWorkflowResult object with all specified attributes initialized. The object is immutable by default (unless frozen=False is explicitly set) and provides automatic __init__, __repr__, and __eq__ methods. The instance serves as a data transfer object containing all workflow results.",
      "settings_required": [],
      "source_code": "class EditingWorkflowResult:\n    \"\"\"Result from the editing workflow process\"\"\"\n    annotations_detected: int\n    confidence_score: float\n    recommendations: List[str]\n    rewritten_content: Optional[str] = None\n    annotation_details: Optional[List[Dict[str, Any]]] = None\n    workflow_summary: Optional[str] = None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/editing_workflow.py",
      "tags": [
        "dataclass",
        "result-container",
        "editing-workflow",
        "annotations",
        "data-transfer-object",
        "workflow-result",
        "text-processing",
        "confidence-scoring"
      ],
      "updated_at": "2025-12-07T00:56:17.951002",
      "usage_example": "from dataclasses import dataclass\nfrom typing import Optional, List, Dict, Any\n\n@dataclass\nclass EditingWorkflowResult:\n    annotations_detected: int\n    confidence_score: float\n    recommendations: List[str]\n    rewritten_content: Optional[str] = None\n    annotation_details: Optional[List[Dict[str, Any]]] = None\n    workflow_summary: Optional[str] = None\n\n# Basic instantiation with required fields only\nresult = EditingWorkflowResult(\n    annotations_detected=5,\n    confidence_score=0.92,\n    recommendations=[\"Fix grammar in paragraph 2\", \"Clarify argument in section 3\"]\n)\n\n# Full instantiation with all optional fields\ndetailed_result = EditingWorkflowResult(\n    annotations_detected=3,\n    confidence_score=0.87,\n    recommendations=[\"Review tone\", \"Add citations\"],\n    rewritten_content=\"This is the revised text...\",\n    annotation_details=[\n        {\"type\": \"grammar\", \"location\": \"line 5\", \"severity\": \"high\"},\n        {\"type\": \"style\", \"location\": \"line 12\", \"severity\": \"medium\"}\n    ],\n    workflow_summary=\"Processed 3 annotations with high confidence. Major issues addressed.\"\n)\n\n# Accessing attributes\nprint(f\"Detected {result.annotations_detected} annotations\")\nprint(f\"Confidence: {result.confidence_score}\")\nfor rec in result.recommendations:\n    print(f\"- {rec}\")"
    },
    {
      "best_practices": [
        "Always use async/await syntax when calling any method of this class",
        "Ensure the user_token is valid and not expired before making API calls",
        "Handle HTTP status codes appropriately - check response['status'] before accessing response['data']",
        "Use ensure_folder_exists() instead of create_folder() when you need to guarantee a folder path exists",
        "Close aiohttp sessions properly - the class creates new sessions for each request which are automatically closed",
        "For batch operations, use multi_update_metadata() and multi_delete_files() instead of individual calls for better performance",
        "Store and reuse ETags from list_files() responses to implement efficient polling with If-None-Match headers",
        "The client automatically handles JWT token parsing to extract the storage host, falling back to internal.cloud.remarkable.com if parsing fails",
        "When uploading documents, ensure content is in bytes format and file_type matches the actual content MIME type",
        "For real-time updates, call get_events_token() to obtain an SSE token for subscribing to document change events",
        "Path-based folder operations (find_folder_by_path, ensure_folder_exists) require listing all files, which may be slow for large libraries"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "JWT authentication token for API requests",
            "is_class_variable": false,
            "name": "user_token",
            "type": "str"
          },
          {
            "description": "Base URL for API requests, extracted from JWT token or fallback URL",
            "is_class_variable": false,
            "name": "base_url",
            "type": "str"
          },
          {
            "description": "Default HTTP headers for API requests including Authorization, rM-Source, and User-Agent",
            "is_class_variable": false,
            "name": "headers",
            "type": "Dict[str, str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "user_token": "JWT authentication token for reMarkable Cloud API"
            },
            "purpose": "Initialize the API client with authentication token and configure base URL and headers",
            "returns": "None - initializes instance attributes",
            "signature": "__init__(self, user_token: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_storage_host",
            "parameters": {},
            "purpose": "Extract storage host URL from JWT token's tectonic service field",
            "returns": "Storage host URL string (e.g., 'https://xyz.tectonic.remarkable.com' or fallback URL)",
            "signature": "_get_storage_host(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "list_files",
            "parameters": {
              "etag": "ETag value for conditional request (If-None-Match header) to check if content changed",
              "only_folders": "If True, only return folders (CollectionType items)"
            },
            "purpose": "Retrieve list of files and folders from reMarkable Cloud with optional filtering and conditional requests",
            "returns": "Dictionary with 'status' (int), 'headers' (dict), 'data' (list of file objects or None), 'etag' (str or None)",
            "signature": "async list_files(self, etag: Optional[str] = None, only_folders: bool = False) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_document",
            "parameters": {
              "content": "File content as bytes",
              "file_type": "MIME type of the file (e.g., 'application/pdf', 'application/epub+zip')",
              "filename": "Name of the file to display in reMarkable",
              "parent_id": "UUID of parent folder, or None for root"
            },
            "purpose": "Upload a document to reMarkable Cloud with metadata",
            "returns": "Dictionary with 'status', 'headers', 'data', and 'document_id' (extracted from Location header)",
            "signature": "async upload_document(self, filename: str, content: bytes, file_type: str = 'application/pdf', parent_id: Optional[str] = None) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_folder",
            "parameters": {
              "folder_name": "Name of the folder to create",
              "parent_id": "UUID of parent folder, or None for root"
            },
            "purpose": "Create a new folder in reMarkable Cloud",
            "returns": "Dictionary with 'status', 'headers', 'data', and 'folder_id' (extracted from Location header)",
            "signature": "async create_folder(self, folder_name: str, parent_id: Optional[str] = None) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_metadata",
            "parameters": {
              "document_id": "UUID of the document/folder to update",
              "metadata": "Dictionary of metadata fields to update (e.g., name, parent, pinned)"
            },
            "purpose": "Update metadata for a single document or folder",
            "returns": "Dictionary with 'status', 'headers', and 'data' (response body)",
            "signature": "async update_metadata(self, document_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "multi_update_metadata",
            "parameters": {
              "hashes": "List of document UUIDs/hashes corresponding to updates",
              "updates": "List of metadata update dictionaries"
            },
            "purpose": "Update metadata for multiple documents in a single batch request",
            "returns": "Dictionary with 'status', 'headers', and 'data' (batch operation response)",
            "signature": "async multi_update_metadata(self, updates: List[Dict[str, Any]], hashes: List[str]) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "multi_delete_files",
            "parameters": {
              "hashes": "List of document/folder UUIDs to delete"
            },
            "purpose": "Delete multiple files/folders in a single batch request",
            "returns": "Dictionary with 'status', 'headers', and 'data' (batch delete response)",
            "signature": "async multi_delete_files(self, hashes: List[str]) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "export_file",
            "parameters": {
              "document_id": "UUID of the document to export",
              "export_type": "MIME type for export format (e.g., 'application/pdf', 'application/epub+zip')"
            },
            "purpose": "Download/export a file from reMarkable Cloud in specified format",
            "returns": "Tuple of (response_info_dict with 'status', 'headers', 'content_type', file_content_bytes)",
            "signature": "async export_file(self, document_id: str, export_type: str = 'application/pdf') -> Tuple[Dict[str, Any], bytes]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_events_token",
            "parameters": {},
            "purpose": "Obtain SSE (Server-Sent Events) token for subscribing to real-time document updates",
            "returns": "SSE token string for event subscription",
            "signature": "async get_events_token(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "find_folder_by_path",
            "parameters": {
              "folder_path": "Path to folder starting with '/' (e.g., '/Projects/2024')"
            },
            "purpose": "Find folder UUID by hierarchical path (e.g., '/My Folder/Subfolder')",
            "returns": "Folder UUID string if found, None if not found or for root path",
            "signature": "async find_folder_by_path(self, folder_path: str) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "ensure_folder_exists",
            "parameters": {
              "folder_path": "Path to folder starting with '/' (e.g., '/Projects/2024')"
            },
            "purpose": "Ensure folder path exists, creating intermediate folders as needed",
            "returns": "Folder UUID string of the final folder in the path",
            "signature": "async ensure_folder_exists(self, folder_path: str) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:55:28",
      "decorators": [],
      "dependencies": [
        "json",
        "base64",
        "aiohttp",
        "typing",
        "pathlib",
        "asyncio"
      ],
      "description": "Asynchronous API client for interacting with the reMarkable Cloud service, providing methods for file management, folder operations, and document synchronization.",
      "docstring": "reMarkable Cloud API client based on Chrome extension analysis",
      "id": 1969,
      "imports": [
        "import json",
        "import base64",
        "import aiohttp",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Tuple",
        "from pathlib import Path",
        "import asyncio"
      ],
      "imports_required": [
        "import json",
        "import base64",
        "import aiohttp",
        "from typing import Dict, Any, Optional, List, Tuple",
        "from pathlib import Path",
        "import asyncio"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 352,
      "line_start": 15,
      "name": "RemarkableAPIClient",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "user_token": "JWT authentication token for reMarkable Cloud API access. This token should be obtained through the reMarkable authentication flow and contains encoded information about the storage host (tectonic service). The token is used in the Authorization header for all API requests."
      },
      "parent_class": null,
      "purpose": "This class provides a comprehensive interface to the reMarkable Cloud API, enabling programmatic access to upload, download, list, update, and delete documents and folders. It handles authentication via JWT tokens, automatically extracts storage host information, and implements the API patterns observed from the reMarkable Chrome extension. The client supports both individual and batch operations, real-time event subscriptions, and hierarchical folder management with path-based operations.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableAPIClient object configured with the provided user token. Methods return dictionaries containing 'status' (HTTP status code), 'headers' (response headers as dict), and 'data' (parsed response body). Upload and folder creation methods also include 'document_id' or 'folder_id' extracted from the Location header. The export_file method returns a tuple of (response_info_dict, file_content_bytes). Path-based methods return folder IDs as strings or None.",
      "settings_required": [
        "Valid reMarkable Cloud user token (JWT) obtained through authentication",
        "Network access to reMarkable Cloud API endpoints (tectonic.remarkable.com or internal.cloud.remarkable.com)",
        "Async runtime environment (asyncio event loop) for executing async methods"
      ],
      "source_code": "class RemarkableAPIClient:\n    \"\"\"\n    reMarkable Cloud API client based on Chrome extension analysis\n    \"\"\"\n    \n    def __init__(self, user_token: str):\n        self.user_token = user_token\n        self.base_url = self._get_storage_host()\n        self.headers = {\n            'Authorization': f'Bearer {user_token}',\n            'rM-Source': 'RoR-Browser',  # From Chrome extension\n            'User-Agent': 'E-Ink-LLM-Assistant/1.0'\n        }\n    \n    def _get_storage_host(self) -> str:\n        \"\"\"\n        Extract storage host from JWT token (tectonic service)\n        Based on Chrome extension logic\n        \"\"\"\n        try:\n            # Decode JWT payload\n            payload = self.user_token.split('.')[1]\n            # Add padding if needed\n            payload += '=' * (4 - len(payload) % 4)\n            decoded = json.loads(base64.b64decode(payload))\n            \n            tectonic = decoded.get('tectonic')\n            if tectonic and isinstance(tectonic, str) and tectonic:\n                return f\"https://{tectonic}.tectonic.remarkable.com\"\n            else:\n                # Fallback to internal cloud\n                return \"https://internal.cloud.remarkable.com\"\n        except Exception:\n            # Fallback if token parsing fails\n            return \"https://internal.cloud.remarkable.com\"\n    \n    async def list_files(self, etag: Optional[str] = None, only_folders: bool = False) -> Dict[str, Any]:\n        \"\"\"\n        List files in reMarkable Cloud\n        \n        Args:\n            etag: ETag for conditional requests (If-None-Match)\n            only_folders: Only return folders\n        \n        Returns:\n            Response with files list and metadata\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/files\"\n        headers = self.headers.copy()\n        \n        params = {}\n        if only_folders:\n            params['onlyFolders'] = 'true'\n        \n        if etag:\n            headers['If-None-Match'] = etag\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.get(url, headers=headers, params=params) as response:\n                return {\n                    'status': response.status,\n                    'headers': dict(response.headers),\n                    'data': await response.json() if response.status == 200 else None,\n                    'etag': response.headers.get('ETag')\n                }\n    \n    async def upload_document(self, filename: str, content: bytes, \n                            file_type: str = \"application/pdf\", \n                            parent_id: Optional[str] = None) -> Dict[str, Any]:\n        \"\"\"\n        Upload a document to reMarkable Cloud\n        \n        Args:\n            filename: Name of the file\n            content: File content as bytes\n            file_type: MIME type of file\n            parent_id: Parent folder ID (None for root)\n        \n        Returns:\n            Upload response with document ID\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/files\"\n        \n        # Create metadata (based on Chrome extension)\n        metadata = {\n            'file_name': filename,\n            'type': 'DocumentType'\n        }\n        if parent_id:\n            metadata['parent'] = parent_id\n        \n        # Encode metadata as base64 (Chrome extension pattern)\n        meta_encoded = base64.b64encode(json.dumps(metadata).encode()).decode()\n        \n        headers = self.headers.copy()\n        headers['rM-Meta'] = meta_encoded\n        headers['Content-Type'] = file_type\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.post(url, headers=headers, data=content) as response:\n                return {\n                    'status': response.status,\n                    'headers': dict(response.headers),\n                    'data': await response.json() if response.content_type == 'application/json' else await response.text(),\n                    'document_id': response.headers.get('Location', '').split('/')[-1] if response.headers.get('Location') else None\n                }\n    \n    async def create_folder(self, folder_name: str, parent_id: Optional[str] = None) -> Dict[str, Any]:\n        \"\"\"\n        Create a folder in reMarkable Cloud\n        \n        Args:\n            folder_name: Name of the folder\n            parent_id: Parent folder ID (None for root)\n        \n        Returns:\n            Folder creation response\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/files\"\n        \n        # Create folder metadata\n        metadata = {\n            'file_name': folder_name,\n            'type': 'CollectionType'\n        }\n        if parent_id:\n            metadata['parent'] = parent_id\n        \n        # Encode metadata as base64\n        meta_encoded = base64.b64encode(json.dumps(metadata).encode()).decode()\n        \n        headers = self.headers.copy()\n        headers['rM-Meta'] = meta_encoded\n        headers['Content-Type'] = 'folder'  # Special content type for folders\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.post(url, headers=headers, data='') as response:\n                return {\n                    'status': response.status,\n                    'headers': dict(response.headers),\n                    'data': await response.json() if response.content_type == 'application/json' else await response.text(),\n                    'folder_id': response.headers.get('Location', '').split('/')[-1] if response.headers.get('Location') else None\n                }\n    \n    async def update_metadata(self, document_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:\n        \"\"\"\n        Update document metadata\n        \n        Args:\n            document_id: Document UUID\n            metadata: Metadata to update\n        \n        Returns:\n            Update response\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/files/{document_id}\"\n        \n        headers = self.headers.copy()\n        headers['Content-Type'] = 'application/json'\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.patch(url, headers=headers, json=metadata) as response:\n                return {\n                    'status': response.status,\n                    'headers': dict(response.headers),\n                    'data': await response.json() if response.content_type == 'application/json' else await response.text()\n                }\n    \n    async def multi_update_metadata(self, updates: List[Dict[str, Any]], hashes: List[str]) -> Dict[str, Any]:\n        \"\"\"\n        Update multiple documents metadata in batch\n        \n        Args:\n            updates: List of metadata updates\n            hashes: List of document hashes/IDs\n        \n        Returns:\n            Batch update response\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/files\"\n        \n        payload = {\n            'updates': updates,\n            'hashes': hashes\n        }\n        \n        headers = self.headers.copy()\n        headers['Content-Type'] = 'application/json'\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.patch(url, headers=headers, json=payload) as response:\n                return {\n                    'status': response.status,\n                    'headers': dict(response.headers),\n                    'data': await response.json() if response.content_type == 'application/json' else await response.text()\n                }\n    \n    async def multi_delete_files(self, hashes: List[str]) -> Dict[str, Any]:\n        \"\"\"\n        Delete multiple files in batch\n        \n        Args:\n            hashes: List of document hashes/IDs to delete\n        \n        Returns:\n            Batch delete response\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/files\"\n        \n        payload = {\n            'hashes': hashes\n        }\n        \n        headers = self.headers.copy()\n        headers['Content-Type'] = 'application/json'\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.delete(url, headers=headers, json=payload) as response:\n                return {\n                    'status': response.status,\n                    'headers': dict(response.headers),\n                    'data': await response.json() if response.content_type == 'application/json' else await response.text()\n                }\n    \n    async def export_file(self, document_id: str, export_type: str = \"application/pdf\") -> Tuple[Dict[str, Any], bytes]:\n        \"\"\"\n        Export/download a file from reMarkable Cloud\n        \n        Args:\n            document_id: Document UUID\n            export_type: Export format (application/pdf, application/epub+zip, etc.)\n        \n        Returns:\n            Tuple of (response_info, file_content)\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/files/{document_id}/export\"\n        \n        headers = self.headers.copy()\n        headers['Accept'] = export_type\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.get(url, headers=headers) as response:\n                content = await response.read()\n                return {\n                    'status': response.status,\n                    'headers': dict(response.headers),\n                    'content_type': response.content_type\n                }, content\n    \n    async def get_events_token(self) -> str:\n        \"\"\"\n        Get SSE (Server-Sent Events) token for real-time updates\n        \n        Returns:\n            SSE token string\n        \"\"\"\n        url = f\"{self.base_url}/doc/v2/events/get-token\"\n        \n        async with aiohttp.ClientSession() as session:\n            async with session.get(url, headers=self.headers) as response:\n                if response.status == 200:\n                    data = await response.json()\n                    return data.get('token', '')\n                else:\n                    raise Exception(f\"Failed to get events token: {response.status}\")\n    \n    async def find_folder_by_path(self, folder_path: str) -> Optional[str]:\n        \"\"\"\n        Find folder ID by path (e.g., \"/My Folder/Subfolder\")\n        \n        Args:\n            folder_path: Path to folder (starting with /)\n        \n        Returns:\n            Folder ID if found, None otherwise\n        \"\"\"\n        if folder_path == \"/\" or folder_path == \"\":\n            return None  # Root folder\n        \n        # Get all files/folders\n        response = await self.list_files()\n        if response['status'] != 200 or not response['data']:\n            return None\n        \n        files = response['data']\n        path_parts = [part for part in folder_path.split('/') if part]\n        \n        current_parent = None\n        \n        for part in path_parts:\n            found = False\n            for item in files:\n                if (item.get('name') == part and \n                    item.get('type') == 'CollectionType' and\n                    item.get('parent') == current_parent):\n                    current_parent = item.get('id')\n                    found = True\n                    break\n            \n            if not found:\n                return None\n        \n        return current_parent\n    \n    async def ensure_folder_exists(self, folder_path: str) -> str:\n        \"\"\"\n        Ensure folder exists, create if necessary\n        \n        Args:\n            folder_path: Path to folder (starting with /)\n        \n        Returns:\n            Folder ID\n        \"\"\"\n        folder_id = await self.find_folder_by_path(folder_path)\n        if folder_id is not None:\n            return folder_id\n        \n        # Create folder(s) as needed\n        path_parts = [part for part in folder_path.split('/') if part]\n        current_parent = None\n        current_path = \"\"\n        \n        for part in path_parts:\n            current_path += f\"/{part}\"\n            existing_id = await self.find_folder_by_path(current_path)\n            \n            if existing_id:\n                current_parent = existing_id\n            else:\n                # Create this folder\n                response = await self.create_folder(part, current_parent)\n                if response['status'] in [200, 201] and response['folder_id']:\n                    current_parent = response['folder_id']\n                else:\n                    raise Exception(f\"Failed to create folder '{part}': {response}\")\n        \n        return current_parent",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/remarkable_api_endpoints.py",
      "tags": [
        "remarkable",
        "cloud-api",
        "async",
        "document-management",
        "file-upload",
        "folder-management",
        "jwt-authentication",
        "rest-api",
        "aiohttp",
        "batch-operations",
        "sse-events"
      ],
      "updated_at": "2025-12-07T00:55:28.251023",
      "usage_example": "import asyncio\nimport json\nimport base64\nimport aiohttp\nfrom typing import Dict, Any, Optional, List, Tuple\n\n# Instantiate the client\nuser_token = 'your_jwt_token_here'\nclient = RemarkableAPIClient(user_token)\n\nasync def main():\n    # List all files\n    files_response = await client.list_files()\n    if files_response['status'] == 200:\n        print(f\"Found {len(files_response['data'])} files\")\n    \n    # Create a folder\n    folder_response = await client.create_folder('My Notes')\n    folder_id = folder_response['folder_id']\n    \n    # Upload a PDF document\n    with open('document.pdf', 'rb') as f:\n        content = f.read()\n    upload_response = await client.upload_document(\n        filename='document.pdf',\n        content=content,\n        file_type='application/pdf',\n        parent_id=folder_id\n    )\n    doc_id = upload_response['document_id']\n    \n    # Export/download a file\n    response_info, file_content = await client.export_file(doc_id)\n    with open('downloaded.pdf', 'wb') as f:\n        f.write(file_content)\n    \n    # Ensure folder path exists\n    folder_id = await client.ensure_folder_exists('/Projects/2024')\n    \n    # Delete files\n    delete_response = await client.multi_delete_files([doc_id])\n\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "Always call start_watching() within an async context (asyncio.run() or existing event loop)",
        "Ensure reMarkable Cloud authentication is successful before processing files by checking authenticate_remarkable() return value",
        "Use 'one_time_code' only for initial authentication; subsequent runs will use stored tokens",
        "Handle KeyboardInterrupt gracefully when running in 'both' mode to ensure clean shutdown of both watchers",
        "Process existing files on startup (process_existing=True) to avoid missing files added while processor was offline",
        "Set appropriate poll_interval based on expected file frequency (default 60 seconds balances responsiveness and API usage)",
        "Ensure output folders exist in reMarkable Cloud before processing to avoid upload failures",
        "Use temporary directories for downloaded files to avoid disk space issues with large document volumes",
        "Monitor logs for authentication failures, upload errors, and processing exceptions",
        "The class maintains state through cloud_manager and file_watcher attributes; do not modify these directly"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Configuration dictionary for reMarkable Cloud integration settings",
            "is_class_variable": false,
            "name": "remarkable_config",
            "type": "Dict[str, Any]"
          },
          {
            "description": "Flag indicating whether reMarkable Cloud integration is enabled",
            "is_class_variable": false,
            "name": "remarkable_enabled",
            "type": "bool"
          },
          {
            "description": "Manager instance for reMarkable Cloud operations (authentication, upload, download)",
            "is_class_variable": false,
            "name": "cloud_manager",
            "type": "Optional[RemarkableCloudManager]"
          },
          {
            "description": "Watcher instance that monitors reMarkable Cloud folder for new files",
            "is_class_variable": false,
            "name": "file_watcher",
            "type": "Optional[RemarkableFileWatcher]"
          },
          {
            "description": "Path in reMarkable Cloud to monitor for input files (default: '/E-Ink LLM Input')",
            "is_class_variable": false,
            "name": "remarkable_watch_path",
            "type": "str"
          },
          {
            "description": "Path in reMarkable Cloud to upload processed responses (default: '/E-Ink LLM Output')",
            "is_class_variable": false,
            "name": "remarkable_output_path",
            "type": "str"
          },
          {
            "description": "Seconds between cloud folder checks (default: 60)",
            "is_class_variable": false,
            "name": "poll_interval",
            "type": "int"
          },
          {
            "description": "Logger instance inherited from parent class for error and info logging",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "OpenAI API key for LLM processing",
              "remarkable_config": "Dictionary with reMarkable Cloud settings (enabled, paths, poll_interval, one_time_code)",
              "watch_folder": "Local folder path to watch for files"
            },
            "purpose": "Initialize the enhanced processor with optional local and cloud configurations",
            "returns": "None (constructor)",
            "signature": "__init__(self, api_key: Optional[str] = None, watch_folder: Optional[str] = None, remarkable_config: Optional[Dict[str, Any]] = None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "authenticate_remarkable",
            "parameters": {},
            "purpose": "Authenticate with reMarkable Cloud service using configured credentials",
            "returns": "Boolean indicating whether authentication was successful",
            "signature": "async authenticate_remarkable(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_remarkable_folders",
            "parameters": {},
            "purpose": "Create required input and output folders in reMarkable Cloud if they don't exist",
            "returns": "Boolean indicating whether folder setup was successful",
            "signature": "async setup_remarkable_folders(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "process_remarkable_file",
            "parameters": {
              "document": "reMarkable Document object containing metadata",
              "local_file_path": "Path to the locally downloaded file to process"
            },
            "purpose": "Process a file downloaded from reMarkable Cloud through the LLM pipeline and upload the response back",
            "returns": "None (side effect: uploads processed response to reMarkable Cloud)",
            "signature": "async process_remarkable_file(self, document, local_file_path: Path) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "start_remarkable_watching",
            "parameters": {
              "process_existing": "Whether to process files already present in the cloud folder before starting to watch"
            },
            "purpose": "Start monitoring reMarkable Cloud folder for new files, authenticate, setup folders, and begin processing",
            "returns": "None (runs indefinitely until interrupted)",
            "signature": "async start_remarkable_watching(self, process_existing: bool = False) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "start_watching",
            "parameters": {
              "mode": "Operating mode: 'local' for local file watching, 'remarkable' for cloud watching, 'both' for concurrent operation",
              "process_existing": "Whether to process existing files on startup"
            },
            "purpose": "Start watching for files in specified mode (local, remarkable, or both)",
            "returns": "None (runs indefinitely until interrupted)",
            "signature": "async start_watching(self, process_existing: bool = True, mode: str = 'local') -> None"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:53:35",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "tempfile",
        "pathlib",
        "typing",
        "logging",
        "processor",
        "remarkable_cloud",
        "rmcl"
      ],
      "description": "Enhanced E-Ink LLM Processor that extends EInkLLMProcessor with reMarkable Cloud integration, enabling file processing from both local directories and reMarkable Cloud storage.",
      "docstring": "Enhanced E-Ink LLM Processor with reMarkable Cloud integration",
      "id": 1965,
      "imports": [
        "import asyncio",
        "import tempfile",
        "from pathlib import Path",
        "from typing import Optional",
        "from typing import Dict",
        "from typing import Any",
        "import logging",
        "from processor import EInkLLMProcessor",
        "from remarkable_cloud import RemarkableCloudManager",
        "from remarkable_cloud import RemarkableFileWatcher",
        "from rmcl import Item"
      ],
      "imports_required": [
        "import asyncio",
        "import tempfile",
        "from pathlib import Path",
        "from typing import Optional, Dict, Any",
        "import logging",
        "from processor import EInkLLMProcessor",
        "from remarkable_cloud import RemarkableCloudManager, RemarkableFileWatcher",
        "from rmcl import Item"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 218,
      "line_start": 17,
      "name": "RemarkableEInkProcessor",
      "parameters": [
        {
          "annotation": "EInkLLMProcessor",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "Optional OpenAI API key for LLM processing. If not provided, will attempt to use environment variable or configuration from parent class.",
        "remarkable_config": "Optional dictionary containing reMarkable Cloud integration settings. Keys include: 'enabled' (bool, whether to enable cloud integration), 'watch_folder_path' (str, cloud folder to monitor, default '/E-Ink LLM Input'), 'output_folder_path' (str, cloud folder for responses, default '/E-Ink LLM Output'), 'poll_interval' (int, seconds between cloud checks, default 60), 'one_time_code' (str, optional authentication code for initial setup).",
        "watch_folder": "Optional local folder path to watch for new files in local mode. Used when operating in local or both modes."
      },
      "parent_class": null,
      "purpose": "This class provides a comprehensive solution for processing files through an LLM with E-Ink display optimization, supporting both local file watching and reMarkable Cloud integration. It can monitor reMarkable Cloud folders for new documents, download them, process them through the LLM pipeline, and upload responses back to the cloud. The class manages authentication, folder setup, file watching, and bidirectional synchronization with reMarkable Cloud while maintaining backward compatibility with local file processing.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableEInkProcessor object configured for file processing. Methods return various types: authenticate_remarkable() returns bool indicating authentication success, setup_remarkable_folders() returns bool for folder setup success, process_remarkable_file() returns None (side effect: uploads processed file), start_remarkable_watching() returns None (runs indefinitely), start_watching() returns None (runs indefinitely based on mode).",
      "settings_required": [
        "OpenAI API key (via api_key parameter or environment variable)",
        "reMarkable Cloud authentication (one_time_code for initial setup, or existing token)",
        "Local watch folder path if using local or both modes",
        "reMarkable Cloud folder paths configured in remarkable_config if using cloud integration",
        "Network connectivity for reMarkable Cloud operations"
      ],
      "source_code": "class RemarkableEInkProcessor(EInkLLMProcessor):\n    \"\"\"Enhanced E-Ink LLM Processor with reMarkable Cloud integration\"\"\"\n    \n    def __init__(self, api_key: Optional[str] = None, watch_folder: Optional[str] = None,\n                 remarkable_config: Optional[Dict[str, Any]] = None):\n        \"\"\"\n        Initialize the enhanced processor\n        \n        Args:\n            api_key: OpenAI API key\n            watch_folder: Local folder to watch (for local mode)\n            remarkable_config: Configuration for reMarkable Cloud integration\n                {\n                    'enabled': bool,\n                    'watch_folder_path': str,  # Path in reMarkable Cloud to watch\n                    'output_folder_path': str,  # Path in reMarkable Cloud to upload responses\n                    'poll_interval': int,  # Seconds between checks (default: 60)\n                    'one_time_code': str,  # For initial authentication (optional)\n                }\n        \"\"\"\n        # Initialize base processor\n        super().__init__(api_key, watch_folder)\n        \n        self.remarkable_config = remarkable_config or {}\n        self.remarkable_enabled = self.remarkable_config.get('enabled', False)\n        \n        # Initialize reMarkable Cloud components if enabled\n        self.cloud_manager = None\n        self.file_watcher = None\n        \n        if self.remarkable_enabled:\n            self.cloud_manager = RemarkableCloudManager()\n            \n            # Extract configuration\n            self.remarkable_watch_path = self.remarkable_config.get('watch_folder_path', '/E-Ink LLM Input')\n            self.remarkable_output_path = self.remarkable_config.get('output_folder_path', '/E-Ink LLM Output')\n            self.poll_interval = self.remarkable_config.get('poll_interval', 60)\n            \n            print(f\"\ud83c\udf10 reMarkable Cloud integration enabled\")\n            print(f\"\ud83d\udcc1 Input folder: {self.remarkable_watch_path}\")\n            print(f\"\ud83d\udce4 Output folder: {self.remarkable_output_path}\")\n        else:\n            print(f\"\ud83d\udcc2 Local file watching mode\")\n    \n    async def authenticate_remarkable(self) -> bool:\n        \"\"\"Authenticate with reMarkable Cloud\"\"\"\n        if not self.remarkable_enabled or not self.cloud_manager:\n            return False\n        \n        one_time_code = self.remarkable_config.get('one_time_code')\n        return await self.cloud_manager.authenticate(one_time_code)\n    \n    async def setup_remarkable_folders(self) -> bool:\n        \"\"\"Ensure required folders exist in reMarkable Cloud\"\"\"\n        if not self.cloud_manager:\n            return False\n        \n        try:\n            # Create input folder if it doesn't exist\n            await self.cloud_manager.create_folder(self.remarkable_watch_path)\n            \n            # Create output folder if it doesn't exist\n            await self.cloud_manager.create_folder(self.remarkable_output_path)\n            \n            return True\n        except Exception as e:\n            self.logger.error(f\"Error setting up reMarkable folders: {e}\")\n            return False\n    \n    async def process_remarkable_file(self, document, local_file_path: Path) -> None:\n        \"\"\"\n        Process a file downloaded from reMarkable Cloud\n        \n        Args:\n            document: reMarkable Document object\n            local_file_path: Path to locally downloaded file\n        \"\"\"\n        try:\n            print(f\"\\n{'='*60}\")\n            print(f\"\ud83c\udf10 PROCESSING REMARKABLE FILE: {document.name}\")\n            print(f\"{'='*60}\")\n            \n            # Process the file using the standard processor\n            result_path = await self.process_file(local_file_path)\n            \n            if result_path and result_path.exists():\n                # Upload the response back to reMarkable Cloud\n                response_name = f\"RESPONSE_{document.name}\"\n                \n                print(f\"\ud83d\udce4 Uploading response to reMarkable Cloud...\")\n                success = await self.cloud_manager.upload_document(\n                    result_path, \n                    self.remarkable_output_path,\n                    response_name\n                )\n                \n                if success:\n                    print(f\"\u2705 Response uploaded successfully: {response_name}\")\n                else:\n                    print(f\"\u274c Failed to upload response to reMarkable Cloud\")\n            else:\n                print(f\"\u274c No response file to upload\")\n                \n        except Exception as e:\n            self.logger.error(f\"Error processing reMarkable file {document.name}: {e}\")\n            print(f\"\u274c Error processing {document.name}: {e}\")\n    \n    async def start_remarkable_watching(self, process_existing: bool = False) -> None:\n        \"\"\"Start watching reMarkable Cloud folder for new files\"\"\"\n        if not self.remarkable_enabled:\n            raise ValueError(\"reMarkable Cloud integration is not enabled\")\n        \n        # Authenticate\n        print(f\"\ud83d\udd10 Authenticating with reMarkable Cloud...\")\n        auth_success = await self.authenticate_remarkable()\n        if not auth_success:\n            raise Exception(\"Failed to authenticate with reMarkable Cloud\")\n        \n        # Setup folders\n        print(f\"\ud83d\udcc1 Setting up reMarkable folders...\")\n        folder_success = await self.setup_remarkable_folders()\n        if not folder_success:\n            raise Exception(\"Failed to setup reMarkable folders\")\n        \n        # Process existing files if requested\n        if process_existing:\n            print(f\"\ud83d\udd0d Processing existing files in {self.remarkable_watch_path}...\")\n            existing_files = await self.cloud_manager.list_files_in_folder(\n                self.remarkable_watch_path, include_subfolders=True\n            )\n            \n            if existing_files:\n                print(f\"\ud83d\udcc1 Found {len(existing_files)} existing file(s) to process\")\n                \n                with tempfile.TemporaryDirectory() as temp_dir:\n                    temp_path = Path(temp_dir)\n                    \n                    for doc in existing_files:\n                        local_file = await self.cloud_manager.download_document(doc, temp_path)\n                        if local_file:\n                            await self.process_remarkable_file(doc, local_file)\n            else:\n                print(f\"\ud83d\udcc1 No existing files found\")\n        \n        # Start file watcher\n        self.file_watcher = RemarkableFileWatcher(\n            self.cloud_manager, \n            self.remarkable_watch_path, \n            self.poll_interval\n        )\n        \n        await self.file_watcher.start_watching(self.process_remarkable_file)\n    \n    async def start_watching(self, process_existing: bool = True, mode: str = \"local\") -> None:\n        \"\"\"\n        Start watching for files\n        \n        Args:\n            process_existing: Whether to process existing files on startup\n            mode: 'local' for local file watching, 'remarkable' for cloud watching, 'both' for both\n        \"\"\"\n        print(f\"\\n\ud83c\udfaf Starting Enhanced E-Ink LLM File Processor\")\n        print(f\"\ud83d\udd27 Mode: {mode}\")\n        \n        if mode == \"remarkable\":\n            if not self.remarkable_enabled:\n                raise ValueError(\"reMarkable mode requested but integration not enabled\")\n            await self.start_remarkable_watching(process_existing)\n            \n        elif mode == \"local\":\n            # Use the original local file watching\n            await super().start_watching(process_existing)\n            \n        elif mode == \"both\":\n            if not self.remarkable_enabled:\n                print(\"\u26a0\ufe0f  reMarkable integration not enabled, falling back to local mode\")\n                await super().start_watching(process_existing)\n                return\n            \n            # Start both watchers concurrently\n            print(f\"\ud83d\udd04 Starting both local and reMarkable file watchers...\")\n            \n            # Create tasks for both watchers\n            local_task = asyncio.create_task(super().start_watching(process_existing))\n            remarkable_task = asyncio.create_task(self.start_remarkable_watching(process_existing))\n            \n            try:\n                # Wait for either task to complete (which shouldn't happen unless there's an error)\n                await asyncio.gather(local_task, remarkable_task)\n            except KeyboardInterrupt:\n                print(f\"\\n\ud83d\uded1 Stopping all file watchers...\")\n                local_task.cancel()\n                remarkable_task.cancel()\n                \n                # Wait a bit for clean shutdown\n                try:\n                    await asyncio.wait_for(asyncio.gather(local_task, remarkable_task, return_exceptions=True), timeout=5.0)\n                except asyncio.TimeoutError:\n                    pass\n                    \n        else:\n            raise ValueError(f\"Invalid mode: {mode}. Must be 'local', 'remarkable', or 'both'\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/remarkable_processor.py",
      "tags": [
        "e-ink",
        "llm",
        "file-processing",
        "remarkable",
        "cloud-integration",
        "async",
        "file-watcher",
        "document-processing",
        "openai",
        "automation"
      ],
      "updated_at": "2025-12-07T00:53:35.221981",
      "usage_example": "# Example 1: Local mode only\nprocessor = RemarkableEInkProcessor(\n    api_key='your-openai-key',\n    watch_folder='/path/to/local/folder'\n)\nawait processor.start_watching(process_existing=True, mode='local')\n\n# Example 2: reMarkable Cloud mode\nremarkable_config = {\n    'enabled': True,\n    'watch_folder_path': '/E-Ink LLM Input',\n    'output_folder_path': '/E-Ink LLM Output',\n    'poll_interval': 60,\n    'one_time_code': 'abc123'  # For first-time auth\n}\nprocessor = RemarkableEInkProcessor(\n    api_key='your-openai-key',\n    remarkable_config=remarkable_config\n)\nawait processor.start_watching(process_existing=True, mode='remarkable')\n\n# Example 3: Both modes simultaneously\nprocessor = RemarkableEInkProcessor(\n    api_key='your-openai-key',\n    watch_folder='/path/to/local/folder',\n    remarkable_config=remarkable_config\n)\nawait processor.start_watching(process_existing=True, mode='both')"
    },
    {
      "best_practices": [
        "Always instantiate the class before calling any methods - the constructor sets up essential styling and color schemes",
        "Ensure the ConversationContext object is fully populated with all required data (conversation_turns, active_topics, key_insights, etc.) before generating PDFs",
        "Use await when calling generate_conversation_timeline() as it is an async method",
        "Check the boolean return value to verify successful PDF generation before assuming the file exists",
        "Provide absolute paths for output_path to avoid file location ambiguity",
        "The class maintains state through instance attributes (logger, styles, colors), so reuse the same instance for multiple PDF generations",
        "Handle exceptions in calling code as the methods catch and log errors but return False rather than raising exceptions",
        "For large conversations, be aware that comprehensive timeline generation may take significant time and memory",
        "The quick summary PDF is synchronous and faster, suitable for real-time previews",
        "Custom styles are set up during initialization and cannot be modified after instantiation without directly accessing self.styles"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Logger instance for tracking PDF generation operations and errors",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "ReportLab stylesheet containing both default and custom paragraph styles for PDF formatting",
            "is_class_variable": false,
            "name": "styles",
            "type": "reportlab.lib.styles.StyleSheet1"
          },
          {
            "description": "Dictionary mapping element types (header, exchange, topic, reference, insight, light_gray, medium_gray) to HexColor objects for consistent PDF styling",
            "is_class_variable": false,
            "name": "colors",
            "type": "Dict[str, colors.HexColor]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the timeline generator with logger, PDF styles, and color scheme",
            "returns": "None - initializes instance attributes",
            "signature": "__init__(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_custom_styles",
            "parameters": {},
            "purpose": "Configure custom paragraph styles for different PDF elements (titles, headers, content, topics, references)",
            "returns": "None - modifies self.styles by adding custom ParagraphStyle objects",
            "signature": "setup_custom_styles(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_conversation_timeline",
            "parameters": {
              "context": "ConversationContext object containing full conversation data including turns, topics, insights, problem-solving chain, and reference map",
              "output_path": "String path where the output PDF file should be saved"
            },
            "purpose": "Generate a comprehensive multi-page PDF timeline report with title page, executive summary, timeline visualization, detailed exchanges, problem-solving analysis, and references",
            "returns": "Boolean - True if PDF generation succeeds, False if an error occurs",
            "signature": "async generate_conversation_timeline(self, context: ConversationContext, output_path: str) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_title_page",
            "parameters": {
              "context": "ConversationContext object with conversation metadata"
            },
            "purpose": "Build ReportLab flowable elements for the title page including conversation details, active topics, and key insights",
            "returns": "List of ReportLab flowable elements (Paragraph, Spacer objects) for the title page",
            "signature": "_build_title_page(self, context: ConversationContext) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_executive_summary",
            "parameters": {
              "context": "ConversationContext object with conversation statistics and problem-solving chain"
            },
            "purpose": "Build executive summary section with conversation overview, statistics table, and problem-solving progression",
            "returns": "List of ReportLab flowable elements for the executive summary section",
            "signature": "_build_executive_summary(self, context: ConversationContext) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_timeline_section",
            "parameters": {
              "context": "ConversationContext object with conversation_turns data"
            },
            "purpose": "Build visual timeline section with a table showing exchange numbers, timestamps, inputs, topics, and processing times",
            "returns": "List of ReportLab flowable elements including a formatted timeline table",
            "signature": "_build_timeline_section(self, context: ConversationContext) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_detailed_exchanges",
            "parameters": {
              "context": "ConversationContext object with conversation_turns containing detailed exchange information"
            },
            "purpose": "Build detailed documentation for each conversation exchange including input/response summaries, topics, key points, and processing statistics",
            "returns": "List of ReportLab flowable elements documenting each exchange in detail",
            "signature": "_build_detailed_exchanges(self, context: ConversationContext) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_problem_solving_analysis",
            "parameters": {
              "context": "ConversationContext object with problem_solving_chain data"
            },
            "purpose": "Build problem-solving analysis section showing the progression of problem-solving steps throughout the conversation",
            "returns": "List of ReportLab flowable elements documenting problem-solving progression",
            "signature": "_build_problem_solving_analysis(self, context: ConversationContext) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_references_section",
            "parameters": {
              "context": "ConversationContext object with reference_map data"
            },
            "purpose": "Build references and connections section showing how exchanges reference and build upon previous discussions",
            "returns": "List of ReportLab flowable elements documenting cross-references between exchanges",
            "signature": "_build_references_section(self, context: ConversationContext) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_quick_summary_pdf",
            "parameters": {
              "context": "ConversationContext object with conversation data",
              "output_path": "String path where the output PDF file should be saved"
            },
            "purpose": "Generate a quick one-page summary PDF with basic statistics and recent exchanges using ReportLab canvas",
            "returns": "Boolean - True if PDF generation succeeds, False if an error occurs",
            "signature": "generate_quick_summary_pdf(self, context: ConversationContext, output_path: str) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:52:58",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "logging",
        "pathlib",
        "typing",
        "datetime",
        "json",
        "reportlab"
      ],
      "description": "A class that generates comprehensive PDF reports documenting conversation timelines, including detailed exchanges, problem-solving analysis, references, and visual summaries.",
      "docstring": "Generate comprehensive conversation timeline PDFs",
      "id": 1964,
      "imports": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from datetime import datetime",
        "import json",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.lib.pagesizes import A4",
        "from reportlab.lib import colors",
        "from reportlab.lib.units import inch",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.platypus import Table",
        "from reportlab.platypus import TableStyle",
        "from reportlab.platypus import PageBreak",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.enums import TA_LEFT",
        "from reportlab.lib.enums import TA_CENTER",
        "from reportlab.lib.enums import TA_JUSTIFY",
        "from conversation_context import ConversationContext",
        "from conversation_context import ConversationTurn",
        "from conversation_context import ConversationReference"
      ],
      "imports_required": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List, Dict, Any, Optional",
        "from datetime import datetime",
        "import json",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter, A4",
        "from reportlab.lib import colors",
        "from reportlab.lib.units import inch",
        "from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak",
        "from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle",
        "from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY",
        "from conversation_context import ConversationContext, ConversationTurn, ConversationReference"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 422,
      "line_start": 25,
      "name": "ConversationTimelineGenerator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "No constructor parameters": "The __init__ method takes no parameters. It initializes the logger, sets up PDF styling with custom paragraph styles, and defines a color scheme for different document elements."
      },
      "parent_class": null,
      "purpose": "ConversationTimelineGenerator creates professional PDF documentation of conversations using the ReportLab library. It produces multi-page timeline reports with title pages, executive summaries, detailed exchange histories, problem-solving analysis, and reference mappings. The class supports both comprehensive multi-page reports and quick one-page summaries, making it suitable for conversation analysis, documentation, and archival purposes.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a ConversationTimelineGenerator object. The main methods (generate_conversation_timeline and generate_quick_summary_pdf) return boolean values: True if PDF generation succeeds, False if an error occurs. The private helper methods return List[Any] containing ReportLab flowable elements for PDF construction.",
      "settings_required": [
        "ReportLab library must be installed (pip install reportlab)",
        "ConversationContext, ConversationTurn, and ConversationReference classes must be available from conversation_context module",
        "Write permissions for output directory where PDFs will be saved",
        "Sufficient disk space for PDF generation"
      ],
      "source_code": "class ConversationTimelineGenerator:\n    \"\"\"Generate comprehensive conversation timeline PDFs\"\"\"\n    \n    def __init__(self):\n        \"\"\"Initialize timeline generator\"\"\"\n        self.logger = logging.getLogger(__name__)\n        \n        # PDF styling\n        self.styles = getSampleStyleSheet()\n        self.setup_custom_styles()\n        \n        # Colors for different elements\n        self.colors = {\n            'header': colors.HexColor('#2E86AB'),\n            'exchange': colors.HexColor('#A23B72'),\n            'topic': colors.HexColor('#F18F01'),\n            'reference': colors.HexColor('#C73E1D'),\n            'insight': colors.HexColor('#6A994E'),\n            'light_gray': colors.HexColor('#F5F5F5'),\n            'medium_gray': colors.HexColor('#E0E0E0')\n        }\n    \n    def setup_custom_styles(self):\n        \"\"\"Set up custom paragraph styles\"\"\"\n        self.styles.add(ParagraphStyle(\n            name='ConversationTitle',\n            parent=self.styles['Title'],\n            fontSize=18,\n            spaceAfter=20,\n            textColor=self.colors['header'] if hasattr(self, 'colors') else colors.blue,\n            alignment=TA_CENTER\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='ExchangeHeader',\n            parent=self.styles['Heading2'],\n            fontSize=14,\n            spaceBefore=15,\n            spaceAfter=10,\n            textColor=self.colors['exchange'] if hasattr(self, 'colors') else colors.darkred,\n            leftIndent=20\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='ExchangeContent',\n            parent=self.styles['Normal'],\n            fontSize=11,\n            spaceBefore=5,\n            spaceAfter=5,\n            leftIndent=30,\n            rightIndent=20,\n            alignment=TA_JUSTIFY\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='TopicStyle',\n            parent=self.styles['Normal'],\n            fontSize=10,\n            textColor=self.colors['topic'] if hasattr(self, 'colors') else colors.orange,\n            leftIndent=30\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='ReferenceStyle',\n            parent=self.styles['Normal'],\n            fontSize=10,\n            textColor=self.colors['reference'] if hasattr(self, 'colors') else colors.red,\n            leftIndent=30,\n            fontName='Helvetica-Oblique'\n        ))\n    \n    async def generate_conversation_timeline(self, \n                                           context: ConversationContext,\n                                           output_path: str) -> bool:\n        \"\"\"\n        Generate comprehensive conversation timeline PDF\n        \n        Args:\n            context: ConversationContext with full conversation data\n            output_path: Path for output PDF\n            \n        Returns:\n            True if successful, False otherwise\n        \"\"\"\n        self.logger.info(f\"Generating conversation timeline for {context.conversation_id}\")\n        \n        try:\n            # Create PDF document\n            doc = SimpleDocTemplate(\n                output_path,\n                pagesize=A4,\n                rightMargin=72,\n                leftMargin=72,\n                topMargin=72,\n                bottomMargin=72\n            )\n            \n            # Build story elements\n            story = []\n            \n            # Title page\n            story.extend(self._build_title_page(context))\n            story.append(PageBreak())\n            \n            # Executive summary\n            story.extend(self._build_executive_summary(context))\n            story.append(PageBreak())\n            \n            # Timeline visualization\n            story.extend(self._build_timeline_section(context))\n            story.append(PageBreak())\n            \n            # Detailed exchanges\n            story.extend(self._build_detailed_exchanges(context))\n            \n            # Problem-solving analysis\n            if context.problem_solving_chain:\n                story.append(PageBreak())\n                story.extend(self._build_problem_solving_analysis(context))\n            \n            # References and connections\n            if context.reference_map:\n                story.append(PageBreak())\n                story.extend(self._build_references_section(context))\n            \n            # Build PDF\n            doc.build(story)\n            \n            self.logger.info(f\"Timeline PDF generated: {output_path}\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"Error generating timeline PDF: {e}\")\n            return False\n    \n    def _build_title_page(self, context: ConversationContext) -> List[Any]:\n        \"\"\"Build title page elements\"\"\"\n        elements = []\n        \n        # Title\n        title = f\"Conversation Timeline\"\n        elements.append(Paragraph(title, self.styles['ConversationTitle']))\n        elements.append(Spacer(1, 20))\n        \n        # Conversation details\n        details = [\n            f\"<b>Conversation ID:</b> {context.conversation_id}\",\n            f\"<b>Total Exchanges:</b> {context.total_exchanges}\",\n            f\"<b>Generated:</b> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\",\n            \"\",\n            f\"<b>Summary:</b> {context.conversation_summary}\"\n        ]\n        \n        for detail in details:\n            elements.append(Paragraph(detail, self.styles['Normal']))\n            elements.append(Spacer(1, 6))\n        \n        elements.append(Spacer(1, 30))\n        \n        # Active topics\n        if context.active_topics:\n            elements.append(Paragraph(\"<b>Active Topics:</b>\", self.styles['Heading3']))\n            for topic in context.active_topics:\n                elements.append(Paragraph(f\"\u2022 {topic.replace('_', ' ').title()}\", self.styles['TopicStyle']))\n            elements.append(Spacer(1, 20))\n        \n        # Key insights\n        if context.key_insights:\n            elements.append(Paragraph(\"<b>Key Insights:</b>\", self.styles['Heading3']))\n            for insight in context.key_insights:\n                elements.append(Paragraph(f\"\u2022 {insight}\", self.styles['Normal']))\n            elements.append(Spacer(1, 10))\n        \n        return elements\n    \n    def _build_executive_summary(self, context: ConversationContext) -> List[Any]:\n        \"\"\"Build executive summary section\"\"\"\n        elements = []\n        \n        elements.append(Paragraph(\"Executive Summary\", self.styles['Heading1']))\n        elements.append(Spacer(1, 12))\n        \n        # Conversation overview\n        overview = f\"\"\"\n        This conversation timeline documents {context.total_exchanges} exchanges in conversation {context.conversation_id}.\n        The conversation covers {len(context.active_topics)} main topic areas and demonstrates a clear progression\n        through {len(context.problem_solving_chain)} problem-solving steps.\n        \"\"\"\n        elements.append(Paragraph(overview, self.styles['Normal']))\n        elements.append(Spacer(1, 15))\n        \n        # Statistics table\n        stats_data = [\n            ['Metric', 'Value'],\n            ['Total Exchanges', str(context.total_exchanges)],\n            ['Active Topics', str(len(context.active_topics))],\n            ['Key Insights', str(len(context.key_insights))],\n            ['Problem-Solving Steps', str(len(context.problem_solving_chain))],\n            ['Cross-References', str(sum(len(refs) for refs in context.reference_map.values()))]\n        ]\n        \n        stats_table = Table(stats_data, colWidths=[2*inch, 1*inch])\n        stats_table.setStyle(TableStyle([\n            ('BACKGROUND', (0, 0), (-1, 0), self.colors['header']),\n            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),\n            ('ALIGN', (0, 0), (-1, -1), 'LEFT'),\n            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),\n            ('FONTSIZE', (0, 0), (-1, 0), 11),\n            ('BOTTOMPADDING', (0, 0), (-1, 0), 12),\n            ('BACKGROUND', (0, 1), (-1, -1), self.colors['light_gray']),\n            ('GRID', (0, 0), (-1, -1), 1, colors.black)\n        ]))\n        \n        elements.append(stats_table)\n        elements.append(Spacer(1, 20))\n        \n        # Problem-solving progression\n        if context.problem_solving_chain:\n            elements.append(Paragraph(\"Problem-Solving Progression\", self.styles['Heading3']))\n            for i, step in enumerate(context.problem_solving_chain, 1):\n                step_text = f\"{i}. <b>{step['step_type'].title()}</b> (Exchange {step['exchange_number']}): {step['description']}\"\n                elements.append(Paragraph(step_text, self.styles['Normal']))\n            elements.append(Spacer(1, 15))\n        \n        return elements\n    \n    def _build_timeline_section(self, context: ConversationContext) -> List[Any]:\n        \"\"\"Build visual timeline section\"\"\"\n        elements = []\n        \n        elements.append(Paragraph(\"Conversation Timeline\", self.styles['Heading1']))\n        elements.append(Spacer(1, 12))\n        \n        # Timeline table\n        timeline_data = [['Exchange', 'Timestamp', 'Input', 'Key Topics', 'Processing Time']]\n        \n        for turn in context.conversation_turns:\n            timeline_data.append([\n                str(turn.exchange_number),\n                turn.timestamp.strftime('%H:%M:%S'),\n                turn.input_summary[:30] + \"...\" if len(turn.input_summary) > 30 else turn.input_summary,\n                \", \".join(turn.topics[:2]),\n                f\"{turn.processing_time:.1f}s\"\n            ])\n        \n        timeline_table = Table(timeline_data, colWidths=[0.8*inch, 1*inch, 2.5*inch, 1.5*inch, 1*inch])\n        timeline_table.setStyle(TableStyle([\n            ('BACKGROUND', (0, 0), (-1, 0), self.colors['header']),\n            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),\n            ('ALIGN', (0, 0), (-1, -1), 'LEFT'),\n            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),\n            ('FONTSIZE', (0, 0), (-1, 0), 10),\n            ('FONTSIZE', (0, 1), (-1, -1), 9),\n            ('BOTTOMPADDING', (0, 0), (-1, 0), 8),\n            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, self.colors['light_gray']]),\n            ('GRID', (0, 0), (-1, -1), 0.5, colors.gray),\n            ('VALIGN', (0, 0), (-1, -1), 'TOP')\n        ]))\n        \n        elements.append(timeline_table)\n        elements.append(Spacer(1, 20))\n        \n        return elements\n    \n    def _build_detailed_exchanges(self, context: ConversationContext) -> List[Any]:\n        \"\"\"Build detailed exchange documentation\"\"\"\n        elements = []\n        \n        elements.append(Paragraph(\"Detailed Exchange History\", self.styles['Heading1']))\n        elements.append(Spacer(1, 12))\n        \n        for turn in context.conversation_turns:\n            # Exchange header\n            header_text = f\"Exchange {turn.exchange_number} - {turn.timestamp.strftime('%Y-%m-%d %H:%M:%S')}\"\n            elements.append(Paragraph(header_text, self.styles['ExchangeHeader']))\n            \n            # Input summary\n            elements.append(Paragraph(f\"<b>Input:</b> {turn.input_summary}\", self.styles['ExchangeContent']))\n            \n            # Response summary\n            elements.append(Paragraph(f\"<b>Response:</b> {turn.response_summary}\", self.styles['ExchangeContent']))\n            \n            # Topics\n            if turn.topics:\n                topics_text = f\"<b>Topics:</b> {', '.join(turn.topics)}\"\n                elements.append(Paragraph(topics_text, self.styles['TopicStyle']))\n            \n            # Key points\n            if turn.key_points:\n                elements.append(Paragraph(\"<b>Key Points:</b>\", self.styles['ExchangeContent']))\n                for point in turn.key_points:\n                    elements.append(Paragraph(f\"\u2022 {point}\", self.styles['ExchangeContent']))\n            \n            # Processing stats\n            stats_text = f\"<b>Processing:</b> {turn.processing_time:.1f}s, {turn.tokens_used} tokens\"\n            elements.append(Paragraph(stats_text, self.styles['Normal']))\n            \n            elements.append(Spacer(1, 15))\n        \n        return elements\n    \n    def _build_problem_solving_analysis(self, context: ConversationContext) -> List[Any]:\n        \"\"\"Build problem-solving analysis section\"\"\"\n        elements = []\n        \n        elements.append(Paragraph(\"Problem-Solving Analysis\", self.styles['Heading1']))\n        elements.append(Spacer(1, 12))\n        \n        # Problem-solving flow\n        elements.append(Paragraph(\"The conversation demonstrates the following problem-solving progression:\", self.styles['Normal']))\n        elements.append(Spacer(1, 10))\n        \n        for i, step in enumerate(context.problem_solving_chain, 1):\n            step_header = f\"Step {i}: {step['step_type'].title()} (Exchange {step['exchange_number']})\"\n            elements.append(Paragraph(step_header, self.styles['ExchangeHeader']))\n            \n            elements.append(Paragraph(step['description'], self.styles['ExchangeContent']))\n            \n            if step['topics']:\n                topics_text = f\"Related topics: {', '.join(step['topics'])}\"\n                elements.append(Paragraph(topics_text, self.styles['TopicStyle']))\n            \n            elements.append(Spacer(1, 10))\n        \n        return elements\n    \n    def _build_references_section(self, context: ConversationContext) -> List[Any]:\n        \"\"\"Build references and connections section\"\"\"\n        elements = []\n        \n        elements.append(Paragraph(\"References and Connections\", self.styles['Heading1']))\n        elements.append(Spacer(1, 12))\n        \n        elements.append(Paragraph(\"This section shows how exchanges reference and build upon previous discussions:\", self.styles['Normal']))\n        elements.append(Spacer(1, 10))\n        \n        for exchange_num, references in context.reference_map.items():\n            if references:\n                header = f\"Exchange {exchange_num} References:\"\n                elements.append(Paragraph(header, self.styles['ExchangeHeader']))\n                \n                for ref in references:\n                    ref_text = f\"\u2192 References Exchange {ref.exchange_number} ({ref.reference_type}): {ref.referenced_content}\"\n                    elements.append(Paragraph(ref_text, self.styles['ReferenceStyle']))\n                    \n                    if ref.context_snippet:\n                        context_text = f\"   Context: \\\"{ref.context_snippet}\\\"\"\n                        elements.append(Paragraph(context_text, self.styles['Normal']))\n                \n                elements.append(Spacer(1, 10))\n        \n        return elements\n    \n    def generate_quick_summary_pdf(self, \n                                  context: ConversationContext,\n                                  output_path: str) -> bool:\n        \"\"\"Generate a quick 1-page summary PDF\"\"\"\n        try:\n            c = canvas.Canvas(output_path, pagesize=letter)\n            width, height = letter\n            \n            # Title\n            c.setFont(\"Helvetica-Bold\", 16)\n            c.drawString(50, height - 50, f\"Conversation Summary: {context.conversation_id}\")\n            \n            # Basic stats\n            y_pos = height - 100\n            c.setFont(\"Helvetica\", 12)\n            \n            stats = [\n                f\"Total Exchanges: {context.total_exchanges}\",\n                f\"Active Topics: {', '.join(context.active_topics[:5])}\",\n                f\"Key Insights: {len(context.key_insights)}\",\n                f\"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\"\n            ]\n            \n            for stat in stats:\n                c.drawString(50, y_pos, stat)\n                y_pos -= 20\n            \n            # Recent exchanges\n            y_pos -= 20\n            c.setFont(\"Helvetica-Bold\", 14)\n            c.drawString(50, y_pos, \"Recent Exchanges:\")\n            y_pos -= 20\n            \n            c.setFont(\"Helvetica\", 10)\n            for turn in context.conversation_turns[-3:]:  # Last 3 exchanges\n                exchange_text = f\"Ex {turn.exchange_number}: {turn.input_summary} \u2192 {turn.response_summary[:60]}...\"\n                c.drawString(50, y_pos, exchange_text)\n                y_pos -= 15\n            \n            c.save()\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"Error generating quick summary: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/conversation_timeline.py",
      "tags": [
        "pdf-generation",
        "reporting",
        "conversation-analysis",
        "timeline",
        "documentation",
        "reportlab",
        "visualization",
        "async",
        "logging"
      ],
      "updated_at": "2025-12-07T00:52:58.349736",
      "usage_example": "import asyncio\nfrom conversation_timeline_generator import ConversationTimelineGenerator\nfrom conversation_context import ConversationContext\n\n# Create generator instance\ngenerator = ConversationTimelineGenerator()\n\n# Assume we have a ConversationContext object with conversation data\ncontext = ConversationContext(conversation_id=\"conv_123\")\n# ... populate context with conversation turns, topics, insights, etc.\n\n# Generate comprehensive timeline PDF\nasync def generate_report():\n    success = await generator.generate_conversation_timeline(\n        context=context,\n        output_path=\"/path/to/timeline_report.pdf\"\n    )\n    if success:\n        print(\"Timeline PDF generated successfully\")\n    else:\n        print(\"Failed to generate timeline PDF\")\n\n# Generate quick summary PDF\nsuccess = generator.generate_quick_summary_pdf(\n    context=context,\n    output_path=\"/path/to/quick_summary.pdf\"\n)\n\n# Run async generation\nasyncio.run(generate_report())"
    },
    {
      "best_practices": [
        "Always instantiate the class before calling any methods - the constructor sets up required styles and patterns",
        "Call setup_hybrid_styles() only once during initialization - it's automatically called by __init__",
        "Ensure graphic objects have valid base64-encoded image_data before passing to create_hybrid_pdf",
        "Placeholder position_marker strings must exactly match the text in text_content for proper replacement",
        "Use await when calling create_hybrid_pdf as it's an async method",
        "Provide complete metadata dictionary with at least 'source_file' key for proper document information",
        "Graphics dictionary keys must match placeholder.id values for successful embedding",
        "Output directory must exist before calling create_hybrid_pdf",
        "The class maintains state through self.styles - avoid modifying styles after initialization",
        "Image data should be in a format PIL can decode (PNG, JPEG, etc.) when base64 decoded",
        "Text content can use markdown formatting: # for headers, ## for subheaders, ** for bold, * for italic, ` for code",
        "Graphics are automatically sized to fit within 6x4 inch bounds while maintaining aspect ratio",
        "The placeholder_pattern regex expects format: [GRAPHIC:id:type:description]",
        "Method call order: instantiate -> create_hybrid_pdf (which internally calls all helper methods)",
        "Helper methods (_process_content_with_graphics, _convert_text_to_elements, etc.) are internal and should not be called directly"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "ReportLab stylesheet containing all paragraph styles including custom hybrid styles",
            "is_class_variable": false,
            "name": "styles",
            "type": "StyleSheet1"
          },
          {
            "description": "Compiled regex pattern for matching graphic placeholder markers in format [GRAPHIC:id:type:description]",
            "is_class_variable": false,
            "name": "placeholder_pattern",
            "type": "re.Pattern"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the HybridPDFGenerator with default styles and placeholder pattern",
            "returns": "None - initializes instance attributes",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_hybrid_styles",
            "parameters": {},
            "purpose": "Configure custom paragraph styles optimized for hybrid PDFs and e-ink displays",
            "returns": "None - modifies self.styles by adding custom ParagraphStyle objects",
            "signature": "setup_hybrid_styles(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_hybrid_pdf",
            "parameters": {
              "conversation_id": "Optional session conversation identifier for document template",
              "exchange_number": "Optional exchange number for document template",
              "graphics": "Dictionary mapping graphic IDs to graphic objects with image_data, type, width, height",
              "metadata": "Dictionary containing document metadata like source_file and dimensions",
              "output_path": "File system path where the PDF should be saved",
              "placeholders": "List of placeholder objects with id, position_marker, and description attributes",
              "text_content": "String containing the main text content with graphic placeholder markers"
            },
            "purpose": "Generate a complete hybrid PDF document with embedded text and graphics",
            "returns": "String containing the path to the generated PDF file",
            "signature": "async create_hybrid_pdf(self, text_content: str, placeholders: List[Any], graphics: Dict[str, Any], metadata: Dict[str, Any], output_path: str, conversation_id: Optional[str] = None, exchange_number: Optional[int] = None) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_process_content_with_graphics",
            "parameters": {
              "graphics": "Dictionary of graphic objects to embed",
              "placeholders": "List of placeholder objects to locate in text",
              "text_content": "String containing text with placeholder markers"
            },
            "purpose": "Parse text content and insert graphics at placeholder positions, returning PDF elements",
            "returns": "List of ReportLab flowable elements (Paragraphs, Images, Spacers) ready for PDF building",
            "signature": "_process_content_with_graphics(self, text_content: str, placeholders: List[Any], graphics: Dict[str, Any]) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_convert_text_to_elements",
            "parameters": {
              "text": "String containing text with markdown-style formatting"
            },
            "purpose": "Convert plain text with markdown formatting into styled PDF elements",
            "returns": "List of ReportLab Paragraph and Spacer elements with appropriate styles applied",
            "signature": "_convert_text_to_elements(self, text: str) -> List[Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_parse_markdown_sections",
            "parameters": {
              "text": "String containing markdown-formatted text"
            },
            "purpose": "Parse text into sections based on markdown headers and code blocks",
            "returns": "List of tuples where each tuple is (section_type, content) - section_type is 'header1', 'header2', 'body', or 'code'",
            "signature": "_parse_markdown_sections(self, text: str) -> List[tuple]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_process_markdown_formatting",
            "parameters": {
              "text": "String with markdown formatting like **bold**, *italic*, `code`"
            },
            "purpose": "Convert markdown formatting syntax to ReportLab HTML-like tags",
            "returns": "String with HTML-like tags (<b>, <i>, <font>) that ReportLab can render",
            "signature": "_process_markdown_formatting(self, text: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_graphic_elements",
            "parameters": {
              "graphic": "Graphic object with image_data, type, width, height attributes",
              "placeholder": "Placeholder object with description attribute for caption"
            },
            "purpose": "Create PDF elements for embedding a graphic with caption and proper sizing",
            "returns": "List of ReportLab elements including Image, Paragraph (caption), and Spacers, wrapped in KeepTogether",
            "signature": "_create_graphic_elements(self, graphic: Any, placeholder: Any) -> List[Any]"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:49:56",
      "decorators": [],
      "dependencies": [
        "io",
        "base64",
        "re",
        "pathlib",
        "datetime",
        "typing",
        "dataclasses",
        "reportlab",
        "PIL"
      ],
      "description": "A class that generates hybrid PDF documents combining formatted text content with embedded graphics, optimized for e-ink displays.",
      "docstring": "Generates hybrid PDFs combining text and graphics",
      "id": 1955,
      "imports": [
        "import io",
        "import base64",
        "import re",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from dataclasses import dataclass",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.lib.pagesizes import A4",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib.enums import TA_LEFT",
        "from reportlab.lib.enums import TA_JUSTIFY",
        "from reportlab.lib.enums import TA_CENTER",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.platypus import Image",
        "from reportlab.platypus import PageBreak",
        "from reportlab.platypus import KeepTogether",
        "from reportlab.platypus.doctemplate import PageTemplate",
        "from reportlab.platypus.doctemplate import BaseDocTemplate",
        "from reportlab.platypus.frames import Frame",
        "from reportlab.lib import colors",
        "from PIL import Image as PILImage"
      ],
      "imports_required": [
        "import io",
        "import base64",
        "import re",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, Any, List, Optional",
        "from dataclasses import dataclass",
        "from reportlab.lib.pagesizes import letter, A4",
        "from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib.enums import TA_LEFT, TA_JUSTIFY, TA_CENTER",
        "from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak, KeepTogether",
        "from reportlab.platypus.doctemplate import PageTemplate, BaseDocTemplate",
        "from reportlab.platypus.frames import Frame",
        "from reportlab.lib import colors",
        "from PIL import Image as PILImage"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 461,
      "line_start": 86,
      "name": "HybridPDFGenerator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "No constructor parameters": "The __init__ method takes no parameters. It initializes the class with default styles and sets up a regex pattern for detecting graphic placeholders."
      },
      "parent_class": null,
      "purpose": "HybridPDFGenerator creates professional PDF documents that integrate text content with dynamically generated graphics. It processes text with markdown-style formatting, identifies graphic placeholders, embeds base64-encoded images at appropriate positions, and applies custom styling optimized for e-ink readers. The class handles the complete workflow from content parsing to PDF generation, including metadata sections, processing summaries, and proper layout management.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a HybridPDFGenerator object. The main method create_hybrid_pdf returns a string containing the path to the generated PDF file. Helper methods return various types: _process_content_with_graphics returns List[Any] of PDF elements, _convert_text_to_elements returns List[Any] of formatted elements, _parse_markdown_sections returns List[tuple] of section types and content, _process_markdown_formatting returns str with HTML-like tags, and _create_graphic_elements returns List[Any] of graphic elements.",
      "settings_required": [
        "HybridSessionDocTemplate class must be available (referenced but not defined in this code)",
        "Placeholder objects with attributes: position_marker, id, description",
        "Graphic objects with attributes: image_data, type, width, height, id",
        "Write permissions to the output directory for PDF generation"
      ],
      "source_code": "class HybridPDFGenerator:\n    \"\"\"Generates hybrid PDFs combining text and graphics\"\"\"\n    \n    def __init__(self):\n        self.styles = getSampleStyleSheet()\n        self.setup_hybrid_styles()\n        self.placeholder_pattern = re.compile(\n            r'\\[GRAPHIC:(\\w+):([^:]+):([^\\]]+)\\]',\n            re.IGNORECASE\n        )\n    \n    def setup_hybrid_styles(self):\n        \"\"\"Setup custom styles for hybrid PDFs\"\"\"\n        \n        # Main title style\n        self.styles.add(ParagraphStyle(\n            name='HybridTitle',\n            parent=self.styles['Title'],\n            fontSize=18,\n            leading=24,\n            alignment=TA_CENTER,\n            spaceAfter=20,\n            textColor=colors.black,\n            fontName='Helvetica-Bold'\n        ))\n        \n        # Section header style\n        self.styles.add(ParagraphStyle(\n            name='HybridHeader',\n            parent=self.styles['Heading1'],\n            fontSize=14,\n            leading=18,\n            spaceAfter=12,\n            spaceBefore=16,\n            textColor=colors.black,\n            fontName='Helvetica-Bold'\n        ))\n        \n        # Sub-header style\n        self.styles.add(ParagraphStyle(\n            name='HybridSubHeader',\n            parent=self.styles['Heading2'],\n            fontSize=12,\n            leading=16,\n            spaceAfter=8,\n            spaceBefore=12,\n            textColor=colors.black,\n            fontName='Helvetica-Bold'\n        ))\n        \n        # Body text optimized for e-ink\n        self.styles.add(ParagraphStyle(\n            name='HybridBody',\n            parent=self.styles['Normal'],\n            fontSize=11,\n            leading=15,\n            alignment=TA_JUSTIFY,\n            spaceAfter=8,\n            textColor=colors.black,\n            fontName='Helvetica'\n        ))\n        \n        # Graphics caption style\n        self.styles.add(ParagraphStyle(\n            name='GraphicCaption',\n            parent=self.styles['Normal'],\n            fontSize=9,\n            leading=12,\n            alignment=TA_CENTER,\n            spaceAfter=12,\n            spaceBefore=4,\n            textColor=colors.grey,\n            fontName='Helvetica-Oblique'\n        ))\n        \n        # Metadata style\n        self.styles.add(ParagraphStyle(\n            name='HybridMeta',\n            parent=self.styles['Normal'],\n            fontSize=9,\n            leading=12,\n            alignment=TA_LEFT,\n            spaceAfter=4,\n            textColor=colors.grey,\n            fontName='Helvetica-Oblique'\n        ))\n    \n    async def create_hybrid_pdf(self, \n                              text_content: str,\n                              placeholders: List[Any],\n                              graphics: Dict[str, Any],\n                              metadata: Dict[str, Any],\n                              output_path: str,\n                              conversation_id: Optional[str] = None,\n                              exchange_number: Optional[int] = None) -> str:\n        \"\"\"\n        Create a hybrid PDF with text and embedded graphics\n        \n        Args:\n            text_content: Text content with graphic placeholders\n            placeholders: List of graphic placeholders found in text\n            graphics: Generated graphics keyed by ID\n            metadata: Processing metadata\n            output_path: Path for output PDF\n            conversation_id: Session conversation ID\n            exchange_number: Exchange number\n            \n        Returns:\n            Path to generated hybrid PDF\n        \"\"\"\n        print(f\"\ud83d\udcc4 Creating hybrid PDF with {len(graphics)} graphics...\")\n        \n        # Use custom hybrid document template\n        doc = HybridSessionDocTemplate(\n            output_path,\n            conversation_id=conversation_id,\n            exchange_number=exchange_number,\n            pagesize=letter,\n            rightMargin=72,\n            leftMargin=72,\n            topMargin=72,\n            bottomMargin=72\n        )\n        \n        story = []\n        \n        # Add title\n        timestamp = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n        title = f\"AI Hybrid Response - {timestamp}\"\n        story.append(Paragraph(title, self.styles['HybridTitle']))\n        story.append(Spacer(1, 20))\n        \n        # Add metadata section\n        story.append(Paragraph(\"Document Information\", self.styles['HybridHeader']))\n        \n        # Source information\n        source_info = f\"Source: {metadata.get('source_file', 'Unknown')}\"\n        story.append(Paragraph(source_info, self.styles['HybridMeta']))\n        \n        # Processing information\n        if metadata.get('dimensions'):\n            dims_info = f\"Original dimensions: {metadata['dimensions']}\"\n            story.append(Paragraph(dims_info, self.styles['HybridMeta']))\n        \n        processing_info = f\"Processing mode: Hybrid (Text + Graphics)\"\n        story.append(Paragraph(processing_info, self.styles['HybridMeta']))\n        \n        graphics_info = f\"Generated graphics: {len(graphics)}\"\n        story.append(Paragraph(graphics_info, self.styles['HybridMeta']))\n        \n        story.append(Spacer(1, 20))\n        \n        # Process content and insert graphics\n        content_elements = self._process_content_with_graphics(\n            text_content, placeholders, graphics\n        )\n        \n        story.extend(content_elements)\n        \n        # Add processing summary\n        story.append(PageBreak())\n        story.append(Paragraph(\"Processing Summary\", self.styles['HybridHeader']))\n        \n        summary_items = [\n            f\"\u2022 Generated graphics: {len(graphics)}\",\n            f\"\u2022 Successfully embedded: {len([g for g in graphics.values() if g.image_data])}\",\n            f\"\u2022 Response optimization: E-ink display\",\n            f\"\u2022 Timestamp: {timestamp}\"\n        ]\n        \n        for item in summary_items:\n            story.append(Paragraph(item, self.styles['HybridMeta']))\n        \n        # Build PDF\n        doc.build(story)\n        \n        print(f\"\u2705 Hybrid PDF created: {Path(output_path).name}\")\n        return output_path\n    \n    def _process_content_with_graphics(self, \n                                     text_content: str,\n                                     placeholders: List[Any],\n                                     graphics: Dict[str, Any]) -> List[Any]:\n        \"\"\"Process text content and insert graphics at placeholder positions\"\"\"\n        \n        elements = []\n        current_pos = 0\n        \n        # Sort placeholders by their position in the text\n        placeholder_positions = []\n        for placeholder in placeholders:\n            pos = text_content.find(placeholder.position_marker)\n            if pos >= 0:\n                placeholder_positions.append((pos, placeholder))\n        \n        placeholder_positions.sort(key=lambda x: x[0])\n        \n        for pos, placeholder in placeholder_positions:\n            # Add text before this placeholder\n            if pos > current_pos:\n                text_before = text_content[current_pos:pos]\n                text_elements = self._convert_text_to_elements(text_before)\n                elements.extend(text_elements)\n            \n            # Add the graphic if available\n            if placeholder.id in graphics:\n                graphic_elements = self._create_graphic_elements(\n                    graphics[placeholder.id], placeholder\n                )\n                elements.extend(graphic_elements)\n            else:\n                # Add placeholder text if graphic generation failed\n                placeholder_text = f\"[Graphic placeholder: {placeholder.description}]\"\n                elements.append(Paragraph(placeholder_text, self.styles['GraphicCaption']))\n            \n            # Update position to after this placeholder\n            current_pos = pos + len(placeholder.position_marker)\n        \n        # Add remaining text after last placeholder\n        if current_pos < len(text_content):\n            remaining_text = text_content[current_pos:]\n            text_elements = self._convert_text_to_elements(remaining_text)\n            elements.extend(text_elements)\n        \n        return elements\n    \n    def _convert_text_to_elements(self, text: str) -> List[Any]:\n        \"\"\"Convert text content to PDF elements with proper formatting\"\"\"\n        elements = []\n        \n        # Split text into sections based on markdown-style headers\n        sections = self._parse_markdown_sections(text)\n        \n        for section_type, content in sections:\n            if section_type == 'header1':\n                elements.append(Paragraph(content, self.styles['HybridHeader']))\n            elif section_type == 'header2':\n                elements.append(Paragraph(content, self.styles['HybridSubHeader']))\n            elif section_type == 'body':\n                # Split paragraphs and add each one\n                paragraphs = content.split('\\n\\n')\n                for paragraph in paragraphs:\n                    if paragraph.strip():\n                        # Process markdown formatting\n                        formatted_paragraph = self._process_markdown_formatting(paragraph)\n                        elements.append(Paragraph(formatted_paragraph, self.styles['HybridBody']))\n                        elements.append(Spacer(1, 6))\n            elif section_type == 'code':\n                elements.append(Paragraph(content, self.styles['Code']))\n        \n        return elements\n    \n    def _parse_markdown_sections(self, text: str) -> List[tuple]:\n        \"\"\"Parse text for markdown-style sections\"\"\"\n        sections = []\n        lines = text.split('\\n')\n        current_section = []\n        current_type = 'body'\n        \n        for line in lines:\n            line = line.strip()\n            \n            if line.startswith('# '):\n                # Save previous section\n                if current_section:\n                    sections.append((current_type, '\\n'.join(current_section)))\n                # Start new header section\n                sections.append(('header1', line[2:]))\n                current_section = []\n                current_type = 'body'\n                \n            elif line.startswith('## '):\n                # Save previous section\n                if current_section:\n                    sections.append((current_type, '\\n'.join(current_section)))\n                # Start new subheader section\n                sections.append(('header2', line[3:]))\n                current_section = []\n                current_type = 'body'\n                \n            elif line.startswith('```'):\n                # Toggle code section\n                if current_type == 'code':\n                    sections.append((current_type, '\\n'.join(current_section)))\n                    current_section = []\n                    current_type = 'body'\n                else:\n                    if current_section:\n                        sections.append((current_type, '\\n'.join(current_section)))\n                    current_section = []\n                    current_type = 'code'\n                    \n            else:\n                current_section.append(line)\n        \n        # Add final section\n        if current_section:\n            sections.append((current_type, '\\n'.join(current_section)))\n        \n        return sections\n    \n    def _process_markdown_formatting(self, text: str) -> str:\n        \"\"\"Process basic markdown formatting for reportlab\"\"\"\n        # Convert **bold** to <b>bold</b>\n        text = re.sub(r'\\*\\*(.*?)\\*\\*', r'<b>\\1</b>', text)\n        \n        # Convert *italic* to <i>italic</i>\n        text = re.sub(r'\\*(.*?)\\*', r'<i>\\1</i>', text)\n        \n        # Convert `code` to monospace\n        text = re.sub(r'`(.*?)`', r'<font name=\"Courier\">\\1</font>', text)\n        \n        return text\n    \n    def _create_graphic_elements(self, \n                               graphic: Any, \n                               placeholder: Any) -> List[Any]:\n        \"\"\"Create PDF elements for a graphic\"\"\"\n        elements = []\n        \n        if not graphic.image_data:\n            # No image data available\n            error_text = f\"[Graphic generation failed: {placeholder.description}]\"\n            elements.append(Paragraph(error_text, self.styles['GraphicCaption']))\n            return elements\n        \n        try:\n            # Decode image data\n            image_data = base64.b64decode(graphic.image_data)\n            image_buffer = io.BytesIO(image_data)\n            \n            # Create ReportLab Image\n            # Calculate appropriate size for e-ink display\n            max_width = 6 * inch  # Maximum width for graphics\n            max_height = 4 * inch  # Maximum height for graphics\n            \n            # Use graphic's dimensions if available\n            if graphic.width and graphic.height:\n                aspect_ratio = graphic.width / graphic.height\n                if aspect_ratio > max_width / max_height:\n                    # Width is limiting factor\n                    img_width = max_width\n                    img_height = max_width / aspect_ratio\n                else:\n                    # Height is limiting factor\n                    img_height = max_height\n                    img_width = max_height * aspect_ratio\n            else:\n                # Default size\n                img_width = max_width\n                img_height = max_height\n            \n            # Create image element\n            img_element = Image(image_buffer, width=img_width, height=img_height)\n            \n            # Create caption\n            caption_text = placeholder.description\n            if graphic.type.value:\n                caption_text = f\"{graphic.type.value.title()}: {caption_text}\"\n            \n            # Add spacing, image, and caption as a group\n            graphic_group = KeepTogether([\n                Spacer(1, 12),\n                img_element,\n                Paragraph(caption_text, self.styles['GraphicCaption']),\n                Spacer(1, 12)\n            ])\n            \n            elements.append(graphic_group)\n            \n        except Exception as e:\n            print(f\"Error embedding graphic {graphic.id}: {e}\")\n            error_text = f\"[Error embedding graphic: {placeholder.description}]\"\n            elements.append(Paragraph(error_text, self.styles['GraphicCaption']))\n        \n        return elements",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/hybrid_pdf_generator.py",
      "tags": [
        "pdf-generation",
        "document-creation",
        "reportlab",
        "hybrid-content",
        "graphics-embedding",
        "markdown-processing",
        "e-ink-optimization",
        "text-formatting",
        "image-embedding",
        "async"
      ],
      "updated_at": "2025-12-07T00:49:56.972628",
      "usage_example": "import asyncio\nfrom hybrid_pdf_generator import HybridPDFGenerator\n\n# Instantiate the generator\ngenerator = HybridPDFGenerator()\n\n# Prepare content with placeholders\ntext_content = \"# Report Title\\n\\nThis is body text.\\n\\n[GRAPHIC:chart1:bar:Sales Data]\\n\\nMore text here.\"\n\n# Define placeholders (mock objects)\nclass Placeholder:\n    def __init__(self, id, position_marker, description):\n        self.id = id\n        self.position_marker = position_marker\n        self.description = description\n\nplaceholders = [\n    Placeholder('chart1', '[GRAPHIC:chart1:bar:Sales Data]', 'Sales Data')\n]\n\n# Define graphics (mock objects)\nclass Graphic:\n    def __init__(self, id, image_data, type, width, height):\n        self.id = id\n        self.image_data = image_data\n        self.type = type\n        self.width = width\n        self.height = height\n\nclass GraphicType:\n    def __init__(self, value):\n        self.value = value\n\ngraphics = {\n    'chart1': Graphic('chart1', 'base64_encoded_image_data', GraphicType('chart'), 800, 600)\n}\n\n# Metadata\nmetadata = {\n    'source_file': 'data.csv',\n    'dimensions': '1920x1080'\n}\n\n# Generate PDF\nasync def generate():\n    output_path = await generator.create_hybrid_pdf(\n        text_content=text_content,\n        placeholders=placeholders,\n        graphics=graphics,\n        metadata=metadata,\n        output_path='output/hybrid_report.pdf',\n        conversation_id='conv_123',\n        exchange_number=1\n    )\n    print(f'PDF generated: {output_path}')\n\nasyncio.run(generate())"
    },
    {
      "best_practices": [
        "Always call the build() method with a list of flowables to generate the PDF after instantiation",
        "Provide conversation_id and exchange_number for proper session tracking in multi-turn conversations",
        "The template reserves 0.5 inches at the bottom for the footer, so content will automatically flow within the remaining space",
        "Use with other ReportLab flowables (Paragraph, Image, Spacer, etc.) to create rich content",
        "The footer is automatically added to every page via the onPage callback mechanism",
        "Session information in the footer is conditionally displayed based on what parameters were provided during instantiation",
        "The template uses letter size by default but can be customized via kwargs (e.g., pagesize=A4)",
        "Canvas state is properly managed (saveState/restoreState) in the footer callback to avoid affecting main content rendering"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Stores the conversation identifier for display in the footer",
            "is_class_variable": false,
            "name": "conversation_id",
            "type": "Optional[str]"
          },
          {
            "description": "Stores the exchange number within the conversation for display in the footer",
            "is_class_variable": false,
            "name": "exchange_number",
            "type": "Optional[int]"
          },
          {
            "description": "Left margin of the document (inherited from BaseDocTemplate)",
            "is_class_variable": false,
            "name": "leftMargin",
            "type": "float"
          },
          {
            "description": "Bottom margin of the document (inherited from BaseDocTemplate)",
            "is_class_variable": false,
            "name": "bottomMargin",
            "type": "float"
          },
          {
            "description": "Width of the content area (inherited from BaseDocTemplate)",
            "is_class_variable": false,
            "name": "width",
            "type": "float"
          },
          {
            "description": "Height of the content area (inherited from BaseDocTemplate)",
            "is_class_variable": false,
            "name": "height",
            "type": "float"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "**kwargs": "Additional arguments passed to BaseDocTemplate (pagesize, margins, etc.)",
              "conversation_id": "Optional conversation session identifier",
              "exchange_number": "Optional exchange number within the conversation",
              "filename": "Output PDF filename or file path"
            },
            "purpose": "Initializes the hybrid session document template with session tracking information and sets up the page layout with a main content frame and footer space",
            "returns": "None (constructor)",
            "signature": "__init__(self, filename, conversation_id=None, exchange_number=None, **kwargs)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "add_session_footer",
            "parameters": {
              "canvas": "ReportLab Canvas object for drawing on the page",
              "doc": "Document object containing page information and dimensions"
            },
            "purpose": "Callback method that adds session information footer to each page, including page number, hybrid indicator, and session details",
            "returns": "None (modifies canvas in place)",
            "signature": "add_session_footer(self, canvas, doc)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:49:11",
      "decorators": [],
      "dependencies": [
        "reportlab"
      ],
      "description": "A custom ReportLab document template class that extends BaseDocTemplate to create PDF documents with session information footers, specifically designed for hybrid text and graphics responses.",
      "docstring": "Custom document template for hybrid PDFs with session info",
      "id": 1954,
      "imports": [
        "import io",
        "import base64",
        "import re",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from dataclasses import dataclass",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.lib.pagesizes import A4",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib.enums import TA_LEFT",
        "from reportlab.lib.enums import TA_JUSTIFY",
        "from reportlab.lib.enums import TA_CENTER",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.platypus import Image",
        "from reportlab.platypus import PageBreak",
        "from reportlab.platypus import KeepTogether",
        "from reportlab.platypus.doctemplate import PageTemplate",
        "from reportlab.platypus.doctemplate import BaseDocTemplate",
        "from reportlab.platypus.frames import Frame",
        "from reportlab.lib import colors",
        "from PIL import Image as PILImage"
      ],
      "imports_required": [
        "from reportlab.platypus.doctemplate import BaseDocTemplate",
        "from reportlab.platypus.doctemplate import PageTemplate",
        "from reportlab.platypus.frames import Frame",
        "from reportlab.lib import colors",
        "from reportlab.lib.units import inch"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 84,
      "line_start": 29,
      "name": "HybridSessionDocTemplate",
      "parameters": [
        {
          "annotation": "BaseDocTemplate",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "**kwargs": "Additional keyword arguments passed to the parent BaseDocTemplate class, such as pagesize (default letter), leftMargin, rightMargin, topMargin, bottomMargin, title, author, etc.",
        "conversation_id": "Optional identifier for the conversation session. Used in the footer to track which conversation this document belongs to. Can be None if session tracking is not needed.",
        "exchange_number": "Optional integer representing the exchange number within a conversation. Used in the footer to indicate the specific turn or exchange. Can be None if exchange tracking is not needed.",
        "filename": "The output filename or file path where the PDF document will be saved. Can be a string path or file-like object."
      },
      "parent_class": null,
      "purpose": "This class provides a specialized PDF document template for generating hybrid response PDFs with automatic footer generation. It manages page layout with a main content frame and adds session metadata (conversation ID, exchange number) to each page footer. The footer includes page numbers, a hybrid response indicator, and session tracking information, making it ideal for conversational AI systems that need to track and document multi-turn interactions with mixed content types.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a HybridSessionDocTemplate object that can be used to build PDF documents. The object inherits all methods from BaseDocTemplate, primarily the build() method which takes a list of flowables (Paragraph, Image, Spacer, etc.) and generates the final PDF with session footers on each page.",
      "settings_required": [
        "No specific environment variables or configuration files required",
        "ReportLab library must be installed (pip install reportlab)"
      ],
      "source_code": "class HybridSessionDocTemplate(BaseDocTemplate):\n    \"\"\"Custom document template for hybrid PDFs with session info\"\"\"\n    \n    def __init__(self, filename, conversation_id=None, exchange_number=None, **kwargs):\n        super().__init__(filename, **kwargs)\n        self.conversation_id = conversation_id\n        self.exchange_number = exchange_number\n        \n        # Create frame for main content (leaving space for footer)\n        main_frame = Frame(\n            self.leftMargin, self.bottomMargin + 0.5*inch,\n            self.width, self.height - 0.5*inch,\n            id='main'\n        )\n        \n        # Create page template\n        main_template = PageTemplate(\n            id='main',\n            frames=[main_frame],\n            onPage=self.add_session_footer\n        )\n        \n        self.addPageTemplates([main_template])\n    \n    def add_session_footer(self, canvas, doc):\n        \"\"\"Add session information to page footer\"\"\"\n        canvas.saveState()\n        \n        # Set footer style\n        canvas.setFont('Helvetica', 8)\n        canvas.setFillColor(colors.grey)\n        \n        # Left side: page number\n        page_text = f\"Page {doc.page}\"\n        canvas.drawString(doc.leftMargin, doc.bottomMargin, page_text)\n        \n        # Center: hybrid indicator\n        hybrid_text = \"Hybrid Response (Text + Graphics)\"\n        text_width = canvas.stringWidth(hybrid_text, 'Helvetica', 8)\n        x_center = (doc.width + 2*doc.leftMargin) / 2 - text_width / 2\n        canvas.drawString(x_center, doc.bottomMargin, hybrid_text)\n        \n        # Right side: session info\n        if self.conversation_id and self.exchange_number:\n            session_text = f\"Session: {self.conversation_id} | Exchange #{self.exchange_number}\"\n        elif self.conversation_id:\n            session_text = f\"Session: {self.conversation_id}\"\n        else:\n            session_text = \"E-Ink LLM Assistant\"\n            \n        # Calculate position for right-aligned text\n        text_width = canvas.stringWidth(session_text, 'Helvetica', 8)\n        x_position = doc.width + doc.leftMargin - text_width\n        canvas.drawString(x_position, doc.bottomMargin, session_text)\n        \n        canvas.restoreState()",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/hybrid_pdf_generator.py",
      "tags": [
        "pdf-generation",
        "reportlab",
        "document-template",
        "session-tracking",
        "footer",
        "hybrid-content",
        "conversational-ai",
        "page-layout",
        "document-formatting"
      ],
      "updated_at": "2025-12-07T00:49:11.885751",
      "usage_example": "from reportlab.platypus.doctemplate import BaseDocTemplate\nfrom reportlab.platypus.doctemplate import PageTemplate\nfrom reportlab.platypus.frames import Frame\nfrom reportlab.lib import colors\nfrom reportlab.lib.units import inch\nfrom reportlab.platypus import Paragraph, Spacer\nfrom reportlab.lib.styles import getSampleStyleSheet\n\n# Instantiate the template\ndoc = HybridSessionDocTemplate(\n    'output.pdf',\n    conversation_id='conv_12345',\n    exchange_number=3\n)\n\n# Create content\nstyles = getSampleStyleSheet()\nstory = [\n    Paragraph('Hybrid Response Example', styles['Heading1']),\n    Spacer(1, 0.2*inch),\n    Paragraph('This is a text response with graphics.', styles['Normal'])\n]\n\n# Build the PDF\ndoc.build(story)\n\n# Example without session info\ndoc_simple = HybridSessionDocTemplate('simple.pdf')\ndoc_simple.build(story)"
    },
    {
      "best_practices": [
        "Always provide an API key either through constructor parameter or OPENAI_API_KEY environment variable",
        "Use async/await pattern when calling analyze_and_respond() and other async methods",
        "Monitor usage statistics with get_usage_summary() to track API costs",
        "Ensure base64 encoded images are valid PNG format for best results",
        "Set enable_hybrid_mode in metadata to control graphics placeholder generation",
        "The class uses a two-stage processing approach: content analysis with small model, then comprehensive response with main model",
        "Handle exceptions from API calls as network errors or rate limits may occur",
        "The class maintains state through usage_stats - create new instances for independent tracking",
        "Graphics placeholders in responses follow format: [GRAPHIC:type:description:parameters]",
        "Cost estimates are approximate based on 2024 pricing and may need adjustment",
        "The class is designed for e-ink displays, so responses are optimized for high contrast and readability"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "OpenAI API key used for authentication",
            "is_class_variable": false,
            "name": "api_key",
            "type": "str"
          },
          {
            "description": "OpenAI client instance for making API calls",
            "is_class_variable": false,
            "name": "client",
            "type": "OpenAI"
          },
          {
            "description": "Model name for preprocessing tasks (default: 'gpt-4o-mini')",
            "is_class_variable": false,
            "name": "small_model",
            "type": "str"
          },
          {
            "description": "Model name for main processing tasks (default: 'gpt-4o')",
            "is_class_variable": false,
            "name": "main_model",
            "type": "str"
          },
          {
            "description": "Dictionary tracking preprocessing_calls, main_processing_calls, total_tokens_used, and total_cost_estimate",
            "is_class_variable": false,
            "name": "usage_stats",
            "type": "Dict[str, Any]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "Optional OpenAI API key. Falls back to OPENAI_API_KEY environment variable if not provided"
            },
            "purpose": "Initialize the LLMHandler with OpenAI client, model configurations, and usage tracking",
            "returns": "None (constructor)",
            "signature": "__init__(self, api_key: Optional[str] = None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_and_respond",
            "parameters": {
              "image_b64": "Base64 encoded image string (PNG format recommended)",
              "metadata": "Dictionary containing image metadata including 'source_type' and 'enable_hybrid_mode' flags"
            },
            "purpose": "Main method to analyze handwritten/drawn content and provide comprehensive response with two-stage processing",
            "returns": "String containing comprehensive response with optional graphics placeholders in format [GRAPHIC:type:description:parameters]",
            "signature": "async analyze_and_respond(self, image_b64: str, metadata: Dict[str, Any]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_analyze_content_type",
            "parameters": {
              "image_b64": "Base64 encoded image string"
            },
            "purpose": "Analyze content type and structure using small model (gpt-4o-mini) for efficient preprocessing",
            "returns": "Dictionary with keys: content_type, language, complexity, elements, response_approach, confidence",
            "signature": "async _analyze_content_type(self, image_b64: str) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_comprehensive_response",
            "parameters": {
              "content_analysis": "Dictionary from _analyze_content_type containing content characteristics",
              "image_b64": "Base64 encoded image string",
              "metadata": "Image metadata dictionary"
            },
            "purpose": "Generate comprehensive response using main model (gpt-4o) with optimized prompts based on content analysis",
            "returns": "String containing the comprehensive response text with optional graphics placeholders",
            "signature": "async _generate_comprehensive_response(self, image_b64: str, content_analysis: Dict[str, Any], metadata: Dict[str, Any]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_dynamic_prompt",
            "parameters": {
              "content_analysis": "Dictionary containing content type, complexity, elements, and approach",
              "metadata": "Image metadata including source_type and hybrid mode settings"
            },
            "purpose": "Build optimized prompt dynamically based on content analysis results and metadata",
            "returns": "String containing the complete prompt tailored to the specific content type",
            "signature": "_build_dynamic_prompt(self, content_analysis: Dict[str, Any], metadata: Dict[str, Any]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_should_use_hybrid_mode",
            "parameters": {
              "content_analysis": "Dictionary with content type, elements, and complexity",
              "metadata": "Metadata dictionary with enable_hybrid_mode flag"
            },
            "purpose": "Determine if hybrid mode with graphics should be enabled based on content characteristics",
            "returns": "Boolean indicating whether to include graphics instructions in the prompt",
            "signature": "_should_use_hybrid_mode(self, content_analysis: Dict[str, Any], metadata: Dict[str, Any]) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_hybrid_graphics_instruction",
            "parameters": {
              "content_analysis": "Dictionary containing content analysis results"
            },
            "purpose": "Get detailed graphics instruction text for hybrid mode responses",
            "returns": "String containing comprehensive instructions for including graphics placeholders",
            "signature": "_get_hybrid_graphics_instruction(self, content_analysis: Dict[str, Any]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_update_usage_stats",
            "parameters": {
              "usage_data": "OpenAI API usage object containing token counts (total_tokens, prompt_tokens, completion_tokens)"
            },
            "purpose": "Update internal usage statistics for cost tracking based on API response usage data",
            "returns": "None (updates internal state)",
            "signature": "_update_usage_stats(self, usage_data)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_usage_summary",
            "parameters": {},
            "purpose": "Get current usage statistics including call counts, token usage, and cost estimates",
            "returns": "Dictionary copy with keys: preprocessing_calls, main_processing_calls, total_tokens_used, total_cost_estimate",
            "signature": "get_usage_summary(self) -> Dict[str, Any]"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:47:38",
      "decorators": [],
      "dependencies": [
        "openai",
        "asyncio",
        "json",
        "os",
        "time",
        "typing"
      ],
      "description": "Handles OpenAI LLM interactions with optimized prompts for analyzing handwritten and drawn content, providing comprehensive responses with optional hybrid graphics integration.",
      "docstring": "Handles OpenAI LLM interactions with optimized prompts for handwritten/drawn content",
      "id": 1949,
      "imports": [
        "import os",
        "import asyncio",
        "import json",
        "from openai import OpenAI",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "import time"
      ],
      "imports_required": [
        "import os",
        "import asyncio",
        "import json",
        "from openai import OpenAI",
        "from typing import Dict, Any, Optional",
        "import time"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 426,
      "line_start": 8,
      "name": "LLMHandler",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "Optional OpenAI API key string. If not provided, the class will attempt to read from the OPENAI_API_KEY environment variable. If neither is available, raises ValueError during initialization."
      },
      "parent_class": null,
      "purpose": "This class manages the complete lifecycle of analyzing handwritten/drawn images using OpenAI's GPT models. It employs a two-stage approach: first using a small model (gpt-4o-mini) to analyze content type and characteristics, then using a main model (gpt-4o) to generate comprehensive responses. The class supports dynamic prompt generation based on content analysis, hybrid mode with graphics placeholders, usage tracking, and cost estimation. It's designed for e-ink display applications and handles various content types including questions, instructions, diagrams, mathematical notation, and mixed content.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an LLMHandler object configured with OpenAI client, model settings, and usage tracking. The main method analyze_and_respond() returns a string containing the comprehensive response with optional graphics placeholders. The get_usage_summary() method returns a dictionary with usage statistics including call counts, token usage, and cost estimates.",
      "settings_required": [
        "OPENAI_API_KEY environment variable (or pass api_key parameter to constructor)",
        "OpenAI API access with gpt-4o and gpt-4o-mini model permissions",
        "Network connectivity for API calls"
      ],
      "source_code": "class LLMHandler:\n    \"\"\"Handles OpenAI LLM interactions with optimized prompts for handwritten/drawn content\"\"\"\n    \n    def __init__(self, api_key: Optional[str] = None):\n        # Use API key from environment or parameter (matching OneCo_hybrid_RAG pattern)\n        self.api_key = api_key or os.getenv(\"OPENAI_API_KEY\")\n        if not self.api_key:\n            raise ValueError(\"OpenAI API key not provided. Set OPENAI_API_KEY environment variable or pass api_key parameter.\")\n        \n        self.client = OpenAI(api_key=self.api_key)\n        \n        # Model configurations (matching OneCo_hybrid_RAG style)\n        self.small_model = \"gpt-4o-mini\"  # For preprocessing tasks\n        self.main_model = \"gpt-4o\"        # For main processing\n        \n        # Usage tracking\n        self.usage_stats = {\n            \"preprocessing_calls\": 0,\n            \"main_processing_calls\": 0,\n            \"total_tokens_used\": 0,\n            \"total_cost_estimate\": 0.0\n        }\n    \n    async def analyze_and_respond(self, image_b64: str, metadata: Dict[str, Any]) -> str:\n        \"\"\"\n        Main method to analyze handwritten/drawn content and provide comprehensive response\n        \n        Args:\n            image_b64: Base64 encoded image\n            metadata: Image metadata from input processor\n            \n        Returns:\n            Comprehensive response string\n        \"\"\"\n        print(f\"\ud83e\udde0 Starting LLM analysis for {metadata.get('source_type', 'unknown')} content...\")\n        \n        # Step 1: Quick content analysis with small model\n        content_analysis = await self._analyze_content_type(image_b64)\n        print(f\"\ud83d\udccb Content analysis: {content_analysis['content_type']}\")\n        \n        # Step 2: Generate comprehensive response with main model\n        response = await self._generate_comprehensive_response(image_b64, content_analysis, metadata)\n        \n        print(f\"\u2705 LLM analysis complete\")\n        return response\n    \n    async def _analyze_content_type(self, image_b64: str) -> Dict[str, Any]:\n        \"\"\"Analyze content type and structure using small model\"\"\"\n        print(f\"\ud83d\udd0d Analyzing content type with {self.small_model}...\")\n        \n        prompt = \"\"\"\n        You are an expert content analyzer. Analyze this handwritten/drawn image and classify its content type and characteristics.\n\n        Determine:\n        1. Content type (question, instruction, diagram, notes, sketch, etc.)\n        2. Primary language (if text is present)\n        3. Complexity level (simple, moderate, complex)\n        4. Key elements present (text, drawings, diagrams, mathematical notation, etc.)\n        5. Suggested response approach\n\n        Output your analysis in strict JSON format:\n        ```json\n        {\n            \"content_type\": \"question|instruction|diagram|notes|sketch|mixed\",\n            \"language\": \"english|spanish|french|other\",\n            \"complexity\": \"simple|moderate|complex\",\n            \"elements\": [\"text\", \"drawings\", \"diagrams\", \"math\", \"tables\"],\n            \"response_approach\": \"direct_answer|explanation|step_by_step|analysis|interpretation\",\n            \"confidence\": 0.8\n        }\n        ```\n        \n        Focus on accuracy and be concise.\n        \"\"\"\n        \n        try:\n            response = await asyncio.to_thread(\n                self.client.chat.completions.create,\n                model=self.small_model,\n                messages=[\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": f\"data:image/png;base64,{image_b64}\",\n                                    \"detail\": \"low\"  # Use low detail for analysis to save tokens\n                                }\n                            }\n                        ]\n                    }\n                ],\n                max_tokens=500,\n                temperature=0.1\n            )\n            \n            self.usage_stats[\"preprocessing_calls\"] += 1\n            self._update_usage_stats(response.usage)\n            \n            # Parse JSON response\n            content = response.choices[0].message.content\n            if '```json' in content:\n                content = content.split('```json')[1].split('```')[0].strip()\n            elif '```' in content:\n                content = content.split('```')[1].split('```')[0].strip()\n            \n            return json.loads(content)\n            \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Error in content analysis: {e}\")\n            # Return default analysis if parsing fails\n            return {\n                \"content_type\": \"mixed\",\n                \"language\": \"english\",\n                \"complexity\": \"moderate\",\n                \"elements\": [\"text\", \"drawings\"],\n                \"response_approach\": \"analysis\",\n                \"confidence\": 0.5\n            }\n    \n    async def _generate_comprehensive_response(self, image_b64: str, content_analysis: Dict[str, Any], metadata: Dict[str, Any]) -> str:\n        \"\"\"Generate comprehensive response using main model with optimized prompts\"\"\"\n        print(f\"\ud83c\udfaf Generating comprehensive response with {self.main_model}...\")\n        \n        # Build dynamic prompt based on content analysis\n        prompt = self._build_dynamic_prompt(content_analysis, metadata)\n        \n        try:\n            response = await asyncio.to_thread(\n                self.client.chat.completions.create,\n                model=self.main_model,\n                messages=[\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": prompt},\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": f\"data:image/png;base64,{image_b64}\",\n                                    \"detail\": \"high\"  # High detail for comprehensive analysis\n                                }\n                            }\n                        ]\n                    }\n                ],\n                max_tokens=2500,\n                temperature=0.3\n            )\n            \n            self.usage_stats[\"main_processing_calls\"] += 1\n            self._update_usage_stats(response.usage)\n            \n            return response.choices[0].message.content\n            \n        except Exception as e:\n            return f\"Error processing request: {str(e)}\\n\\nPlease ensure the image is clear and try again.\"\n    \n    def _build_dynamic_prompt(self, content_analysis: Dict[str, Any], metadata: Dict[str, Any]) -> str:\n        \"\"\"Build optimized prompt based on content analysis\"\"\"\n        \n        content_type = content_analysis.get(\"content_type\", \"mixed\")\n        complexity = content_analysis.get(\"complexity\", \"moderate\")\n        approach = content_analysis.get(\"response_approach\", \"analysis\")\n        elements = content_analysis.get(\"elements\", [])\n        \n        # Check if hybrid mode is available and appropriate\n        use_hybrid_mode = self._should_use_hybrid_mode(content_analysis, metadata)\n        \n        # Base prompt structure\n        base_prompt = f\"\"\"\nYou are an expert AI assistant specialized in analyzing and responding to handwritten and drawn content. \n\n**Content Analysis Results:**\n- Content Type: {content_type}\n- Complexity: {complexity}\n- Approach: {approach}\n- Elements Present: {', '.join(elements)}\n- Source: {metadata.get('source_type', 'unknown')}\n\n**Your Task:**\nCarefully analyze this {content_type} content and provide a comprehensive, well-structured response.\n\"\"\"\n        \n        # Add hybrid graphics instruction if appropriate\n        if use_hybrid_mode:\n            base_prompt += self._get_hybrid_graphics_instruction(content_analysis)\n        \n        # Add specific instructions based on content type\n        if content_type == \"question\":\n            specific_instructions = \"\"\"\n**Response Structure for Questions:**\n1. **Question Understanding**: Restate what you understand from the handwritten question\n2. **Direct Answer**: Provide a clear, direct answer\n3. **Detailed Explanation**: Elaborate with context, examples, and supporting information\n4. **Additional Context**: Include related information that might be helpful\n5. **Summary**: Conclude with key takeaways\n\n**Requirements:**\n- Address all parts of the question thoroughly\n- Use clear, accessible language\n- Provide examples where helpful\n- Include relevant context and background information\n\"\"\"\n            \n            if use_hybrid_mode:\n                specific_instructions += \"\"\"\n**Graphics Integration for Questions:**\n- Include charts for data-related questions\n- Add diagrams for process or concept questions  \n- Use illustrations for educational explanations\n- Provide visual examples where they enhance understanding\n\"\"\"\n                \n        elif content_type == \"instruction\":\n            specific_instructions = \"\"\"\n**Response Structure for Instructions:**\n1. **Instruction Analysis**: Clarify what the instruction is asking for\n2. **Step-by-Step Response**: Break down your response into clear steps\n3. **Detailed Guidance**: Provide comprehensive guidance for each step\n4. **Best Practices**: Include tips and best practices\n5. **Troubleshooting**: Address potential issues or alternatives\n\n**Requirements:**\n- Follow the instruction precisely\n- Provide actionable, specific guidance\n- Include warnings or precautions if relevant\n- Offer alternatives or variations where appropriate\n\"\"\"\n            \n            if use_hybrid_mode:\n                specific_instructions += \"\"\"\n**Graphics Integration for Instructions:**\n- Create flowcharts for step-by-step processes\n- Include diagrams for complex procedures\n- Add illustrations for technical concepts\n- Use charts for comparative information\n\"\"\"\n        \n        elif content_type == \"diagram\" or \"diagrams\" in elements:\n            specific_instructions = \"\"\"\n**Response Structure for Diagrams:**\n1. **Diagram Description**: Describe what you see in the diagram\n2. **Component Analysis**: Break down the key components and their relationships\n3. **Interpretation**: Explain what the diagram represents or demonstrates\n4. **Context and Applications**: Provide relevant context and real-world applications\n5. **Additional Information**: Include related concepts or principles\n\n**Requirements:**\n- Describe visual elements clearly\n- Explain relationships between components\n- Provide technical accuracy\n- Include practical applications or examples\n\"\"\"\n            \n            if use_hybrid_mode:\n                specific_instructions += \"\"\"\n**Graphics Integration for Diagrams:**\n- Create enhanced versions of hand-drawn diagrams\n- Add professional diagram representations\n- Include process flow improvements\n- Provide alternative visual perspectives\n\"\"\"\n        \n        elif \"math\" in elements:\n            specific_instructions = \"\"\"\n**Response Structure for Mathematical Content:**\n1. **Problem/Expression Recognition**: Identify the mathematical content\n2. **Solution Process**: Show step-by-step solution if it's a problem\n3. **Explanation**: Explain the mathematical concepts involved\n4. **Verification**: Check the solution or explain the concept thoroughly\n5. **Related Concepts**: Include related mathematical principles\n\n**Requirements:**\n- Show all mathematical steps clearly\n- Explain the reasoning behind each step\n- Use proper mathematical notation in text form\n- Provide conceptual understanding, not just calculations\n\"\"\"\n            \n            if use_hybrid_mode:\n                specific_instructions += \"\"\"\n**Graphics Integration for Mathematics:**\n- Use illustration graphics for mathematical concepts\n- Include charts for data analysis or statistics\n- Create diagrams for geometric problems\n- Show visual solutions where helpful\n\"\"\"\n        \n        else:  # mixed or other content types\n            specific_instructions = \"\"\"\n**Response Structure for Mixed Content:**\n1. **Content Overview**: Summarize what you observe in the handwritten/drawn content\n2. **Element-by-Element Analysis**: Address each distinct element (text, drawings, etc.)\n3. **Synthesis**: Connect the different elements and explain their relationship\n4. **Comprehensive Response**: Provide thorough information addressing all aspects\n5. **Conclusion**: Summarize key points and implications\n\n**Requirements:**\n- Address all visible elements in the content\n- Maintain logical flow between different content types\n- Provide depth and detail appropriate to the content\n- Ensure clarity and accessibility\n\"\"\"\n            \n            if use_hybrid_mode:\n                specific_instructions += \"\"\"\n**Graphics Integration for Mixed Content:**\n- Add graphics that complement and enhance written explanations\n- Use appropriate chart types for any data mentioned\n- Include diagrams for processes or workflows\n- Provide illustrations for complex concepts\n\"\"\"\n        \n        # Add formatting requirements\n        formatting_requirements = \"\"\"\n\n**Formatting Requirements:**\n- Use clear Markdown formatting with appropriate headers\n- Structure your response with logical sections\n- Use bullet points or numbered lists for clarity\n- Include **bold** text for emphasis on key points\n- Ensure the response is well-organized and easy to read on an e-ink display\n- Keep paragraphs concise but informative\n\n**Quality Standards:**\n- Provide accurate, helpful information\n- Be thorough but not unnecessarily verbose\n- Use examples and analogies where helpful\n- Maintain a helpful, educational tone\n- Double-check any factual claims or calculations\n\"\"\"\n        \n        return base_prompt + specific_instructions + formatting_requirements\n    \n    def _should_use_hybrid_mode(self, content_analysis: Dict[str, Any], metadata: Dict[str, Any]) -> bool:\n        \"\"\"Determine if hybrid mode with graphics should be used\"\"\"\n        # Check if hybrid mode is enabled in metadata\n        if not metadata.get('enable_hybrid_mode', True):\n            return False\n        \n        # Use hybrid mode for content that would benefit from graphics\n        content_type = content_analysis.get(\"content_type\", \"mixed\")\n        elements = content_analysis.get(\"elements\", [])\n        complexity = content_analysis.get(\"complexity\", \"moderate\")\n        \n        # Use graphics for these content types\n        graphic_friendly_types = [\"question\", \"instruction\", \"diagram\", \"mixed\"]\n        has_graphic_elements = any(elem in elements for elem in [\"math\", \"diagrams\", \"drawings\"])\n        is_complex = complexity in [\"moderate\", \"complex\"]\n        \n        return (content_type in graphic_friendly_types or \n                has_graphic_elements or \n                is_complex)\n    \n    def _get_hybrid_graphics_instruction(self, content_analysis: Dict[str, Any]) -> str:\n        \"\"\"Get graphics instruction based on content analysis\"\"\"\n        \n        graphics_instruction = \"\"\"\n\n**HYBRID RESPONSE CAPABILITY:**\nYou can now include graphics in your responses! Use the following placeholder format to request graphics:\n\n[GRAPHIC:type:description:parameters]\n\n**Available Graphic Types:**\n- chart: Data visualizations (bar, line, pie, scatter charts)\n- diagram: Process flows, organizational charts, concept maps\n- illustration: Educational diagrams, mathematical concepts, technical drawings\n- sketch: Simple drawings, annotations, visual explanations\n\n**Placeholder Format Examples:**\n- [GRAPHIC:chart:Sales Comparison:{\"type\":\"bar\",\"data\":[25,40,30,45],\"labels\":[\"Q1\",\"Q2\",\"Q3\",\"Q4\"],\"title\":\"Quarterly Sales\"}]\n- [GRAPHIC:diagram:Process Flow:{\"steps\":[\"Input\",\"Process\",\"Output\"],\"style\":\"flowchart\",\"direction\":\"horizontal\"}]\n- [GRAPHIC:illustration:Mathematical Concept:{\"concept\":\"quadratic_function\",\"style\":\"educational\",\"annotations\":true}]\n\n**When to Include Graphics:**\n- Data that would benefit from visualization\n- Complex processes that need step-by-step diagrams\n- Mathematical or scientific concepts\n- Comparisons that work better visually\n- Any content where a graphic would enhance understanding\n\n**Graphics Integration Guidelines:**\n1. Place graphic placeholders exactly where you want them in your text\n2. Ensure graphics complement and enhance your written explanation\n3. Provide clear, descriptive parameters for graphic generation\n4. Use graphics strategically - not every response needs them\n5. Consider the e-ink display limitations (high contrast, simple designs work best)\n\n**Response Structure with Graphics:**\n- Start with your text explanation\n- Insert graphic placeholders at relevant points\n- Continue your explanation referencing the graphics\n- Ensure the response flows naturally even without the graphics\n\n**Important:** Only include graphics when they genuinely enhance your response. A good text-only response is better than a response with unnecessary graphics. Focus on clarity and helpfulness above all else.\n\"\"\"\n        \n        return graphics_instruction\n    \n    def _update_usage_stats(self, usage_data):\n        \"\"\"Update usage statistics for cost tracking\"\"\"\n        if hasattr(usage_data, 'total_tokens'):\n            self.usage_stats[\"total_tokens_used\"] += usage_data.total_tokens\n            \n            # Rough cost estimation (as of 2024 pricing)\n            if self.main_model == \"gpt-4o\":\n                # Approximate costs: input ~$0.005/1K tokens, output ~$0.015/1K tokens\n                prompt_tokens = getattr(usage_data, 'prompt_tokens', 0)\n                completion_tokens = getattr(usage_data, 'completion_tokens', 0)\n                cost = (prompt_tokens * 0.005 + completion_tokens * 0.015) / 1000\n                self.usage_stats[\"total_cost_estimate\"] += cost\n    \n    def get_usage_summary(self) -> Dict[str, Any]:\n        \"\"\"Get current usage statistics\"\"\"\n        return self.usage_stats.copy()",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/llm_handler.py",
      "tags": [
        "openai",
        "llm",
        "gpt-4",
        "image-analysis",
        "handwriting-recognition",
        "async",
        "vision",
        "prompt-engineering",
        "content-analysis",
        "hybrid-graphics",
        "usage-tracking",
        "cost-estimation",
        "e-ink",
        "educational"
      ],
      "updated_at": "2025-12-07T00:47:38.051880",
      "usage_example": "import os\nimport asyncio\nfrom llm_handler import LLMHandler\n\n# Initialize handler\nhandler = LLMHandler(api_key=os.getenv('OPENAI_API_KEY'))\n\n# Prepare image and metadata\nimage_b64 = \"base64_encoded_image_string\"\nmetadata = {\n    'source_type': 'handwritten',\n    'enable_hybrid_mode': True\n}\n\n# Analyze and get response (async)\nasync def process_image():\n    response = await handler.analyze_and_respond(image_b64, metadata)\n    print(response)\n    \n    # Check usage statistics\n    stats = handler.get_usage_summary()\n    print(f\"Tokens used: {stats['total_tokens_used']}\")\n    print(f\"Estimated cost: ${stats['total_cost_estimate']:.4f}\")\n\n# Run async function\nasyncio.run(process_image())"
    },
    {
      "best_practices": [
        "Always use async/await when calling analyze_multi_page_document as it performs asynchronous LLM operations",
        "Ensure PageAnalysis objects have valid image_b64 and text_content before passing to the handler",
        "The class automatically selects the optimal analysis strategy: single page (1 page), contextual (2-5 pages), progressive summary (6-20 pages), or chunked (20+ pages)",
        "For large documents, the handler intelligently selects key pages for detailed analysis to manage token usage",
        "The conversation_context parameter allows maintaining context across multiple document analysis sessions",
        "Monitor processing_stats in the result to understand which analysis method was used and track performance",
        "The handler updates PageAnalysis objects in-place with analysis_result attribute",
        "For very large documents (100+ pages), expect longer processing times as the handler processes in chunks",
        "Ensure sufficient API rate limits and quotas for the LLM service when processing large documents",
        "The combined_response provides a formatted, human-readable summary suitable for direct presentation"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Instance of LLMHandler used for performing actual LLM API calls and analysis",
            "is_class_variable": false,
            "name": "llm_handler",
            "type": "LLMHandler"
          },
          {
            "description": "Logger instance for tracking processing progress and debugging",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "API key for the LLM service, passed to the underlying LLMHandler"
            },
            "purpose": "Initialize the multi-page LLM handler with API credentials and set up logging",
            "returns": "None (constructor)",
            "signature": "__init__(self, api_key: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "analyze_multi_page_document",
            "parameters": {
              "conversation_context": "Optional previous conversation context to maintain continuity across sessions",
              "metadata": "Dictionary with document metadata (source_file, total_pages, etc.)",
              "pages": "List of PageAnalysis objects containing page images and text content"
            },
            "purpose": "Main entry point for analyzing complete multi-page documents with automatic strategy selection based on document size",
            "returns": "MultiPageAnalysisResult containing page_analyses (list of strings), document_summary (DocumentSummary object), combined_response (formatted string), and processing_stats (dictionary with metrics)",
            "signature": "async analyze_multi_page_document(self, pages: List[PageAnalysis], metadata: Dict[str, Any], conversation_context: str = '') -> MultiPageAnalysisResult"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_analyze_single_page",
            "parameters": {
              "conversation_context": "Previous conversation context",
              "metadata": "Document metadata dictionary",
              "page": "PageAnalysis object for the single page"
            },
            "purpose": "Analyze a single page document using standard LLM processing",
            "returns": "String containing the analysis result for the page",
            "signature": "async _analyze_single_page(self, page: PageAnalysis, metadata: Dict[str, Any], conversation_context: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_analyze_pages_with_context",
            "parameters": {
              "conversation_context": "Previous conversation context",
              "metadata": "Document metadata dictionary",
              "pages": "List of PageAnalysis objects (2-5 pages)"
            },
            "purpose": "Analyze 2-5 page documents with full context awareness, building cumulative context from previous pages",
            "returns": "List of analysis strings, one per page, with each analysis considering previous pages",
            "signature": "async _analyze_pages_with_context(self, pages: List[PageAnalysis], metadata: Dict[str, Any], conversation_context: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_analyze_progressive_summary",
            "parameters": {
              "conversation_context": "Previous conversation context",
              "metadata": "Document metadata dictionary",
              "pages": "List of PageAnalysis objects (6-20 pages)"
            },
            "purpose": "Analyze 6-20 page documents using progressive summarization in chunks of 4 pages",
            "returns": "List of analysis strings with progressive summaries maintaining context across chunks",
            "signature": "async _analyze_progressive_summary(self, pages: List[PageAnalysis], metadata: Dict[str, Any], conversation_context: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_analyze_chunked_document",
            "parameters": {
              "conversation_context": "Previous conversation context",
              "metadata": "Document metadata dictionary",
              "pages": "List of PageAnalysis objects (20+ pages)"
            },
            "purpose": "Analyze large documents (20+ pages) by selecting key pages for detailed analysis and summarizing others",
            "returns": "List of analysis strings with detailed analyses for key pages and summaries for others",
            "signature": "async _analyze_chunked_document(self, pages: List[PageAnalysis], metadata: Dict[str, Any], conversation_context: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_select_key_pages",
            "parameters": {
              "pages": "List of PageAnalysis objects to select from"
            },
            "purpose": "Select key pages for detailed analysis in large documents based on content density and position",
            "returns": "Sorted list of page indices (0-based) representing key pages to analyze in detail",
            "signature": "_select_key_pages(self, pages: List[PageAnalysis]) -> List[int]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_document_summary",
            "parameters": {
              "metadata": "Document metadata dictionary",
              "page_analyses": "List of analysis strings for each page",
              "pages": "List of PageAnalysis objects with updated analysis_result attributes"
            },
            "purpose": "Generate comprehensive document summary using the MultiPagePDFProcessor",
            "returns": "DocumentSummary object containing overall_summary, document_type, key_findings, main_topics, and confidence_score",
            "signature": "async _generate_document_summary(self, pages: List[PageAnalysis], page_analyses: List[str], metadata: Dict[str, Any]) -> DocumentSummary"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_combined_response",
            "parameters": {
              "document_summary": "DocumentSummary object with overall insights",
              "metadata": "Document metadata dictionary",
              "page_analyses": "List of analysis strings for each page"
            },
            "purpose": "Create a formatted, human-readable combined response from all analyses and summary",
            "returns": "Formatted markdown string containing document summary, key findings, main topics, and page-by-page analysis (condensed for large documents)",
            "signature": "_create_combined_response(self, page_analyses: List[str], document_summary: DocumentSummary, metadata: Dict[str, Any]) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "imported lazily inside _generate_document_summary method, but should be available at module level",
          "import": "from multi_page_processor import MultiPagePDFProcessor",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 23:46:59",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "logging",
        "pathlib",
        "typing",
        "dataclasses",
        "llm_handler",
        "multi_page_processor"
      ],
      "description": "Handles LLM processing for multi-page documents with context awareness, automatically selecting optimal analysis strategies based on document size.",
      "docstring": "Handles LLM processing for multi-page documents with context awareness",
      "id": 1948,
      "imports": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from llm_handler import LLMHandler",
        "from multi_page_processor import PageAnalysis",
        "from multi_page_processor import DocumentSummary",
        "from multi_page_processor import MultiPagePDFProcessor"
      ],
      "imports_required": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List, Dict, Any, Optional, Tuple",
        "from dataclasses import dataclass",
        "from llm_handler import LLMHandler",
        "from multi_page_processor import PageAnalysis, DocumentSummary, MultiPagePDFProcessor"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 356,
      "line_start": 25,
      "name": "MultiPageLLMHandler",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "API key for the LLM service (passed to the underlying LLMHandler). This is required for authentication with the LLM provider (e.g., OpenAI, Anthropic)."
      },
      "parent_class": null,
      "purpose": "This class orchestrates the analysis of multi-page documents by intelligently choosing between different processing strategies (single page, contextual pages, progressive summary, or chunked analysis) based on document length. It maintains context across pages, generates comprehensive summaries, and produces unified responses that synthesize information from all pages. The class is designed to handle documents ranging from single pages to large documents with hundreds of pages, optimizing token usage and processing efficiency.",
      "return_annotation": null,
      "return_explained": "The constructor returns an instance of MultiPageLLMHandler. The main method analyze_multi_page_document returns a MultiPageAnalysisResult object containing: page_analyses (list of analysis strings for each page), document_summary (DocumentSummary object with overall insights), combined_response (formatted string with complete analysis), and processing_stats (dictionary with metrics like total_pages, pages_processed, total_tokens, processing_time, and analysis_methods used).",
      "settings_required": [
        "API key for LLM service (OpenAI, Anthropic, etc.)",
        "LLMHandler must be properly configured and available",
        "MultiPagePDFProcessor and related classes (PageAnalysis, DocumentSummary, MultiPageAnalysisResult) must be available",
        "Logging configuration recommended for tracking processing progress"
      ],
      "source_code": "class MultiPageLLMHandler:\n    \"\"\"Handles LLM processing for multi-page documents with context awareness\"\"\"\n    \n    def __init__(self, api_key: str):\n        \"\"\"Initialize multi-page LLM handler\"\"\"\n        self.llm_handler = LLMHandler(api_key)\n        self.logger = logging.getLogger(__name__)\n        \n    async def analyze_multi_page_document(self, \n                                        pages: List[PageAnalysis],\n                                        metadata: Dict[str, Any],\n                                        conversation_context: str = \"\") -> MultiPageAnalysisResult:\n        \"\"\"\n        Analyze complete multi-page document with context awareness\n        \n        Args:\n            pages: List of page analyses\n            metadata: Document metadata\n            conversation_context: Previous conversation context\n            \n        Returns:\n            MultiPageAnalysisResult with comprehensive analysis\n        \"\"\"\n        total_pages = len(pages)\n        self.logger.info(f\"Starting multi-page analysis of {total_pages} pages\")\n        \n        # Statistics tracking\n        stats = {\n            'total_pages': total_pages,\n            'pages_processed': 0,\n            'total_tokens': 0,\n            'processing_time': 0,\n            'analysis_methods': []\n        }\n        \n        # Choose analysis strategy based on document size\n        if total_pages == 1:\n            # Single page - use standard processing\n            result = await self._analyze_single_page(pages[0], metadata, conversation_context)\n            page_analyses = [result]\n            stats['analysis_methods'].append('single_page')\n            \n        elif total_pages <= 5:\n            # Small document - analyze each page with context\n            page_analyses = await self._analyze_pages_with_context(pages, metadata, conversation_context)\n            stats['analysis_methods'].append('contextual_pages')\n            \n        elif total_pages <= 20:\n            # Medium document - progressive analysis with summaries\n            page_analyses = await self._analyze_progressive_summary(pages, metadata, conversation_context)\n            stats['analysis_methods'].append('progressive_summary')\n            \n        else:\n            # Large document - chunk-based analysis\n            page_analyses = await self._analyze_chunked_document(pages, metadata, conversation_context)\n            stats['analysis_methods'].append('chunked_analysis')\n        \n        stats['pages_processed'] = len(page_analyses)\n        \n        # Generate document summary\n        document_summary = await self._generate_document_summary(pages, page_analyses, metadata)\n        \n        # Create combined response\n        combined_response = self._create_combined_response(page_analyses, document_summary, metadata)\n        \n        return MultiPageAnalysisResult(\n            page_analyses=page_analyses,\n            document_summary=document_summary,\n            combined_response=combined_response,\n            processing_stats=stats\n        )\n    \n    async def _analyze_single_page(self, page: PageAnalysis, metadata: Dict[str, Any], \n                                 conversation_context: str) -> str:\n        \"\"\"Analyze single page using standard processing\"\"\"\n        # Use existing LLM handler for single page\n        enhanced_metadata = {**metadata, 'conversation_context': conversation_context}\n        return await self.llm_handler.analyze_and_respond(page.image_b64, enhanced_metadata)\n    \n    async def _analyze_pages_with_context(self, pages: List[PageAnalysis], \n                                        metadata: Dict[str, Any],\n                                        conversation_context: str) -> List[str]:\n        \"\"\"Analyze each page with full document context\"\"\"\n        page_analyses = []\n        cumulative_context = conversation_context\n        \n        for i, page in enumerate(pages):\n            self.logger.info(f\"Analyzing page {i+1}/{len(pages)} with context\")\n            \n            # Build context from previous pages\n            if i > 0:\n                prev_summary = f\"\\nPrevious pages summary:\\n\"\n                for j in range(i):\n                    prev_summary += f\"Page {j+1}: {page_analyses[j][:200]}...\\n\"\n                cumulative_context += prev_summary\n            \n            # Create context-aware prompt\n            prompt = f\"\"\"Analyzing page {i+1} of {len(pages)} from a multi-page document.\n\nDocument Context:\n- Total pages: {len(pages)}\n- Current page: {i+1}\n- Processing mode: Contextual analysis\n\n{cumulative_context}\n\nPage {i+1} Text Content:\n{page.text_content[:1000]}{'...' if len(page.text_content) > 1000 else ''}\n\nPlease analyze this page considering:\n1. The content on this specific page\n2. How it relates to previous pages in the document\n3. The overall document flow and structure\n4. Key information that builds upon previous content\n5. Any questions or insights for this page\n\nProvide a comprehensive analysis that considers the document context.\"\"\"\n\n            # Analyze with enhanced metadata\n            enhanced_metadata = {\n                **metadata,\n                'page_number': page.page_number,\n                'total_pages': len(pages),\n                'custom_prompt': prompt,\n                'analysis_mode': 'contextual'\n            }\n            \n            analysis = await self.llm_handler.analyze_and_respond(page.image_b64, enhanced_metadata)\n            page_analyses.append(analysis)\n            \n            # Update page analysis result\n            page.analysis_result = analysis\n        \n        return page_analyses\n    \n    async def _analyze_progressive_summary(self, pages: List[PageAnalysis],\n                                         metadata: Dict[str, Any],\n                                         conversation_context: str) -> List[str]:\n        \"\"\"Analyze with progressive summarization for medium documents\"\"\"\n        page_analyses = []\n        running_summary = conversation_context\n        \n        # Process in chunks of 3-5 pages with summaries\n        chunk_size = 4\n        \n        for chunk_start in range(0, len(pages), chunk_size):\n            chunk_end = min(chunk_start + chunk_size, len(pages))\n            chunk_pages = pages[chunk_start:chunk_end]\n            \n            self.logger.info(f\"Processing chunk {chunk_start+1}-{chunk_end} of {len(pages)}\")\n            \n            # Analyze chunk pages\n            chunk_analyses = []\n            for i, page in enumerate(chunk_pages):\n                global_page_num = chunk_start + i + 1\n                \n                prompt = f\"\"\"Analyzing page {global_page_num} of {len(pages)} (chunk page {i+1}/{len(chunk_pages)}).\n\nDocument Progress Summary:\n{running_summary}\n\nPage {global_page_num} Content:\n{page.text_content[:800]}{'...' if len(page.text_content) > 800 else ''}\n\nAnalyze this page focusing on:\n1. Key content and insights\n2. How it builds on previous pages\n3. Important details for document understanding\n4. Progression of ideas or information\"\"\"\n\n                enhanced_metadata = {\n                    **metadata,\n                    'page_number': global_page_num,\n                    'total_pages': len(pages),\n                    'custom_prompt': prompt,\n                    'analysis_mode': 'progressive'\n                }\n                \n                analysis = await self.llm_handler.analyze_and_respond(page.image_b64, enhanced_metadata)\n                chunk_analyses.append(analysis)\n                page_analyses.append(analysis)\n                page.analysis_result = analysis\n            \n            # Create chunk summary for next iteration\n            if chunk_end < len(pages):  # Not the last chunk\n                chunk_summary = f\"\\nPages {chunk_start+1}-{chunk_end} Summary:\\n\"\n                for i, analysis in enumerate(chunk_analyses):\n                    chunk_summary += f\"Page {chunk_start + i + 1}: {analysis[:150]}...\\n\"\n                running_summary += chunk_summary\n        \n        return page_analyses\n    \n    async def _analyze_chunked_document(self, pages: List[PageAnalysis],\n                                      metadata: Dict[str, Any],\n                                      conversation_context: str) -> List[str]:\n        \"\"\"Analyze large documents using chunked approach\"\"\"\n        page_analyses = []\n        \n        # For large documents, analyze representative pages and create summaries\n        self.logger.info(f\"Using chunked analysis for {len(pages)} pages\")\n        \n        # Select key pages for detailed analysis\n        key_pages_indices = self._select_key_pages(pages)\n        \n        # Analyze key pages in detail\n        for page_idx in key_pages_indices:\n            page = pages[page_idx]\n            \n            prompt = f\"\"\"Analyzing key page {page_idx + 1} of {len(pages)} from a large document.\n\nThis is a representative page selected for detailed analysis.\n\nPage Content:\n{page.text_content[:1000]}{'...' if len(page.text_content) > 1000 else ''}\n\nProvide a comprehensive analysis focusing on:\n1. Main themes and topics on this page\n2. Key information and insights\n3. Document structure and organization\n4. Important details that represent this section\"\"\"\n\n            enhanced_metadata = {\n                **metadata,\n                'page_number': page.page_number,\n                'total_pages': len(pages),\n                'custom_prompt': prompt,\n                'analysis_mode': 'key_page'\n            }\n            \n            analysis = await self.llm_handler.analyze_and_respond(page.image_b64, enhanced_metadata)\n            page.analysis_result = analysis\n        \n        # Create analyses for all pages (detailed for key pages, summary for others)\n        for i, page in enumerate(pages):\n            if i in key_pages_indices:\n                page_analyses.append(page.analysis_result)\n            else:\n                # Create summary analysis for non-key pages\n                summary = f\"Page {i+1}: Contains {len(page.text_content)} characters of content.\"\n                if page.text_content.strip():\n                    # Extract first few sentences as summary\n                    sentences = page.text_content.split('.')[:3]\n                    summary += f\" Key content: {'. '.join(sentences)[:200]}...\"\n                page_analyses.append(summary)\n        \n        return page_analyses\n    \n    def _select_key_pages(self, pages: List[PageAnalysis]) -> List[int]:\n        \"\"\"Select key pages for detailed analysis in large documents\"\"\"\n        total_pages = len(pages)\n        \n        # Always include first and last pages\n        key_indices = [0]\n        if total_pages > 1:\n            key_indices.append(total_pages - 1)\n        \n        # Add middle pages based on content density\n        content_scores = []\n        for i, page in enumerate(pages):\n            score = len(page.text_content.strip())\n            content_scores.append((score, i))\n        \n        # Sort by content score and select top pages\n        content_scores.sort(reverse=True)\n        \n        # Select additional key pages (up to 10 total for very large docs)\n        max_key_pages = min(10, max(3, total_pages // 10))\n        \n        for score, idx in content_scores[:max_key_pages]:\n            if idx not in key_indices:\n                key_indices.append(idx)\n        \n        return sorted(key_indices)\n    \n    async def _generate_document_summary(self, pages: List[PageAnalysis],\n                                       page_analyses: List[str],\n                                       metadata: Dict[str, Any]) -> DocumentSummary:\n        \"\"\"Generate comprehensive document summary\"\"\"\n        # Use multi-page processor for basic summary\n        from multi_page_processor import MultiPagePDFProcessor\n        processor = MultiPagePDFProcessor()\n        \n        # Update pages with analysis results\n        for i, analysis in enumerate(page_analyses):\n            if i < len(pages):\n                pages[i].analysis_result = analysis\n        \n        return processor.generate_document_summary(pages, metadata)\n    \n    def _create_combined_response(self, page_analyses: List[str],\n                                document_summary: DocumentSummary,\n                                metadata: Dict[str, Any]) -> str:\n        \"\"\"Create combined response from all analyses\"\"\"\n        total_pages = len(page_analyses)\n        \n        response = f\"# Multi-Page Document Analysis\\n\\n\"\n        response += f\"**Document:** {Path(metadata.get('source_file', 'Unknown')).name}\\n\"\n        response += f\"**Pages:** {total_pages} pages processed\\n\"\n        response += f\"**Type:** {document_summary.document_type.replace('_', ' ').title()}\\n\"\n        response += f\"**Confidence:** {document_summary.confidence_score:.0%}\\n\\n\"\n        \n        # Overall summary\n        response += f\"## Document Summary\\n\\n{document_summary.overall_summary}\\n\\n\"\n        \n        # Key findings\n        if document_summary.key_findings:\n            response += f\"## Key Findings\\n\\n\"\n            for finding in document_summary.key_findings:\n                response += f\"\u2022 {finding}\\n\"\n            response += \"\\n\"\n        \n        # Main topics\n        if document_summary.main_topics:\n            response += f\"## Main Topics\\n\\n\"\n            for topic in document_summary.main_topics[:10]:  # Limit to top 10\n                response += f\"\u2022 {topic}\\n\"\n            response += \"\\n\"\n        \n        # Page-by-page analysis (condensed for large documents)\n        if total_pages <= 10:\n            response += f\"## Page-by-Page Analysis\\n\\n\"\n            for i, analysis in enumerate(page_analyses):\n                response += f\"### Page {i+1}\\n\\n{analysis}\\n\\n\"\n        else:\n            response += f\"## Key Pages Analysis\\n\\n\"\n            # Show only key analyses for large documents\n            key_pages = [0, total_pages//2, total_pages-1]  # First, middle, last\n            for page_idx in key_pages:\n                if page_idx < len(page_analyses):\n                    response += f\"### Page {page_idx + 1}\\n\\n{page_analyses[page_idx]}\\n\\n\"\n        \n        return response",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/multi_page_llm_handler.py",
      "tags": [
        "llm",
        "multi-page",
        "document-analysis",
        "context-aware",
        "async",
        "pdf-processing",
        "summarization",
        "chunking",
        "progressive-analysis",
        "ai",
        "nlp"
      ],
      "updated_at": "2025-12-07T00:46:59.159876",
      "usage_example": "import asyncio\nfrom multi_page_llm_handler import MultiPageLLMHandler\nfrom multi_page_processor import PageAnalysis\n\n# Initialize handler\napi_key = \"your-api-key-here\"\nhandler = MultiPageLLMHandler(api_key)\n\n# Prepare page analyses (from PDF processor)\npages = [\n    PageAnalysis(page_number=1, image_b64=\"base64_image_data\", text_content=\"Page 1 text...\"),\n    PageAnalysis(page_number=2, image_b64=\"base64_image_data\", text_content=\"Page 2 text...\")\n]\n\nmetadata = {\n    'source_file': 'document.pdf',\n    'total_pages': 2,\n    'file_size': 1024000\n}\n\n# Analyze document\nresult = await handler.analyze_multi_page_document(\n    pages=pages,\n    metadata=metadata,\n    conversation_context=\"Previous conversation context if any\"\n)\n\n# Access results\nprint(result.combined_response)\nprint(f\"Processed {result.processing_stats['pages_processed']} pages\")\nprint(f\"Analysis method: {result.processing_stats['analysis_methods']}\")\nfor i, analysis in enumerate(result.page_analyses):\n    print(f\"Page {i+1}: {analysis[:200]}...\")"
    },
    {
      "best_practices": [
        "This is an immutable dataclass by default - consider using frozen=True in the decorator if immutability is desired",
        "Always ensure page_analyses list order matches the actual page order in the document",
        "Use consistent keys in processing_stats dictionary across different analysis runs for easier comparison",
        "The combined_response should synthesize information from all pages, not just concatenate page_analyses",
        "Validate that the length of page_analyses matches the total_pages in document_summary if that field exists",
        "Consider adding validation in __post_init__ if you need to enforce constraints on the data",
        "This class is designed to be instantiated once per document analysis and should not be modified after creation",
        "Use type hints when working with this class to leverage IDE autocomplete and type checking"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "List of analysis results for each page in the document, ordered by page number",
            "is_class_variable": false,
            "name": "page_analyses",
            "type": "List[str]"
          },
          {
            "description": "High-level summary object containing aggregated information about the entire document",
            "is_class_variable": false,
            "name": "document_summary",
            "type": "DocumentSummary"
          },
          {
            "description": "Synthesized response that combines insights from all page analyses into a cohesive narrative",
            "is_class_variable": false,
            "name": "combined_response",
            "type": "str"
          },
          {
            "description": "Dictionary containing processing metadata such as timing, token counts, and other relevant statistics",
            "is_class_variable": false,
            "name": "processing_stats",
            "type": "Dict[str, Any]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "combined_response": "Unified string response synthesizing all page analyses",
              "document_summary": "DocumentSummary object with high-level document information",
              "page_analyses": "List of strings containing individual page analysis results",
              "processing_stats": "Dictionary of processing metadata and statistics"
            },
            "purpose": "Initializes a new MultiPageAnalysisResult instance with the provided analysis data. Automatically generated by the dataclass decorator.",
            "returns": "None - initializes the instance",
            "signature": "__init__(page_analyses: List[str], document_summary: DocumentSummary, combined_response: str, processing_stats: Dict[str, Any]) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the MultiPageAnalysisResult instance. Automatically generated by the dataclass decorator.",
            "returns": "String representation showing all field names and values",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two MultiPageAnalysisResult instances for equality based on all fields. Automatically generated by the dataclass decorator.",
            "returns": "True if all fields are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:46:15",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses",
        "typing"
      ],
      "description": "A dataclass that encapsulates the complete results of analyzing a multi-page document, including individual page analyses, document summary, combined response, and processing statistics.",
      "docstring": "Result of multi-page document analysis",
      "id": 1947,
      "imports": [
        "import asyncio",
        "import logging",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from dataclasses import dataclass",
        "from llm_handler import LLMHandler",
        "from multi_page_processor import PageAnalysis",
        "from multi_page_processor import DocumentSummary",
        "from multi_page_processor import MultiPagePDFProcessor"
      ],
      "imports_required": [
        "from dataclasses import dataclass",
        "from typing import List, Dict, Any",
        "from multi_page_processor import DocumentSummary"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 23,
      "line_start": 18,
      "name": "MultiPageAnalysisResult",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "combined_response": "A single string that represents the aggregated or synthesized response combining insights from all page analyses into a cohesive narrative.",
        "document_summary": "A DocumentSummary object that provides a high-level overview and summary of the entire document across all pages.",
        "page_analyses": "A list of strings where each string contains the analysis result for an individual page of the document. The order corresponds to the page order in the original document.",
        "processing_stats": "A dictionary containing metadata and statistics about the processing operation, such as timing information, token counts, page counts, or other relevant metrics. Keys are string identifiers and values can be of any type."
      },
      "parent_class": null,
      "purpose": "This dataclass serves as a structured container for the output of multi-page document analysis operations. It aggregates page-level analysis results, provides a high-level document summary, combines responses into a unified format, and tracks processing metrics. It's designed to be used as the return type for multi-page document processing workflows, making it easy to access different aspects of the analysis results in a type-safe manner.",
      "return_annotation": null,
      "return_explained": "When instantiated, returns a MultiPageAnalysisResult object containing all the analysis results. This is a dataclass, so it automatically provides __init__, __repr__, __eq__, and other standard methods. The object provides direct attribute access to all four fields.",
      "settings_required": [
        "Requires DocumentSummary class to be available from multi_page_processor module"
      ],
      "source_code": "class MultiPageAnalysisResult:\n    \"\"\"Result of multi-page document analysis\"\"\"\n    page_analyses: List[str]\n    document_summary: DocumentSummary\n    combined_response: str\n    processing_stats: Dict[str, Any]",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/multi_page_llm_handler.py",
      "tags": [
        "dataclass",
        "document-analysis",
        "multi-page",
        "result-container",
        "pdf-processing",
        "data-structure",
        "analysis-results",
        "document-processing"
      ],
      "updated_at": "2025-12-07T00:46:15.685813",
      "usage_example": "from dataclasses import dataclass\nfrom typing import List, Dict, Any\nfrom multi_page_processor import DocumentSummary, MultiPageAnalysisResult\n\n# Create a document summary\nsummary = DocumentSummary(\n    total_pages=3,\n    main_topics=['AI', 'Machine Learning'],\n    key_findings='Document discusses AI applications'\n)\n\n# Create analysis result\nresult = MultiPageAnalysisResult(\n    page_analyses=[\n        'Page 1: Introduction to AI concepts',\n        'Page 2: Machine learning algorithms',\n        'Page 3: Practical applications'\n    ],\n    document_summary=summary,\n    combined_response='This document provides a comprehensive overview of AI and ML with practical examples.',\n    processing_stats={\n        'total_tokens': 1500,\n        'processing_time_seconds': 12.5,\n        'pages_processed': 3,\n        'model_used': 'gpt-4'\n    }\n)\n\n# Access results\nprint(result.page_analyses[0])\nprint(result.document_summary.total_pages)\nprint(result.combined_response)\nprint(result.processing_stats['total_tokens'])"
    },
    {
      "best_practices": [
        "Always instantiate PDFGenerator once and reuse it for multiple PDF generations to avoid redundant style setup",
        "Ensure output_path directory exists before calling create_response_pdf or generate_error_pdf",
        "Provide base64-encoded images as strings; the class handles decoding and conversion automatically",
        "Include conversation_id and exchange_number for session tracking in multi-turn conversations",
        "The class automatically converts images to grayscale and enhances contrast for e-ink displays",
        "Metadata dictionary should include 'source_file', 'source_type', and optionally 'dimensions' and 'compact_mode'",
        "LLM responses support markdown-like syntax: ## for headers,  for code blocks, - for bullets, **bold**, *italic*, `code`",
        "Long code lines are automatically wrapped at 80 characters for e-ink readability",
        "Error handling is built-in for image conversion failures; check console output for warnings",
        "The SessionDocTemplate dependency must be available in the environment for proper PDF generation with footers"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "ReportLab stylesheet containing both default and custom e-ink optimized paragraph styles",
            "is_class_variable": false,
            "name": "styles",
            "type": "reportlab.lib.styles.StyleSheet1"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initializes the PDFGenerator with ReportLab styles and sets up custom e-ink optimized paragraph styles",
            "returns": "None",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_eink_styles",
            "parameters": {},
            "purpose": "Creates and registers custom paragraph styles optimized for e-ink displays (EInkTitle, EInkHeader, EInkSubHeader, EInkBody, EInkCode, EInkMeta)",
            "returns": "None (modifies self.styles in place)",
            "signature": "setup_eink_styles(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_response_pdf",
            "parameters": {
              "conversation_id": "Optional unique identifier for the conversation session",
              "exchange_number": "Optional sequential number for this exchange in the conversation",
              "llm_response": "The AI-generated text response to format and include in the PDF",
              "metadata": "Dictionary containing source_file, source_type, dimensions, and optional compact_mode",
              "original_image_b64": "Base64 encoded string of the original input image",
              "output_path": "File system path where the PDF should be saved"
            },
            "purpose": "Generates a complete PDF document with the original input image, LLM response with formatted text, and metadata information",
            "returns": "String containing the path to the generated PDF file",
            "signature": "create_response_pdf(self, llm_response: str, original_image_b64: str, metadata: Dict[str, Any], output_path: str, conversation_id: Optional[str] = None, exchange_number: Optional[int] = None) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_format_response_text",
            "parameters": {
              "response_text": "Raw text response from LLM containing markdown-like formatting"
            },
            "purpose": "Parses LLM response text and converts markdown-like syntax into ReportLab flowable objects with appropriate styling",
            "returns": "List of ReportLab flowable objects (Paragraph, Spacer) ready for PDF rendering",
            "signature": "_format_response_text(self, response_text: str) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_process_inline_formatting",
            "parameters": {
              "text": "Text string containing markdown inline formatting"
            },
            "purpose": "Converts markdown inline formatting (**bold**, *italic*, `code`) to ReportLab XML tags",
            "returns": "String with markdown syntax replaced by ReportLab XML tags (<b>, <i>, <font>)",
            "signature": "_process_inline_formatting(self, text: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_b64_to_image",
            "parameters": {
              "image_b64": "Base64 encoded image string",
              "max_width": "Maximum width in points for the image (default 450)"
            },
            "purpose": "Converts base64 encoded image to ReportLab Image object, optimized for e-ink with grayscale conversion, resizing, and contrast enhancement",
            "returns": "ReportLab Image object ready for PDF inclusion, or None if conversion fails",
            "signature": "_b64_to_image(self, image_b64: str, max_width: int = 450) -> Optional[Image]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_error_pdf",
            "parameters": {
              "conversation_id": "Optional unique identifier for the conversation session",
              "error_message": "Description of the error that occurred",
              "exchange_number": "Optional sequential number for this exchange",
              "original_file": "Name or path of the file that caused the error",
              "output_path": "File system path where the error PDF should be saved"
            },
            "purpose": "Creates a formatted PDF document for error cases with error details and session tracking information",
            "returns": "String containing the path to the generated error PDF file",
            "signature": "generate_error_pdf(self, error_message: str, original_file: str, output_path: str, conversation_id: str = None, exchange_number: int = None) -> str"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only when processing images for contrast enhancement in _b64_to_image method",
          "import": "from PIL import ImageEnhance",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 23:45:45",
      "decorators": [],
      "dependencies": [
        "reportlab",
        "PIL",
        "Pillow",
        "io",
        "base64",
        "pathlib",
        "datetime",
        "typing",
        "textwrap",
        "re"
      ],
      "description": "A class that generates PDF documents optimized for e-ink displays, converting LLM responses and images into formatted, high-contrast PDFs with custom styling.",
      "docstring": "Generates PDF responses optimized for e-ink displays",
      "id": 1946,
      "imports": [
        "import io",
        "import base64",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.lib.pagesizes import A4",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib.enums import TA_LEFT",
        "from reportlab.lib.enums import TA_JUSTIFY",
        "from reportlab.lib.enums import TA_CENTER",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.platypus import Image",
        "from reportlab.platypus import PageBreak",
        "from reportlab.platypus.tableofcontents import TableOfContents",
        "from reportlab.platypus.doctemplate import PageTemplate",
        "from reportlab.platypus.doctemplate import BaseDocTemplate",
        "from reportlab.platypus.frames import Frame",
        "from reportlab.lib import colors",
        "from PIL import Image as PILImage",
        "import textwrap",
        "import re",
        "from PIL import ImageEnhance"
      ],
      "imports_required": [
        "import io",
        "import base64",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, Any, Optional",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle",
        "from reportlab.lib.enums import TA_LEFT, TA_JUSTIFY, TA_CENTER",
        "from reportlab.platypus import Paragraph, Spacer, Image",
        "from reportlab.lib import colors",
        "from PIL import Image as PILImage",
        "import textwrap",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 430,
      "line_start": 69,
      "name": "PDFGenerator",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. Initializes the PDF generator with ReportLab's sample stylesheet and sets up custom e-ink optimized styles automatically."
      },
      "parent_class": null,
      "purpose": "PDFGenerator creates PDF documents specifically optimized for e-ink display devices by using high-contrast black text, grayscale images, and custom typography. It formats LLM responses with markdown-like syntax support (headers, code blocks, lists, inline formatting), embeds original input images, and includes metadata tracking for conversation sessions. The class handles both successful responses and error cases, producing readable documents suitable for e-ink readers.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a PDFGenerator object ready to generate PDFs. The main methods (create_response_pdf, generate_error_pdf) return strings containing the file path to the generated PDF document.",
      "settings_required": [
        "SessionDocTemplate class must be available in the same module or imported (custom document template with session footer)",
        "Write permissions to the output directory where PDFs will be saved",
        "Sufficient memory to handle image processing and PDF generation"
      ],
      "source_code": "class PDFGenerator:\n    \"\"\"Generates PDF responses optimized for e-ink displays\"\"\"\n    \n    def __init__(self):\n        self.styles = getSampleStyleSheet()\n        self.setup_eink_styles()\n    \n    def setup_eink_styles(self):\n        \"\"\"Setup custom styles optimized for e-ink displays\"\"\"\n        \n        # Main title style\n        self.styles.add(ParagraphStyle(\n            name='EInkTitle',\n            parent=self.styles['Title'],\n            fontSize=18,\n            leading=24,\n            alignment=TA_CENTER,\n            spaceAfter=20,\n            textColor=colors.black,\n            fontName='Helvetica-Bold'\n        ))\n        \n        # Section header style\n        self.styles.add(ParagraphStyle(\n            name='EInkHeader',\n            parent=self.styles['Heading1'],\n            fontSize=14,\n            leading=18,\n            spaceAfter=12,\n            spaceBefore=16,\n            textColor=colors.black,\n            fontName='Helvetica-Bold'\n        ))\n        \n        # Sub-header style\n        self.styles.add(ParagraphStyle(\n            name='EInkSubHeader',\n            parent=self.styles['Heading2'],\n            fontSize=12,\n            leading=16,\n            spaceAfter=8,\n            spaceBefore=12,\n            textColor=colors.black,\n            fontName='Helvetica-Bold'\n        ))\n        \n        # Body text optimized for e-ink\n        self.styles.add(ParagraphStyle(\n            name='EInkBody',\n            parent=self.styles['Normal'],\n            fontSize=11,\n            leading=15,\n            alignment=TA_JUSTIFY,\n            spaceAfter=8,\n            textColor=colors.black,\n            fontName='Helvetica'\n        ))\n        \n        # Code or technical text\n        self.styles.add(ParagraphStyle(\n            name='EInkCode',\n            parent=self.styles['Code'],\n            fontSize=10,\n            leading=13,\n            spaceAfter=8,\n            spaceBefore=4,\n            textColor=colors.black,\n            fontName='Courier',\n            backColor=colors.lightgrey,\n            borderWidth=1,\n            borderColor=colors.black,\n            leftIndent=20,\n            rightIndent=20\n        ))\n        \n        # Metadata style\n        self.styles.add(ParagraphStyle(\n            name='EInkMeta',\n            parent=self.styles['Normal'],\n            fontSize=9,\n            leading=12,\n            alignment=TA_LEFT,\n            spaceAfter=4,\n            textColor=colors.grey,\n            fontName='Helvetica-Oblique'\n        ))\n    \n    def create_response_pdf(self, \n                          llm_response: str, \n                          original_image_b64: str, \n                          metadata: Dict[str, Any],\n                          output_path: str,\n                          conversation_id: Optional[str] = None,\n                          exchange_number: Optional[int] = None) -> str:\n        \"\"\"\n        Generate PDF with original prompt and LLM response\n        \n        Args:\n            llm_response: The AI-generated response\n            original_image_b64: Base64 encoded original image\n            metadata: Image metadata\n            output_path: Path for output PDF\n            \n        Returns:\n            Path to generated PDF\n        \"\"\"\n        print(f\"\ud83d\udcc4 Generating PDF response: {output_path}\")\n        \n        # Use custom document template with session footer\n        doc = SessionDocTemplate(\n            output_path,\n            conversation_id=conversation_id,\n            exchange_number=exchange_number,\n            pagesize=letter,\n            rightMargin=72,\n            leftMargin=72,\n            topMargin=72,\n            bottomMargin=72\n        )\n        \n        story = []\n        \n        # Add title\n        timestamp = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n        title = f\"AI Response - {timestamp}\"\n        story.append(Paragraph(title, self.styles['EInkTitle']))\n        story.append(Spacer(1, 20))\n        \n        # Add metadata section\n        story.append(Paragraph(\"Document Information\", self.styles['EInkHeader']))\n        \n        source_info = f\"Source: {metadata.get('source_file', 'Unknown')}\"\n        story.append(Paragraph(source_info, self.styles['EInkMeta']))\n        \n        source_type = f\"Type: {metadata.get('source_type', 'Unknown').upper()}\"\n        story.append(Paragraph(source_type, self.styles['EInkMeta']))\n        \n        if metadata.get('dimensions'):\n            dims = f\"Dimensions: {metadata['dimensions'][0]} x {metadata['dimensions'][1]} pixels\"\n            story.append(Paragraph(dims, self.styles['EInkMeta']))\n        \n        # Add session information if available\n        if conversation_id:\n            session_info = f\"Conversation: {conversation_id}\"\n            story.append(Paragraph(session_info, self.styles['EInkMeta']))\n            \n        if exchange_number:\n            exchange_info = f\"Exchange: #{exchange_number}\"\n            story.append(Paragraph(exchange_info, self.styles['EInkMeta']))\n        \n        # Add compact mode indicator if present\n        if metadata.get('compact_mode'):\n            format_info = \"Format: Compact (E-ink optimized)\"\n            story.append(Paragraph(format_info, self.styles['EInkMeta']))\n        \n        story.append(Spacer(1, 16))\n        \n        # Add original prompt image\n        story.append(Paragraph(\"Original Input\", self.styles['EInkHeader']))\n        original_img = self._b64_to_image(original_image_b64, max_width=450)\n        if original_img:\n            story.append(original_img)\n        else:\n            story.append(Paragraph(\"*Original image could not be displayed*\", self.styles['EInkMeta']))\n        \n        story.append(Spacer(1, 20))\n        \n        # Add AI response section\n        story.append(Paragraph(\"AI Analysis and Response\", self.styles['EInkHeader']))\n        story.append(Spacer(1, 12))\n        \n        # Process the response text with markdown-like formatting\n        formatted_response = self._format_response_text(llm_response)\n        story.extend(formatted_response)\n        \n        # Add footer with generation info\n        story.append(Spacer(1, 30))\n        story.append(Paragraph(\"---\", self.styles['EInkMeta']))\n        \n        # Footer line 1: Basic generation info\n        footer_text = f\"Generated by E-Ink LLM Assistant on {timestamp}\"\n        story.append(Paragraph(footer_text, self.styles['EInkMeta']))\n        \n        # Footer line 2: Session tracking info\n        if conversation_id and exchange_number:\n            session_footer = f\"Session: {conversation_id} | Exchange: #{exchange_number}\"\n            story.append(Paragraph(session_footer, self.styles['EInkMeta']))\n        elif conversation_id:\n            session_footer = f\"Session: {conversation_id}\"\n            story.append(Paragraph(session_footer, self.styles['EInkMeta']))\n        \n        # Build the PDF\n        doc.build(story)\n        print(f\"\u2705 PDF generated successfully: {output_path}\")\n        return output_path\n    \n    def _format_response_text(self, response_text: str) -> list:\n        \"\"\"\n        Format response text with basic markdown-like styling for PDF\n        \n        Args:\n            response_text: Raw response text from LLM\n            \n        Returns:\n            List of ReportLab flowables\n        \"\"\"\n        story = []\n        lines = response_text.split('\\n')\n        \n        i = 0\n        while i < len(lines):\n            line = lines[i].strip()\n            \n            if not line:\n                # Empty line - add small spacer\n                story.append(Spacer(1, 6))\n                i += 1\n                continue\n            \n            # Handle headers (## or ###)\n            if line.startswith('###'):\n                header_text = line[3:].strip()\n                story.append(Paragraph(header_text, self.styles['EInkSubHeader']))\n            elif line.startswith('##'):\n                header_text = line[2:].strip()\n                story.append(Paragraph(header_text, self.styles['EInkHeader']))\n            elif line.startswith('#'):\n                header_text = line[1:].strip()\n                story.append(Paragraph(header_text, self.styles['EInkTitle']))\n            \n            # Handle code blocks (```)\n            elif line.startswith('```'):\n                i += 1\n                code_lines = []\n                while i < len(lines) and not lines[i].strip().startswith('```'):\n                    code_lines.append(lines[i])\n                    i += 1\n                \n                if code_lines:\n                    code_text = '\\n'.join(code_lines)\n                    # Split long code lines for e-ink display\n                    wrapped_code = []\n                    for code_line in code_lines:\n                        if len(code_line) > 80:\n                            wrapped_code.extend(textwrap.wrap(code_line, width=80))\n                        else:\n                            wrapped_code.append(code_line)\n                    \n                    code_text = '\\n'.join(wrapped_code)\n                    story.append(Paragraph(code_text, self.styles['EInkCode']))\n            \n            # Handle bullet points\n            elif line.startswith(('- ', '* ', '+ ')):\n                bullet_text = line[2:].strip()\n                # Process bold text within bullets\n                bullet_text = self._process_inline_formatting(bullet_text)\n                story.append(Paragraph(f\"\u2022 {bullet_text}\", self.styles['EInkBody']))\n            \n            # Handle numbered lists\n            elif line and line[0].isdigit() and '. ' in line:\n                story.append(Paragraph(line, self.styles['EInkBody']))\n            \n            # Regular paragraph\n            else:\n                if line:\n                    # Process inline formatting (bold, italic)\n                    formatted_line = self._process_inline_formatting(line)\n                    story.append(Paragraph(formatted_line, self.styles['EInkBody']))\n            \n            i += 1\n        \n        return story\n    \n    def _process_inline_formatting(self, text: str) -> str:\n        \"\"\"Process basic inline formatting for ReportLab\"\"\"\n        # Handle bold (**text**)\n        import re\n        \n        # Bold formatting\n        text = re.sub(r'\\*\\*(.*?)\\*\\*', r'<b>\\1</b>', text)\n        \n        # Italic formatting (*text*)\n        text = re.sub(r'\\*(.*?)\\*', r'<i>\\1</i>', text)\n        \n        # Code formatting (`text`)\n        text = re.sub(r'`(.*?)`', r'<font name=\"Courier\">\\1</font>', text)\n        \n        return text\n    \n    def _b64_to_image(self, image_b64: str, max_width: int = 450) -> Optional[Image]:\n        \"\"\"\n        Convert base64 to ReportLab Image optimized for e-ink\n        \n        Args:\n            image_b64: Base64 encoded image\n            max_width: Maximum width in points\n            \n        Returns:\n            ReportLab Image object or None if conversion fails\n        \"\"\"\n        try:\n            img_data = base64.b64decode(image_b64)\n            img = PILImage.open(io.BytesIO(img_data))\n            \n            # Convert to grayscale for better e-ink display\n            if img.mode != 'L':\n                img = img.convert('L')\n            \n            # Resize for e-ink display constraints\n            ratio = min(max_width / img.width, max_width / img.height)\n            if ratio < 1:\n                new_size = (int(img.width * ratio), int(img.height * ratio))\n                img = img.resize(new_size, PILImage.Resampling.LANCZOS)\n            \n            # Enhance contrast for e-ink\n            from PIL import ImageEnhance\n            enhancer = ImageEnhance.Contrast(img)\n            img = enhancer.enhance(1.2)  # Slightly increase contrast\n            \n            # Save to BytesIO\n            img_buffer = io.BytesIO()\n            img.save(img_buffer, format='PNG')\n            img_buffer.seek(0)\n            \n            return Image(img_buffer, width=img.width, height=img.height)\n            \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Error converting image: {e}\")\n            return None\n    \n    def generate_error_pdf(self, error_message: str, original_file: str, output_path: str, \n                          conversation_id: str = None, exchange_number: int = None) -> str:\n        \"\"\"Generate a PDF for error cases\"\"\"\n        doc = SessionDocTemplate(output_path, pagesize=letter, \n                               rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=72,\n                               conversation_id=conversation_id, exchange_number=exchange_number)\n        \n        story = []\n        timestamp = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n        \n        # Title\n        story.append(Paragraph(\"Processing Error\", self.styles['EInkTitle']))\n        story.append(Spacer(1, 20))\n        \n        # Error details\n        story.append(Paragraph(\"Error Information\", self.styles['EInkHeader']))\n        story.append(Paragraph(f\"File: {original_file}\", self.styles['EInkMeta']))\n        story.append(Paragraph(f\"Time: {timestamp}\", self.styles['EInkMeta']))\n        if conversation_id:\n            story.append(Paragraph(f\"Session: {conversation_id}\", self.styles['EInkMeta']))\n        if exchange_number:\n            story.append(Paragraph(f\"Exchange: #{exchange_number}\", self.styles['EInkMeta']))\n        story.append(Spacer(1, 16))\n        \n        story.append(Paragraph(\"Error Message:\", self.styles['EInkSubHeader']))\n        story.append(Paragraph(error_message, self.styles['EInkBody']))\n        \n        story.append(Spacer(1, 20))\n        story.append(Paragraph(\"Please check the input file and try again.\", self.styles['EInkBody']))\n        \n        doc.build(story)\n        return output_path",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/pdf_generator.py",
      "tags": [
        "pdf-generation",
        "e-ink",
        "document-formatting",
        "reportlab",
        "image-processing",
        "markdown-rendering",
        "llm-output",
        "grayscale-conversion",
        "text-formatting",
        "session-tracking"
      ],
      "updated_at": "2025-12-07T00:45:45.944855",
      "usage_example": "from pdf_generator import PDFGenerator\nimport base64\n\n# Instantiate the generator\npdf_gen = PDFGenerator()\n\n# Prepare data\nllm_response = \"## Analysis\\n\\nThis is a **bold** response with `code`.\\n\\n- Bullet point 1\\n- Bullet point 2\"\nwith open('image.png', 'rb') as f:\n    image_b64 = base64.b64encode(f.read()).decode('utf-8')\n\nmetadata = {\n    'source_file': 'input.png',\n    'source_type': 'image',\n    'dimensions': (800, 600),\n    'compact_mode': True\n}\n\n# Generate PDF\noutput_path = pdf_gen.create_response_pdf(\n    llm_response=llm_response,\n    original_image_b64=image_b64,\n    metadata=metadata,\n    output_path='output.pdf',\n    conversation_id='conv_123',\n    exchange_number=1\n)\n\nprint(f'PDF created at: {output_path}')\n\n# Generate error PDF if needed\nerror_path = pdf_gen.generate_error_pdf(\n    error_message='File format not supported',\n    original_file='bad_input.txt',\n    output_path='error.pdf',\n    conversation_id='conv_123',\n    exchange_number=2\n)"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "company_name": "Type: str",
              "font_dir": "Type: Optional[str]",
              "logo_path": "Type: Optional[str]"
            },
            "purpose": "Initialize the PDF generator\n\nParameters\n----------\ncompany_name : str\n    Name of the company to include in generated documents\nlogo_path : str, optional\n    Path to the company logo image\nfont_dir : str, optional\n    Directory containing custom fonts",
            "returns": "None",
            "signature": "__init__(self, company_name, logo_path, font_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_register_fonts",
            "parameters": {
              "font_dir": "Type: str"
            },
            "purpose": "Register custom fonts for use in PDF documents\n\nParameters\n----------\nfont_dir : str\n    Directory containing font files",
            "returns": "None",
            "signature": "_register_fonts(self, font_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_initialize_custom_styles",
            "parameters": {},
            "purpose": "Initialize custom paragraph styles for consistent document formatting",
            "returns": "None",
            "signature": "_initialize_custom_styles(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_document_cover",
            "parameters": {
              "author": "Type: str",
              "confidentiality": "Type: str",
              "date": "Type: str",
              "department": "Type: str",
              "doc_number": "Type: str",
              "doc_type": "Type: str",
              "output_path": "Type: str",
              "revision": "Type: str",
              "title": "Type: str"
            },
            "purpose": "Generate a cover page for a controlled document\n\nParameters\n----------\noutput_path : str\n    Path where the PDF will be saved\ndoc_number : str\n    Document number/identifier\ntitle : str\n    Document title\nrevision : str\n    Revision/version number\ndate : str\n    Document date\nauthor : str\n    Document author\ndepartment : str\n    Responsible department\ndoc_type : str\n    Document type\nconfidentiality : str, optional\n    Confidentiality level\n    \nReturns\n-------\nstr\n    Path to the generated PDF",
            "returns": "Returns str",
            "signature": "generate_document_cover(self, output_path, doc_number, title, revision, date, author, department, doc_type, confidentiality) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_certificate_page",
            "parameters": {
              "approved_date": "Type: str",
              "approvers": "Type: List[Dict[str, str]]",
              "doc_number": "Type: str",
              "output_path": "Type: str",
              "revision": "Type: str",
              "signature_dir": "Type: Optional[str]",
              "title": "Type: str"
            },
            "purpose": "Generate an approval certificate for a controlled document\n\nParameters\n----------\noutput_path : str\n    Path where the PDF will be saved\ndoc_number : str\n    Document number/identifier\ntitle : str\n    Document title\nrevision : str\n    Revision/version number\napprovers : List[Dict[str, str]]\n    List of approvers containing 'name', 'role' and 'date' keys\napproved_date : str\n    Final approval date\nsignature_dir : str, optional\n    Directory containing signature images\n    \nReturns\n-------\nstr\n    Path to the generated PDF",
            "returns": "Returns str",
            "signature": "generate_certificate_page(self, output_path, doc_number, title, revision, approvers, approved_date, signature_dir) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_verification_code",
            "parameters": {
              "date": "Type: str",
              "doc_number": "Type: str",
              "revision": "Type: str"
            },
            "purpose": "Generate a verification code for document validation\n\nParameters\n----------\ndoc_number : str\n    Document number/identifier\nrevision : str\n    Revision/version number\ndate : str\n    Approval date\n    \nReturns\n-------\nstr\n    Verification code",
            "returns": "Returns str",
            "signature": "_generate_verification_code(self, doc_number, revision, date) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_audit_report",
            "parameters": {
              "audit_data": "Type: List[Dict[str, Any]]",
              "audit_date": "Type: str",
              "auditor": "Type: str",
              "doc_number": "Type: str",
              "output_path": "Type: str",
              "revision": "Type: str",
              "title": "Type: str"
            },
            "purpose": "Generate an audit report for a document\n\nParameters\n----------\noutput_path : str\n    Path where the PDF will be saved\ndoc_number : str\n    Document number/identifier\ntitle : str\n    Document title\nrevision : str\n    Revision/version number\naudit_data : List[Dict[str, Any]]\n    List of audit entries\naudit_date : str\n    Date of the audit\nauditor : str\n    Name of the auditor\n    \nReturns\n-------\nstr\n    Path to the generated PDF",
            "returns": "Returns str",
            "signature": "generate_audit_report(self, output_path, doc_number, title, revision, audit_data, audit_date, auditor) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_report_id",
            "parameters": {},
            "purpose": "Generate a unique report ID\n\nReturns\n-------\nstr\n    Unique report ID",
            "returns": "Returns str",
            "signature": "_generate_report_id(self) -> str"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 15:14:35",
      "decorators": [],
      "dependencies": [],
      "description": "PDF document generation for reports and controlled documents\n\nThis class provides methods to generate PDF documents from scratch,\nincluding audit reports, document covers, and certificate pages.",
      "docstring": "PDF document generation for reports and controlled documents\n\nThis class provides methods to generate PDF documents from scratch,\nincluding audit reports, document covers, and certificate pages.",
      "id": 1077,
      "imports": [
        "import os",
        "import io",
        "import logging",
        "import tempfile",
        "import shutil",
        "import subprocess",
        "import json",
        "import hashlib",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import List",
        "from typing import Any",
        "from typing import Optional",
        "from typing import Tuple",
        "from typing import Union",
        "from pathlib import Path",
        "from reportlab.lib import colors",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.lib.pagesizes import A4",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib.units import cm",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.platypus import Table",
        "from reportlab.platypus import TableStyle",
        "from reportlab.platypus import Image",
        "from reportlab.platypus import PageBreak",
        "from reportlab.platypus import Flowable",
        "from reportlab.pdfbase import pdfmetrics",
        "from reportlab.pdfbase.ttfonts import TTFont",
        "import fitz",
        "import pikepdf",
        "from CDocs.config import settings",
        "from PIL import Image as PILImage",
        "from docx2pdf import convert",
        "import pandas as pd"
      ],
      "imports_required": [
        "import os",
        "import io",
        "import logging",
        "import tempfile",
        "import shutil"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1156,
      "line_start": 613,
      "name": "PDFGenerator_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "PDF document generation for reports and controlled documents\n\nThis class provides methods to generate PDF documents from scratch,\nincluding audit reports, document covers, and certificate pages.",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class PDFGenerator:\n    \"\"\"\n    PDF document generation for reports and controlled documents\n    \n    This class provides methods to generate PDF documents from scratch,\n    including audit reports, document covers, and certificate pages.\n    \"\"\"\n    \n    def __init__(self, \n                 company_name: str = \"Company\", \n                 logo_path: Optional[str] = None,\n                 font_dir: Optional[str] = None):\n        \"\"\"\n        Initialize the PDF generator\n        \n        Parameters\n        ----------\n        company_name : str\n            Name of the company to include in generated documents\n        logo_path : str, optional\n            Path to the company logo image\n        font_dir : str, optional\n            Directory containing custom fonts\n        \"\"\"\n        self.company_name = company_name\n        self.logo_path = logo_path\n        \n        # Register custom fonts if provided\n        if font_dir and os.path.exists(font_dir):\n            self._register_fonts(font_dir)\n        \n        # Initialize default styles\n        self.styles = getSampleStyleSheet()\n        self._initialize_custom_styles()\n    \n    def _register_fonts(self, font_dir: str):\n        \"\"\"\n        Register custom fonts for use in PDF documents\n        \n        Parameters\n        ----------\n        font_dir : str\n            Directory containing font files\n        \"\"\"\n        try:\n            # Common font families to look for\n            font_families = {\n                'arial': ['arial.ttf', 'arialbd.ttf', 'ariali.ttf', 'arialbi.ttf'],\n                'times': ['times.ttf', 'timesbd.ttf', 'timesi.ttf', 'timesbi.ttf'],\n                'calibri': ['calibri.ttf', 'calibrib.ttf', 'calibrii.ttf', 'calibriz.ttf'],\n            }\n            \n            for family, fonts in font_families.items():\n                for font_file in fonts:\n                    font_path = os.path.join(font_dir, font_file)\n                    if os.path.exists(font_path):\n                        font_name = os.path.splitext(font_file)[0]\n                        pdfmetrics.registerFont(TTFont(font_name, font_path))\n                        logger.info(f\"Registered font: {font_name} from {font_path}\")\n        except Exception as e:\n            logger.error(f\"Error registering fonts: {str(e)}\")\n    \n    def _initialize_custom_styles(self):\n        \"\"\"Initialize custom paragraph styles for consistent document formatting\"\"\"\n        # Add custom styles\n        self.styles.add(ParagraphStyle(\n            name='Title',\n            parent=self.styles['Heading1'],\n            fontSize=18,\n            leading=22,\n            alignment=1,  # Center\n            spaceAfter=12\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='Subtitle',\n            parent=self.styles['Heading2'],\n            fontSize=14,\n            leading=18,\n            alignment=1,  # Center\n            spaceAfter=10\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='Normal-Bold',\n            parent=self.styles['Normal'],\n            fontName='Helvetica-Bold'\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='Normal-Italic',\n            parent=self.styles['Normal'],\n            fontName='Helvetica-Oblique'\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='Caption',\n            parent=self.styles['Normal'],\n            fontSize=8,\n            leading=10,\n            alignment=1  # Center\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='Header',\n            parent=self.styles['Normal'],\n            fontSize=9,\n            leading=11,\n            alignment=0  # Left\n        ))\n        \n        self.styles.add(ParagraphStyle(\n            name='Footer',\n            parent=self.styles['Normal'],\n            fontSize=9,\n            leading=11,\n            alignment=1  # Center\n        ))\n    \n    def generate_document_cover(self, \n                               output_path: str,\n                               doc_number: str,\n                               title: str,\n                               revision: str,\n                               date: str,\n                               author: str,\n                               department: str,\n                               doc_type: str,\n                               confidentiality: str = \"Internal Use\") -> str:\n        \"\"\"\n        Generate a cover page for a controlled document\n        \n        Parameters\n        ----------\n        output_path : str\n            Path where the PDF will be saved\n        doc_number : str\n            Document number/identifier\n        title : str\n            Document title\n        revision : str\n            Revision/version number\n        date : str\n            Document date\n        author : str\n            Document author\n        department : str\n            Responsible department\n        doc_type : str\n            Document type\n        confidentiality : str, optional\n            Confidentiality level\n            \n        Returns\n        -------\n        str\n            Path to the generated PDF\n        \"\"\"\n        # Create PDF document\n        doc = SimpleDocTemplate(\n            output_path,\n            pagesize=A4,\n            leftMargin=inch,\n            rightMargin=inch,\n            topMargin=inch,\n            bottomMargin=inch\n        )\n        \n        # Create content elements\n        elements = []\n        \n        # Add logo if available\n        if self.logo_path and os.path.exists(self.logo_path):\n            try:\n                img = Image(self.logo_path, width=2*inch, height=1*inch)\n                elements.append(img)\n                elements.append(Spacer(1, 0.5*inch))\n            except Exception as e:\n                logger.warning(f\"Could not load logo: {str(e)}\")\n        \n        # Add company name\n        elements.append(Paragraph(self.company_name, self.styles['Title']))\n        elements.append(Spacer(1, 0.25*inch))\n        \n        # Add document type and confidentiality\n        elements.append(Paragraph(f\"{doc_type} - {confidentiality}\", self.styles['Subtitle']))\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add document title\n        elements.append(Paragraph(title, self.styles['Title']))\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add document information table\n        data = [\n            [\"Document Number:\", doc_number],\n            [\"Revision:\", revision],\n            [\"Date:\", date],\n            [\"Author:\", author],\n            [\"Department:\", department]\n        ]\n        \n        # Create table with appropriate styling\n        table = Table(data, colWidths=[1.5*inch, 3*inch])\n        table.setStyle(TableStyle([\n            ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),\n            ('FONTNAME', (1, 0), (1, -1), 'Helvetica'),\n            ('ALIGN', (0, 0), (0, -1), 'RIGHT'),\n            ('ALIGN', (1, 0), (1, -1), 'LEFT'),\n            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),\n            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),\n            ('BOX', (0, 0), (-1, -1), 1, colors.black),\n            ('BACKGROUND', (0, 0), (0, -1), colors.lightgrey)\n        ]))\n        \n        elements.append(table)\n        elements.append(Spacer(1, 1*inch))\n        \n        # Add approval notice\n        elements.append(Paragraph(\n            \"This document is subject to controlled distribution and requires \"\n            \"formal approval before use.\",\n            self.styles['Normal-Bold']\n        ))\n        \n        # Build the document\n        doc.build(elements)\n        \n        logger.info(f\"Generated document cover page: {output_path}\")\n        return output_path\n    \n    def generate_certificate_page(self,\n                                 output_path: str,\n                                 doc_number: str,\n                                 title: str,\n                                 revision: str,\n                                 approvers: List[Dict[str, str]],\n                                 approved_date: str,\n                                 signature_dir: Optional[str] = None) -> str:\n        \"\"\"\n        Generate an approval certificate for a controlled document\n        \n        Parameters\n        ----------\n        output_path : str\n            Path where the PDF will be saved\n        doc_number : str\n            Document number/identifier\n        title : str\n            Document title\n        revision : str\n            Revision/version number\n        approvers : List[Dict[str, str]]\n            List of approvers containing 'name', 'role' and 'date' keys\n        approved_date : str\n            Final approval date\n        signature_dir : str, optional\n            Directory containing signature images\n            \n        Returns\n        -------\n        str\n            Path to the generated PDF\n        \"\"\"\n        # Create PDF document\n        doc = SimpleDocTemplate(\n            output_path,\n            pagesize=A4,\n            leftMargin=inch,\n            rightMargin=inch,\n            topMargin=inch,\n            bottomMargin=inch\n        )\n        \n        # Create content elements\n        elements = []\n        \n        # Add certificate title\n        elements.append(Paragraph(\"Document Approval Certificate\", self.styles['Title']))\n        elements.append(Spacer(1, 0.25*inch))\n        \n        # Add document information\n        elements.append(Paragraph(f\"Document: {title}\", self.styles['Normal']))\n        elements.append(Paragraph(f\"Document Number: {doc_number}\", self.styles['Normal']))\n        elements.append(Paragraph(f\"Revision: {revision}\", self.styles['Normal']))\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add certificate text\n        elements.append(Paragraph(\n            \"This document has been reviewed and approved according to the Document \"\n            f\"Control Procedure. It was approved on {approved_date} and is subject \"\n            \"to periodic review.\",\n            self.styles['Normal']\n        ))\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add approvers table with signatures\n        elements.append(Paragraph(\"Approvals:\", self.styles['Normal-Bold']))\n        elements.append(Spacer(1, 0.1*inch))\n        \n        # Create approvers table data\n        approver_data = [[\"Name\", \"Role\", \"Date\", \"Signature\"]]\n        \n        for approver in approvers:\n            name = approver.get('name', '')\n            role = approver.get('role', '')\n            date = approver.get('date', '')\n            \n            # Check for signature image\n            signature_img = None\n            if signature_dir:\n                # Look for signature file based on name \n                # (typically using a sanitized version of the name)\n                safe_name = \"\".join(c for c in name if c.isalnum()).lower()\n                sig_path = os.path.join(signature_dir, f\"{safe_name}.png\")\n                alt_sig_path = os.path.join(signature_dir, f\"{safe_name}.jpg\")\n                \n                if os.path.exists(sig_path):\n                    signature_img = SignatureImage(sig_path, width=1.5*inch, height=0.6*inch)\n                elif os.path.exists(alt_sig_path):\n                    signature_img = SignatureImage(alt_sig_path, width=1.5*inch, height=0.6*inch)\n                else:\n                    # Use a signature placeholder\n                    signature_img = SignatureImage(\"nonexistent.png\", width=1.5*inch, height=0.6*inch)\n            else:\n                # Use a signature placeholder\n                signature_img = SignatureImage(\"nonexistent.png\", width=1.5*inch, height=0.6*inch)\n            \n            # Add row to table\n            approver_data.append([name, role, date, signature_img])\n        \n        # Create table with appropriate styling\n        table = Table(approver_data, colWidths=[1.3*inch, 1.3*inch, 1*inch, 1.7*inch])\n        table.setStyle(TableStyle([\n            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),\n            ('ALIGN', (0, 0), (-1, 0), 'CENTER'),\n            ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),\n            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),\n            ('BOX', (0, 0), (-1, -1), 1, colors.black),\n            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),\n            ('ALIGN', (0, 1), (-1, -1), 'CENTER'),\n        ]))\n        \n        elements.append(table)\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add validity statement\n        elements.append(Paragraph(\n            \"This certificate confirms that the document has been approved \"\n            \"by all required stakeholders and is valid for use.\",\n            self.styles['Normal-Italic']\n        ))\n        \n        # Add verification information\n        verification_code = self._generate_verification_code(doc_number, revision, approved_date)\n        elements.append(Spacer(1, 0.5*inch))\n        elements.append(Paragraph(f\"Verification Code: {verification_code}\", self.styles['Caption']))\n        elements.append(Paragraph(f\"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\", self.styles['Caption']))\n        \n        # Build the document\n        doc.build(elements)\n        \n        logger.info(f\"Generated approval certificate: {output_path}\")\n        return output_path\n    \n    def _generate_verification_code(self, doc_number: str, revision: str, date: str) -> str:\n        \"\"\"\n        Generate a verification code for document validation\n        \n        Parameters\n        ----------\n        doc_number : str\n            Document number/identifier\n        revision : str\n            Revision/version number\n        date : str\n            Approval date\n            \n        Returns\n        -------\n        str\n            Verification code\n        \"\"\"\n        # Create a hash of the document information\n        verification_string = f\"{doc_number}-{revision}-{date}\"\n        hash_object = hashlib.sha256(verification_string.encode())\n        # Return a shortened version of the hash\n        return hash_object.hexdigest()[:12].upper()\n    \n    def generate_audit_report(self,\n                             output_path: str,\n                             doc_number: str,\n                             title: str,\n                             revision: str,\n                             audit_data: List[Dict[str, Any]],\n                             audit_date: str,\n                             auditor: str) -> str:\n        \"\"\"\n        Generate an audit report for a document\n        \n        Parameters\n        ----------\n        output_path : str\n            Path where the PDF will be saved\n        doc_number : str\n            Document number/identifier\n        title : str\n            Document title\n        revision : str\n            Revision/version number\n        audit_data : List[Dict[str, Any]]\n            List of audit entries\n        audit_date : str\n            Date of the audit\n        auditor : str\n            Name of the auditor\n            \n        Returns\n        -------\n        str\n            Path to the generated PDF\n        \"\"\"\n        # Create PDF document\n        doc = SimpleDocTemplate(\n            output_path,\n            pagesize=A4,\n            leftMargin=inch,\n            rightMargin=inch,\n            topMargin=inch,\n            bottomMargin=inch\n        )\n        \n        # Create content elements\n        elements = []\n        \n        # Add report title\n        elements.append(Paragraph(\"Document Audit Report\", self.styles['Title']))\n        elements.append(Spacer(1, 0.25*inch))\n        \n        # Add document information\n        elements.append(Paragraph(f\"Document: {title}\", self.styles['Normal']))\n        elements.append(Paragraph(f\"Document Number: {doc_number}\", self.styles['Normal']))\n        elements.append(Paragraph(f\"Revision: {revision}\", self.styles['Normal']))\n        elements.append(Paragraph(f\"Audit Date: {audit_date}\", self.styles['Normal']))\n        elements.append(Paragraph(f\"Auditor: {auditor}\", self.styles['Normal']))\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add audit summary\n        elements.append(Paragraph(\"Audit Summary\", self.styles['Heading2']))\n        \n        # Count audit events by type\n        event_counts = {}\n        for entry in audit_data:\n            event_type = entry.get('event_type', 'Unknown')\n            event_counts[event_type] = event_counts.get(event_type, 0) + 1\n        \n        # Add summary table\n        summary_data = [[\"Event Type\", \"Count\"]]\n        for event_type, count in event_counts.items():\n            summary_data.append([event_type, str(count)])\n        \n        summary_table = Table(summary_data, colWidths=[3*inch, 1*inch])\n        summary_table.setStyle(TableStyle([\n            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),\n            ('ALIGN', (0, 0), (-1, 0), 'CENTER'),\n            ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),\n            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),\n            ('BOX', (0, 0), (-1, -1), 1, colors.black),\n            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),\n            ('ALIGN', (1, 0), (1, -1), 'CENTER'),\n        ]))\n        \n        elements.append(summary_table)\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add detailed audit log\n        elements.append(Paragraph(\"Audit Log Detail\", self.styles['Heading2']))\n        \n        # Create audit log table\n        log_data = [[\"Timestamp\", \"User\", \"Event Type\", \"Details\"]]\n        \n        for entry in audit_data:\n            timestamp = entry.get('timestamp', '')\n            user = entry.get('user', '')\n            event_type = entry.get('event_type', '')\n            details = entry.get('details', '')\n            \n            # Truncate long details\n            if len(details) > 100:\n                details = details[:97] + \"...\"\n            \n            log_data.append([timestamp, user, event_type, details])\n        \n        # Create table with appropriate styling\n        log_table = Table(log_data, colWidths=[1.2*inch, 1*inch, 1.2*inch, 2.1*inch])\n        log_table.setStyle(TableStyle([\n            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),\n            ('ALIGN', (0, 0), (-1, 0), 'CENTER'),\n            ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),\n            ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),\n            ('BOX', (0, 0), (-1, -1), 1, colors.black),\n            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),\n            ('ALIGN', (0, 1), (2, -1), 'CENTER'),\n            ('ALIGN', (3, 1), (3, -1), 'LEFT'),\n        ]))\n        \n        # Add zebra striping\n        for i in range(1, len(log_data), 2):\n            log_table.setStyle(TableStyle([('BACKGROUND', (0, i), (-1, i), colors.whitesmoke)]))\n        \n        elements.append(log_table)\n        elements.append(Spacer(1, 0.5*inch))\n        \n        # Add certification\n        elements.append(Paragraph(\n            \"This report has been generated automatically by the Controlled \"\n            \"Document Management System. It provides a complete audit trail \"\n            \"of all recorded activities for this document.\",\n            self.styles['Normal-Italic']\n        ))\n        \n        # Add report footer\n        elements.append(Spacer(1, 0.5*inch))\n        elements.append(Paragraph(f\"Report generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\", self.styles['Caption']))\n        elements.append(Paragraph(f\"Report ID: {self._generate_report_id()}\", self.styles['Caption']))\n        \n        # Build the document\n        doc.build(elements)\n        \n        logger.info(f\"Generated audit report: {output_path}\")\n        return output_path\n    \n    def _generate_report_id(self) -> str:\n        \"\"\"\n        Generate a unique report ID\n        \n        Returns\n        -------\n        str\n            Unique report ID\n        \"\"\"\n        # Generate a timestamp-based report ID\n        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')\n        random_suffix = hashlib.md5(os.urandom(8)).hexdigest()[:6]\n        return f\"RPT-{timestamp}-{random_suffix}\"",
      "source_file": "/tf/active/vicechatdev/CDocs/utils/pdf_utils.py",
      "tags": [
        "class",
        "pdfgenerator"
      ],
      "updated_at": "2025-12-07T00:45:45.942184",
      "usage_example": "# Example usage:\n# result = PDFGenerator(bases)"
    },
    {
      "best_practices": [
        "Always call the build() method with a list of flowables to generate the actual PDF file",
        "The conversation_id and exchange_number are optional but recommended for tracking multi-exchange conversations",
        "The template reserves 0.5 inches at the bottom for the footer, ensure your content fits within the remaining space",
        "Use standard ReportLab flowables (Paragraph, Spacer, Image, etc.) to populate the document",
        "The footer is automatically added to every page via the onPage callback mechanism",
        "If neither conversation_id nor exchange_number is provided, the footer displays 'E-Ink LLM Assistant' as default text",
        "The class modifies the document's frame to leave space for the footer, so manual frame adjustments are not needed",
        "Session information is rendered in grey color at 8pt Helvetica font for subtle appearance"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Stores the conversation identifier to be displayed in the footer",
            "is_class_variable": false,
            "name": "conversation_id",
            "type": "Optional[str]"
          },
          {
            "description": "Stores the exchange number to be displayed in the footer",
            "is_class_variable": false,
            "name": "exchange_number",
            "type": "Optional[int]"
          },
          {
            "description": "Inherited from BaseDocTemplate. Left margin of the page in points",
            "is_class_variable": false,
            "name": "leftMargin",
            "type": "float"
          },
          {
            "description": "Inherited from BaseDocTemplate. Right margin of the page in points",
            "is_class_variable": false,
            "name": "rightMargin",
            "type": "float"
          },
          {
            "description": "Inherited from BaseDocTemplate. Top margin of the page in points",
            "is_class_variable": false,
            "name": "topMargin",
            "type": "float"
          },
          {
            "description": "Inherited from BaseDocTemplate. Bottom margin of the page in points",
            "is_class_variable": false,
            "name": "bottomMargin",
            "type": "float"
          },
          {
            "description": "Inherited from BaseDocTemplate. Width of the content area in points",
            "is_class_variable": false,
            "name": "width",
            "type": "float"
          },
          {
            "description": "Inherited from BaseDocTemplate. Height of the content area in points",
            "is_class_variable": false,
            "name": "height",
            "type": "float"
          },
          {
            "description": "Inherited from BaseDocTemplate. Current page number during document generation",
            "is_class_variable": false,
            "name": "page",
            "type": "int"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "**kwargs": "Additional arguments passed to BaseDocTemplate (pagesize, margins, etc.)",
              "conversation_id": "Optional conversation identifier for footer display",
              "exchange_number": "Optional exchange number for footer display",
              "filename": "Path where the PDF will be saved"
            },
            "purpose": "Initializes the document template with session tracking information and sets up the page layout with footer space",
            "returns": "None (constructor)",
            "signature": "__init__(self, filename, conversation_id=None, exchange_number=None, **kwargs)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "add_session_footer",
            "parameters": {
              "canvas": "ReportLab canvas object for drawing on the page",
              "doc": "Document object containing page information and dimensions"
            },
            "purpose": "Callback method that adds session information and page numbers to the footer of each page during PDF generation",
            "returns": "None (modifies canvas in-place)",
            "signature": "add_session_footer(self, canvas, doc)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "build",
            "parameters": {
              "**kwargs": "Additional build options",
              "flowables": "List of ReportLab flowable objects to render in the document"
            },
            "purpose": "Inherited from BaseDocTemplate. Builds the PDF document from a list of flowables (Paragraph, Spacer, Image, etc.)",
            "returns": "None (writes PDF to file)",
            "signature": "build(self, flowables, **kwargs)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:45:07",
      "decorators": [],
      "dependencies": [
        "reportlab"
      ],
      "description": "A custom ReportLab document template that extends BaseDocTemplate to add session information (conversation ID and exchange number) in the footer of each page.",
      "docstring": "Custom document template with session info in footer",
      "id": 1945,
      "imports": [
        "import io",
        "import base64",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.lib.pagesizes import A4",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib.enums import TA_LEFT",
        "from reportlab.lib.enums import TA_JUSTIFY",
        "from reportlab.lib.enums import TA_CENTER",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.platypus import Image",
        "from reportlab.platypus import PageBreak",
        "from reportlab.platypus.tableofcontents import TableOfContents",
        "from reportlab.platypus.doctemplate import PageTemplate",
        "from reportlab.platypus.doctemplate import BaseDocTemplate",
        "from reportlab.platypus.frames import Frame",
        "from reportlab.lib import colors",
        "from PIL import Image as PILImage",
        "import textwrap",
        "import re",
        "from PIL import ImageEnhance"
      ],
      "imports_required": [
        "from reportlab.platypus.doctemplate import BaseDocTemplate",
        "from reportlab.platypus.doctemplate import PageTemplate",
        "from reportlab.platypus.frames import Frame",
        "from reportlab.lib import colors",
        "from reportlab.lib.units import inch"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 67,
      "line_start": 18,
      "name": "SessionDocTemplate",
      "parameters": [
        {
          "annotation": "BaseDocTemplate",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "**kwargs": "Additional keyword arguments passed to the parent BaseDocTemplate class, such as pagesize, leftMargin, rightMargin, topMargin, bottomMargin, etc.",
        "conversation_id": "Optional identifier for the conversation session. Used to track which conversation this document belongs to. Displayed in the footer as 'Session: {conversation_id}'.",
        "exchange_number": "Optional number indicating which exchange in the conversation this document represents. Displayed in footer as 'Exchange #{exchange_number}'.",
        "filename": "The path/filename where the PDF document will be saved. Can be a string path or file-like object."
      },
      "parent_class": null,
      "purpose": "This class creates PDF documents with customized footers containing session tracking information. It's designed for generating PDF reports from conversational AI interactions, displaying page numbers on the left and session metadata on the right of each page footer. The template automatically manages page layout with appropriate margins to accommodate the footer.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a SessionDocTemplate object that can be used to build PDF documents. The object inherits all methods from BaseDocTemplate, primarily the build() method which takes a list of flowables (Paragraph, Spacer, Image, etc.) and generates the PDF file.",
      "settings_required": [
        "No environment variables or configuration files required",
        "ReportLab library must be installed: pip install reportlab"
      ],
      "source_code": "class SessionDocTemplate(BaseDocTemplate):\n    \"\"\"Custom document template with session info in footer\"\"\"\n    \n    def __init__(self, filename, conversation_id=None, exchange_number=None, **kwargs):\n        super().__init__(filename, **kwargs)\n        self.conversation_id = conversation_id\n        self.exchange_number = exchange_number\n        \n        # Create frame for main content (leaving space for footer)\n        main_frame = Frame(\n            self.leftMargin, self.bottomMargin + 0.5*inch,\n            self.width, self.height - 0.5*inch,\n            id='main'\n        )\n        \n        # Create page template\n        main_template = PageTemplate(\n            id='main',\n            frames=[main_frame],\n            onPage=self.add_session_footer\n        )\n        \n        self.addPageTemplates([main_template])\n    \n    def add_session_footer(self, canvas, doc):\n        \"\"\"Add session information to page footer\"\"\"\n        canvas.saveState()\n        \n        # Set footer style\n        canvas.setFont('Helvetica', 8)\n        canvas.setFillColor(colors.grey)\n        \n        # Left side: page number\n        page_text = f\"Page {doc.page}\"\n        canvas.drawString(doc.leftMargin, doc.bottomMargin, page_text)\n        \n        # Right side: session info\n        if self.conversation_id and self.exchange_number:\n            session_text = f\"Session: {self.conversation_id} | Exchange #{self.exchange_number}\"\n        elif self.conversation_id:\n            session_text = f\"Session: {self.conversation_id}\"\n        else:\n            session_text = \"E-Ink LLM Assistant\"\n            \n        # Calculate position for right-aligned text\n        text_width = canvas.stringWidth(session_text, 'Helvetica', 8)\n        x_position = doc.width + doc.leftMargin - text_width\n        canvas.drawString(x_position, doc.bottomMargin, session_text)\n        \n        canvas.restoreState()",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/pdf_generator.py",
      "tags": [
        "pdf-generation",
        "reportlab",
        "document-template",
        "footer",
        "session-tracking",
        "page-layout",
        "conversational-ai",
        "report-generation"
      ],
      "updated_at": "2025-12-07T00:45:07.100584",
      "usage_example": "from reportlab.platypus.doctemplate import BaseDocTemplate\nfrom reportlab.platypus.doctemplate import PageTemplate\nfrom reportlab.platypus.frames import Frame\nfrom reportlab.lib import colors\nfrom reportlab.lib.units import inch\nfrom reportlab.platypus import Paragraph\nfrom reportlab.lib.styles import getSampleStyleSheet\n\n# Instantiate the template\ndoc = SessionDocTemplate(\n    'output.pdf',\n    conversation_id='conv_12345',\n    exchange_number=3\n)\n\n# Create content\nstyles = getSampleStyleSheet()\nstory = [\n    Paragraph('This is a test document', styles['Title']),\n    Paragraph('Content goes here', styles['Normal'])\n]\n\n# Build the PDF\ndoc.build(story)\n\n# Example without session info\ndoc2 = SessionDocTemplate('simple.pdf')\ndoc2.build(story)"
    },
    {
      "best_practices": [
        "Always use async/await when calling methods as this is an async class",
        "Provide a proper async callback function to start_watching() with signature: async def callback(document, local_file_path)",
        "The watcher maintains state (processed_files set) to avoid reprocessing files, so use a single instance per folder",
        "Files are downloaded to temporary directories that are automatically cleaned up after callback execution",
        "Handle KeyboardInterrupt to gracefully stop the start_watching() loop",
        "The watcher automatically detects client type from cloud_manager, no manual configuration needed",
        "For rmcl client, initial files in the folder are marked as processed to avoid processing existing files on startup",
        "Error handling is built-in but callback functions should implement their own error handling for robustness",
        "The poll_interval should be balanced between responsiveness and API rate limits",
        "The watcher will continue running even if individual file processing fails"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The cloud manager instance used to interact with reMarkable Cloud API",
            "is_class_variable": false,
            "name": "cloud_manager",
            "type": "RemarkableCloudManager"
          },
          {
            "description": "Path to the folder being monitored for new files",
            "is_class_variable": false,
            "name": "watch_folder_path",
            "type": "str"
          },
          {
            "description": "Number of seconds to wait between polling checks",
            "is_class_variable": false,
            "name": "poll_interval",
            "type": "int"
          },
          {
            "description": "Logger instance for recording errors and debug information",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "Type of watcher being used: 'rest' or 'rmcl'",
            "is_class_variable": false,
            "name": "watcher_type",
            "type": "str"
          },
          {
            "description": "REST API watcher instance (only present when watcher_type is 'rest')",
            "is_class_variable": false,
            "name": "rest_watcher",
            "type": "RemarkableRestFileWatcher"
          },
          {
            "description": "Set of document IDs that have already been processed (only for rmcl watcher type)",
            "is_class_variable": false,
            "name": "processed_files",
            "type": "Set[str]"
          },
          {
            "description": "Timestamp of the last check for new files (only for rmcl watcher type)",
            "is_class_variable": false,
            "name": "last_check_time",
            "type": "datetime"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "cloud_manager": "RemarkableCloudManager instance providing cloud access",
              "poll_interval": "Seconds between polling checks (default 60)",
              "watch_folder_path": "Path to the folder to monitor for new files"
            },
            "purpose": "Initialize the file watcher with cloud manager, folder path, and polling interval. Automatically detects and configures the appropriate watcher type (REST or rmcl).",
            "returns": "None (constructor)",
            "signature": "__init__(self, cloud_manager: RemarkableCloudManager, watch_folder_path: str, poll_interval: int = 60)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_new_files",
            "parameters": {},
            "purpose": "Retrieve a list of new files that have appeared in the watched folder since the last check. Tracks processed files to avoid duplicates.",
            "returns": "List of Document objects (rmcl) or dictionaries (REST API) representing new files. Returns empty list on error or if no new files found.",
            "signature": "async get_new_files(self) -> List[Union[Dict, Document]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "start_watching",
            "parameters": {
              "callback": "Async function with signature: async def callback(document, local_file_path). Called for each new file with the document object and path to downloaded file."
            },
            "purpose": "Start continuous monitoring of the folder, calling the provided callback function for each new file detected. Runs indefinitely until interrupted.",
            "returns": "None. Runs until KeyboardInterrupt or fatal error.",
            "signature": "async start_watching(self, callback)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only when cloud_manager.client_type == 'rest'",
          "import": "from remarkable_rest_client import RemarkableRestFileWatcher",
          "optional": false
        },
        {
          "condition": "only when cloud_manager.client_type == 'rmcl'",
          "import": "from rmcl import Document",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 23:44:37",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "os",
        "tempfile",
        "time",
        "uuid",
        "datetime",
        "pathlib",
        "typing",
        "logging",
        "json",
        "remarkable_rest_client",
        "rmcl"
      ],
      "description": "A unified file watcher class that monitors a reMarkable Cloud folder for new files, supporting both REST API and rmcl client implementations with automatic client type detection.",
      "docstring": "Unified file watcher that works with both REST API and rmcl clients",
      "id": 1944,
      "imports": [
        "import asyncio",
        "import os",
        "import tempfile",
        "import time",
        "import uuid",
        "from datetime import datetime",
        "from datetime import timedelta",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Set",
        "from typing import Tuple",
        "from typing import Union",
        "import logging",
        "import json",
        "from remarkable_rest_client import RemarkableRestClient",
        "from remarkable_rest_client import RemarkableRestFileWatcher",
        "import sys",
        "from pathlib import Path",
        "from rmcl import Item",
        "from rmcl import Document",
        "from rmcl import Folder",
        "from rmcl.exceptions import ApiError",
        "from rmcl.exceptions import AuthError",
        "from rmcl.exceptions import DocumentNotFound",
        "from rmcl.exceptions import FolderNotFound",
        "from rmcl import Item",
        "from rmcl import Document",
        "from rmcl import Folder",
        "from rmcl.exceptions import ApiError",
        "from rmcl.exceptions import AuthError",
        "from rmcl.exceptions import DocumentNotFound",
        "from rmcl.exceptions import FolderNotFound"
      ],
      "imports_required": [
        "import asyncio",
        "import tempfile",
        "import logging",
        "from datetime import datetime",
        "from pathlib import Path",
        "from typing import Dict, List, Optional, Set, Union",
        "from remarkable_rest_client import RemarkableRestClient, RemarkableRestFileWatcher",
        "from rmcl import Document"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 592,
      "line_start": 461,
      "name": "RemarkableFileWatcher",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "cloud_manager": "An instance of RemarkableCloudManager that provides the connection to reMarkable Cloud. Must have a client_type attribute ('rest' or 'rmcl') and methods for listing and downloading files.",
        "poll_interval": "Integer number of seconds to wait between checks for new files. Default is 60 seconds. Lower values increase responsiveness but may increase API usage.",
        "watch_folder_path": "String path to the folder to monitor. For REST API, the folder name is extracted from the path. For rmcl, the full path is used. Can be empty string or '/' for root folder."
      },
      "parent_class": null,
      "purpose": "RemarkableFileWatcher provides a unified interface for monitoring reMarkable Cloud folders for new files. It automatically detects whether to use REST API or rmcl client based on the cloud_manager's client type, polls the folder at specified intervals, tracks processed files to avoid reprocessing, and executes callbacks when new files are detected. The class handles file downloading, temporary storage, and error recovery during the watch loop.",
      "return_annotation": null,
      "return_explained": "The constructor returns an instance of RemarkableFileWatcher. The get_new_files() method returns a List[Union[Dict, Document]] containing new files found since the last check (empty list if none found or on error). The start_watching() method does not return a value but runs indefinitely until interrupted.",
      "settings_required": [
        "A configured RemarkableCloudManager instance with valid authentication",
        "Network access to reMarkable Cloud API",
        "Read permissions for the specified watch folder",
        "Sufficient disk space for temporary file downloads"
      ],
      "source_code": "class RemarkableFileWatcher:\n    \"\"\"Unified file watcher that works with both REST API and rmcl clients\"\"\"\n    \n    def __init__(self, cloud_manager: RemarkableCloudManager, \n                 watch_folder_path: str, poll_interval: int = 60):\n        self.cloud_manager = cloud_manager\n        self.watch_folder_path = watch_folder_path\n        self.poll_interval = poll_interval\n        self.logger = logging.getLogger(__name__)\n        \n        # Initialize appropriate watcher\n        if cloud_manager.client_type == \"rest\":\n            # Extract folder name from path for REST API\n            folder_name = watch_folder_path.strip('/').split('/')[-1] if watch_folder_path.strip('/') else \"Root\"\n            self.rest_watcher = RemarkableRestFileWatcher(\n                cloud_manager.rest_client, folder_name, poll_interval\n            )\n            self.watcher_type = \"rest\"\n        else:\n            # Use original implementation for rmcl\n            self.watcher_type = \"rmcl\"\n            # Track processed files to avoid reprocessing\n            self.processed_files: Set[str] = set()\n            self.last_check_time = datetime.now()\n    \n    async def get_new_files(self) -> List[Union[Dict, Document]]:\n        \"\"\"\n        Get list of new files since last check\n        \n        Returns:\n            List of new Document objects/dicts\n        \"\"\"\n        try:\n            if self.watcher_type == \"rest\":\n                return await self.rest_watcher.get_new_files()\n            \n            elif self.watcher_type == \"rmcl\":\n                # Original rmcl implementation\n                all_files = await self.cloud_manager.list_files_in_folder(\n                    self.watch_folder_path, include_subfolders=True\n                )\n                \n                new_files = []\n                current_time = datetime.now()\n                \n                for doc in all_files:\n                    # Skip if already processed\n                    doc_id = doc.id if hasattr(doc, 'id') else str(doc)\n                    if doc_id in self.processed_files:\n                        continue\n                    \n                    # Check if file is new (modified after last check)\n                    # Note: rmcl may not provide exact modification times,\n                    # so we'll track by ID for now\n                    new_files.append(doc)\n                    self.processed_files.add(doc_id)\n                \n                self.last_check_time = current_time\n                return new_files\n            \n            return []\n            \n        except Exception as e:\n            self.logger.error(f\"Error checking for new files: {e}\")\n            return []\n    \n    async def start_watching(self, callback):\n        \"\"\"\n        Start watching for new files\n        \n        Args:\n            callback: Async function to call with new files (signature: async def callback(document, local_file_path))\n        \"\"\"\n        if self.watcher_type == \"rest\":\n            # Delegate to REST watcher\n            await self.rest_watcher.start_watching(callback)\n            return\n        \n        # Original rmcl implementation\n        print(f\"\ud83d\udc41\ufe0f  Started watching reMarkable folder: {self.watch_folder_path}\")\n        print(f\"\ud83d\udd04 Checking every {self.poll_interval} seconds...\")\n        \n        # Get initial file list to mark as already processed\n        initial_files = await self.cloud_manager.list_files_in_folder(\n            self.watch_folder_path, include_subfolders=True\n        )\n        for doc in initial_files:\n            doc_id = doc.id if hasattr(doc, 'id') else str(doc)\n            self.processed_files.add(doc_id)\n        \n        print(f\"\ud83d\udcc1 Tracking {len(initial_files)} existing files\")\n        \n        try:\n            while True:\n                try:\n                    new_files = await self.get_new_files()\n                    \n                    if new_files:\n                        print(f\"\ud83d\udce5 Found {len(new_files)} new file(s)\")\n                        \n                        # Create temporary directory for downloads\n                        with tempfile.TemporaryDirectory() as temp_dir:\n                            temp_path = Path(temp_dir)\n                            \n                            for doc in new_files:\n                                doc_name = doc.name if hasattr(doc, 'name') else \"Unknown\"\n                                print(f\"\ud83d\udcc4 Processing: {doc_name}\")\n                                \n                                # Download file\n                                local_file = await self.cloud_manager.download_document(doc, temp_path)\n                                \n                                if local_file:\n                                    # Call the callback function\n                                    try:\n                                        await callback(doc, local_file)\n                                    except Exception as e:\n                                        self.logger.error(f\"Error in callback for {doc_name}: {e}\")\n                                        print(f\"\u274c Error processing {doc_name}: {e}\")\n                    \n                    # Wait before next check\n                    await asyncio.sleep(self.poll_interval)\n                    \n                except Exception as e:\n                    self.logger.error(f\"Error in watch loop: {e}\")\n                    print(f\"\u274c Watch error: {e}\")\n                    await asyncio.sleep(self.poll_interval)\n                    \n        except KeyboardInterrupt:\n            print(f\"\\n\ud83d\uded1 Stopping reMarkable file watcher...\")\n        except Exception as e:\n            self.logger.error(f\"Fatal error in file watcher: {e}\")\n            print(f\"\u274c Fatal watcher error: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/remarkable_cloud.py",
      "tags": [
        "file-watcher",
        "remarkable",
        "cloud-sync",
        "polling",
        "async",
        "file-monitoring",
        "rest-api",
        "rmcl",
        "document-management",
        "callback-pattern"
      ],
      "updated_at": "2025-12-07T00:44:37.583844",
      "usage_example": "import asyncio\nfrom pathlib import Path\nfrom remarkable_cloud_manager import RemarkableCloudManager\nfrom remarkable_file_watcher import RemarkableFileWatcher\n\nasync def process_new_file(document, local_file_path):\n    \"\"\"Callback function to process new files\"\"\"\n    print(f\"New file: {document.name}\")\n    print(f\"Downloaded to: {local_file_path}\")\n    # Process the file here\n\nasync def main():\n    # Initialize cloud manager\n    cloud_manager = RemarkableCloudManager(client_type='rest', token='your_token')\n    \n    # Create file watcher for a specific folder\n    watcher = RemarkableFileWatcher(\n        cloud_manager=cloud_manager,\n        watch_folder_path='/My Notes/Work',\n        poll_interval=30\n    )\n    \n    # Check for new files once\n    new_files = await watcher.get_new_files()\n    print(f\"Found {len(new_files)} new files\")\n    \n    # Or start continuous watching\n    await watcher.start_watching(process_new_file)\n\nif __name__ == '__main__':\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "Always call authenticate() before performing any operations - check the return value to ensure authentication succeeded",
        "Use async/await pattern for all method calls as most operations are asynchronous",
        "Handle None returns from methods gracefully - many methods return None on failure",
        "For first-time setup, obtain a one-time code from https://my.remarkable.com/connect/desktop",
        "The manager automatically handles fallback between REST API and rmcl - monitor console output for which client is being used",
        "Cache is invalidated automatically on folder creation - no manual cache management needed",
        "Document downloads may return .rm files for notebooks without original PDFs - handle both .pdf and .rm extensions",
        "Folder paths should start with '/' and use forward slashes - root folder is represented as '/' or empty string",
        "The manager maintains state (authenticated, client_type, caches) - reuse the same instance for multiple operations",
        "Check self.authenticated property before operations to verify authentication status",
        "Use include_subfolders=True in list_files_in_folder() for recursive scanning",
        "Output directories for downloads are created automatically if they don't exist",
        "Configuration and tokens are persisted to disk - subsequent runs may not need one-time code"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Path object pointing to the configuration directory where tokens and settings are stored",
            "is_class_variable": false,
            "name": "config_dir",
            "type": "Path"
          },
          {
            "description": "Path to the file storing the reMarkable device authentication token",
            "is_class_variable": false,
            "name": "device_token_file",
            "type": "Path"
          },
          {
            "description": "Path to the JSON configuration file",
            "is_class_variable": false,
            "name": "config_file",
            "type": "Path"
          },
          {
            "description": "Logger instance for recording errors and debug information",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "Flag indicating whether the manager is currently authenticated with reMarkable Cloud",
            "is_class_variable": false,
            "name": "authenticated",
            "type": "bool"
          },
          {
            "description": "Timestamp of the last synchronization operation, None if never synced",
            "is_class_variable": false,
            "name": "last_sync_time",
            "type": "Optional[datetime]"
          },
          {
            "description": "Configuration flag indicating preference for REST API over rmcl library",
            "is_class_variable": false,
            "name": "prefer_rest_api",
            "type": "bool"
          },
          {
            "description": "Instance of the REST API client for reMarkable Cloud operations",
            "is_class_variable": false,
            "name": "rest_client",
            "type": "RemarkableRestClient"
          },
          {
            "description": "Instance of the rmcl library client, None if not initialized or unavailable",
            "is_class_variable": false,
            "name": "rmcl_client",
            "type": "Optional[Any]"
          },
          {
            "description": "String indicating which client is currently active: 'rest' or 'rmcl'",
            "is_class_variable": false,
            "name": "client_type",
            "type": "str"
          },
          {
            "description": "Cache dictionary storing folder structure and metadata for REST API operations",
            "is_class_variable": false,
            "name": "folder_cache",
            "type": "Dict[str, Dict]"
          },
          {
            "description": "Cache dictionary storing file metadata for REST API operations",
            "is_class_variable": false,
            "name": "file_cache",
            "type": "Dict[str, Dict]"
          },
          {
            "description": "Timestamp when the cache was last populated, None if cache is invalid or empty",
            "is_class_variable": false,
            "name": "cache_timestamp",
            "type": "Optional[datetime]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "config_dir": "Optional path to configuration directory, defaults to ~/.eink-llm",
              "prefer_rest_api": "Whether to prefer REST API over rmcl library, defaults to True"
            },
            "purpose": "Initialize the RemarkableCloudManager with configuration directory and client preference",
            "returns": "None - initializes instance with configured clients and settings",
            "signature": "__init__(self, config_dir: Optional[str] = None, prefer_rest_api: bool = True)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "authenticate",
            "parameters": {
              "one_time_code": "One-time code from reMarkable account, required for first setup or token refresh"
            },
            "purpose": "Authenticate with reMarkable Cloud using the selected client, with automatic fallback",
            "returns": "True if authentication successful, False otherwise",
            "signature": "async authenticate(self, one_time_code: Optional[str] = None) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_folder_by_path",
            "parameters": {
              "folder_path": "Path to the folder starting with '/' for root (e.g., '/My Folder/Subfolder')"
            },
            "purpose": "Retrieve a folder object by its path in the reMarkable Cloud hierarchy",
            "returns": "Folder object (Dict for REST API, Folder for rmcl) if found, None otherwise",
            "signature": "async get_folder_by_path(self, folder_path: str) -> Optional[Union[Dict, Folder]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "list_files_in_folder",
            "parameters": {
              "folder_path": "Path to the folder to scan",
              "include_subfolders": "If True, scan subfolders recursively, defaults to True"
            },
            "purpose": "List all PDF files in a folder and optionally its subfolders recursively",
            "returns": "List of Document objects/dicts representing PDF files found in the folder",
            "signature": "async list_files_in_folder(self, folder_path: str, include_subfolders: bool = True) -> List[Union[Dict, Document]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "download_document",
            "parameters": {
              "document": "Document object/dict to download (from list_files_in_folder)",
              "output_dir": "Directory path where the downloaded file should be saved"
            },
            "purpose": "Download a document from reMarkable Cloud to local filesystem",
            "returns": "Path object pointing to the downloaded file, or None if download failed",
            "signature": "async download_document(self, document: Union[Dict, Document], output_dir: Path) -> Optional[Path]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_file",
            "parameters": {
              "file_path": "Path to the local file to upload",
              "folder_path": "Optional destination folder path in reMarkable Cloud, defaults to root"
            },
            "purpose": "Upload a file from local filesystem to reMarkable Cloud",
            "returns": "Document ID string if upload successful, None otherwise",
            "signature": "async upload_file(self, file_path: str, folder_path: str = None) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_content",
            "parameters": {
              "content": "Bytes content to upload",
              "file_type": "MIME type of the content, defaults to 'application/pdf'",
              "filename": "Name for the file in reMarkable Cloud",
              "folder_path": "Optional destination folder path, defaults to root"
            },
            "purpose": "Upload content directly from memory to reMarkable Cloud without saving to disk first",
            "returns": "Document ID string if upload successful, None otherwise",
            "signature": "async upload_content(self, content: bytes, filename: str, folder_path: str = None, file_type: str = 'application/pdf') -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_folder",
            "parameters": {
              "folder_path": "Full path to the folder to create (e.g., '/My Folder/New Subfolder')"
            },
            "purpose": "Create a new folder in reMarkable Cloud at the specified path",
            "returns": "True if folder created successfully or already exists, False otherwise",
            "signature": "async create_folder(self, folder_path: str) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_config",
            "parameters": {},
            "purpose": "Load configuration from the config file on disk",
            "returns": "Dictionary containing configuration settings, empty dict if file doesn't exist or error occurs",
            "signature": "get_config(self) -> Dict"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "save_config",
            "parameters": {
              "config": "Dictionary of configuration settings to persist"
            },
            "purpose": "Save configuration dictionary to the config file on disk",
            "returns": "None - saves to disk or logs error if save fails",
            "signature": "save_config(self, config: Dict) -> None"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only if rmcl library is installed and prefer_rest_api=False or as fallback when REST API fails",
          "import": "from rmcl import Item, Document, Folder",
          "optional": true
        },
        {
          "condition": "only if rmcl library is installed for exception handling",
          "import": "from rmcl.exceptions import ApiError, AuthError, DocumentNotFound, FolderNotFound",
          "optional": true
        }
      ],
      "created_at": "2025-12-06 23:44:03",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "os",
        "tempfile",
        "time",
        "uuid",
        "datetime",
        "pathlib",
        "typing",
        "logging",
        "json",
        "remarkable_rest_client",
        "rmcl"
      ],
      "description": "Unified manager for reMarkable Cloud operations that uses REST API as primary method with rmcl library as fallback, handling authentication, file operations, and folder management.",
      "docstring": "Unified reMarkable Cloud manager that uses REST API as primary method\nwith rmcl library as fallback for compatibility",
      "id": 1943,
      "imports": [
        "import asyncio",
        "import os",
        "import tempfile",
        "import time",
        "import uuid",
        "from datetime import datetime",
        "from datetime import timedelta",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Set",
        "from typing import Tuple",
        "from typing import Union",
        "import logging",
        "import json",
        "from remarkable_rest_client import RemarkableRestClient",
        "from remarkable_rest_client import RemarkableRestFileWatcher",
        "import sys",
        "from pathlib import Path",
        "from rmcl import Item",
        "from rmcl import Document",
        "from rmcl import Folder",
        "from rmcl.exceptions import ApiError",
        "from rmcl.exceptions import AuthError",
        "from rmcl.exceptions import DocumentNotFound",
        "from rmcl.exceptions import FolderNotFound",
        "from rmcl import Item",
        "from rmcl import Document",
        "from rmcl import Folder",
        "from rmcl.exceptions import ApiError",
        "from rmcl.exceptions import AuthError",
        "from rmcl.exceptions import DocumentNotFound",
        "from rmcl.exceptions import FolderNotFound"
      ],
      "imports_required": [
        "import asyncio",
        "import os",
        "import tempfile",
        "import time",
        "import uuid",
        "from datetime import datetime, timedelta",
        "from pathlib import Path",
        "from typing import Dict, List, Optional, Set, Tuple, Union",
        "import logging",
        "import json",
        "from remarkable_rest_client import RemarkableRestClient, RemarkableRestFileWatcher"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 458,
      "line_start": 69,
      "name": "RemarkableCloudManager",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "config_dir": "Optional path to configuration directory where authentication tokens and settings are stored. Defaults to '~/.eink-llm' if not provided. The directory will be created if it doesn't exist.",
        "prefer_rest_api": "Boolean flag indicating whether to prefer REST API over rmcl library. Defaults to True. When True, REST API is attempted first with rmcl as fallback. When False and rmcl is available, rmcl is used as primary method."
      },
      "parent_class": null,
      "purpose": "This class provides a comprehensive interface for interacting with reMarkable Cloud storage. It abstracts away the complexity of using multiple client libraries (REST API and rmcl) by automatically selecting the best available method and falling back when needed. The class handles authentication, document upload/download, folder creation and navigation, and maintains caching for improved performance. It's designed to be resilient, attempting multiple authentication methods and providing detailed logging and user feedback throughout operations.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableCloudManager object configured with the specified settings. Key method returns: authenticate() returns bool indicating success/failure; get_folder_by_path() returns Dict or Folder object or None; list_files_in_folder() returns List of Document objects/dicts; download_document() returns Path to downloaded file or None; upload_file() and upload_content() return Optional[str] document ID; create_folder() returns bool indicating success.",
      "settings_required": [
        "reMarkable Cloud account with one-time code from https://my.remarkable.com/connect/desktop for initial authentication",
        "Configuration directory (defaults to ~/.eink-llm) with write permissions",
        "Device token file will be stored at {config_dir}/remarkable_device_token",
        "Configuration file will be stored at {config_dir}/remarkable_config.json",
        "Optional: rmcl library installed for fallback functionality (pip install rmcl)"
      ],
      "source_code": "class RemarkableCloudManager:\n    \"\"\"\n    Unified reMarkable Cloud manager that uses REST API as primary method\n    with rmcl library as fallback for compatibility\n    \"\"\"\n    \n    def __init__(self, config_dir: Optional[str] = None, prefer_rest_api: bool = True):\n        self.config_dir = Path(config_dir) if config_dir else Path.home() / '.eink-llm'\n        self.config_dir.mkdir(exist_ok=True)\n        \n        self.device_token_file = self.config_dir / 'remarkable_device_token'\n        self.config_file = self.config_dir / 'remarkable_config.json'\n        \n        self.logger = logging.getLogger(__name__)\n        self.authenticated = False\n        self.last_sync_time = None\n        self.prefer_rest_api = prefer_rest_api\n        \n        # Initialize the appropriate client\n        self.rest_client = RemarkableRestClient(config_dir)\n        self.rmcl_client = None\n        \n        # Try to initialize rmcl client if available and requested\n        if RMCL_AVAILABLE and not prefer_rest_api:\n            try:\n                # rmcl initialization would go here\n                self.client_type = \"rmcl\"\n                print(\"\ud83d\udd27 Using rmcl library\")\n            except Exception as e:\n                self.logger.warning(f\"rmcl initialization failed: {e}\")\n                self.client_type = \"rest\"\n                print(\"\ud83d\udd27 Falling back to REST API\")\n        else:\n            self.client_type = \"rest\"\n            print(\"\ud83d\udd27 Using REST API client\")\n        \n        # Cache for folder structure and file metadata (REST API only)\n        self.folder_cache: Dict[str, Dict] = {}\n        self.file_cache: Dict[str, Dict] = {}\n        self.cache_timestamp = None\n        \n    async def authenticate(self, one_time_code: Optional[str] = None) -> bool:\n        \"\"\"\n        Authenticate with reMarkable Cloud using the selected client\n        \n        Args:\n            one_time_code: One-time code from reMarkable account (required for first setup)\n            \n        Returns:\n            True if authentication successful, False otherwise\n        \"\"\"\n        try:\n            # First try REST API (primary method)\n            if self.client_type == \"rest\" or self.prefer_rest_api:\n                print(\"\ud83d\udd27 Attempting authentication with REST API...\")\n                success = await self.rest_client.authenticate(one_time_code)\n                \n                if success:\n                    self.authenticated = success\n                    self.client_type = \"rest\"\n                    return success\n                else:\n                    print(\"\u26a0\ufe0f  REST API authentication failed\")\n                    \n                    # If REST API failed and rmcl is available, try fallback\n                    if RMCL_AVAILABLE:\n                        print(\"\ud83d\udd04 Falling back to rmcl library...\")\n                        self.client_type = \"rmcl\"\n                    else:\n                        print(\"\u274c No fallback available - rmcl library not installed\")\n                        print(\"\ud83d\udca1 To install rmcl fallback: pip install rmcl\")\n                        return False\n            \n            # Try rmcl client (either as primary choice or fallback)\n            if self.client_type == \"rmcl\" and RMCL_AVAILABLE:\n                print(\"\ud83d\udd27 Using rmcl library for authentication...\")\n                \n                # Check if we have a device token already\n                if self.device_token_file.exists() and not one_time_code:\n                    print(\"\ud83d\udd11 Using existing device token with rmcl...\")\n                    try:\n                        # Test existing authentication\n                        root = Item.get_by_id_s('')  # Get root folder\n                        self.authenticated = True\n                        print(\"\u2705 rmcl authentication successful!\")\n                        return True\n                    except (AuthError, ApiError) as e:\n                        print(\"\u26a0\ufe0f  Existing token invalid, need fresh one-time code\")\n                \n                # First time setup or token refresh - need one-time code\n                if one_time_code:\n                    print(\"\ud83d\udd10 Registering new device with rmcl...\")\n                    try:\n                        Item.register_device(one_time_code)\n                        # Test the registration worked\n                        root = Item.get_by_id_s('')  # Get root folder\n                        print(\"\u2705 rmcl device registered and authenticated!\")\n                        self.authenticated = True\n                        return True\n                    except Exception as e:\n                        print(f\"\u274c rmcl registration failed: {e}\")\n                        return False\n                \n                print(\"\u274c rmcl requires a one-time code for authentication\")\n                return False\n                \n            print(\"\u274c No available authentication method\")\n            print(\"\ud83d\udcdd Options:\")\n            print(\"   1. Generate a one-time code from https://my.remarkable.com/connect/desktop\")\n            print(\"   2. Install rmcl library for fallback: pip install rmcl\")\n            return False\n                \n        except Exception as e:\n            self.logger.error(f\"Authentication error: {e}\")\n            print(f\"\u274c Authentication error: {e}\")\n            \n            # If we haven't tried rmcl yet and it's available, try it as last resort\n            if self.client_type == \"rest\" and RMCL_AVAILABLE and one_time_code:\n                print(\"\ud83d\udd04 Trying rmcl as last resort...\")\n                self.client_type = \"rmcl\"\n                return await self.authenticate(one_time_code)\n                \n            return False\n    \n    async def get_folder_by_path(self, folder_path: str) -> Optional[Union[Dict, Folder]]:\n        \"\"\"\n        Get a folder by its path (e.g., '/My Folder/Subfolder')\n        \n        Args:\n            folder_path: Path to the folder, starting with '/' for root\n            \n        Returns:\n            Folder object/dict if found, None otherwise\n        \"\"\"\n        try:\n            if self.client_type == \"rest\":\n                # For REST API, we'll work with folder names rather than paths\n                # This is a simplified implementation\n                if folder_path.strip() in ['/', '']:\n                    return {\"ID\": \"\", \"Type\": \"CollectionType\", \"VissibleName\": \"Root\"}\n                \n                # Find folder by name (simplified - could be enhanced for full path support)\n                folder_name = folder_path.strip('/').split('/')[-1]\n                folder_id = self.rest_client.find_folder_by_name(folder_name)\n                if folder_id:\n                    return {\"ID\": folder_id, \"Type\": \"CollectionType\", \"VissibleName\": folder_name}\n                return None\n            \n            elif self.client_type == \"rmcl\" and RMCL_AVAILABLE:\n                # Original rmcl implementation\n                current_folder = Item.get_by_id_s('')  # Root folder has empty ID\n                \n                if folder_path.strip() in ['/', '']:\n                    return current_folder\n                \n                # Split path and navigate through folders\n                path_parts = [part for part in folder_path.strip('/').split('/') if part]\n                \n                for part in path_parts:\n                    found = False\n                    for child in current_folder.children:\n                        if isinstance(child, Folder) and child.name == part:\n                            current_folder = child\n                            found = True\n                            break\n                    \n                    if not found:\n                        print(f\"\u274c Folder not found: {part} in path {folder_path}\")\n                        return None\n                \n                return current_folder\n            \n            return None\n            \n        except Exception as e:\n            self.logger.error(f\"Error finding folder {folder_path}: {e}\")\n            return None\n    \n    async def list_files_in_folder(self, folder_path: str, include_subfolders: bool = True) -> List[Union[Dict, Document]]:\n        \"\"\"\n        List all PDF files in a folder and optionally its subfolders\n        \n        Args:\n            folder_path: Path to the folder to scan\n            include_subfolders: If True, scan subfolders recursively\n            \n        Returns:\n            List of Document objects/dicts representing PDF files\n        \"\"\"\n        try:\n            if self.client_type == \"rest\":\n                # For REST API implementation\n                if folder_path.strip() in ['/', '']:\n                    folder_id = \"\"\n                else:\n                    folder_name = folder_path.strip('/').split('/')[-1]\n                    folder_id = self.rest_client.find_folder_by_name(folder_name)\n                    if not folder_id:\n                        print(f\"\u274c Folder not found: {folder_path}\")\n                        return []\n                \n                documents = self.rest_client.get_documents_in_folder(folder_id)\n                \n                # If include_subfolders is True, we'd need to implement recursive search\n                # This is simplified for now\n                return documents\n            \n            elif self.client_type == \"rmcl\" and RMCL_AVAILABLE:\n                # Original rmcl implementation\n                folder = await self.get_folder_by_path(folder_path)\n                if not folder:\n                    return []\n                \n                documents = []\n                \n                def collect_documents(current_folder: Folder):\n                    for child in current_folder.children:\n                        if isinstance(child, Document):\n                            # Check if it's a PDF or has content we can process\n                            documents.append(child)\n                        elif isinstance(child, Folder) and include_subfolders:\n                            collect_documents(child)\n                \n                collect_documents(folder)\n                return documents\n            \n            return []\n            \n        except Exception as e:\n            self.logger.error(f\"Error listing files in {folder_path}: {e}\")\n            return []\n    \n    async def download_document(self, document: Union[Dict, Document], output_dir: Path) -> Optional[Path]:\n        \"\"\"\n        Download a document from reMarkable Cloud\n        \n        Args:\n            document: Document object/dict to download\n            output_dir: Directory to save the downloaded file\n            \n        Returns:\n            Path to downloaded file, or None if failed\n        \"\"\"\n        try:\n            if self.client_type == \"rest\":\n                # REST API implementation\n                doc_id = document.get(\"ID\") if isinstance(document, dict) else getattr(document, 'id', None)\n                doc_name = document.get(\"VissibleName\") if isinstance(document, dict) else getattr(document, 'name', 'Unknown')\n                \n                if not doc_id:\n                    print(\"\u274c No document ID available\")\n                    return None\n                \n                return self.rest_client.download_document(doc_id, doc_name, output_dir)\n            \n            elif self.client_type == \"rmcl\" and RMCL_AVAILABLE:\n                # Original rmcl implementation\n                output_dir.mkdir(parents=True, exist_ok=True)\n                \n                # Generate safe filename\n                safe_name = \"\".join(c for c in document.name if c.isalnum() or c in (' ', '-', '_')).rstrip()\n                if not safe_name:\n                    safe_name = f\"document_{document.id[:8]}\"\n                \n                output_path = output_dir / f\"{safe_name}.pdf\"\n                \n                # Download the document content\n                # First try to get the original PDF if it exists\n                try:\n                    content = document.contents_s()  # Get original PDF/EPUB\n                    with open(output_path, 'wb') as f:\n                        f.write(content.read())\n                    print(f\"\ud83d\udce5 Downloaded: {document.name} -> {output_path.name}\")\n                    return output_path\n                    \n                except Exception:\n                    # If no original content, try to get the raw file\n                    raw_content = document.raw_s()\n                    \n                    # Save as a temporary zip and try to extract PDF\n                    with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_file:\n                        temp_file.write(raw_content.read())\n                        temp_zip_path = Path(temp_file.name)\n                    \n                    # For now, just save the raw content - in a real implementation,\n                    # you might want to extract and convert notebook files to PDF\n                    output_path = output_dir / f\"{safe_name}.rm\"\n                    temp_zip_path.rename(output_path)\n                    \n                    print(f\"\ud83d\udce5 Downloaded raw: {document.name} -> {output_path.name}\")\n                    return output_path\n            \n            return None\n                \n        except Exception as e:\n            doc_name = document.get(\"VissibleName\", \"Unknown\") if isinstance(document, dict) else getattr(document, 'name', 'Unknown')\n            self.logger.error(f\"Error downloading {doc_name}: {e}\")\n            print(f\"\u274c Failed to download {doc_name}: {e}\")\n            return None\n    \n    async def upload_file(self, file_path: str, folder_path: str = None) -> Optional[str]:\n        \"\"\"Upload a file to reMarkable Cloud\"\"\"\n        try:\n            if self.client_type == 'rest':\n                return await self.rest_client.upload_file(file_path, folder_path)\n            else:\n                return await self.rmcl_client.upload_file(file_path, folder_path)\n        except Exception as e:\n            logger.error(f\"Failed to upload file {file_path}: {e}\")\n            return None\n    \n    async def upload_content(self, content: bytes, filename: str, folder_path: str = None, \n                           file_type: str = \"application/pdf\") -> Optional[str]:\n        \"\"\"Upload content directly to reMarkable Cloud\"\"\"\n        try:\n            if self.client_type == 'rest':\n                return await self.rest_client.upload_content(content, filename, folder_path, file_type)\n            else:\n                return await self.rmcl_client.upload_content(content, filename, folder_path, file_type)\n        except Exception as e:\n            logger.error(f\"Failed to upload content {filename}: {e}\")\n            return None\n    \n    async def create_folder(self, folder_path: str) -> bool:\n        \"\"\"\n        Create a folder in reMarkable Cloud\n        \n        Args:\n            folder_path: Full path to the folder to create\n            \n        Returns:\n            True if folder created or already exists, False otherwise\n        \"\"\"\n        try:\n            # Check if folder already exists\n            existing = await self.get_folder_by_path(folder_path)\n            if existing:\n                print(f\"\ud83d\udcc1 Folder already exists: {folder_path}\")\n                return True\n            \n            # Split path to get parent and folder name\n            path_parts = [part for part in folder_path.strip('/').split('/') if part]\n            if not path_parts:\n                print(\"\u274c Invalid folder path\")\n                return False\n            \n            folder_name = path_parts[-1]\n            parent_path = '/' + '/'.join(path_parts[:-1]) if len(path_parts) > 1 else '/'\n            \n            # Get parent folder\n            parent_folder = await self.get_folder_by_path(parent_path)\n            if not parent_folder:\n                print(f\"\u274c Parent folder not found: {parent_path}\")\n                return False\n            \n            # Create folder using REST API\n            success = await self.rest_client.create_folder(folder_name, parent_folder.id if parent_folder.id != 'root' else None)\n            \n            if success:\n                print(f\"\u2705 Folder created: {folder_path}\")\n                # Invalidate cache\n                self.cache_timestamp = None\n                return True\n            else:\n                print(f\"\u274c Failed to create folder via REST API\")\n                return False\n            \n        except Exception as e:\n            self.logger.error(f\"Error creating folder {folder_path}: {e}\")\n            print(f\"\u274c Failed to create folder: {e}\")\n            return False\n    \n    def get_config(self) -> Dict:\n        \"\"\"Load configuration from file\"\"\"\n        if self.config_file.exists():\n            try:\n                with open(self.config_file, 'r') as f:\n                    return json.load(f)\n            except Exception as e:\n                self.logger.error(f\"Error loading config: {e}\")\n                return {}\n        return {}\n    \n    def save_config(self, config: Dict) -> None:\n        \"\"\"Save configuration to file\"\"\"\n        try:\n            with open(self.config_file, 'w') as f:\n                json.dump(config, f, indent=2)\n        except Exception as e:\n            self.logger.error(f\"Error saving config: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/remarkable_cloud.py",
      "tags": [
        "remarkable",
        "cloud-storage",
        "e-ink",
        "document-management",
        "async",
        "authentication",
        "file-upload",
        "file-download",
        "folder-management",
        "rest-api",
        "rmcl",
        "fallback-pattern",
        "caching"
      ],
      "updated_at": "2025-12-07T00:44:03.962609",
      "usage_example": "import asyncio\nfrom pathlib import Path\nfrom remarkable_cloud_manager import RemarkableCloudManager\n\nasync def main():\n    # Initialize manager with REST API preference\n    manager = RemarkableCloudManager(\n        config_dir='~/.eink-llm',\n        prefer_rest_api=True\n    )\n    \n    # Authenticate (first time requires one-time code)\n    one_time_code = 'your-code-from-remarkable'\n    success = await manager.authenticate(one_time_code)\n    if not success:\n        print('Authentication failed')\n        return\n    \n    # Create a folder\n    await manager.create_folder('/My Documents/LLM Output')\n    \n    # List files in a folder\n    documents = await manager.list_files_in_folder('/My Documents', include_subfolders=True)\n    print(f'Found {len(documents)} documents')\n    \n    # Download a document\n    if documents:\n        output_dir = Path('./downloads')\n        file_path = await manager.download_document(documents[0], output_dir)\n        if file_path:\n            print(f'Downloaded to {file_path}')\n    \n    # Upload a file\n    doc_id = await manager.upload_file('document.pdf', '/My Documents')\n    if doc_id:\n        print(f'Uploaded with ID: {doc_id}')\n    \n    # Upload content directly\n    with open('content.pdf', 'rb') as f:\n        content = f.read()\n    doc_id = await manager.upload_content(\n        content,\n        'my_document.pdf',\n        '/My Documents',\n        'application/pdf'\n    )\n\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "Always instantiate with a properly configured LLMHandler that has valid API credentials",
        "All rewriting methods are async and must be awaited in an async context",
        "The class limits content to 3000 characters when sending to LLM to avoid token limits - consider chunking for longer documents",
        "Methods return None or empty lists on failure - always check return values before using",
        "Annotation dictionaries should contain 'annotation_type', 'confidence', and 'area' keys for best results",
        "The class uses temperature=0.3 for consistent rewriting - this is intentionally low to reduce variability",
        "Multiple annotation types automatically trigger the 'mixed' prompt template for comprehensive revision",
        "Error handling is built-in but logs errors - ensure logging is configured to capture issues",
        "The rewriting_prompts dictionary can be modified after instantiation to customize prompt templates",
        "For production use, implement retry logic around the async methods as LLM calls may fail intermittently"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Handler instance for interacting with the language model to generate rewritten text",
            "is_class_variable": false,
            "name": "llm_handler",
            "type": "LLMHandler"
          },
          {
            "description": "Dictionary mapping annotation types ('strikethrough', 'highlight', 'markup', 'mixed') to their corresponding prompt templates with placeholders for content and annotation_info",
            "is_class_variable": false,
            "name": "rewriting_prompts",
            "type": "Dict[str, str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "llm_handler": "LLMHandler instance that provides text generation capabilities"
            },
            "purpose": "Initialize the TextRewriter with an LLM handler and set up structured prompts for different annotation types",
            "returns": "None (constructor)",
            "signature": "__init__(self, llm_handler: LLMHandler)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "rewrite_document_from_annotations",
            "parameters": {
              "annotations": "List of annotation dictionaries containing 'annotation_type', 'confidence', and 'area' keys",
              "original_content": "The original document text to be rewritten (limited to first 3000 characters)"
            },
            "purpose": "Rewrite document content based on detected annotations by selecting appropriate prompt template and generating improved version via LLM",
            "returns": "Rewritten content as string if successful, None if no annotations provided or generation fails",
            "signature": "async rewrite_document_from_annotations(self, original_content: str, annotations: List[Dict[str, Any]]) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_select_prompt_key",
            "parameters": {
              "annotation_types": "Set of annotation type strings found in the document"
            },
            "purpose": "Select the most appropriate prompt template key based on the types of annotations present",
            "returns": "String key for prompt template ('strikethrough', 'highlight', 'markup', or 'mixed')",
            "signature": "_select_prompt_key(self, annotation_types: set) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_improvement_suggestions",
            "parameters": {
              "annotations": "List of annotation dictionaries to analyze",
              "original_content": "The original document content (used for context)"
            },
            "purpose": "Generate specific, actionable improvement suggestions based on annotation types, counts, and confidence levels",
            "returns": "List of string suggestions describing recommended improvements, or error message list on failure",
            "signature": "async generate_improvement_suggestions(self, original_content: str, annotations: List[Dict[str, Any]]) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_editing_summary",
            "parameters": {
              "annotations": "List of annotations that guided the rewriting process",
              "original_content": "Original document text before rewriting",
              "rewritten_content": "Rewritten document text after processing"
            },
            "purpose": "Create a formatted summary of the editing process including statistics, annotation breakdown, and change assessment",
            "returns": "Multi-line formatted string containing editing statistics and summary, or error message on failure",
            "signature": "async create_editing_summary(self, original_content: str, rewritten_content: str, annotations: List[Dict[str, Any]]) -> str"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:43:10",
      "decorators": [],
      "dependencies": [
        "typing",
        "logging",
        "llm_handler"
      ],
      "description": "AI-powered text rewriting engine that processes documents based on detected annotations (strikethrough, highlight, markup) and generates improved versions using structured LLM prompts.",
      "docstring": "AI-powered text rewriting engine that processes documents based on detected annotations\nGenerates improved versions of text content using structured prompts",
      "id": 1942,
      "imports": [
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "import logging",
        "from llm_handler import LLMHandler"
      ],
      "imports_required": [
        "from typing import List, Dict, Any, Optional",
        "import logging",
        "from llm_handler import LLMHandler"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 283,
      "line_start": 12,
      "name": "TextRewriter",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "llm_handler": "An instance of LLMHandler that provides the interface to the language model for generating rewritten text. This handler must implement a generate_text method that accepts prompts and returns generated content asynchronously."
      },
      "parent_class": null,
      "purpose": "TextRewriter is responsible for analyzing annotated documents and generating rewritten content based on detected annotations. It uses different prompt templates for various annotation types (strikethrough, highlight, markup, mixed) to guide an LLM in producing improved document versions. The class also provides functionality to generate improvement suggestions and create editing summaries that describe the changes made during the rewriting process.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a TextRewriter object. Key method returns: rewrite_document_from_annotations returns Optional[str] (rewritten content or None on failure), generate_improvement_suggestions returns List[str] (list of actionable suggestions), and create_editing_summary returns str (formatted summary of changes made).",
      "settings_required": [
        "A configured LLMHandler instance with valid API credentials",
        "Logger configuration (uses module-level 'logger' variable)",
        "LLM API access with sufficient token limits (methods use up to 2000 max_tokens)"
      ],
      "source_code": "class TextRewriter:\n    \"\"\"\n    AI-powered text rewriting engine that processes documents based on detected annotations\n    Generates improved versions of text content using structured prompts\n    \"\"\"\n    \n    def __init__(self, llm_handler: LLMHandler):\n        self.llm_handler = llm_handler\n        \n        # Structured prompts for different annotation types\n        self.rewriting_prompts = {\n            'strikethrough': \"\"\"\nYou are analyzing a document where text has been marked for deletion with strikethrough annotations.\nYour task is to rewrite the content by removing the marked sections and ensuring smooth flow.\n\nGuidelines:\n- Remove content that appears to be struck through\n- Ensure transitions remain natural\n- Maintain the document's tone and style\n- Fix any grammatical issues caused by deletions\n\nOriginal content: {content}\nAnnotation context: {annotation_info}\n\nPlease provide a clean, revised version:\n\"\"\",\n            \n            'highlight': \"\"\"\nYou are analyzing a document where text has been highlighted for emphasis or attention.\nYour task is to enhance the highlighted sections while maintaining the overall message.\n\nGuidelines:\n- Pay special attention to highlighted sections\n- Enhance clarity and impact of highlighted content\n- Ensure highlighted points are well-integrated\n- Maintain document structure and flow\n\nOriginal content: {content}\nHighlighted sections: {annotation_info}\n\nPlease provide an enhanced version that better emphasizes the key points:\n\"\"\",\n            \n            'markup': \"\"\"\nYou are analyzing a document with handwritten markup and corrections.\nYour task is to incorporate the suggested changes and improvements.\n\nGuidelines:\n- Interpret markup as correction suggestions\n- Improve clarity and readability\n- Fix any errors indicated by markup\n- Enhance overall quality while preserving meaning\n\nOriginal content: {content}\nMarkup annotations: {annotation_info}\n\nPlease provide a corrected and improved version:\n\"\"\",\n            \n            'mixed': \"\"\"\nYou are analyzing a document with multiple types of annotations including strikethroughs, highlights, and markup.\nYour task is to create an improved version that incorporates all the suggested changes.\n\nGuidelines:\n- Remove struck-through content\n- Enhance highlighted sections\n- Incorporate markup corrections\n- Ensure coherent flow and readability\n- Maintain the document's original purpose and tone\n\nOriginal content: {content}\nAnnotation summary: {annotation_info}\n\nPlease provide a comprehensive revision:\n\"\"\"\n        }\n    \n    async def rewrite_document_from_annotations(\n        self, \n        original_content: str, \n        annotations: List[Dict[str, Any]]\n    ) -> Optional[str]:\n        \"\"\"\n        Rewrite document content based on detected annotations\n        \n        Args:\n            original_content: The original document content/analysis\n            annotations: List of annotation information\n            \n        Returns:\n            Rewritten content or None if failed\n        \"\"\"\n        try:\n            if not annotations:\n                logger.info(\"No annotations provided for rewriting\")\n                return None\n            \n            # Analyze annotation types\n            annotation_types = set()\n            annotation_summary = []\n            \n            for ann in annotations:\n                ann_type = ann.get('annotation_type', 'unknown')\n                annotation_types.add(ann_type)\n                \n                # Create annotation summary\n                confidence = ann.get('confidence', 0)\n                area = ann.get('area', 0)\n                annotation_summary.append(f\"{ann_type} (confidence: {confidence:.2f}, area: {area}px)\")\n            \n            # Choose appropriate prompt based on annotation types\n            prompt_key = self._select_prompt_key(annotation_types)\n            prompt_template = self.rewriting_prompts.get(prompt_key, self.rewriting_prompts['mixed'])\n            \n            # Format annotation information\n            annotation_info = \"; \".join(annotation_summary)\n            \n            # Create the prompt\n            prompt = prompt_template.format(\n                content=original_content[:3000],  # Limit content length for API\n                annotation_info=annotation_info\n            )\n            \n            logger.info(f\"Rewriting content using prompt type: {prompt_key}\")\n            \n            # Generate rewritten content\n            rewritten_content = await self.llm_handler.generate_text(\n                prompt, \n                max_tokens=2000,\n                temperature=0.3  # Lower temperature for more consistent rewriting\n            )\n            \n            if rewritten_content:\n                logger.info(f\"Successfully generated {len(rewritten_content)} characters of rewritten content\")\n                return rewritten_content.strip()\n            else:\n                logger.warning(\"Failed to generate rewritten content\")\n                return None\n                \n        except Exception as e:\n            logger.error(f\"Error in document rewriting: {e}\")\n            return None\n    \n    def _select_prompt_key(self, annotation_types: set) -> str:\n        \"\"\"Select the most appropriate prompt based on annotation types\"\"\"\n        if len(annotation_types) == 1:\n            # Single annotation type\n            single_type = next(iter(annotation_types))\n            if single_type in self.rewriting_prompts:\n                return single_type\n        \n        # Multiple types or unsupported single type\n        return 'mixed'\n    \n    async def generate_improvement_suggestions(\n        self, \n        original_content: str, \n        annotations: List[Dict[str, Any]]\n    ) -> List[str]:\n        \"\"\"\n        Generate specific improvement suggestions based on annotations\n        \n        Args:\n            original_content: The original document content\n            annotations: List of annotation information\n            \n        Returns:\n            List of specific improvement suggestions\n        \"\"\"\n        try:\n            if not annotations:\n                return [\"No annotations detected - document appears to be in final form\"]\n            \n            # Analyze annotations to generate targeted suggestions\n            suggestions = []\n            \n            # Count annotation types\n            type_counts = {}\n            for ann in annotations:\n                ann_type = ann.get('annotation_type', 'unknown')\n                type_counts[ann_type] = type_counts.get(ann_type, 0) + 1\n            \n            # Generate type-specific suggestions\n            if 'strikethrough' in type_counts:\n                count = type_counts['strikethrough']\n                suggestions.append(f\"Consider removing {count} section(s) marked for deletion\")\n            \n            if 'highlight' in type_counts:\n                count = type_counts['highlight']\n                suggestions.append(f\"Enhance {count} highlighted section(s) for greater impact\")\n            \n            if 'markup' in type_counts:\n                count = type_counts['markup']\n                suggestions.append(f\"Incorporate {count} handwritten correction(s) or addition(s)\")\n            \n            if 'underline' in type_counts:\n                count = type_counts['underline']\n                suggestions.append(f\"Review {count} underlined section(s) for emphasis or correction\")\n            \n            # Add general suggestions based on annotation density\n            total_annotations = len(annotations)\n            if total_annotations >= 5:\n                suggestions.append(\"Heavy editing detected - consider comprehensive revision\")\n            elif total_annotations >= 3:\n                suggestions.append(\"Moderate editing detected - focus on marked areas\")\n            else:\n                suggestions.append(\"Light editing detected - minor refinements needed\")\n            \n            # Calculate average confidence\n            avg_confidence = sum(ann.get('confidence', 0) for ann in annotations) / len(annotations)\n            if avg_confidence < 0.5:\n                suggestions.append(\"Some annotations have low confidence - manual review recommended\")\n            \n            return suggestions\n            \n        except Exception as e:\n            logger.error(f\"Error generating improvement suggestions: {e}\")\n            return [\"Error generating suggestions - manual review recommended\"]\n    \n    async def create_editing_summary(\n        self, \n        original_content: str, \n        rewritten_content: str, \n        annotations: List[Dict[str, Any]]\n    ) -> str:\n        \"\"\"\n        Create a summary of the editing process and changes made\n        \n        Args:\n            original_content: Original document content\n            rewritten_content: Rewritten document content  \n            annotations: List of annotations that guided the rewriting\n            \n        Returns:\n            Summary of editing process and changes\n        \"\"\"\n        try:\n            # Create basic statistics\n            original_length = len(original_content)\n            rewritten_length = len(rewritten_content)\n            length_change = rewritten_length - original_length\n            \n            # Count annotation types\n            type_counts = {}\n            for ann in annotations:\n                ann_type = ann.get('annotation_type', 'unknown')\n                type_counts[ann_type] = type_counts.get(ann_type, 0) + 1\n            \n            # Generate summary\n            summary_parts = [\n                f\"Editing Summary:\",\n                f\"\u2022 Processed {len(annotations)} annotations across {len(type_counts)} types\",\n                f\"\u2022 Content length: {original_length:,} \u2192 {rewritten_length:,} characters ({length_change:+,})\",\n            ]\n            \n            # Add annotation breakdown\n            if type_counts:\n                type_breakdown = \", \".join([f\"{count} {type}\" for type, count in type_counts.items()])\n                summary_parts.append(f\"\u2022 Annotation types: {type_breakdown}\")\n            \n            # Add change assessment\n            if abs(length_change) > original_length * 0.1:  # More than 10% change\n                change_type = \"substantial revision\" if length_change > 0 else \"significant condensation\"\n                summary_parts.append(f\"\u2022 Change assessment: {change_type}\")\n            else:\n                summary_parts.append(\"\u2022 Change assessment: focused improvements\")\n            \n            return \"\\n\".join(summary_parts)\n            \n        except Exception as e:\n            logger.error(f\"Error creating editing summary: {e}\")\n            return \"Error creating editing summary\"",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/text_rewriter.py",
      "tags": [
        "text-rewriting",
        "document-processing",
        "annotation-analysis",
        "llm",
        "ai-editing",
        "async",
        "natural-language-processing",
        "content-generation",
        "document-revision"
      ],
      "updated_at": "2025-12-07T00:43:10.102640",
      "usage_example": "import asyncio\nfrom typing import List, Dict, Any\nfrom llm_handler import LLMHandler\nfrom text_rewriter import TextRewriter\n\n# Initialize the LLM handler\nllm_handler = LLMHandler(api_key='your-api-key')\n\n# Create TextRewriter instance\nrewriter = TextRewriter(llm_handler)\n\n# Define annotations detected from document\nannotations = [\n    {'annotation_type': 'strikethrough', 'confidence': 0.85, 'area': 1200},\n    {'annotation_type': 'highlight', 'confidence': 0.92, 'area': 800}\n]\n\noriginal_text = \"This is the original document content with some sections marked for deletion and others highlighted for emphasis.\"\n\n# Rewrite document based on annotations\nasync def main():\n    rewritten = await rewriter.rewrite_document_from_annotations(original_text, annotations)\n    if rewritten:\n        print(f\"Rewritten: {rewritten}\")\n    \n    # Get improvement suggestions\n    suggestions = await rewriter.generate_improvement_suggestions(original_text, annotations)\n    print(f\"Suggestions: {suggestions}\")\n    \n    # Create editing summary\n    if rewritten:\n        summary = await rewriter.create_editing_summary(original_text, rewritten, annotations)\n        print(f\"Summary: {summary}\")\n\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "The class is stateless after initialization - safe to reuse the same instance for multiple formatting operations",
        "Use format_compact_response() when you have structured data from your LLM (recommended for better control)",
        "Use parse_llm_response_to_compact() for quick conversion of raw LLM text, but expect less precise extraction",
        "The CompactSection dataclass must be defined before using this class (it's referenced but not included in the source)",
        "Compact output is optimized for readability on e-ink displays - test rendering on target device",
        "Symbol mappings can be customized by modifying self.symbols dictionary after instantiation",
        "The formatter limits extracted content (e.g., max 5 main points, 3 equations) to prevent overflow on small displays",
        "Text extraction uses regex patterns that may not capture all variations - consider pre-structuring LLM output for critical applications",
        "The _apply_compact_notation() method is aggressive in removing redundant words - review output to ensure meaning is preserved"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Dictionary mapping content type names to emoji/Unicode symbols for visual categorization (e.g., 'math': '\ud83d\udcd0', 'action': '\u2705')",
            "is_class_variable": false,
            "name": "symbols",
            "type": "Dict[str, str]"
          },
          {
            "description": "Dictionary mapping mathematical/logical terms to their compact symbol representations (e.g., 'equals': ' = ', 'therefore': ' \u2234 ')",
            "is_class_variable": false,
            "name": "notation_patterns",
            "type": "Dict[str, str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the formatter with symbol mappings and notation patterns",
            "returns": "None - initializes instance with symbols and notation_patterns dictionaries",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "format_compact_response",
            "parameters": {
              "analysis_result": "Dictionary containing structured analysis with keys like 'key_insight', 'main_points', 'equations', 'calculations', 'solutions', 'actions', 'notes', 'verification'"
            },
            "purpose": "Convert a structured analysis dictionary into compact formatted text with symbols",
            "returns": "Compact formatted string with emoji icons and abbreviated notation, organized by priority",
            "signature": "format_compact_response(self, analysis_result: Dict[str, Any]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "parse_llm_response_to_compact",
            "parameters": {
              "llm_response": "Raw text response from an LLM that needs to be condensed"
            },
            "purpose": "Parse raw verbose LLM text and convert it to compact format by extracting key components",
            "returns": "Compact formatted string after extracting and structuring content from the raw text",
            "signature": "parse_llm_response_to_compact(self, llm_response: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_format_bullet_points",
            "parameters": {
              "points": "List of text strings to format as bullet points"
            },
            "purpose": "Format a list of points into compact bullet-point style with notation applied",
            "returns": "Newline-separated string of bullet points with compact notation applied",
            "signature": "_format_bullet_points(self, points: List[str]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_format_mathematics",
            "parameters": {
              "analysis": "Dictionary potentially containing 'equations', 'calculations', and 'solutions' keys"
            },
            "purpose": "Extract and format mathematical content (equations, calculations, solutions) in compact notation",
            "returns": "Pipe-separated string of mathematical expressions with compact notation",
            "signature": "_format_mathematics(self, analysis: Dict[str, Any]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_apply_compact_notation",
            "parameters": {
              "text": "Text string to be compacted"
            },
            "purpose": "Replace verbose phrases with mathematical/logical symbols and remove redundant words",
            "returns": "Compacted text with symbols replacing words (e.g., 'equals' \u2192 '=', 'therefore' \u2192 '\u2234')",
            "signature": "_apply_compact_notation(self, text: str) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_build_compact_output",
            "parameters": {
              "sections": "List of CompactSection objects containing icon, title, content, and priority"
            },
            "purpose": "Assemble final compact output from sorted sections with icons",
            "returns": "Newline-separated string of formatted sections sorted by priority",
            "signature": "_build_compact_output(self, sections: List[CompactSection]) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_key_insight",
            "parameters": {
              "text": "Raw text to extract insight from"
            },
            "purpose": "Extract the main insight or conclusion from raw text using pattern matching",
            "returns": "Extracted insight string (max 100 chars) or None if not found",
            "signature": "_extract_key_insight(self, text: str) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_main_points",
            "parameters": {
              "text": "Raw text to extract points from"
            },
            "purpose": "Extract bullet points, numbered lists, or main ideas from text",
            "returns": "List of up to 5 extracted and compacted main points",
            "signature": "_extract_main_points(self, text: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_equations",
            "parameters": {
              "text": "Raw text containing mathematical content"
            },
            "purpose": "Extract mathematical equations and expressions from text",
            "returns": "List of up to 3 extracted equation strings",
            "signature": "_extract_equations(self, text: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_solutions",
            "parameters": {
              "text": "Raw text containing solutions"
            },
            "purpose": "Extract solutions, answers, or results from text",
            "returns": "List of up to 3 extracted solution strings",
            "signature": "_extract_solutions(self, text: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_actions",
            "parameters": {
              "text": "Raw text containing action items"
            },
            "purpose": "Extract action items, recommendations, or next steps from text",
            "returns": "List of up to 3 extracted action strings",
            "signature": "_extract_actions(self, text: str) -> List[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_verification",
            "parameters": {
              "text": "Raw text containing verification steps"
            },
            "purpose": "Extract verification or checking information from text",
            "returns": "Extracted verification string (max 80 chars) or None if not found",
            "signature": "_extract_verification(self, text: str) -> Optional[str]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:42:37",
      "decorators": [],
      "dependencies": [
        "re",
        "json",
        "typing",
        "dataclasses"
      ],
      "description": "A formatter class that converts verbose LLM responses into compact, symbol-rich text optimized for e-ink displays by using Unicode symbols, mathematical notation, and abbreviated formatting.",
      "docstring": "Generates compact, symbol-rich responses for e-ink displays",
      "id": 1941,
      "imports": [
        "import re",
        "import json",
        "from typing import Dict",
        "from typing import List",
        "from typing import Any",
        "from typing import Optional",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "import re",
        "import json",
        "from typing import Dict, List, Any, Optional",
        "from dataclasses import dataclass"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 437,
      "line_start": 22,
      "name": "CompactResponseFormatter",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor initializes two dictionaries: 'symbols' containing emoji/icon mappings for different content types (math, science, actions, etc.) and 'notation_patterns' containing mathematical and logical symbols for compact notation."
      },
      "parent_class": null,
      "purpose": "This class is designed to transform standard, verbose language model outputs into highly condensed formats suitable for e-ink displays with limited screen space. It replaces common phrases with symbols (e.g., '\u2192' for 'implies'), uses mathematical notation (e.g., '\u2234' for 'therefore'), and structures content with emoji icons for quick visual scanning. The formatter can either work with pre-structured analysis dictionaries or parse raw LLM text to extract key components like insights, equations, action items, and verification steps.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a CompactResponseFormatter object. The main methods return strings: format_compact_response() and parse_llm_response_to_compact() both return compact formatted strings with symbols and abbreviated text. Helper methods return extracted components (lists of strings, optional strings, or formatted strings).",
      "settings_required": [
        "Requires CompactSection dataclass to be defined (referenced but not included in source code)",
        "No environment variables or external configuration needed"
      ],
      "source_code": "class CompactResponseFormatter:\n    \"\"\"Generates compact, symbol-rich responses for e-ink displays\"\"\"\n    \n    def __init__(self):\n        # Symbol mapping for different content types\n        self.symbols = {\n            # Core symbols\n            'key_insight': '\ud83c\udfaf',\n            'summary': '\ud83d\udccb', \n            'action': '\u2705',\n            'warning': '\u26a0\ufe0f',\n            'error': '\u274c',\n            'question': '\u2753',\n            'info': '\u2139\ufe0f',\n            \n            # Subject-specific symbols  \n            'math': '\ud83d\udcd0',\n            'equation': '\ud83e\uddee',\n            'formula': '\u2211',\n            'calculation': '\ud83d\udd22',\n            'geometry': '\ud83d\udccf',\n            'graph': '\ud83d\udcca',\n            \n            'text': '\ud83d\udcdd',\n            'language': '\ud83d\udde3\ufe0f',\n            'translation': '\ud83c\udf10',\n            'grammar': '\ud83d\udcd6',\n            'writing': '\u270d\ufe0f',\n            \n            'science': '\ud83d\udd2c',\n            'chemistry': '\u2697\ufe0f',\n            'physics': '\u269b\ufe0f',\n            'biology': '\ud83e\uddec',\n            'medicine': '\ud83d\udc8a',\n            \n            'code': '\ud83d\udcbb',\n            'algorithm': '\ud83d\udd04',\n            'data': '\ud83d\udcca',\n            'logic': '\ud83e\udde0',\n            \n            'design': '\ud83c\udfa8',\n            'diagram': '\ud83d\udccb',\n            'flowchart': '\ud83d\udd00',\n            'sketch': '\u270f\ufe0f',\n            \n            # Process symbols\n            'step': '\u27a4',\n            'next': '\u2192',\n            'result': '=',\n            'conclusion': '\u2234',\n            'therefore': '\u2234',\n            'because': '\u2235',\n            'check': '\u2713',\n            'cross': '\u2717',\n            'reference': '\ud83d\udccc',\n            'note': '\ud83d\udca1'\n        }\n        \n        # Compact notation patterns\n        self.notation_patterns = {\n            'equals': ' = ',\n            'approximately': ' \u2248 ',\n            'not_equal': ' \u2260 ',\n            'less_than': ' < ',\n            'greater_than': ' > ',\n            'less_equal': ' \u2264 ',\n            'greater_equal': ' \u2265 ',\n            'plus_minus': ' \u00b1 ',\n            'infinity': '\u221e',\n            'therefore': ' \u2234 ',\n            'because': ' \u2235 ',\n            'implies': ' \u2192 ',\n            'if_and_only_if': ' \u2194 ',\n            'for_all': '\u2200',\n            'exists': '\u2203',\n            'element_of': ' \u2208 ',\n            'subset': ' \u2286 ',\n            'union': ' \u222a ',\n            'intersection': ' \u2229 ',\n            'degree': '\u00b0',\n            'squared': '\u00b2',\n            'cubed': '\u00b3',\n            'sqrt': '\u221a',\n            'pi': '\u03c0',\n            'alpha': '\u03b1',\n            'beta': '\u03b2',\n            'gamma': '\u03b3',\n            'delta': '\u0394',\n            'theta': '\u03b8',\n            'lambda': '\u03bb',\n            'mu': '\u03bc',\n            'sigma': '\u03a3'\n        }\n    \n    def format_compact_response(self, analysis_result: Dict[str, Any]) -> str:\n        \"\"\"\n        Convert standard LLM response to compact format\n        \n        Args:\n            analysis_result: Dictionary containing analysis from LLM\n            \n        Returns:\n            Compact formatted response string\n        \"\"\"\n        sections = []\n        \n        # 1. Key insight (always first)\n        if 'key_insight' in analysis_result:\n            sections.append(CompactSection(\n                icon=self.symbols['key_insight'],\n                title='',\n                content=analysis_result['key_insight'],\n                priority=1\n            ))\n        \n        # 2. Main content (bullets/steps)\n        if 'main_points' in analysis_result:\n            content = self._format_bullet_points(analysis_result['main_points'])\n            sections.append(CompactSection(\n                icon=self.symbols['summary'],\n                title='',\n                content=content,\n                priority=1\n            ))\n        \n        # 3. Mathematics (if present)\n        if 'equations' in analysis_result or 'calculations' in analysis_result:\n            math_content = self._format_mathematics(analysis_result)\n            if math_content:\n                sections.append(CompactSection(\n                    icon=self.symbols['math'],\n                    title='',\n                    content=math_content,\n                    priority=1\n                ))\n        \n        # 4. Action items\n        if 'actions' in analysis_result:\n            actions = ' | '.join(analysis_result['actions'])\n            sections.append(CompactSection(\n                icon=self.symbols['action'],\n                title='',\n                content=actions,\n                priority=2\n            ))\n        \n        # 5. Additional notes\n        if 'notes' in analysis_result:\n            sections.append(CompactSection(\n                icon=self.symbols['note'],\n                title='',\n                content=analysis_result['notes'],\n                priority=3\n            ))\n        \n        # 6. References/verification\n        if 'verification' in analysis_result:\n            sections.append(CompactSection(\n                icon=self.symbols['check'],\n                title='',\n                content=analysis_result['verification'],\n                priority=3\n            ))\n        \n        # Build final compact response\n        return self._build_compact_output(sections)\n    \n    def _format_bullet_points(self, points: List[str]) -> str:\n        \"\"\"Format bullet points in compact style\"\"\"\n        if not points:\n            return \"\"\n        \n        # Use compact bullets and abbreviations\n        formatted_points = []\n        for point in points:\n            # Apply compact notation\n            compact_point = self._apply_compact_notation(point)\n            formatted_points.append(f\"\u2022 {compact_point}\")\n        \n        return '\\n'.join(formatted_points)\n    \n    def _format_mathematics(self, analysis: Dict[str, Any]) -> str:\n        \"\"\"Format mathematical content in compact notation\"\"\"\n        math_parts = []\n        \n        # Handle equations\n        if 'equations' in analysis:\n            equations = analysis['equations']\n            if isinstance(equations, list):\n                # Chain equations with arrows\n                math_parts.append(' \u2192 '.join(equations))\n            else:\n                math_parts.append(str(equations))\n        \n        # Handle calculations\n        if 'calculations' in analysis:\n            calc = analysis['calculations']\n            if isinstance(calc, dict):\n                for step, result in calc.items():\n                    math_parts.append(f\"{step} = {result}\")\n            else:\n                math_parts.append(str(calc))\n        \n        # Handle solutions\n        if 'solutions' in analysis:\n            solutions = analysis['solutions']\n            if isinstance(solutions, list):\n                math_parts.append('Solutions: ' + ', '.join(map(str, solutions)))\n            else:\n                math_parts.append(f\"Solution: {solutions}\")\n        \n        result = ' | '.join(math_parts)\n        return self._apply_compact_notation(result)\n    \n    def _apply_compact_notation(self, text: str) -> str:\n        \"\"\"Apply mathematical and logical compact notation\"\"\"\n        # Replace common phrases with symbols\n        replacements = {\n            'equals': '=',\n            'approximately': '\u2248',\n            'less than': '<',\n            'greater than': '>',\n            'less than or equal': '\u2264',\n            'greater than or equal': '\u2265',\n            'plus or minus': '\u00b1',\n            'therefore': '\u2234',\n            'because': '\u2235',\n            'implies': '\u2192',\n            'degrees': '\u00b0',\n            'squared': '\u00b2',\n            'cubed': '\u00b3',\n            'square root': '\u221a',\n            'pi': '\u03c0',\n            'infinity': '\u221e',\n            'delta': '\u0394',\n            'theta': '\u03b8',\n            'alpha': '\u03b1',\n            'beta': '\u03b2',\n            'gamma': '\u03b3',\n            'sigma': '\u03a3'\n        }\n        \n        result = text\n        for phrase, symbol in replacements.items():\n            result = re.sub(rf'\\b{re.escape(phrase)}\\b', symbol, result, flags=re.IGNORECASE)\n        \n        # Remove redundant words\n        redundant_patterns = [\n            r'\\bwe can see that\\b',\n            r'\\bit is clear that\\b', \n            r'\\bobviously\\b',\n            r'\\blet me\\b',\n            r'\\blet us\\b',\n            r'\\bI can\\b',\n            r'\\bI will\\b',\n            r'\\bfirst,?\\s*',\n            r'\\bsecond,?\\s*',\n            r'\\bthird,?\\s*',\n            r'\\bnext,?\\s*',\n            r'\\bfinally,?\\s*'\n        ]\n        \n        for pattern in redundant_patterns:\n            result = re.sub(pattern, '', result, flags=re.IGNORECASE)\n        \n        # Clean up extra spaces\n        result = re.sub(r'\\s+', ' ', result).strip()\n        \n        return result\n    \n    def _build_compact_output(self, sections: List[CompactSection]) -> str:\n        \"\"\"Build final compact output from sections\"\"\"\n        # Sort by priority\n        sections.sort(key=lambda x: x.priority)\n        \n        output_lines = []\n        \n        for section in sections:\n            if section.content.strip():\n                if section.title:\n                    line = f\"{section.icon} {section.title}: {section.content}\"\n                else:\n                    line = f\"{section.icon} {section.content}\"\n                output_lines.append(line)\n        \n        return '\\n'.join(output_lines)\n    \n    def parse_llm_response_to_compact(self, llm_response: str) -> str:\n        \"\"\"\n        Parse a standard verbose LLM response and convert to compact format\n        \n        Args:\n            llm_response: Raw response from LLM\n            \n        Returns:\n            Compact formatted version\n        \"\"\"\n        # Simple parsing strategy - look for key patterns\n        analysis = {\n            'key_insight': self._extract_key_insight(llm_response),\n            'main_points': self._extract_main_points(llm_response),\n            'equations': self._extract_equations(llm_response),\n            'solutions': self._extract_solutions(llm_response),\n            'actions': self._extract_actions(llm_response),\n            'verification': self._extract_verification(llm_response)\n        }\n        \n        # Remove empty values\n        analysis = {k: v for k, v in analysis.items() if v}\n        \n        return self.format_compact_response(analysis)\n    \n    def _extract_key_insight(self, text: str) -> Optional[str]:\n        \"\"\"Extract the main insight/conclusion from text\"\"\"\n        patterns = [\n            r'(?:the main|key|primary|central)\\s+(?:insight|finding|point|conclusion|result)(?:\\s+is)?:?\\s*(.+?)(?:\\.|$)',\n            r'(?:in summary|to summarize|conclusion)[:,]?\\s*(.+?)(?:\\.|$)',\n            r'(?:this shows|this indicates|this means)(?:\\s+that)?\\s*(.+?)(?:\\.|$)'\n        ]\n        \n        for pattern in patterns:\n            match = re.search(pattern, text, re.IGNORECASE | re.DOTALL)\n            if match:\n                return match.group(1).strip()[:100]  # Limit length\n        \n        # Fallback: first sentence\n        sentences = re.split(r'[.!?]', text)\n        if sentences:\n            return sentences[0].strip()[:100]\n        \n        return None\n    \n    def _extract_main_points(self, text: str) -> List[str]:\n        \"\"\"Extract bullet points or main ideas\"\"\"\n        points = []\n        \n        # Look for numbered lists\n        numbered_pattern = r'^\\d+[\\.\\)]\\s*(.+?)(?=\\n\\d+[\\.\\)]|\\n\\n|\\Z)'\n        numbered_matches = re.findall(numbered_pattern, text, re.MULTILINE | re.DOTALL)\n        if numbered_matches:\n            points.extend([p.strip() for p in numbered_matches])\n        \n        # Look for bullet points\n        bullet_pattern = r'^[-*\u2022]\\s*(.+?)(?=\\n[-*\u2022]|\\n\\n|\\Z)'\n        bullet_matches = re.findall(bullet_pattern, text, re.MULTILINE | re.DOTALL)\n        if bullet_matches:\n            points.extend([p.strip() for p in bullet_matches])\n        \n        # If no lists found, split by sentences and take first few\n        if not points:\n            sentences = re.split(r'[.!?]', text)\n            points = [s.strip() for s in sentences[:3] if s.strip()]\n        \n        # Clean up and limit\n        return [self._apply_compact_notation(p) for p in points[:5]]\n    \n    def _extract_equations(self, text: str) -> List[str]:\n        \"\"\"Extract mathematical equations\"\"\"\n        equations = []\n        \n        # Look for mathematical expressions\n        math_patterns = [\n            r'([a-zA-Z]\\w*\\s*[=\u2248<>\u2264\u2265]\\s*[^.]+?)(?:\\.|$)',\n            r'(\\([^)]+\\)\\s*[=\u2248]\\s*[^.]+?)(?:\\.|$)',\n            r'([xyz\u00b2\u00b3]\\s*[+\\-*/=]\\s*[^.]+?)(?:\\.|$)'\n        ]\n        \n        for pattern in math_patterns:\n            matches = re.findall(pattern, text)\n            equations.extend([m.strip() for m in matches])\n        \n        return equations[:3]  # Limit to first 3\n    \n    def _extract_solutions(self, text: str) -> List[str]:\n        \"\"\"Extract solutions or answers\"\"\"\n        solution_patterns = [\n            r'(?:solution|answer|result)(?:s)?\\s*(?:is|are)?:?\\s*([^.]+)',\n            r'([xyz]\\s*=\\s*[^.]+)',\n            r'(?:therefore|thus|hence),?\\s*([^.]+)'\n        ]\n        \n        solutions = []\n        for pattern in solution_patterns:\n            matches = re.findall(pattern, text, re.IGNORECASE)\n            solutions.extend([m.strip() for m in matches])\n        \n        return solutions[:3]\n    \n    def _extract_actions(self, text: str) -> List[str]:\n        \"\"\"Extract action items or next steps\"\"\"\n        action_patterns = [\n            r'(?:next step|action|recommendation|suggestion)(?:s)?:?\\s*(.+?)(?:\\.|$)',\n            r'(?:you should|recommended to|suggested to)\\s*(.+?)(?:\\.|$)',\n            r'(?:consider|try|attempt)\\s*(.+?)(?:\\.|$)'\n        ]\n        \n        actions = []\n        for pattern in action_patterns:\n            matches = re.findall(pattern, text, re.IGNORECASE)\n            actions.extend([m.strip() for m in matches])\n        \n        return actions[:3]\n    \n    def _extract_verification(self, text: str) -> Optional[str]:\n        \"\"\"Extract verification or check information\"\"\"\n        verify_patterns = [\n            r'(?:verify|check|confirm)(?:ing)?:?\\s*(.+?)(?:\\.|$)',\n            r'(?:proof|verification):\\s*(.+?)(?:\\.|$)'\n        ]\n        \n        for pattern in verify_patterns:\n            match = re.search(pattern, text, re.IGNORECASE)\n            if match:\n                return match.group(1).strip()[:80]\n        \n        return None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/compact_formatter.py",
      "tags": [
        "formatting",
        "text-processing",
        "e-ink",
        "display-optimization",
        "compact-notation",
        "mathematical-symbols",
        "unicode",
        "response-formatting",
        "llm-output",
        "text-compression",
        "symbol-mapping"
      ],
      "updated_at": "2025-12-07T00:42:37.358587",
      "usage_example": "# Instantiate the formatter\nformatter = CompactResponseFormatter()\n\n# Method 1: Format pre-structured analysis\nanalysis = {\n    'key_insight': 'The equation has two real solutions',\n    'main_points': ['Factor the quadratic', 'Apply zero product property', 'Solve for x'],\n    'equations': ['x\u00b2 - 5x + 6 = 0', '(x-2)(x-3) = 0'],\n    'solutions': ['x = 2', 'x = 3'],\n    'actions': ['Verify by substitution']\n}\ncompact_output = formatter.format_compact_response(analysis)\nprint(compact_output)\n# Output: \ud83c\udfaf The equation has two real solutions\n#         \ud83d\udccb \u2022 Factor the quadratic\n#             \u2022 Apply zero product property\n#             \u2022 Solve for x\n#         \ud83d\udcd0 x\u00b2 - 5x + 6 = 0 \u2192 (x-2)(x-3) = 0 | Solutions: x = 2, x = 3\n\n# Method 2: Parse raw LLM response\nllm_text = \"The main conclusion is that x equals 5. First, we solve the equation x + 3 = 8. Therefore x = 5.\"\ncompact = formatter.parse_llm_response_to_compact(llm_text)\nprint(compact)"
    },
    {
      "best_practices": [
        "Use consistent icon formats across all CompactSection instances (e.g., all emojis or all icon identifiers)",
        "Keep priority values within the documented range (1-3) for consistency, though the class doesn't enforce this constraint",
        "Consider the priority value when displaying or processing sections - lower numbers indicate higher priority",
        "The content field should contain pre-formatted text if specific formatting is required, as the class doesn't handle formatting logic",
        "Since this is a dataclass, instances are mutable by default - be cautious when sharing instances across different parts of your application",
        "Use frozen=True in the @dataclass decorator if immutability is desired: @dataclass(frozen=True)",
        "The class provides automatic __eq__ comparison based on all fields, making it easy to compare sections for equality",
        "Consider creating factory methods or builder patterns if you need to construct CompactSection instances with complex validation or transformation logic"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The icon representing the section, typically an emoji or icon identifier",
            "is_class_variable": false,
            "name": "icon",
            "type": "str"
          },
          {
            "description": "The title or heading of the section",
            "is_class_variable": false,
            "name": "title",
            "type": "str"
          },
          {
            "description": "The main content or body text of the section",
            "is_class_variable": false,
            "name": "content",
            "type": "str"
          },
          {
            "description": "The priority level of the section (1=high, 2=medium, 3=low), defaults to 1",
            "is_class_variable": false,
            "name": "priority",
            "type": "int"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "content": "String containing the section's content",
              "icon": "String representing the section's icon",
              "priority": "Integer priority level (1=high, 2=medium, 3=low), defaults to 1",
              "title": "String containing the section's title"
            },
            "purpose": "Initializes a new CompactSection instance with the provided attributes. Auto-generated by the dataclass decorator.",
            "returns": "None - initializes the instance",
            "signature": "__init__(icon: str, title: str, content: str, priority: int = 1) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Returns a string representation of the CompactSection instance. Auto-generated by the dataclass decorator.",
            "returns": "String representation in the format: CompactSection(icon='...', title='...', content='...', priority=...)",
            "signature": "__repr__() -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__eq__",
            "parameters": {
              "other": "Another object to compare with"
            },
            "purpose": "Compares two CompactSection instances for equality based on all attributes. Auto-generated by the dataclass decorator.",
            "returns": "True if all attributes are equal, False otherwise",
            "signature": "__eq__(other: object) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:41:41",
      "decorators": [
        "dataclass"
      ],
      "dependencies": [
        "dataclasses"
      ],
      "description": "A dataclass representing a section in compact format with an icon, title, content, and priority level.",
      "docstring": "Represents a section in compact format",
      "id": 1940,
      "imports": [
        "import re",
        "import json",
        "from typing import Dict",
        "from typing import List",
        "from typing import Any",
        "from typing import Optional",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "from dataclasses import dataclass"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 20,
      "line_start": 15,
      "name": "CompactSection",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "content": "A string containing the main body text or data of the section, representing the detailed information to be displayed",
        "icon": "A string representing the icon for the section, typically an emoji, Unicode character, or icon identifier that visually represents the section's content",
        "priority": "An integer indicating the importance level of the section (1=high priority, 2=medium priority, 3=low priority). Defaults to 1 (high priority). Used for sorting, filtering, or styling sections based on importance",
        "title": "A string containing the heading or name of the section, used as the primary identifier for the section's content"
      },
      "parent_class": null,
      "purpose": "CompactSection is a data container class used to structure and organize information sections in a compact display format. It stores metadata about a section including its visual representation (icon), heading (title), body text (content), and importance level (priority). This class is typically used in UI rendering, report generation, or data presentation scenarios where content needs to be organized hierarchically with visual indicators.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a CompactSection object with the specified icon, title, content, and priority attributes. As a dataclass, it automatically generates __init__, __repr__, __eq__, and other special methods. The object can be used to access its attributes directly or passed to functions that process section data.",
      "settings_required": [],
      "source_code": "class CompactSection:\n    \"\"\"Represents a section in compact format\"\"\"\n    icon: str\n    title: str\n    content: str\n    priority: int = 1  # 1=high, 2=medium, 3=low",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/compact_formatter.py",
      "tags": [
        "dataclass",
        "data-structure",
        "section",
        "compact-format",
        "ui-component",
        "content-organization",
        "priority-based",
        "display-formatting"
      ],
      "updated_at": "2025-12-07T00:41:41.068079",
      "usage_example": "from dataclasses import dataclass\n\n@dataclass\nclass CompactSection:\n    icon: str\n    title: str\n    content: str\n    priority: int = 1\n\n# Create a high-priority section\nsection1 = CompactSection(\n    icon='\ud83d\udcca',\n    title='Performance Metrics',\n    content='CPU usage: 45%, Memory: 2.3GB',\n    priority=1\n)\n\n# Create a medium-priority section with default priority\nsection2 = CompactSection(\n    icon='\u2699\ufe0f',\n    title='Configuration',\n    content='Settings loaded successfully'\n)\n\n# Access attributes\nprint(section1.icon)  # '\ud83d\udcca'\nprint(section1.title)  # 'Performance Metrics'\nprint(section1.priority)  # 1\n\n# Sort sections by priority\nsections = [section1, section2]\nsorted_sections = sorted(sections, key=lambda s: s.priority)"
    },
    {
      "best_practices": [
        "Always ensure OneDrive integration is available before instantiation (check ONEDRIVE_AVAILABLE flag)",
        "Provide a properly authenticated reMarkable session before creating the processor",
        "The start_watching() method runs indefinitely - use asyncio.run() or await it in an async context",
        "Configure appropriate poll intervals to balance responsiveness and API rate limits (default 60 seconds)",
        "Ensure target folders exist in both OneDrive and reMarkable Cloud before starting",
        "The processor creates temporary files during processing - ensure sufficient disk space",
        "Handle exceptions at the caller level as start_watching() uses return_exceptions=True",
        "The processor will continue running even if one watcher fails to initialize",
        "Logging is configured automatically but can be adjusted via the logger attribute",
        "Files are processed sequentially within each watcher to avoid race conditions",
        "Temporary files are automatically cleaned up after processing",
        "The processor uses the OneDriveProcessor for consistent file handling across sources"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Configuration dictionary for OneDrive integration including authentication and folder settings",
            "is_class_variable": false,
            "name": "onedrive_config",
            "type": "Dict"
          },
          {
            "description": "Authenticated reMarkable Cloud session for API access",
            "is_class_variable": false,
            "name": "remarkable_session",
            "type": "object"
          },
          {
            "description": "API key for file processing service",
            "is_class_variable": false,
            "name": "api_key",
            "type": "str"
          },
          {
            "description": "Logger instance for MixedCloudProcessor with INFO level logging",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "OneDriveProcessor instance for handling OneDrive file operations and processing",
            "is_class_variable": false,
            "name": "onedrive_processor",
            "type": "OneDriveProcessor"
          },
          {
            "description": "Watcher instance for monitoring the reMarkable input folder",
            "is_class_variable": false,
            "name": "remarkable_input_watcher",
            "type": "RemarkableCloudWatcher"
          },
          {
            "description": "Watcher instance for monitoring the reMarkable 'gpt_out' folder",
            "is_class_variable": false,
            "name": "remarkable_gptout_watcher",
            "type": "RemarkableCloudWatcher"
          },
          {
            "description": "Polling interval in seconds for OneDrive checks (default: 60)",
            "is_class_variable": false,
            "name": "poll_interval",
            "type": "int"
          },
          {
            "description": "Polling interval in seconds for reMarkable Cloud checks (default: 60)",
            "is_class_variable": false,
            "name": "remarkable_poll_interval",
            "type": "int"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "api_key": "API key for file processing service",
              "onedrive_config": "Dictionary with OneDrive settings and poll intervals",
              "remarkable_session": "Authenticated reMarkable Cloud session object"
            },
            "purpose": "Initialize the MixedCloudProcessor with configuration for OneDrive and reMarkable Cloud integration",
            "returns": "None (constructor)",
            "signature": "__init__(self, onedrive_config: Dict, remarkable_session, api_key: str)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "start_watching",
            "parameters": {},
            "purpose": "Start monitoring all configured cloud sources (OneDrive and reMarkable folders) for new files and process them concurrently",
            "returns": "None - runs indefinitely as an async coroutine, gathering results from multiple watcher tasks",
            "signature": "async start_watching(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_initialize_remarkable_input_watcher",
            "parameters": {},
            "purpose": "Initialize the watcher for the reMarkable input folder by discovering and locating the target folder UUID",
            "returns": "Boolean indicating whether initialization was successful (True if folder found, False otherwise)",
            "signature": "async _initialize_remarkable_input_watcher(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_initialize_remarkable_gptout_watcher",
            "parameters": {},
            "purpose": "Initialize the watcher for the reMarkable 'gpt_out' folder",
            "returns": "Boolean indicating whether initialization was successful",
            "signature": "async _initialize_remarkable_gptout_watcher(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_onedrive_watcher",
            "parameters": {},
            "purpose": "Run the OneDrive file watcher loop, delegating to OneDriveProcessor's start_watching method",
            "returns": "None - runs indefinitely until error or cancellation",
            "signature": "async _onedrive_watcher(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_remarkable_input_watcher",
            "parameters": {},
            "purpose": "Run the reMarkable input folder watcher loop, checking for new files at regular intervals and processing them",
            "returns": "None - runs indefinitely in a polling loop with remarkable_poll_interval delays",
            "signature": "async _remarkable_input_watcher(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_remarkable_gptout_watcher",
            "parameters": {},
            "purpose": "Run the reMarkable 'gpt_out' folder watcher loop, checking for new files at regular intervals and processing them",
            "returns": "None - runs indefinitely in a polling loop with remarkable_poll_interval delays",
            "signature": "async _remarkable_gptout_watcher(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_process_remarkable_file",
            "parameters": {
              "pdf_file": "Path object pointing to the PDF file to process",
              "source": "String identifier for the source folder ('input_folder', 'gpt_out', or 'unknown')"
            },
            "purpose": "Process a PDF file from reMarkable Cloud by creating a temporary copy and delegating to OneDriveProcessor for consistent handling",
            "returns": "None - logs success/failure and cleans up temporary files",
            "signature": "async _process_remarkable_file(self, pdf_file: Path, source: str = 'unknown')"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "OneDrive integration must be available (ONEDRIVE_AVAILABLE flag must be True)",
          "import": "from onedrive_client import OneDriveClient, OneDriveProcessor",
          "optional": false
        },
        {
          "condition": "Required for OneDrive authentication, checked via ONEDRIVE_AVAILABLE flag",
          "import": "import msal",
          "optional": false
        },
        {
          "condition": "Required for HTTP requests to cloud services",
          "import": "import requests",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 23:40:37",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "json",
        "re",
        "subprocess",
        "tempfile",
        "shutil",
        "pathlib",
        "typing",
        "datetime",
        "logging",
        "onedrive_client",
        "requests",
        "cloudtest.auth",
        "PyPDF2",
        "PyPDF4",
        "msal"
      ],
      "description": "A cloud integration processor that monitors both OneDrive and reMarkable Cloud for input PDF files, processes them through an API, and manages file synchronization between cloud services.",
      "docstring": "Mixed cloud processor that watches both OneDrive and reMarkable Cloud for input files",
      "id": 1937,
      "imports": [
        "import asyncio",
        "import json",
        "import re",
        "import subprocess",
        "import tempfile",
        "import shutil",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from datetime import datetime",
        "import logging",
        "from onedrive_client import OneDriveClient",
        "from onedrive_client import OneDriveProcessor",
        "import requests",
        "from cloudtest.auth import RemarkableAuth",
        "from PyPDF2 import PdfWriter",
        "from PyPDF2 import PdfReader",
        "from PyPDF4 import PdfFileWriter",
        "from PyPDF4 import PdfFileReader"
      ],
      "imports_required": [
        "import asyncio",
        "import shutil",
        "import logging",
        "from pathlib import Path",
        "from typing import Dict",
        "from onedrive_client import OneDriveProcessor",
        "from cloudtest.auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 654,
      "line_start": 476,
      "name": "MixedCloudProcessor",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "api_key": "API key string used for processing files through the backend service. Passed to OneDriveProcessor for file processing operations.",
        "onedrive_config": "Dictionary containing OneDrive configuration settings including authentication details, folder paths, poll intervals, and reMarkable-specific settings. Expected keys: 'poll_interval' (seconds between OneDrive checks), 'remarkable_poll_interval' (seconds between reMarkable checks), 'remarkable_input_folder' (path to reMarkable input folder, defaults to '/E-Ink LLM Input'). Must contain all necessary OneDrive authentication parameters required by OneDriveProcessor.",
        "remarkable_session": "An authenticated reMarkable Cloud session object (typically from RemarkableAuth) that provides access to the reMarkable Cloud API. Must be pre-authenticated and ready for API calls."
      },
      "parent_class": null,
      "purpose": "MixedCloudProcessor orchestrates file watching and processing across multiple cloud platforms. It monitors OneDrive folders and two reMarkable Cloud folders (a regular input folder and a 'gpt_out' folder) for new PDF files, processes them through an API using the OneDriveProcessor, and handles the complete lifecycle of file ingestion, processing, and output management. This enables seamless integration between e-ink devices (reMarkable) and cloud storage (OneDrive) for document processing workflows.",
      "return_annotation": null,
      "return_explained": "The constructor returns a MixedCloudProcessor instance. The start_watching() method returns None but runs indefinitely as an async coroutine, gathering results from multiple concurrent watcher tasks. Internal processing methods return boolean success indicators or None.",
      "settings_required": [
        "ONEDRIVE_AVAILABLE flag must be True (indicates msal and requests are installed)",
        "OneDrive configuration dictionary with authentication credentials (client_id, client_secret, tenant_id, etc.)",
        "Authenticated reMarkable Cloud session (via RemarkableAuth)",
        "API key for file processing service",
        "RemarkableCloudWatcher class must be available in the same module",
        "OneDrive folders must be configured and accessible",
        "reMarkable Cloud folders must exist: input folder (configurable) and 'gpt_out' folder"
      ],
      "source_code": "class MixedCloudProcessor:\n    \"\"\"Mixed cloud processor that watches both OneDrive and reMarkable Cloud for input files\"\"\"\n    \n    def __init__(self, onedrive_config: Dict, remarkable_session, api_key: str):\n        self.onedrive_config = onedrive_config\n        self.remarkable_session = remarkable_session\n        self.api_key = api_key\n        \n        # Setup logging\n        self.logger = logging.getLogger('MixedCloudProcessor')\n        self.logger.setLevel(logging.INFO)\n        \n        if not ONEDRIVE_AVAILABLE:\n            raise ImportError(\"OneDrive integration not available. Install with: pip install msal requests\")\n        \n        # Initialize OneDrive processor\n        self.onedrive_processor = OneDriveProcessor(onedrive_config, api_key)\n        \n        # Initialize reMarkable watchers - both input folder and gpt_out\n        self.remarkable_input_watcher = RemarkableCloudWatcher(remarkable_session, self.logger)\n        self.remarkable_gptout_watcher = RemarkableCloudWatcher(remarkable_session, self.logger)\n        \n        # Configuration\n        self.poll_interval = onedrive_config.get('poll_interval', 60)\n        self.remarkable_poll_interval = onedrive_config.get('remarkable_poll_interval', 60)\n    \n    async def start_watching(self):\n        \"\"\"Start watching both OneDrive and reMarkable Cloud for input files\"\"\"\n        self.logger.info(\"\ud83d\ude80 Starting Mixed Cloud Processor\")\n        self.logger.info(\"   \ud83d\udcc1 OneDrive: Input and Output\")\n        self.logger.info(\"   \ud83c\udf10 reMarkable Cloud: Input (regular folder + gpt_out folder)\")\n        \n        # Initialize reMarkable watchers\n        input_init_success = await self._initialize_remarkable_input_watcher()\n        gptout_init_success = await self._initialize_remarkable_gptout_watcher()\n        \n        if not input_init_success and not gptout_init_success:\n            self.logger.warning(\"\u26a0\ufe0f Both reMarkable watchers failed to initialize, continuing with OneDrive only\")\n        \n        # Start all watchers concurrently\n        tasks = [\n            self._onedrive_watcher(),\n        ]\n        \n        if input_init_success:\n            tasks.append(self._remarkable_input_watcher())\n        \n        if gptout_init_success:\n            tasks.append(self._remarkable_gptout_watcher())\n        \n        await asyncio.gather(*tasks, return_exceptions=True)\n    \n    async def _initialize_remarkable_input_watcher(self):\n        \"\"\"Initialize the reMarkable input folder watcher\"\"\"\n        try:\n            # Set target folder for input watcher (regular input folder)\n            target_folder = self.onedrive_config.get('remarkable_input_folder', '/E-Ink LLM Input')\n            self.logger.info(f\"\ud83d\udd0d Initializing reMarkable input watcher for folder: {target_folder}\")\n            \n            # Find the target folder\n            all_nodes = await self.remarkable_input_watcher._discover_all_nodes()\n            \n            for uuid, node in all_nodes.items():\n                if (node.get('node_type') == 'folder' and \n                    node.get('name', '').strip() == target_folder.strip('/').split('/')[-1]):\n                    self.remarkable_input_watcher.gpt_out_folder_uuid = uuid\n                    self.logger.info(f\"\u2705 Found reMarkable input folder: {target_folder} ({uuid})\")\n                    return True\n            \n            self.logger.warning(f\"\u26a0\ufe0f reMarkable input folder '{target_folder}' not found\")\n            return False\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Failed to initialize reMarkable input watcher: {e}\")\n            return False\n    \n    async def _initialize_remarkable_gptout_watcher(self):\n        \"\"\"Initialize the reMarkable gpt_out folder watcher\"\"\"\n        try:\n            success = await self.remarkable_gptout_watcher.initialize()\n            if success:\n                self.logger.info(\"\u2705 reMarkable gpt_out watcher initialized\")\n            else:\n                self.logger.warning(\"\u26a0\ufe0f reMarkable gpt_out folder not found\")\n            return success\n        except Exception as e:\n            self.logger.error(f\"\u274c Failed to initialize reMarkable gpt_out watcher: {e}\")\n            return False\n    \n    async def _onedrive_watcher(self):\n        \"\"\"Run OneDrive watcher\"\"\"\n        try:\n            self.logger.info(\"\ud83d\udcc1 Starting OneDrive watcher...\")\n            await self.onedrive_processor.start_watching()\n        except Exception as e:\n            self.logger.error(f\"\u274c OneDrive watcher error: {e}\")\n    \n    async def _remarkable_input_watcher(self):\n        \"\"\"Run reMarkable Cloud input folder watcher\"\"\"\n        try:\n            self.logger.info(\"\ud83c\udf10 Starting reMarkable input folder watcher...\")\n            \n            while True:\n                try:\n                    # Check for new files in reMarkable input folder\n                    new_files = await self.remarkable_input_watcher.check_for_new_files()\n                    \n                    # Process each new file through the OneDrive processor\n                    for pdf_file in new_files:\n                        self.logger.info(f\"\ud83d\udcc4 Processing reMarkable input file: {pdf_file.name}\")\n                        \n                        try:\n                            # Process the file using OneDrive processor's file processing logic\n                            await self._process_remarkable_file(pdf_file, source=\"input_folder\")\n                        except Exception as e:\n                            self.logger.error(f\"\u274c Error processing {pdf_file.name}: {e}\")\n                \n                except Exception as e:\n                    self.logger.error(f\"\u274c reMarkable input watcher loop error: {e}\")\n                \n                # Wait before next check\n                await asyncio.sleep(self.remarkable_poll_interval)\n                \n        except Exception as e:\n            self.logger.error(f\"\u274c reMarkable input watcher error: {e}\")\n    \n    async def _remarkable_gptout_watcher(self):\n        \"\"\"Run reMarkable Cloud gpt_out folder watcher\"\"\"\n        try:\n            self.logger.info(\"\ud83c\udf10 Starting reMarkable gpt_out folder watcher...\")\n            \n            while True:\n                try:\n                    # Check for new files in reMarkable gpt_out folder\n                    new_files = await self.remarkable_gptout_watcher.check_for_new_files()\n                    \n                    # Process each new file through the OneDrive processor\n                    for pdf_file in new_files:\n                        self.logger.info(f\"\ud83d\udcc4 Processing reMarkable gpt_out file: {pdf_file.name}\")\n                        \n                        try:\n                            # Process the file using OneDrive processor's file processing logic\n                            await self._process_remarkable_file(pdf_file, source=\"gpt_out\")\n                        except Exception as e:\n                            self.logger.error(f\"\u274c Error processing {pdf_file.name}: {e}\")\n                \n                except Exception as e:\n                    self.logger.error(f\"\u274c reMarkable gpt_out watcher loop error: {e}\")\n                \n                # Wait before next check\n                await asyncio.sleep(self.remarkable_poll_interval)\n                \n        except Exception as e:\n            self.logger.error(f\"\u274c reMarkable gpt_out watcher error: {e}\")\n    \n    async def _process_remarkable_file(self, pdf_file: Path, source: str = \"unknown\"):\n        \"\"\"Process a PDF file from reMarkable Cloud using the OneDrive processor\"\"\"\n        try:\n            # Use OneDrive processor to handle the file processing and upload\n            # This ensures consistent processing between OneDrive and reMarkable sources\n            \n            # Create a temporary copy in OneDrive processor's expected location\n            temp_input_file = pdf_file.parent / f\"remarkable_{source}_{pdf_file.name}\"\n            shutil.copy2(pdf_file, temp_input_file)\n            \n            # Process through OneDrive processor\n            success = await self.onedrive_processor._process_file_from_path(str(temp_input_file))\n            \n            if success:\n                self.logger.info(f\"\u2705 Successfully processed reMarkable file from {source}: {pdf_file.name}\")\n            else:\n                self.logger.error(f\"\u274c Failed to process reMarkable file from {source}: {pdf_file.name}\")\n            \n            # Clean up temporary file\n            if temp_input_file.exists():\n                temp_input_file.unlink()\n        \n        except Exception as e:\n            self.logger.error(f\"\u274c Error processing reMarkable file {pdf_file.name} from {source}: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/mixed_cloud_processor.py",
      "tags": [
        "cloud-integration",
        "file-watcher",
        "async",
        "onedrive",
        "remarkable",
        "pdf-processing",
        "multi-cloud",
        "file-synchronization",
        "document-processing",
        "e-ink",
        "polling"
      ],
      "updated_at": "2025-12-07T00:40:37.713240",
      "usage_example": "import asyncio\nfrom cloudtest.auth import RemarkableAuth\nfrom mixed_cloud_processor import MixedCloudProcessor\n\n# Configure OneDrive settings\nonedrive_config = {\n    'client_id': 'your-client-id',\n    'client_secret': 'your-secret',\n    'tenant_id': 'your-tenant',\n    'poll_interval': 60,\n    'remarkable_poll_interval': 60,\n    'remarkable_input_folder': '/E-Ink LLM Input'\n}\n\n# Authenticate with reMarkable Cloud\nrm_auth = RemarkableAuth()\nrm_session = rm_auth.get_session()\n\n# Create processor instance\nprocessor = MixedCloudProcessor(\n    onedrive_config=onedrive_config,\n    remarkable_session=rm_session,\n    api_key='your-api-key'\n)\n\n# Start watching (runs indefinitely)\nasync def main():\n    await processor.start_watching()\n\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "Always call initialize() before check_for_new_files() to ensure the gpt_out folder is located",
        "The class automatically cleans up temporary files in __del__, but for long-running processes, consider periodic cleanup",
        "The processed_files set grows indefinitely - for long-running watchers, implement periodic clearing or size limits",
        "Ensure the 'rmc' tool is installed for .rm file conversion (remarkable-cli package)",
        "Install PyPDF2 for reliable PDF concatenation: pip install PyPDF2",
        "The class uses async methods - must be called with await in an async context",
        "Handle the case where initialize() returns False (gpt_out folder not found)",
        "The session object must remain valid throughout the watcher's lifetime",
        "Temporary files are stored in system temp directory - ensure sufficient disk space",
        "For production use, implement retry logic for network failures in API calls"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Authenticated session for making API requests to reMarkable Cloud",
            "is_class_variable": false,
            "name": "session",
            "type": "requests.Session"
          },
          {
            "description": "Logger instance for outputting messages",
            "is_class_variable": false,
            "name": "logger",
            "type": "logging.Logger"
          },
          {
            "description": "Base URL for reMarkable Cloud API (https://eu.tectonic.remarkable.com)",
            "is_class_variable": false,
            "name": "base_url",
            "type": "str"
          },
          {
            "description": "Set of document hashes that have already been processed to avoid reprocessing",
            "is_class_variable": false,
            "name": "processed_files",
            "type": "Set[str]"
          },
          {
            "description": "UUID of the gpt_out folder in reMarkable Cloud, set during initialization",
            "is_class_variable": false,
            "name": "gpt_out_folder_uuid",
            "type": "Optional[str]"
          },
          {
            "description": "Temporary directory for storing downloaded and converted files, automatically cleaned up",
            "is_class_variable": false,
            "name": "temp_dir",
            "type": "Path"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "logger": "Logger instance for status and error messages",
              "remarkable_session": "Authenticated requests.Session for reMarkable Cloud API"
            },
            "purpose": "Initialize the watcher with authentication session and logger, create temporary directory for file processing",
            "returns": "None (constructor)",
            "signature": "__init__(self, remarkable_session, logger)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__del__",
            "parameters": {},
            "purpose": "Cleanup temporary directory when object is destroyed",
            "returns": "None",
            "signature": "__del__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "initialize",
            "parameters": {},
            "purpose": "Find and store the UUID of the gpt_out folder in reMarkable Cloud by discovering all nodes",
            "returns": "Boolean indicating whether gpt_out folder was successfully found",
            "signature": "async initialize(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_discover_all_nodes",
            "parameters": {},
            "purpose": "Recursively discover all nodes (folders and documents) in reMarkable Cloud storage",
            "returns": "Dictionary mapping UUIDs to node information (name, type, parent, metadata, hash)",
            "signature": "async _discover_all_nodes(self) -> Dict[str, Dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fetch_hash_content",
            "parameters": {
              "hash_ref": "64-character hash identifying the content to fetch"
            },
            "purpose": "Download content from reMarkable Cloud using a content hash reference",
            "returns": "Dictionary with 'hash', 'content' (bytes), and 'size' keys, or None on failure",
            "signature": "async _fetch_hash_content(self, hash_ref: str) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_parse_directory_listing",
            "parameters": {
              "content": "Raw bytes of directory listing content"
            },
            "purpose": "Parse reMarkable's directory listing format to extract child objects and data components",
            "returns": "Dictionary with 'child_objects' (folders/documents) and 'data_components' (files like .pdf, .rm, .metadata)",
            "signature": "_parse_directory_listing(self, content: bytes) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_metadata",
            "parameters": {
              "metadata_hash": "Hash reference to the metadata file"
            },
            "purpose": "Fetch and parse JSON metadata for a document or folder",
            "returns": "Parsed JSON metadata dictionary containing visibleName, type, parent, etc., or None on failure",
            "signature": "async _extract_metadata(self, metadata_hash: str) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "check_for_new_files",
            "parameters": {},
            "purpose": "Main method to check gpt_out folder for new documents and extract them as PDFs",
            "returns": "List of Path objects pointing to newly extracted PDF files in temporary directory",
            "signature": "async check_for_new_files(self) -> List[Path]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_extract_document",
            "parameters": {
              "all_nodes": "Complete node tree for reference",
              "doc_node": "Dictionary containing document metadata and parsed data"
            },
            "purpose": "Extract a single document from reMarkable Cloud, handling both PDF and .rm formats",
            "returns": "Path to extracted PDF file, or None on failure",
            "signature": "async _extract_document(self, doc_node: Dict, all_nodes: Dict) -> Optional[Path]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_convert_rm_to_pdf",
            "parameters": {
              "doc_name": "Name of the document for output filename",
              "output_dir": "Directory to store output PDF and temporary files",
              "rm_hashes": "List of hash references to .rm page files"
            },
            "purpose": "Convert reMarkable .rm files to PDF using the rmc tool, handling single and multi-page documents",
            "returns": "Path to final PDF file, or None on failure",
            "signature": "async _convert_rm_to_pdf(self, doc_name: str, rm_hashes: List[str], output_dir: Path) -> Optional[Path]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_concatenate_pdfs",
            "parameters": {
              "output_path": "Path where merged PDF should be written",
              "pdf_files": "List of Path objects to PDF files to concatenate"
            },
            "purpose": "Merge multiple PDF files into a single PDF, trying PyPDF2, PyPDF4, pdftk, and ghostscript in order",
            "returns": "Boolean indicating success or failure of concatenation",
            "signature": "async _concatenate_pdfs(self, pdf_files: List[Path], output_path: Path) -> bool"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required for PDF concatenation when converting multi-page .rm notebooks. Falls back to PyPDF4 if not available.",
          "import": "from PyPDF2 import PdfWriter, PdfReader",
          "optional": true
        },
        {
          "condition": "Fallback for PDF concatenation if PyPDF2 is not available. Can also fall back to system commands (pdftk, ghostscript).",
          "import": "from PyPDF4 import PdfFileWriter, PdfFileReader",
          "optional": true
        }
      ],
      "created_at": "2025-12-06 23:39:54",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "json",
        "re",
        "subprocess",
        "tempfile",
        "shutil",
        "pathlib",
        "typing",
        "datetime",
        "logging",
        "requests",
        "PyPDF2",
        "PyPDF4"
      ],
      "description": "Monitors the reMarkable Cloud 'gpt_out' folder for new documents, automatically downloads them, and converts .rm (reMarkable native) files to PDF format.",
      "docstring": "Watches reMarkable Cloud gpt_out folder for new files",
      "id": 1936,
      "imports": [
        "import asyncio",
        "import json",
        "import re",
        "import subprocess",
        "import tempfile",
        "import shutil",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from datetime import datetime",
        "import logging",
        "from onedrive_client import OneDriveClient",
        "from onedrive_client import OneDriveProcessor",
        "import requests",
        "from cloudtest.auth import RemarkableAuth",
        "from PyPDF2 import PdfWriter",
        "from PyPDF2 import PdfReader",
        "from PyPDF4 import PdfFileWriter",
        "from PyPDF4 import PdfFileReader"
      ],
      "imports_required": [
        "import asyncio",
        "import json",
        "import re",
        "import subprocess",
        "import tempfile",
        "import shutil",
        "from pathlib import Path",
        "from typing import Dict, List, Optional, Any",
        "import logging",
        "import requests"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 473,
      "line_start": 36,
      "name": "RemarkableCloudWatcher",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "logger": "A logging.Logger instance for outputting status messages, errors, and debug information throughout the watching and extraction process.",
        "remarkable_session": "An authenticated requests.Session object configured with reMarkable Cloud authentication headers and tokens. This session is used for all API calls to the reMarkable Cloud service."
      },
      "parent_class": null,
      "purpose": "This class provides a complete solution for watching a specific folder in reMarkable Cloud storage, detecting new documents, and extracting them as PDFs. It handles both native PDF files and reMarkable's proprietary .rm format, converting the latter using the 'rmc' command-line tool. The class maintains state to avoid reprocessing files, manages temporary storage, and supports multi-page notebook conversion with PDF concatenation.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a RemarkableCloudWatcher object. The main method check_for_new_files() returns a List[Path] containing paths to newly extracted PDF files. The initialize() method returns a boolean indicating success/failure of finding the gpt_out folder.",
      "settings_required": [
        "Authenticated reMarkable Cloud session with valid tokens",
        "rmc command-line tool installed and available in PATH (for .rm to PDF conversion)",
        "A 'gpt_out' folder must exist in the reMarkable Cloud account",
        "Optional: PyPDF2 or PyPDF4 for PDF concatenation (pip install PyPDF2)",
        "Optional: pdftk or ghostscript system tools as fallback for PDF concatenation"
      ],
      "source_code": "class RemarkableCloudWatcher:\n    \"\"\"Watches reMarkable Cloud gpt_out folder for new files\"\"\"\n    \n    def __init__(self, remarkable_session, logger):\n        self.session = remarkable_session\n        self.logger = logger\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        self.processed_files = set()  # Track processed file hashes\n        self.gpt_out_folder_uuid = None\n        self.temp_dir = Path(tempfile.mkdtemp(prefix=\"remarkable_watch_\"))\n        \n    def __del__(self):\n        \"\"\"Cleanup temporary directory\"\"\"\n        if hasattr(self, 'temp_dir') and self.temp_dir.exists():\n            shutil.rmtree(self.temp_dir, ignore_errors=True)\n    \n    async def initialize(self):\n        \"\"\"Initialize the watcher by finding the gpt_out folder\"\"\"\n        self.logger.info(\"\ud83d\udd0d Initializing reMarkable Cloud watcher...\")\n        \n        try:\n            # Discover all folders to find gpt_out\n            all_nodes = await self._discover_all_nodes()\n            \n            # Find gpt_out folder\n            for uuid, node in all_nodes.items():\n                if (node.get('node_type') == 'folder' and \n                    node.get('name', '').lower() in ['gpt_out', 'gpt out', 'gptout']):\n                    self.gpt_out_folder_uuid = uuid\n                    self.logger.info(f\"\u2705 Found gpt_out folder: {uuid}\")\n                    break\n            \n            if not self.gpt_out_folder_uuid:\n                self.logger.warning(\"\u26a0\ufe0f gpt_out folder not found in reMarkable Cloud\")\n                return False\n                \n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Failed to initialize reMarkable watcher: {e}\")\n            return False\n    \n    async def _discover_all_nodes(self) -> Dict[str, Dict]:\n        \"\"\"Discover all nodes in reMarkable Cloud using local_replica_v2.py method\"\"\"\n        all_nodes = {}\n        \n        try:\n            # Get root hash\n            root_response = self.session.get(f\"{self.base_url}/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            root_hash = root_data.get('hash')\n            \n            if not root_hash:\n                return all_nodes\n            \n            # Discover nodes recursively\n            discovered_hashes = set()\n            hashes_to_process = [root_hash]\n            \n            while hashes_to_process:\n                current_hash = hashes_to_process.pop(0)\n                \n                if current_hash in discovered_hashes:\n                    continue\n                    \n                discovered_hashes.add(current_hash)\n                \n                # Fetch and parse content\n                content_info = await self._fetch_hash_content(current_hash)\n                if not content_info:\n                    continue\n                \n                parsed = self._parse_directory_listing(content_info['content'])\n                \n                # Extract metadata if available\n                metadata = {}\n                node_name = f\"unknown_{current_hash[:8]}\"\n                node_type = \"folder\"\n                parent_uuid = None\n                \n                for component in parsed['data_components']:\n                    if component['component_type'] == 'metadata':\n                        extracted_metadata = await self._extract_metadata(component['hash'])\n                        if extracted_metadata:\n                            metadata = extracted_metadata\n                            node_name = metadata.get('visibleName', node_name)\n                            if metadata.get('type') == 'DocumentType':\n                                node_type = \"document\"\n                            elif metadata.get('type') == 'CollectionType':\n                                node_type = \"folder\"\n                            parent_uuid = metadata.get('parent', '') or None\n                        break\n                \n                # Determine node UUID\n                node_uuid = None\n                for component in parsed['child_objects']:\n                    node_uuid = component['uuid_component']\n                    break\n                if not node_uuid and parsed['data_components']:\n                    component_name = parsed['data_components'][0]['uuid_component']\n                    if '.' in component_name:\n                        node_uuid = component_name.split('.')[0]\n                if not node_uuid:\n                    node_uuid = current_hash[:32]\n                \n                # Store node\n                all_nodes[node_uuid] = {\n                    'uuid': node_uuid,\n                    'hash': current_hash,\n                    'name': node_name,\n                    'node_type': node_type,\n                    'parent_uuid': parent_uuid,\n                    'metadata': metadata,\n                    'parsed_data': parsed\n                }\n                \n                # Add child hashes to process\n                for child_obj in parsed['child_objects']:\n                    if child_obj['hash'] not in discovered_hashes:\n                        hashes_to_process.append(child_obj['hash'])\n            \n            return all_nodes\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Failed to discover nodes: {e}\")\n            return all_nodes\n    \n    async def _fetch_hash_content(self, hash_ref: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch content from reMarkable cloud by hash\"\"\"\n        try:\n            url = f\"{self.base_url}/sync/v3/files/{hash_ref}\"\n            response = self.session.get(url)\n            response.raise_for_status()\n            \n            return {\n                'hash': hash_ref,\n                'content': response.content,\n                'size': len(response.content)\n            }\n            \n        except Exception as e:\n            self.logger.debug(f\"Failed to fetch {hash_ref[:16]}...: {e}\")\n            return None\n    \n    def _parse_directory_listing(self, content: bytes) -> Dict[str, Any]:\n        \"\"\"Parse directory listing using local_replica_v2.py method\"\"\"\n        try:\n            text_content = content.decode('utf-8')\n        except UnicodeDecodeError:\n            return {'child_objects': [], 'data_components': []}\n        \n        result = {\n            'child_objects': [],\n            'data_components': []\n        }\n        \n        lines = text_content.split('\\n')\n        if lines and lines[0].strip().isdigit():\n            lines = lines[1:]  # Skip count line\n        \n        entry_pattern = r'^([a-f0-9]{64}):([0-9a-fA-F]+):([a-f0-9-/]+(?:\\.[^:]+)?):(\\d+):(\\d+)$'\n        \n        for line in lines:\n            line = line.strip()\n            if not line:\n                continue\n                \n            match = re.match(entry_pattern, line, re.IGNORECASE)\n            if match:\n                hash_val, flags, uuid_component, type_val, size_val = match.groups()\n                \n                entry_info = {\n                    'hash': hash_val,\n                    'flags': flags,\n                    'uuid_component': uuid_component,\n                    'type': type_val,\n                    'size': int(size_val)\n                }\n                \n                if '.' in uuid_component:\n                    # Data component (.content, .metadata, .pdf, .rm, etc.)\n                    component_type = uuid_component.split('.')[-1]\n                    if '/' in component_type:  # Handle .rm files like \"uuid/filename.rm\"\n                        component_type = component_type.split('/')[-1]\n                    entry_info['component_type'] = component_type\n                    result['data_components'].append(entry_info)\n                else:\n                    # Child object (pure UUID)\n                    result['child_objects'].append(entry_info)\n        \n        return result\n    \n    async def _extract_metadata(self, metadata_hash: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Extract metadata from hash\"\"\"\n        content_info = await self._fetch_hash_content(metadata_hash)\n        if not content_info:\n            return None\n        \n        try:\n            text_content = content_info['content'].decode('utf-8')\n            return json.loads(text_content)\n        except (UnicodeDecodeError, json.JSONDecodeError) as e:\n            self.logger.debug(f\"Failed to parse metadata {metadata_hash[:16]}...: {e}\")\n            return None\n    \n    async def check_for_new_files(self) -> List[Path]:\n        \"\"\"Check gpt_out folder for new files and convert them to PDFs\"\"\"\n        if not self.gpt_out_folder_uuid:\n            return []\n        \n        new_pdf_files = []\n        \n        try:\n            # Re-discover nodes to get current state\n            all_nodes = await self._discover_all_nodes()\n            \n            # Find documents in gpt_out folder\n            gpt_out_documents = []\n            for uuid, node in all_nodes.items():\n                if (node.get('node_type') == 'document' and \n                    node.get('parent_uuid') == self.gpt_out_folder_uuid):\n                    gpt_out_documents.append(node)\n            \n            self.logger.debug(f\"Found {len(gpt_out_documents)} documents in gpt_out folder\")\n            \n            for doc_node in gpt_out_documents:\n                doc_hash = doc_node['hash']\n                \n                # Skip if already processed\n                if doc_hash in self.processed_files:\n                    continue\n                \n                self.logger.info(f\"\ud83d\udcc4 Processing new document: {doc_node['name']}\")\n                \n                # Extract the document\n                pdf_file = await self._extract_document(doc_node, all_nodes)\n                if pdf_file:\n                    new_pdf_files.append(pdf_file)\n                    self.processed_files.add(doc_hash)\n        \n        except Exception as e:\n            self.logger.error(f\"\u274c Error checking for new files: {e}\")\n        \n        return new_pdf_files\n    \n    async def _extract_document(self, doc_node: Dict, all_nodes: Dict) -> Optional[Path]:\n        \"\"\"Extract a document from reMarkable Cloud, converting .rm files to PDF if needed\"\"\"\n        try:\n            parsed_data = doc_node.get('parsed_data', {})\n            doc_name = doc_node.get('name', 'unknown')\n            \n            # Create document-specific temp directory\n            doc_temp_dir = self.temp_dir / f\"doc_{doc_node['uuid'][:8]}\"\n            doc_temp_dir.mkdir(exist_ok=True)\n            \n            # Check for PDF content first\n            pdf_hash = None\n            rm_hashes = []\n            \n            for component in parsed_data.get('data_components', []):\n                if component['component_type'] == 'pdf':\n                    pdf_hash = component['hash']\n                elif component['component_type'] == 'rm':\n                    rm_hashes.append(component['hash'])\n            \n            # If PDF exists, extract it directly\n            if pdf_hash:\n                self.logger.info(f\"\ud83d\udcc4 Extracting PDF: {doc_name}\")\n                pdf_content = await self._fetch_hash_content(pdf_hash)\n                if pdf_content:\n                    pdf_path = doc_temp_dir / f\"{doc_name}.pdf\"\n                    with open(pdf_path, 'wb') as f:\n                        f.write(pdf_content['content'])\n                    return pdf_path\n            \n            # If .rm files exist, convert to PDF\n            elif rm_hashes:\n                self.logger.info(f\"\ud83d\udd8a\ufe0f Converting .rm files to PDF: {doc_name}\")\n                return await self._convert_rm_to_pdf(doc_name, rm_hashes, doc_temp_dir)\n            \n            else:\n                self.logger.warning(f\"\u26a0\ufe0f No PDF or .rm content found for: {doc_name}\")\n                return None\n        \n        except Exception as e:\n            self.logger.error(f\"\u274c Error extracting document {doc_name}: {e}\")\n            return None\n    \n    async def _convert_rm_to_pdf(self, doc_name: str, rm_hashes: List[str], output_dir: Path) -> Optional[Path]:\n        \"\"\"Convert .rm files to PDF using rmc tool (from local_replica_v2.py)\"\"\"\n        try:\n            # Create notebook directory for .rm files\n            notebook_dir = output_dir / \"notebook\"\n            notebook_dir.mkdir(exist_ok=True)\n            \n            # Download .rm files\n            rm_files = []\n            for i, rm_hash in enumerate(rm_hashes):\n                rm_content = await self._fetch_hash_content(rm_hash)\n                if rm_content:\n                    rm_path = notebook_dir / f\"page_{i+1}.rm\"\n                    with open(rm_path, 'wb') as f:\n                        f.write(rm_content['content'])\n                    rm_files.append(rm_path)\n            \n            if not rm_files:\n                self.logger.warning(f\"\u26a0\ufe0f No .rm files downloaded for {doc_name}\")\n                return None\n            \n            # Sort files by page number\n            rm_files.sort(key=lambda x: int(x.stem.split('_')[1]))\n            \n            # Final PDF path\n            final_pdf_path = output_dir / f\"{doc_name}.pdf\"\n            \n            if len(rm_files) == 1:\n                # Single page - convert directly\n                result = subprocess.run([\n                    \"rmc\", str(rm_files[0]), \"-o\", str(final_pdf_path)\n                ], capture_output=True, text=True, timeout=60)\n                \n                if result.returncode == 0 and final_pdf_path.exists() and final_pdf_path.stat().st_size > 0:\n                    self.logger.info(f\"\u2705 Converted single page to PDF: {final_pdf_path}\")\n                    return final_pdf_path\n                else:\n                    self.logger.error(f\"\u274c rmc conversion failed: {result.stderr}\")\n                    return None\n            \n            else:\n                # Multiple pages - convert each to temporary PDF and concatenate\n                temp_pdfs = []\n                \n                for i, rm_file in enumerate(rm_files):\n                    temp_pdf = notebook_dir / f\"temp_page_{i+1}.pdf\"\n                    \n                    result = subprocess.run([\n                        \"rmc\", str(rm_file), \"-o\", str(temp_pdf)\n                    ], capture_output=True, text=True, timeout=60)\n                    \n                    if result.returncode == 0 and temp_pdf.exists() and temp_pdf.stat().st_size > 0:\n                        temp_pdfs.append(temp_pdf)\n                    else:\n                        self.logger.error(f\"\u274c rmc conversion failed for page {i+1}: {result.stderr}\")\n                        return None\n                \n                if temp_pdfs:\n                    # Concatenate PDFs using PyPDF2 or similar\n                    success = await self._concatenate_pdfs(temp_pdfs, final_pdf_path)\n                    if success:\n                        self.logger.info(f\"\u2705 Converted multi-page notebook to PDF: {final_pdf_path}\")\n                        return final_pdf_path\n                \n                return None\n        \n        except Exception as e:\n            self.logger.error(f\"\u274c Error converting .rm files: {e}\")\n            return None\n    \n    async def _concatenate_pdfs(self, pdf_files: List[Path], output_path: Path) -> bool:\n        \"\"\"Concatenate multiple PDF files into one\"\"\"\n        if len(pdf_files) <= 1:\n            # If only one file, just copy it\n            if pdf_files:\n                shutil.copy2(pdf_files[0], output_path)\n                return True\n            return False\n        \n        try:\n            # Try PyPDF2 first (newest and most stable)\n            from PyPDF2 import PdfWriter, PdfReader\n            \n            writer = PdfWriter()\n            \n            for pdf_file in pdf_files:\n                reader = PdfReader(str(pdf_file))\n                for page in reader.pages:\n                    writer.add_page(page)\n            \n            with open(output_path, 'wb') as output_file:\n                writer.write(output_file)\n            \n            self.logger.info(f\"\u2705 PDF concatenation successful using PyPDF2\")\n            return True\n            \n        except ImportError:\n            try:\n                # Try PyPDF4 as fallback\n                from PyPDF4 import PdfFileWriter, PdfFileReader\n                \n                writer = PdfFileWriter()\n                \n                for pdf_file in pdf_files:\n                    reader = PdfFileReader(str(pdf_file))\n                    for page_num in range(reader.getNumPages()):\n                        page = reader.getPage(page_num)\n                        writer.addPage(page)\n                \n                with open(output_path, 'wb') as output_file:\n                    writer.write(output_file)\n                \n                self.logger.info(f\"\u2705 PDF concatenation successful using PyPDF4\")\n                return True\n                \n            except ImportError:\n                # Fallback to using system commands if no PDF library available\n                self.logger.warning(\"\u26a0\ufe0f No PDF library available (PyPDF2/PyPDF4), trying system commands\")\n                self.logger.warning(\"\ud83d\udca1 Install PyPDF2 for better performance: pip install PyPDF2\")\n                \n                try:\n                    # Try using pdftk if available\n                    cmd = [\"pdftk\"] + [str(f) for f in pdf_files] + [\"cat\", \"output\", str(output_path)]\n                    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)\n                    \n                    if result.returncode == 0:\n                        self.logger.info(f\"\u2705 PDF concatenation successful using pdftk\")\n                        return True\n                    \n                    # Try using gs (ghostscript) as fallback\n                    cmd = [\"gs\", \"-dNOPAUSE\", \"-dBATCH\", \"-sDEVICE=pdfwrite\", f\"-sOutputFile={output_path}\"] + [str(f) for f in pdf_files]\n                    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)\n                    \n                    if result.returncode == 0:\n                        self.logger.info(f\"\u2705 PDF concatenation successful using ghostscript\")\n                        return True\n                    else:\n                        self.logger.error(\"\u274c All PDF concatenation methods failed\")\n                        self.logger.error(\"\ud83d\udca1 Install dependencies: pip install PyPDF2 OR sudo apt-get install pdftk ghostscript\")\n                        return False\n                    \n                except Exception as e:\n                    self.logger.error(f\"\u274c PDF concatenation failed: {e}\")\n                    self.logger.error(\"\ud83d\udca1 Install dependencies: pip install PyPDF2 OR sudo apt-get install pdftk ghostscript\")\n                    return False\n        \n        except Exception as e:\n            self.logger.error(f\"\u274c PDF concatenation error: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/mixed_cloud_processor.py",
      "tags": [
        "remarkable",
        "cloud-storage",
        "file-watcher",
        "pdf-conversion",
        "document-extraction",
        "async",
        "monitoring",
        "rm-format",
        "cloud-sync"
      ],
      "updated_at": "2025-12-07T00:39:54.583312",
      "usage_example": "import asyncio\nimport logging\nimport requests\nfrom remarkable_cloud_watcher import RemarkableCloudWatcher\n\n# Setup logger\nlogger = logging.getLogger(__name__)\nlogger.setLevel(logging.INFO)\n\n# Create authenticated session (example - actual auth is more complex)\nsession = requests.Session()\nsession.headers.update({\n    'Authorization': 'Bearer YOUR_TOKEN',\n    'User-Agent': 'remarkable-cloud-watcher'\n})\n\n# Create watcher instance\nwatcher = RemarkableCloudWatcher(session, logger)\n\n# Initialize and watch for files\nasync def main():\n    # Initialize - finds gpt_out folder\n    if await watcher.initialize():\n        # Check for new files (returns list of PDF paths)\n        new_pdfs = await watcher.check_for_new_files()\n        \n        for pdf_path in new_pdfs:\n            print(f'New PDF: {pdf_path}')\n            # Process the PDF file...\n    else:\n        print('Failed to initialize watcher')\n\n# Run the async function\nasyncio.run(main())"
    },
    {
      "best_practices": [
        "Use enum members for type-safe comparisons instead of string literals (e.g., use 'FileType.pdf' instead of 'pdf')",
        "Access enum members using dot notation (FileType.pdf) or bracket notation (FileType['pdf'])",
        "Use the 'value' attribute to get the string representation of the enum member",
        "The custom __str__ method returns the name, making it easy to convert to string for logging or display",
        "Enum members are immutable and singleton - comparing with 'is' or '==' both work",
        "Use FileType.unknown as a fallback for unrecognized file types",
        "When serializing to JSON or databases, use the .value attribute to get the string representation",
        "Enum members can be iterated over using 'for member in FileType'",
        "Do not instantiate FileType directly; use the predefined class attributes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Enum member representing PDF file type with value 'pdf'",
            "is_class_variable": true,
            "name": "pdf",
            "type": "FileType"
          },
          {
            "description": "Enum member representing EPUB file type with value 'epub'",
            "is_class_variable": true,
            "name": "epub",
            "type": "FileType"
          },
          {
            "description": "Enum member representing notes file type with value 'notes'",
            "is_class_variable": true,
            "name": "notes",
            "type": "FileType"
          },
          {
            "description": "Enum member representing unknown or unrecognized file type with value 'unknown'",
            "is_class_variable": true,
            "name": "unknown",
            "type": "FileType"
          },
          {
            "description": "Inherited from Enum - the name of the enum member (e.g., 'pdf', 'epub')",
            "is_class_variable": false,
            "name": "name",
            "type": "str"
          },
          {
            "description": "Inherited from Enum - the value associated with the enum member (e.g., 'pdf', 'epub')",
            "is_class_variable": false,
            "name": "value",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Returns the string representation of the enum member, specifically its name",
            "returns": "The name of the enum member as a string (e.g., 'pdf', 'epub', 'notes', 'unknown')",
            "signature": "__str__(self) -> str"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:37:24",
      "decorators": [],
      "dependencies": [
        "enum"
      ],
      "description": "An enumeration class that defines supported file types for document processing, including PDF, EPUB, notes, and unknown file types.",
      "docstring": null,
      "id": 1929,
      "imports": [
        "import datetime",
        "import enum"
      ],
      "imports_required": [
        "import enum"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 27,
      "line_start": 20,
      "name": "FileType",
      "parameters": [
        {
          "annotation": "enum.Enum",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Inherits from enum.Enum, which provides the enumeration functionality. No constructor parameters are needed as this is an enum class with predefined members."
      },
      "parent_class": null,
      "purpose": "FileType is an enumeration that provides a type-safe way to represent and work with different file formats in a document processing system. It extends Python's enum.Enum to define four distinct file types (pdf, epub, notes, unknown) and provides a custom string representation. This enum is typically used for file type identification, validation, and routing logic in file processing workflows.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a FileType enum member (e.g., FileType.pdf, FileType.epub). The __str__ method returns the name of the enum member as a string (e.g., 'pdf', 'epub'). Each enum member has a 'value' attribute that matches its name.",
      "settings_required": [],
      "source_code": "class FileType(enum.Enum):\n    pdf = 'pdf'\n    epub = 'epub'\n    notes = 'notes'\n    unknown = 'unknown'\n\n    def __str__(self):\n        return self.name",
      "source_file": "/tf/active/vicechatdev/rmcl/const.py",
      "tags": [
        "enum",
        "enumeration",
        "file-type",
        "file-format",
        "type-safety",
        "constants",
        "document-processing",
        "pdf",
        "epub"
      ],
      "updated_at": "2025-12-07T00:37:24.604550",
      "usage_example": "# Import the enum module\nimport enum\n\n# Define the FileType class\nclass FileType(enum.Enum):\n    pdf = 'pdf'\n    epub = 'epub'\n    notes = 'notes'\n    unknown = 'unknown'\n\n    def __str__(self):\n        return self.name\n\n# Access enum members\nfile_type = FileType.pdf\nprint(file_type)  # Output: pdf\nprint(file_type.value)  # Output: pdf\nprint(file_type.name)  # Output: pdf\n\n# Compare enum members\nif file_type == FileType.pdf:\n    print(\"This is a PDF file\")\n\n# Iterate over all file types\nfor ft in FileType:\n    print(f\"Type: {ft}, Value: {ft.value}\")\n\n# Get enum member by value\nfile_type_from_value = FileType('epub')\nprint(file_type_from_value)  # Output: epub\n\n# Get enum member by name\nfile_type_from_name = FileType['notes']\nprint(file_type_from_name)  # Output: notes"
    },
    {
      "best_practices": [
        "Raise this exception when detecting that an item is virtual and the requested operation is not supported on virtual items",
        "Always include a descriptive error message when raising the exception to help with debugging",
        "Catch this specific exception type when you want to handle virtual item errors differently from other exceptions",
        "Use this exception in a consistent manner across your codebase to maintain clear error semantics",
        "Consider documenting in your code which operations raise VirtualItemError so users of your API know what to expect",
        "This exception should be raised early in validation logic before attempting operations that would fail on virtual items"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Tuple of arguments passed to the exception constructor, inherited from Exception base class. Typically contains the error message as the first element.",
            "is_class_variable": false,
            "name": "args",
            "type": "tuple"
          }
        ],
        "methods": []
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:36:42",
      "decorators": [],
      "dependencies": [],
      "description": "A custom exception class that signals when an operation cannot be performed on a virtual item.",
      "docstring": "An operation can not be done on a virtual item.",
      "id": 1927,
      "imports": [],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 37,
      "line_start": 35,
      "name": "VirtualItemError",
      "parameters": [
        {
          "annotation": "Exception",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "**kwargs": "Variable keyword arguments passed to the Exception base class constructor. Rarely used but available for advanced exception handling scenarios.",
        "*args": "Variable positional arguments passed to the Exception base class constructor. Typically includes an error message string describing what operation failed and why."
      },
      "parent_class": null,
      "purpose": "VirtualItemError is a specialized exception class that inherits from Python's built-in Exception class. It is designed to be raised when code attempts to perform an operation that is not permitted on virtual items. This provides a specific, semantic way to handle errors related to virtual item operations, allowing calling code to catch and handle this specific error type separately from other exceptions. Virtual items are likely placeholder or abstract representations that don't support certain concrete operations.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a VirtualItemError exception object that can be raised using the 'raise' statement. When caught, the exception object contains the message and traceback information inherited from the Exception base class. The exception object itself has standard exception attributes like 'args' (tuple of arguments passed during instantiation) and can be converted to a string to display the error message.",
      "settings_required": [],
      "source_code": "class VirtualItemError(Exception):\n    \"\"\"An operation can not be done on a virtual item.\"\"\"\n    pass",
      "source_file": "/tf/active/vicechatdev/rmcl/exceptions.py",
      "tags": [
        "exception",
        "error-handling",
        "virtual-item",
        "custom-exception",
        "validation"
      ],
      "updated_at": "2025-12-07T00:36:42.278404",
      "usage_example": "# Raising the exception\ndef process_item(item):\n    if item.is_virtual:\n        raise VirtualItemError(f\"Cannot process virtual item: {item.name}\")\n    # Process the item normally\n    return item.process()\n\n# Catching the exception\ntry:\n    result = process_item(my_item)\nexcept VirtualItemError as e:\n    print(f\"Virtual item error occurred: {e}\")\n    # Handle the virtual item case specifically\n    result = None\nexcept Exception as e:\n    print(f\"Other error occurred: {e}\")\n    raise"
    },
    {
      "best_practices": [
        "Always provide a descriptive error message when raising ApiError to help with debugging and error tracking.",
        "Include the response object when available to preserve full context of the API failure for debugging purposes.",
        "Catch ApiError specifically when you need to handle API-related errors differently from other exceptions.",
        "Consider logging both the message and response details when catching this exception for better observability.",
        "The docstring mentions 'Could not found a requested document' but the class can be used for any API error - consider the docstring as a primary use case rather than a limitation.",
        "Since ApiError inherits from Exception, it can be caught by generic exception handlers, so place specific ApiError handlers before generic Exception handlers.",
        "The response attribute is optional and may be None, so always check for its existence before accessing its properties.",
        "This exception is typically raised by API client code and caught by calling code that needs to handle API failures gracefully."
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Stores the API response object associated with the error, allowing access to status codes, headers, and response body for debugging. May be None if no response was provided.",
            "is_class_variable": false,
            "name": "response",
            "type": "Any (typically requests.Response or similar, can be None)"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "msg": "String message describing the error",
              "response": "Optional API response object (defaults to None)"
            },
            "purpose": "Initializes the ApiError exception with an error message and optional response object",
            "returns": "None (constructor)",
            "signature": "__init__(self, msg, response=None)"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:36:26",
      "decorators": [],
      "dependencies": [],
      "description": "A custom exception class for API-related errors, specifically designed to handle cases where a requested document cannot be found.",
      "docstring": "Could not found a requested document",
      "id": 1926,
      "imports": [],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 32,
      "line_start": 28,
      "name": "ApiError",
      "parameters": [
        {
          "annotation": "Exception",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "msg": "A string message describing the error that occurred. This message will be passed to the parent Exception class and can be accessed via str(exception) or exception.args[0]. It should provide clear information about what went wrong.",
        "response": "An optional parameter that stores the API response object associated with the error. This can be any type (typically a requests.Response object or similar), allowing access to status codes, headers, and response body for debugging purposes. Defaults to None if not provided."
      },
      "parent_class": null,
      "purpose": "ApiError is a custom exception class that extends Python's built-in Exception class. It is designed to be raised when API operations fail, particularly when a requested document cannot be found. The class stores both an error message and an optional response object, allowing developers to access the full API response context when handling the exception. This is useful for debugging and providing detailed error information to users or logging systems.",
      "return_annotation": null,
      "return_explained": "Instantiating ApiError returns an exception object that can be raised. The object contains the error message (accessible via standard exception mechanisms) and a 'response' attribute that stores the optional response object. When raised and caught, the exception can be examined to retrieve both the message and the response details.",
      "settings_required": [],
      "source_code": "class ApiError(Exception):\n    \"\"\"Could not found a requested document\"\"\"\n    def __init__(self, msg, response=None):\n        self.response = response\n        super(ApiError, self).__init__(msg)",
      "source_file": "/tf/active/vicechatdev/rmcl/exceptions.py",
      "tags": [
        "exception",
        "error-handling",
        "api",
        "custom-exception",
        "document-not-found",
        "http-error",
        "error-response"
      ],
      "updated_at": "2025-12-07T00:36:26.279052",
      "usage_example": "# Basic usage - raising the exception\ntry:\n    # Simulating an API call that fails\n    raise ApiError(\"Document with ID 12345 not found\")\nexcept ApiError as e:\n    print(f\"Error occurred: {e}\")\n    print(f\"Response object: {e.response}\")\n\n# Usage with response object\nimport requests\n\ntry:\n    response = requests.get(\"https://api.example.com/document/12345\")\n    if response.status_code == 404:\n        raise ApiError(\"Document not found\", response=response)\nexcept ApiError as e:\n    print(f\"Error: {e}\")\n    if e.response:\n        print(f\"Status code: {e.response.status_code}\")\n        print(f\"Response body: {e.response.text}\")\n\n# Catching in exception hierarchy\ntry:\n    raise ApiError(\"Something went wrong\")\nexcept Exception as e:\n    # ApiError is caught as it inherits from Exception\n    print(f\"Caught exception: {type(e).__name__}\")"
    },
    {
      "best_practices": [
        "Always provide a descriptive message when raising this exception, including the folder path or name that was not found",
        "Use this exception specifically for folder/directory lookup failures rather than generic file operations",
        "Catch this exception specifically when you want to handle folder-not-found cases differently from other exceptions",
        "Consider including the full path in the error message to aid debugging",
        "This exception should be raised before attempting operations that require the folder to exist",
        "Can be used in conjunction with os.path.exists() or pathlib.Path.exists() checks"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Inherited from Exception base class. Contains the error message as a tuple element, accessible via args[0]",
            "is_class_variable": false,
            "name": "args",
            "type": "tuple"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "msg": "A string message describing which folder was not found and providing context for the error"
            },
            "purpose": "Initializes the FolderNotFound exception with a custom error message",
            "returns": "None (constructor)",
            "signature": "__init__(self, msg: str) -> None"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:36:04",
      "decorators": [],
      "dependencies": [],
      "description": "A custom exception class that is raised when a requested folder cannot be found in the system.",
      "docstring": "Could not found a requested folder",
      "id": 1925,
      "imports": [],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 25,
      "line_start": 22,
      "name": "FolderNotFound",
      "parameters": [
        {
          "annotation": "Exception",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "msg": "A string message describing the folder that was not found. This message is passed to the parent Exception class and will be displayed when the exception is raised or converted to a string. Should typically include the folder path or name that was being searched for."
      },
      "parent_class": null,
      "purpose": "This exception class provides a specific error type for folder-not-found scenarios, allowing calling code to distinguish folder lookup failures from other types of exceptions. It extends Python's built-in Exception class and accepts a custom error message to provide context about which folder was not found.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a FolderNotFound exception object that can be raised using the 'raise' keyword. When caught, the exception object contains the error message accessible via str(exception) or exception.args[0].",
      "settings_required": [],
      "source_code": "class FolderNotFound(Exception):\n    \"\"\"Could not found a requested folder\"\"\"\n    def __init__(self, msg):\n        super(FolderNotFound, self).__init__(msg)",
      "source_file": "/tf/active/vicechatdev/rmcl/exceptions.py",
      "tags": [
        "exception",
        "error-handling",
        "filesystem",
        "folder",
        "directory",
        "custom-exception",
        "file-operations"
      ],
      "updated_at": "2025-12-07T00:36:04.459091",
      "usage_example": "# Example 1: Raising the exception\nimport os\n\ndef get_folder_contents(folder_path):\n    if not os.path.exists(folder_path):\n        raise FolderNotFound(f\"Folder '{folder_path}' does not exist\")\n    return os.listdir(folder_path)\n\n# Example 2: Catching the exception\ntry:\n    contents = get_folder_contents('/nonexistent/path')\nexcept FolderNotFound as e:\n    print(f\"Error: {e}\")\n    # Handle the missing folder case\n\n# Example 3: Direct instantiation and raising\nerror = FolderNotFound(\"The data folder could not be located\")\nraise error"
    },
    {
      "best_practices": [
        "Always provide a descriptive error message when raising this exception, including both the expected type and the actual type received",
        "Use this exception specifically for type-related errors rather than general validation errors to maintain clear error semantics",
        "Catch this exception specifically when you need to handle type errors differently from other exceptions",
        "Consider including the variable name or context in the error message to make debugging easier",
        "This exception should be raised as early as possible when a type mismatch is detected to fail fast",
        "Document in your function/method docstrings when this exception might be raised",
        "The exception inherits from Exception, so it will be caught by generic 'except Exception' handlers - be mindful of exception handling hierarchy"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Inherited from Exception base class. Contains the error message as a tuple, accessible as args[0]. This is automatically set by the parent Exception class.",
            "is_class_variable": false,
            "name": "args",
            "type": "tuple"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "msg": "A string describing the type error, typically explaining what type was expected versus what was received"
            },
            "purpose": "Initializes the UnsupportedTypeError exception with a custom error message",
            "returns": "None - this is a constructor that initializes the exception instance",
            "signature": "__init__(self, msg: str) -> None"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:35:49",
      "decorators": [],
      "dependencies": [],
      "description": "A custom exception class that is raised when an unsupported or unexpected type is encountered during type validation or type checking operations.",
      "docstring": "Not the expected type",
      "id": 1924,
      "imports": [],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 19,
      "line_start": 16,
      "name": "UnsupportedTypeError",
      "parameters": [
        {
          "annotation": "Exception",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "msg": "A string message that describes the type error. This message should explain what type was expected versus what type was actually received, providing context to help developers debug the issue. The message is passed to the parent Exception class and will be displayed when the exception is raised or printed."
      },
      "parent_class": null,
      "purpose": "This exception class extends Python's built-in Exception class to provide a specific error type for situations where a value or object does not match the expected type. It allows developers to catch and handle type-related errors separately from other exceptions, making error handling more precise and code more maintainable. This is particularly useful in type validation scenarios, data processing pipelines, or when enforcing strict type contracts in APIs.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an UnsupportedTypeError exception object that can be raised using the 'raise' keyword. When raised, it will propagate up the call stack until caught by an appropriate exception handler. The exception object contains the error message provided during instantiation, accessible via str(exception) or exception.args[0].",
      "settings_required": [],
      "source_code": "class UnsupportedTypeError(Exception):\n    \"\"\"Not the expected type\"\"\"\n    def __init__(self, msg):\n        super(UnsupportedTypeError, self).__init__(msg)",
      "source_file": "/tf/active/vicechatdev/rmcl/exceptions.py",
      "tags": [
        "exception",
        "error-handling",
        "type-checking",
        "validation",
        "custom-exception",
        "type-error",
        "error-class"
      ],
      "updated_at": "2025-12-07T00:35:49.337506",
      "usage_example": "# Example 1: Raising the exception\ndef process_data(value):\n    if not isinstance(value, (int, float)):\n        raise UnsupportedTypeError(f\"Expected int or float, got {type(value).__name__}\")\n    return value * 2\n\n# Example 2: Catching the exception\ntry:\n    result = process_data(\"string\")\nexcept UnsupportedTypeError as e:\n    print(f\"Type error occurred: {e}\")\n\n# Example 3: Using in type validation\nclass DataValidator:\n    def validate(self, data, expected_type):\n        if not isinstance(data, expected_type):\n            raise UnsupportedTypeError(\n                f\"Data validation failed: expected {expected_type.__name__}, \"\n                f\"but received {type(data).__name__}\"\n            )\n        return True\n\nvalidator = DataValidator()\ntry:\n    validator.validate([1, 2, 3], dict)\nexcept UnsupportedTypeError as e:\n    print(f\"Validation error: {e}\")"
    },
    {
      "best_practices": [
        "Always provide a descriptive error message when raising this exception to help with debugging and error logging",
        "Use this exception specifically for document-not-found scenarios rather than generic file or data access errors",
        "Catch this exception specifically in try-except blocks when you need to handle missing documents differently from other errors",
        "Consider including the document identifier (ID, name, path) in the error message for better traceability",
        "This exception should be raised at the point where the document lookup fails, not propagated from lower-level exceptions",
        "Document in your API/module documentation that this exception can be raised by document retrieval methods",
        "Consider logging the exception before re-raising it if you need audit trails of failed document access attempts"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Inherited from Exception base class. Contains the error message as a tuple element. Automatically set by the parent Exception class.",
            "is_class_variable": false,
            "name": "args",
            "type": "tuple"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "msg": "A string describing the document not found error, including context about which document was requested"
            },
            "purpose": "Initializes the DocumentNotFound exception with a custom error message",
            "returns": "None - this is a constructor that initializes the exception instance",
            "signature": "__init__(self, msg: str) -> None"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:35:29",
      "decorators": [],
      "dependencies": [],
      "description": "A custom exception class that is raised when a requested document cannot be found in the system.",
      "docstring": "Could not found a requested document",
      "id": 1923,
      "imports": [],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 13,
      "line_start": 10,
      "name": "DocumentNotFound",
      "parameters": [
        {
          "annotation": "Exception",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "msg": "A string message that describes the specific document not found error. This message should provide context about which document was requested and why it couldn't be found. It will be passed to the parent Exception class and displayed when the exception is raised or printed."
      },
      "parent_class": null,
      "purpose": "This exception class is designed to provide a specific error type for document retrieval failures. It extends Python's built-in Exception class to create a domain-specific exception that can be caught and handled separately from generic exceptions. This allows for more precise error handling in document management systems, databases, or file storage operations where distinguishing between 'document not found' and other errors is important.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a DocumentNotFound exception object that can be raised using the 'raise' keyword. When raised, it behaves like any Python exception and can be caught in try-except blocks. The exception object contains the error message passed during initialization.",
      "settings_required": [],
      "source_code": "class DocumentNotFound(Exception):\n    \"\"\"Could not found a requested document\"\"\"\n    def __init__(self, msg):\n        super(DocumentNotFound, self).__init__(msg)",
      "source_file": "/tf/active/vicechatdev/rmcl/exceptions.py",
      "tags": [
        "exception",
        "error-handling",
        "custom-exception",
        "document-management",
        "not-found",
        "error",
        "exception-class"
      ],
      "updated_at": "2025-12-07T00:35:29.692639",
      "usage_example": "# Example 1: Raising the exception\ndef get_document(doc_id):\n    documents = {'doc1': 'content1', 'doc2': 'content2'}\n    if doc_id not in documents:\n        raise DocumentNotFound(f\"Document with ID '{doc_id}' was not found in the database\")\n    return documents[doc_id]\n\n# Example 2: Catching the exception\ntry:\n    document = get_document('doc3')\nexcept DocumentNotFound as e:\n    print(f\"Error: {e}\")\n    # Handle the missing document case\n    document = None\n\n# Example 3: Using in a class method\nclass DocumentRepository:\n    def __init__(self):\n        self.documents = {}\n    \n    def retrieve(self, doc_id):\n        if doc_id not in self.documents:\n            raise DocumentNotFound(f\"Document '{doc_id}' does not exist in repository\")\n        return self.documents[doc_id]"
    },
    {
      "best_practices": [
        "Always provide descriptive error messages when raising AuthError to help with debugging and user feedback",
        "Catch AuthError specifically before catching general Exception to handle authentication failures differently from other errors",
        "Use AuthError consistently throughout your codebase for all authentication-related failures to maintain clear error handling patterns",
        "Consider logging AuthError occurrences for security monitoring and audit trails",
        "Don't expose sensitive information (like valid usernames or password hints) in the error message",
        "Raise AuthError as early as possible when authentication fails to prevent unauthorized code execution",
        "Document in your API/module which functions may raise AuthError so callers know to handle it"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Inherited from Exception base class. Contains the error message as a tuple element, accessible via exception.args[0]",
            "is_class_variable": false,
            "name": "args",
            "type": "tuple"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "msg": "String message describing the authentication error"
            },
            "purpose": "Initializes the AuthError exception with a custom error message",
            "returns": "None (constructor)",
            "signature": "__init__(self, msg) -> None"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:35:11",
      "decorators": [],
      "dependencies": [],
      "description": "A custom exception class for handling authentication-related errors in an application.",
      "docstring": "Authentication error",
      "id": 1922,
      "imports": [],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 7,
      "line_start": 4,
      "name": "AuthError",
      "parameters": [
        {
          "annotation": "Exception",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "msg": "A string message describing the authentication error that occurred. This message will be displayed when the exception is raised and can provide context about why authentication failed (e.g., 'Invalid credentials', 'Token expired', 'Unauthorized access')."
      },
      "parent_class": null,
      "purpose": "AuthError is a specialized exception class that extends Python's built-in Exception class. It is designed to be raised when authentication failures occur, such as invalid credentials, expired tokens, unauthorized access attempts, or other authentication-related issues. By using a custom exception, code can distinguish authentication errors from other types of exceptions and handle them specifically.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an AuthError exception object that inherits from Exception. This object can be raised using the 'raise' keyword and caught using try-except blocks. The exception carries the error message provided during instantiation.",
      "settings_required": [],
      "source_code": "class AuthError(Exception):\n    \"\"\"Authentication error\"\"\"\n    def __init__(self, msg):\n        super(AuthError, self).__init__(msg)",
      "source_file": "/tf/active/vicechatdev/rmcl/exceptions.py",
      "tags": [
        "exception",
        "authentication",
        "error-handling",
        "security",
        "custom-exception",
        "auth",
        "access-control"
      ],
      "updated_at": "2025-12-07T00:35:11.056808",
      "usage_example": "# Raising the exception\ntry:\n    user_authenticated = False\n    if not user_authenticated:\n        raise AuthError('User authentication failed: Invalid credentials')\nexcept AuthError as e:\n    print(f'Authentication error occurred: {e}')\n\n# Using in a function\ndef authenticate_user(username, password):\n    if not username or not password:\n        raise AuthError('Username and password are required')\n    if password != 'correct_password':\n        raise AuthError('Invalid password provided')\n    return True\n\n# Catching specific authentication errors\ntry:\n    authenticate_user('john', 'wrong_pass')\nexcept AuthError as auth_err:\n    print(f'Auth failed: {auth_err}')\nexcept Exception as e:\n    print(f'Other error: {e}')"
    },
    {
      "best_practices": [
        "VirtualFolder instances are immutable in terms of name, id, and parent - these are set at construction and exposed as read-only properties",
        "The children attribute is mutable and should be managed by the code using this class to maintain the folder hierarchy",
        "Always check the 'virtual' property to distinguish virtual folders from physical folders when processing folder hierarchies",
        "The mtime property always returns the current time, so it's not suitable for tracking actual modification history",
        "When creating folder hierarchies, ensure parent_id references are valid and don't create circular references",
        "The id_ parameter should be unique across all folders (virtual and non-virtual) in the system",
        "This class inherits from Folder, so it may have additional methods and behaviors defined in the parent class"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Private attribute storing the folder's name, exposed via the name property",
            "is_class_variable": false,
            "name": "_name",
            "type": "str"
          },
          {
            "description": "Private attribute storing the folder's unique identifier, exposed via the id property",
            "is_class_variable": false,
            "name": "_id",
            "type": "str or UUID"
          },
          {
            "description": "Private attribute storing the parent folder's identifier, exposed via the parent property",
            "is_class_variable": false,
            "name": "_parent",
            "type": "str or UUID or None"
          },
          {
            "description": "Public mutable list that stores child items (files or folders) contained within this virtual folder",
            "is_class_variable": false,
            "name": "children",
            "type": "list"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "id_": "Unique identifier for this folder",
              "name": "Display name for the virtual folder",
              "parent_id": "Optional identifier of the parent folder, defaults to None"
            },
            "purpose": "Constructor that initializes a virtual folder with name, unique identifier, and optional parent reference",
            "returns": "None (constructor)",
            "signature": "__init__(self, name, id_, parent_id=None)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "name",
            "parameters": {},
            "purpose": "Read-only property that returns the folder's name",
            "returns": "String containing the folder name",
            "signature": "@property name(self) -> str"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "id",
            "parameters": {},
            "purpose": "Read-only property that returns the folder's unique identifier",
            "returns": "The unique identifier (typically string or UUID) for this folder",
            "signature": "@property id(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "parent",
            "parameters": {},
            "purpose": "Read-only property that returns the parent folder's identifier",
            "returns": "The parent folder's identifier, or None if this folder has no parent",
            "signature": "@property parent(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "mtime",
            "parameters": {},
            "purpose": "Read-only property that returns the modification time, always returning the current time for virtual folders",
            "returns": "Current timestamp from the now() utility function",
            "signature": "@property mtime(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "virtual",
            "parameters": {},
            "purpose": "Read-only property that identifies this folder as virtual",
            "returns": "Always returns True to indicate this is a virtual folder",
            "signature": "@property virtual(self) -> bool"
          }
        ]
      },
      "complexity": "simple",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:34:52",
      "decorators": [],
      "dependencies": [
        "trio",
        "functools",
        "io",
        "json",
        "logging",
        "uuid",
        "zipfile"
      ],
      "description": "VirtualFolder is a class representing a virtual folder in a file system hierarchy that doesn't correspond to a physical folder but exists for organizational purposes.",
      "docstring": null,
      "id": 1921,
      "imports": [
        "import functools",
        "import io",
        "import json",
        "import logging",
        "import uuid",
        "import zipfile",
        "import trio",
        "from  import api",
        "from const import ROOT_ID",
        "from const import TRASH_ID",
        "from const import FileType",
        "from  import datacache",
        "from exceptions import DocumentNotFound",
        "from exceptions import VirtualItemError",
        "from sync import add_sync",
        "from utils import now",
        "from utils import parse_datetime",
        "from rmrl import render",
        "from rmrl import sources"
      ],
      "imports_required": [
        "from const import ROOT_ID",
        "from const import TRASH_ID",
        "from const import FileType",
        "from utils import now",
        "from utils import parse_datetime",
        "from exceptions import DocumentNotFound",
        "from exceptions import VirtualItemError",
        "from sync import add_sync",
        "from rmrl import render",
        "from rmrl import sources"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 326,
      "line_start": 300,
      "name": "VirtualFolder",
      "parameters": [
        {
          "annotation": "Folder",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "id_": "Unique identifier for the virtual folder, typically a string or UUID",
        "name": "String representing the display name of the virtual folder",
        "parent_id": "Optional identifier of the parent folder. If None, this folder has no parent (could be a root-level virtual folder). Defaults to None."
      },
      "parent_class": null,
      "purpose": "This class extends the Folder base class to create virtual folders that exist only in memory or as logical constructs. Virtual folders are used to organize content without having physical storage backing. They maintain parent-child relationships, have unique identifiers, and always report the current time as their modification time. The 'virtual' property distinguishes them from regular folders.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a VirtualFolder object with the specified name, id, and parent relationship. The object has read-only properties for name, id, parent, mtime (modification time), and virtual status. The children attribute is a mutable list that can store child items.",
      "settings_required": [
        "Requires a Folder base class to be defined in the codebase",
        "Requires the 'now()' utility function from utils module to return current timestamp",
        "May require ROOT_ID and TRASH_ID constants to be defined for folder hierarchy management"
      ],
      "source_code": "class VirtualFolder(Folder):\n\n    def __init__(self, name, id_, parent_id=None):\n        self._name = name\n        self._id = id_\n        self._parent = parent_id\n        self.children = []\n\n    @property\n    def name(self):\n        return self._name\n\n    @property\n    def id(self):\n        return self._id\n\n    @property\n    def parent(self):\n        return self._parent\n\n    @property\n    def mtime(self):\n        return now()\n\n    @property\n    def virtual(self):\n        return True",
      "source_file": "/tf/active/vicechatdev/rmcl/items.py",
      "tags": [
        "virtual-folder",
        "file-system",
        "folder-hierarchy",
        "organization",
        "data-structure",
        "inheritance",
        "property-decorator",
        "read-only-properties"
      ],
      "updated_at": "2025-12-07T00:34:52.207784",
      "usage_example": "# Create a virtual folder\nvirtual_folder = VirtualFolder(name='My Virtual Folder', id_='vf-12345', parent_id='parent-67890')\n\n# Access properties\nprint(virtual_folder.name)  # 'My Virtual Folder'\nprint(virtual_folder.id)  # 'vf-12345'\nprint(virtual_folder.parent)  # 'parent-67890'\nprint(virtual_folder.virtual)  # True\nprint(virtual_folder.mtime)  # Current timestamp\n\n# Add children\nvirtual_folder.children.append(child_item)\n\n# Create a root-level virtual folder\nroot_virtual = VirtualFolder(name='Root Virtual', id_='vf-root')"
    },
    {
      "best_practices": [
        "Always initialize the Folder with proper metadata that includes at minimum an 'id' field, as this is used in the upload() method",
        "The children list should be populated with Item or Item-subclass instances to maintain the folder hierarchy",
        "The upload() method is async and should be awaited when called in async contexts, or called directly if the @add_sync decorator provides a synchronous wrapper",
        "The upload() method creates an in-memory ZIP file, so be mindful of memory usage for folders with many children or large metadata",
        "Ensure the parent Item class's upload_raw() method is properly implemented before calling upload()",
        "The ZIP file created contains an empty content file named '{id}.content', suggesting this is a placeholder for folder representation in the storage system",
        "Do not modify the children list during iteration without proper safeguards to avoid concurrent modification issues",
        "The Folder instance maintains state through the children list, so be careful with shared references"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "A list that stores child items (files or subfolders) contained within this folder. Initialized as an empty list and should be populated with Item instances or subclasses.",
            "is_class_variable": false,
            "name": "children",
            "type": "list"
          },
          {
            "description": "Inherited from Item base class. Stores metadata information about the folder such as ID, name, parent reference, timestamps, etc.",
            "is_class_variable": false,
            "name": "metadata",
            "type": "dict or object"
          },
          {
            "description": "Inherited from Item base class. Unique identifier for the folder, used in the upload method to name the content file in the ZIP archive.",
            "is_class_variable": false,
            "name": "id",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "metadata": "Dictionary or object containing folder metadata to be passed to the parent Item class"
            },
            "purpose": "Initializes a new Folder instance with the provided metadata and an empty children list",
            "returns": "None (constructor)",
            "signature": "__init__(self, metadata)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload",
            "parameters": {},
            "purpose": "Creates a ZIP archive containing an empty content file for the folder and uploads it using the parent class's upload_raw method",
            "returns": "The result from the upload_raw() method call, likely a response object or status indicator from the upload operation",
            "signature": "async upload(self) -> Any"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:34:24",
      "decorators": [],
      "dependencies": [
        "io",
        "zipfile",
        "trio",
        "functools"
      ],
      "description": "Represents a folder item in a file system hierarchy, extending the Item base class with the ability to contain children and be uploaded as a ZIP archive.",
      "docstring": null,
      "id": 1920,
      "imports": [
        "import functools",
        "import io",
        "import json",
        "import logging",
        "import uuid",
        "import zipfile",
        "import trio",
        "from  import api",
        "from const import ROOT_ID",
        "from const import TRASH_ID",
        "from const import FileType",
        "from  import datacache",
        "from exceptions import DocumentNotFound",
        "from exceptions import VirtualItemError",
        "from sync import add_sync",
        "from utils import now",
        "from utils import parse_datetime",
        "from rmrl import render",
        "from rmrl import sources"
      ],
      "imports_required": [
        "import io",
        "import zipfile",
        "from sync import add_sync"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 297,
      "line_start": 284,
      "name": "Folder",
      "parameters": [
        {
          "annotation": "Item",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "metadata": "A dictionary or object containing metadata information about the folder. This is passed to the parent Item class constructor and likely includes properties such as folder ID, name, parent folder reference, creation/modification timestamps, and other folder-specific attributes required by the Item base class."
      },
      "parent_class": null,
      "purpose": "The Folder class models a directory/folder in what appears to be a reMarkable tablet file system. It maintains a collection of child items and provides functionality to upload the folder as a ZIP file containing metadata. The upload method creates an empty content file within the ZIP archive, suggesting this is part of a document management system where folders need to be synchronized or uploaded to a remote service.",
      "return_annotation": null,
      "return_explained": "The Folder constructor returns a new Folder instance. The upload() method returns the result of upload_raw(f), which is likely a response object or status from uploading the ZIP file to a remote service. Since upload() is an async method decorated with @add_sync, it can be called both synchronously and asynchronously.",
      "settings_required": [
        "Must have access to the Item base class from the same module or package",
        "The parent Item class must implement the upload_raw() method",
        "The parent Item class must provide an 'id' attribute accessible to instances",
        "Requires the @add_sync decorator to be properly configured for async/sync method wrapping"
      ],
      "source_code": "class Folder(Item):\n\n    def __init__(self, metadata):\n        super().__init__(metadata)\n        self.children = []\n\n    @add_sync\n    async def upload(self):\n        f = io.BytesIO()\n        with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zf:\n            zf.writestr(f'{self.id}.content', '')\n        f.seek(0)\n\n        return await self.upload_raw(f)",
      "source_file": "/tf/active/vicechatdev/rmcl/items.py",
      "tags": [
        "folder",
        "directory",
        "file-system",
        "upload",
        "zip",
        "async",
        "remarkable",
        "document-management",
        "hierarchy",
        "collection"
      ],
      "updated_at": "2025-12-07T00:34:24.764808",
      "usage_example": "# Assuming Item base class and necessary imports are available\nmetadata = {\n    'id': 'folder-123',\n    'name': 'My Documents',\n    'parent': 'root',\n    'type': 'CollectionType'\n}\n\n# Create a folder instance\nfolder = Folder(metadata)\n\n# Add children to the folder\nfolder.children.append(child_item1)\nfolder.children.append(child_item2)\n\n# Upload the folder (async)\nimport trio\nasync def main():\n    result = await folder.upload()\n    print(f'Upload result: {result}')\n\ntrio.run(main)\n\n# Or use synchronously (if @add_sync decorator provides sync version)\nresult = folder.upload()\nprint(f'Upload result: {result}')"
    }
  ],
  "count": 100
}