{
  "components": [
    {
      "best_practices": [
        "Nodes and relationships are stored as frozensets internally for immutability, but exposed as mutable sets through properties",
        "When creating a Subgraph with relationships, you don't need to explicitly provide the connected nodes - they are automatically extracted",
        "The unbind() method creates a deep copy with unbound nodes and relationships, useful for detaching from a parent graph",
        "Set operations (|, &, -, ^) create new Subgraph instances rather than modifying existing ones",
        "The graph property can be set after instantiation to bind the subgraph to a parent graph",
        "Boolean evaluation returns True if the subgraph contains any nodes, False otherwise",
        "Nodes must have a 'UID' attribute for the unbind() method to work correctly",
        "The unbind() method assumes relationships have start_node, end_node, labels, and relationship attributes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Private attribute storing reference to the parent graph object",
            "is_class_variable": false,
            "name": "_graph",
            "type": "object or None"
          },
          {
            "description": "Private immutable set storing all node objects in the subgraph",
            "is_class_variable": false,
            "name": "_nodes",
            "type": "frozenset"
          },
          {
            "description": "Private immutable set storing all relationship objects in the subgraph",
            "is_class_variable": false,
            "name": "_relationships",
            "type": "frozenset"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "graph": "Optional parent graph reference",
              "nodes": "Optional iterable of node objects",
              "relationships": "Optional iterable of relationship objects"
            },
            "purpose": "Initialize a Subgraph with optional nodes, relationships, and parent graph reference",
            "returns": "None (constructor)",
            "signature": "__init__(nodes=None, relationships=None, graph=None)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "nodes",
            "parameters": {},
            "purpose": "Get a mutable set of all nodes in the subgraph",
            "returns": "Set containing all node objects in the subgraph",
            "signature": "@property nodes(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "relationships",
            "parameters": {},
            "purpose": "Get a mutable set of all relationships in the subgraph",
            "returns": "Set containing all relationship objects in the subgraph",
            "signature": "@property relationships(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {},
            "purpose": "Get the parent graph reference",
            "returns": "The parent graph object or None if unbound",
            "signature": "@property graph(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {
              "graph": "The parent graph object to bind to"
            },
            "purpose": "Set the parent graph reference",
            "returns": "None",
            "signature": "@graph.setter graph(self, graph)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "unbind",
            "parameters": {},
            "purpose": "Create an unbound copy of the subgraph with unbound nodes and relationships",
            "returns": "A new Subgraph instance with unbound copies of all nodes and relationships",
            "signature": "unbind(self) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return string representation of the subgraph",
            "returns": "String showing nodes and relationships",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return detailed string representation of the subgraph",
            "returns": "String showing nodes and relationships",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__bool__",
            "parameters": {},
            "purpose": "Check if subgraph contains any nodes",
            "returns": "True if subgraph has nodes, False otherwise",
            "signature": "__bool__(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__or__",
            "parameters": {
              "other": "Another Subgraph instance to union with"
            },
            "purpose": "Perform union operation with another subgraph",
            "returns": "New Subgraph containing union of nodes and relationships",
            "signature": "__or__(self, other: Subgraph) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__and__",
            "parameters": {
              "other": "Another Subgraph instance to intersect with"
            },
            "purpose": "Perform intersection operation with another subgraph",
            "returns": "New Subgraph containing intersection of nodes and relationships",
            "signature": "__and__(self, other: Subgraph) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__sub__",
            "parameters": {
              "other": "Another Subgraph instance to subtract"
            },
            "purpose": "Perform difference operation with another subgraph",
            "returns": "New Subgraph containing elements in self but not in other, preserving nodes connected by remaining relationships",
            "signature": "__sub__(self, other: Subgraph) -> Subgraph"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__xor__",
            "parameters": {
              "other": "Another Subgraph instance for symmetric difference"
            },
            "purpose": "Perform symmetric difference operation with another subgraph",
            "returns": "New Subgraph containing elements in either subgraph but not both, preserving nodes connected by remaining relationships",
            "signature": "__xor__(self, other: Subgraph) -> Subgraph"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:04:54",
      "decorators": [],
      "dependencies": [
        "uuid",
        "itertools"
      ],
      "description": "A class representing a graph subgraph containing nodes and relationships, with support for set operations and graph binding/unbinding.",
      "docstring": null,
      "id": 2143,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4",
        "from itertools import chain"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 459,
      "line_start": 393,
      "name": "Subgraph",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "graph": "Optional reference to a parent graph object that this subgraph belongs to. Used for maintaining context and binding state.",
        "nodes": "Optional iterable of node objects to include in the subgraph. Can be None or empty. These nodes will be stored as a frozenset internally.",
        "relationships": "Optional iterable of relationship objects connecting nodes. Can be None or empty. The subgraph will automatically include all nodes referenced by these relationships, even if not explicitly provided in the nodes parameter."
      },
      "parent_class": null,
      "purpose": "The Subgraph class models a subset of a graph structure, managing collections of nodes and relationships. It provides graph algebra operations (union, intersection, difference, symmetric difference) and supports binding/unbinding from a parent graph. This is typically used in graph database operations to work with portions of larger graphs, allowing manipulation and combination of graph fragments.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Subgraph object containing the specified nodes and relationships. The nodes property returns a mutable set of nodes, relationships property returns a mutable set of relationships, and graph property returns the parent graph reference. The unbind() method returns a new unbound Subgraph copy. Set operations (__or__, __and__, __sub__, __xor__) return new Subgraph instances.",
      "settings_required": [
        "Requires Node and Relationship classes to be defined in the same module or imported",
        "Node objects must support 'UID' key access and unbind() method",
        "Relationship objects must have start_node, end_node, labels, relationship, and nodes attributes"
      ],
      "source_code": "class Subgraph():\n    \n    def __init__(self, nodes=None, relationships=None, graph=None):\n        self._graph = graph\n        self._nodes = frozenset(nodes or [])\n        self._relationships = frozenset(relationships or [])\n        self._nodes |= frozenset(chain.from_iterable(r.nodes for r in self._relationships))\n        \n    @property\n    def nodes(self):\n        return set(self._nodes)\n    \n    @property\n    def relationships(self):\n        return set(self._relationships)\n    \n    @property\n    def graph(self):\n        return self._graph\n    \n    @graph.setter\n    def graph(self, graph):\n        self._graph = graph\n    \n    def unbind(self):\n        \"\"\"Returns an unbound copy of itself\"\"\"\n        out = Subgraph()\n        created_nodes = []\n        for r in self.relationships:\n            if r.start_node['UID'] in created_nodes:\n                start = [i for i in out.nodes if i['UID'] == r.start_node['UID']][0]\n            else:\n                start = r.start_node.unbind()\n                created_nodes.append(start['UID'])\n            if r.end_node['UID'] in created_nodes:\n                end = [i for i in out.nodes if i['UID'] == r.end_node['UID']][0]\n            else:\n                end = r.end_node.unbind()\n                created_nodes.append(end['UID'])\n            out = out | Relationship(start, _Relationship(*r.labels, **r.relationship), end)\n        return out\n    \n    def __str__(self):\n        return \"Nodes(%s), \\nRelationships(%s)\" % (self.nodes,\n                                                   self.relationships)\n    \n    def __repr__(self):\n        return \"Nodes(%s), \\nRelationships(%s)\" % (self.nodes,\n                                                   self.relationships)\n    def __bool__(self):\n        return len(self.nodes) > 0\n    \n    def __or__(self, other):\n        return Subgraph(set(self.nodes) | set(other.nodes), set(self.relationships) | set(other.relationships))\n    \n    def __and__(self, other):\n        return Subgraph(set(self.nodes) & set(other.nodes), set(self.relationships) & set(other.relationships))\n\n    def __sub__(self, other):\n        r = set(self.relationships) - set(other.relationships)\n        n = (set(self.nodes) - set(other.nodes)) | set().union(*(set(rel.nodes) for rel in r))\n        return Subgraph(n, r)\n\n    def __xor__(self, other):\n        r = set(self.relationships) ^ set(other.relationships)\n        n = (set(self.nodes) ^ set(other.nodes)) | set().union(*(set(rel.nodes) for rel in r))\n        return Subgraph(n, r)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph",
        "subgraph",
        "nodes",
        "relationships",
        "set-operations",
        "graph-database",
        "data-structure",
        "graph-algebra"
      ],
      "updated_at": "2025-12-07T02:04:54.924392",
      "usage_example": "# Assuming Node and Relationship classes are available\nfrom itertools import chain\n\n# Create nodes\nnode1 = Node('Person', name='Alice', UID='uid1')\nnode2 = Node('Person', name='Bob', UID='uid2')\n\n# Create a relationship\nrel = Relationship(node1, 'KNOWS', node2)\n\n# Create subgraph with nodes and relationships\nsubgraph1 = Subgraph(nodes=[node1, node2], relationships=[rel])\n\n# Create another subgraph\nnode3 = Node('Person', name='Charlie', UID='uid3')\nsubgraph2 = Subgraph(nodes=[node3])\n\n# Combine subgraphs using union\ncombined = subgraph1 | subgraph2\n\n# Get intersection\ncommon = subgraph1 & subgraph2\n\n# Access nodes and relationships\nall_nodes = combined.nodes\nall_rels = combined.relationships\n\n# Unbind from graph\nunbound_copy = subgraph1.unbind()\n\n# Check if subgraph has nodes\nif subgraph1:\n    print('Subgraph has nodes')"
    },
    {
      "best_practices": [
        "Always ensure start_node and end_node are valid Node instances before instantiation",
        "When using a Node object as relationship parameter, ensure it has exactly one label",
        "Use tuple format (label, properties_dict) for relationships with properties",
        "Be aware that Node objects used as relationships carry over UID properties which _Relationship does not enforce",
        "Use unbind() method to create a copy detached from the graph database",
        "The graph parameter is automatically inferred from nodes if not provided",
        "The relationship is directional: start_node -> end_node",
        "Use _from_neo4j_node() classmethod when reconstructing relationships from database queries",
        "The _nodes attribute is a frozenset for immutability, but nodes property returns a mutable set"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Private attribute storing the graph instance this relationship is bound to",
            "is_class_variable": false,
            "name": "_graph",
            "type": "object or None"
          },
          {
            "description": "The source/origin node of the relationship",
            "is_class_variable": false,
            "name": "start_node",
            "type": "Node"
          },
          {
            "description": "The target/destination node of the relationship",
            "is_class_variable": false,
            "name": "end_node",
            "type": "Node"
          },
          {
            "description": "Immutable set containing both start_node and end_node",
            "is_class_variable": false,
            "name": "_nodes",
            "type": "frozenset"
          },
          {
            "description": "List containing this relationship instance (for interface consistency)",
            "is_class_variable": false,
            "name": "_relationships",
            "type": "list"
          },
          {
            "description": "The underlying _Relationship object containing the label and properties",
            "is_class_variable": false,
            "name": "relationship",
            "type": "_Relationship"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "end_node": "Node object representing the target node",
              "graph": "Optional graph instance to bind to",
              "relationship": "Relationship definition (string, tuple, Node, or _Relationship)",
              "start_node": "Node object representing the source node"
            },
            "purpose": "Initialize a Relationship object with start node, relationship definition, and end node",
            "returns": "None (constructor)",
            "signature": "__init__(self, start_node, relationship, end_node, graph=None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_from_neo4j_node",
            "parameters": {
              "kwargs": "Additional keyword arguments, particularly 'graph' for binding",
              "original": "Neo4j relationship object with start_node, end_node, type, and element_id attributes"
            },
            "purpose": "Class method to reconstruct a Relationship object from a Neo4j relationship object",
            "returns": "A new Relationship instance reconstructed from the Neo4j object",
            "signature": "_from_neo4j_node(cls, original, **kwargs) -> Relationship"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {},
            "purpose": "Get the graph instance this relationship is bound to",
            "returns": "The graph instance or None if unbound",
            "signature": "@property graph(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {
              "graph": "Graph instance to bind to"
            },
            "purpose": "Set the graph instance this relationship is bound to",
            "returns": "None",
            "signature": "@graph.setter graph(self, graph)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "nodes",
            "parameters": {},
            "purpose": "Get a set containing both start and end nodes",
            "returns": "A set containing the start_node and end_node",
            "signature": "@property nodes(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "relationships",
            "parameters": {},
            "purpose": "Get a list of relationships (contains self)",
            "returns": "A list containing this relationship instance",
            "signature": "@property relationships(self) -> list"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {},
            "purpose": "Get the database element ID of the relationship",
            "returns": "The element_id from the underlying _Relationship object",
            "signature": "@property element_id(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {
              "element_id": "The element ID to assign"
            },
            "purpose": "Set the database element ID of the relationship",
            "returns": "None",
            "signature": "@element_id.setter element_id(self, element_id)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {},
            "purpose": "Get the labels of the relationship",
            "returns": "The labels from the underlying _Relationship object",
            "signature": "@property labels(self)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {
              "labels": "The labels to assign to the relationship"
            },
            "purpose": "Set the labels of the relationship",
            "returns": "None",
            "signature": "@labels.setter labels(self, labels)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "unbind",
            "parameters": {},
            "purpose": "Create and return an unbound copy of this relationship",
            "returns": "A new Relationship instance with unbound nodes and relationship, detached from any graph",
            "signature": "unbind(self) -> Relationship"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return a human-readable string representation of the relationship",
            "returns": "String in format 'Relationship(start_node [relationship] end_node)'",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a developer-friendly string representation of the relationship",
            "returns": "String in format 'Relationship(start_node relationship end_node)'",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "clear",
            "parameters": {},
            "purpose": "Clear all properties from the underlying relationship object",
            "returns": "None",
            "signature": "clear(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update",
            "parameters": {
              "kwargs": "Key-value pairs of properties to update"
            },
            "purpose": "Update properties of the underlying relationship object",
            "returns": "None",
            "signature": "update(self, **kwargs)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:04:21",
      "decorators": [],
      "dependencies": [
        "warnings",
        "uuid"
      ],
      "description": "A class representing a graph relationship between two nodes, wrapping a _Relationship object with start and end Node objects.",
      "docstring": "A relationship is represented as a collection of two nodes and a base _Relationship, which is similar to a Node except it may only have 1 label and does not enforce the UID property",
      "id": 2142,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 390,
      "line_start": 291,
      "name": "Relationship",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "end_node": "A Node object representing the target/destination node of the relationship. Must be an instance of the Node class.",
        "graph": "Optional graph instance to bind this relationship to. If None, will attempt to use the graph from start_node or end_node if either has one. Defaults to None.",
        "relationship": "The relationship definition, which can be: (1) a _Relationship object, (2) a Node object with exactly 1 label (will be converted to _Relationship), (3) a string representing the relationship label, or (4) a tuple in format (label, properties_dict) where properties_dict is optional.",
        "start_node": "A Node object representing the source/origin node of the relationship. Must be an instance of the Node class."
      },
      "parent_class": null,
      "purpose": "The Relationship class models a directed edge in a graph database (Neo4j-style), connecting two Node objects through a labeled relationship. It encapsulates the start node, end node, and relationship properties, providing a unified interface for managing graph relationships. The class supports multiple initialization formats (string, tuple, Node, _Relationship) and can be bound to a graph instance for database operations.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Relationship object that encapsulates the start node, end node, and relationship properties. The object provides access to graph database operations and relationship metadata through properties and methods.",
      "settings_required": [
        "Requires Node class to be defined in the same module or imported",
        "Requires _Relationship class to be defined in the same module or imported",
        "May require a graph database connection object if performing database operations"
      ],
      "source_code": "class Relationship():\n    \"\"\"\n    A relationship is represented as a collection of two nodes and a base _Relationship, which is similar to a Node except it may only have 1 label and does not enforce the UID property\n    \"\"\"\n    \n    def __init__(self, start_node, relationship, end_node, graph=None):\n        assert isinstance(start_node, Node), \"Please supply a Node as start_node\"\n        assert isinstance(end_node, Node), \"Please supply a Node as end_node\"\n        if graph is None and any([start_node.graph, end_node.graph]):\n            graph = start_node.graph or end_node.graph\n        self._graph=graph\n        self.start_node=start_node\n        self.end_node=end_node\n        self._nodes = frozenset([self.start_node, self.end_node])\n        self._relationships=[self]\n        if isinstance(relationship, _Relationship):\n            self.relationship = relationship\n        elif isinstance(relationship, Node):\n            assert len(relationship.labels) == 1, \"When passing a Node object as relationship, make sure it has exactly 1 label\"\n            warnings.warn(\"Please be aware Node objects are forced to have a UID properties, and this property is carried over to the Relationship, which does not enforce UIDs\")\n            self.relationship = _Relationship(relationship.labels, **relationship)\n        elif isinstance(relationship, str):\n            self.relationship = _Relationship(relationship)\n        elif isinstance(relationship, tuple):\n            assert len(relationship) < 3, \"When passing a tuple, please ensure only one label is passed and all properties are formatted as a dict, e.g. (RELATIONSHIP, {PROPERTIES})\"\n            if isinstance(relationship[-1], dict):\n                self.relationship = _Relationship(relationship[0], **relationship[-1])\n            else:\n                warnings.warn(\"When passing a tuple, properties must be formatted as a single dictionary or they will be ignored\")\n                self.relationship = _Relationship(relationship[0])\n        else:\n            raise TypeError(\"Please supply a Node, string or tuple as relationship\")\n            \n    @classmethod\n    def _from_neo4j_node(cls, original, **kwargs):\n        start_node = Node._from_neo4j_node(original.start_node, graph=kwargs.get('graph',None))\n        start_node.pull()\n        end_node = Node._from_neo4j_node(original.end_node, graph=kwargs.get('graph',None))\n        end_node.pull()\n        try:\n            element_id = int(original.element_id)\n        except:\n            if ':' in original.element_id:\n                element_id = int(original.element_id.split(':')[-1])\n            else:\n                raise Exception(f\"Could not obtain element ID. Found ID: {original.element_id}\")\n        return cls(start_node, \n                   _Relationship(original.type, _element_id=element_id, **dict(original)), \n                   end_node,\n                   graph=kwargs.get('graph',None))\n            \n    @property\n    def graph(self):\n        return self._graph\n    \n    @graph.setter\n    def graph(self, graph):\n        self._graph = graph\n    \n    @property\n    def nodes(self):\n        return set(self._nodes)\n    \n    @property\n    def relationships(self):\n        return self._relationships\n    \n    @property\n    def element_id(self):\n        return self.relationship.element_id\n    \n    @element_id.setter\n    def element_id(self, element_id):\n        self.relationship.element_id = element_id\n        \n    @property\n    def labels(self):\n        return self.relationship.labels\n    \n    @labels.setter\n    def labels(self, labels):\n        self.relationship.labels = labels\n        \n    def unbind(self):\n        \"\"\"Returns an unbound copy of itself\"\"\"\n        return Relationship(self.start_node.unbind(), _Relationship(*self.relationship.labels, **self.relationship), self.end_node.unbind())\n    \n    def __str__(self):\n        return \"Relationship(%s [%s] %s)\" % (self.start_node, self.relationship, self.end_node)\n    \n    def __repr__(self):\n        return \"Relationship(%s %s %s)\" % (self.start_node, self.relationship, self.end_node)\n    \n    def clear(self):\n        \"Redirect PropertyDict funcs to the underlying _relationship class\"\n        self._relationship.clear()\n    \n    def update(self, **kwargs):\n        \"Redirect PropertyDict funcs to the underlying _relationship class\"\n        self._relationship.update(**kwargs)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph",
        "relationship",
        "edge",
        "neo4j",
        "database",
        "node",
        "graph-database",
        "data-structure"
      ],
      "updated_at": "2025-12-07T02:04:21.029143",
      "usage_example": "# Basic instantiation with string relationship\nstart = Node('Person', name='Alice')\nend = Node('Person', name='Bob')\nrel = Relationship(start, 'KNOWS', end)\n\n# With properties using tuple format\nrel = Relationship(start, ('KNOWS', {'since': 2020, 'strength': 0.8}), end)\n\n# With graph binding\nrel = Relationship(start, 'KNOWS', end, graph=my_graph)\n\n# Access properties\nprint(rel.labels)  # Relationship labels\nprint(rel.nodes)  # Set of start and end nodes\nprint(rel.element_id)  # Database element ID if bound\n\n# Create unbound copy\nunbound_rel = rel.unbind()\n\n# Update relationship properties\nrel.update(weight=5, active=True)"
    },
    {
      "best_practices": [
        "Always pass _element_id as a keyword argument (with underscore prefix) to avoid confusion with labels",
        "The element_id should be set by the database system after persistence; use None for new relationships",
        "Labels are stored as a set internally but the setter converts lists and strings appropriately",
        "When setting labels via the property setter, pass either a list of strings or a single string",
        "Properties can be accessed and modified using dictionary-style syntax (inherited from PropertyDict)",
        "The class maintains immutability of the original labels set by returning a copy via the labels property getter",
        "Element_id must be convertible to an integer; attempting to set non-numeric values will raise ValueError",
        "Use __str__ for human-readable output and __repr__ for debugging/logging purposes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Internal storage for relationship labels, initially a set but can become a list via setter",
            "is_class_variable": false,
            "name": "_labels",
            "type": "set | list"
          },
          {
            "description": "Internal storage for the database element identifier, validated to be integer or None",
            "is_class_variable": false,
            "name": "_element_id",
            "type": "int | None"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "**properties": "Arbitrary keyword arguments for relationship properties",
              "*labels": "Variable number of label strings for the relationship type",
              "_element_id": "Optional integer identifier for the relationship in the database (default: None)"
            },
            "purpose": "Initialize a relationship with labels, optional element ID, and properties",
            "returns": "None (constructor)",
            "signature": "__init__(self, *labels, _element_id=None, **properties)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {},
            "purpose": "Get a copy of the relationship's labels as a set",
            "returns": "A set containing all label strings for this relationship",
            "signature": "@property labels(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {
              "labels": "Either a list of label strings or a single label string"
            },
            "purpose": "Set the relationship's labels from a list or string",
            "returns": "None (setter)",
            "signature": "@labels.setter labels(self, labels)"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {},
            "purpose": "Get the relationship's database element identifier",
            "returns": "Integer element ID or None if not set",
            "signature": "@property element_id(self) -> int | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {
              "x": "Value that can be coerced to an integer"
            },
            "purpose": "Set the relationship's element ID with validation",
            "returns": "None (setter)",
            "signature": "@element_id.setter element_id(self, x)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return a human-readable string representation of the relationship",
            "returns": "Formatted string like '[label1, label2 { prop1: value1, prop2: value2 }]'",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a detailed string representation suitable for debugging",
            "returns": "Formatted string like '[label1, label2, prop1=value1, prop2=value2]'",
            "signature": "__repr__(self) -> str"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:03:43",
      "decorators": [],
      "dependencies": [],
      "description": "A class representing a graph relationship (edge) with labels, properties, and an optional element ID, inheriting from PropertyDict to manage key-value properties.",
      "docstring": null,
      "id": 2141,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 289,
      "line_start": 240,
      "name": "_Relationship",
      "parameters": [
        {
          "annotation": "PropertyDict",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "**properties": "Arbitrary keyword arguments representing property key-value pairs for the relationship. These are passed to the parent PropertyDict class and can be accessed like dictionary items.",
        "*labels": "Variable number of string arguments representing the relationship type labels. These are stored as a set internally to ensure uniqueness. Can be zero or more label strings.",
        "_element_id": "Optional keyword-only parameter representing the database element identifier. Must be convertible to an integer or None. Used to track the relationship's identity in a graph database. Defaults to None for new relationships not yet persisted."
      },
      "parent_class": null,
      "purpose": "This class models a relationship in a graph database structure, storing labels (relationship types), properties (key-value pairs), and an optional element_id for database identification. It provides property-based access to labels and element_id with validation, and custom string representations for debugging and display. The class is designed to work with graph database systems where relationships connect nodes and have typed labels and arbitrary properties.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a _Relationship object that behaves like a dictionary for properties while maintaining labels and element_id as separate managed attributes. The __str__ method returns a formatted string like '[label1, label2 { prop1: value1, prop2: value2 }]'. The __repr__ method returns a similar representation suitable for debugging.",
      "settings_required": [
        "Requires PropertyDict class to be available in the same module or imported, as _Relationship inherits from it"
      ],
      "source_code": "class _Relationship(PropertyDict):\n    \n    def __init__(self, *labels, _element_id=None, **properties):\n        self._labels = set(labels)\n        if _element_id is None:\n            self._element_id = None\n        else:\n            try:\n                self._element_id = int(_element_id)\n            except ValueError:\n                raise ValueError(f\"element_id must be an integer or None, got {_element_id}\")\n        PropertyDict.__init__(self, properties)\n        \n        \n    @property\n    def labels(self):\n        return set(self._labels)\n    \n    @labels.setter\n    def labels(self, labels):\n        if isinstance(labels, list):\n            self._labels=labels\n        elif isinstance(labels, str):\n            self._labels=[labels]\n        else:\n            raise ValueError(\"Please pass a list or string as label\")\n    \n    @property\n    def element_id(self):\n        return self._element_id\n    \n    @element_id.setter\n    def element_id(self, x):\n        try:\n            int(x)\n        except:\n            raise ValueError(\"Invalid input for element_id, value cannot be coerced to int\")\n        self._element_id = x\n\n    def __str__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}: {!r}\".format(k, v) for k, v in kwargs.items())\n        return \"[{} {{ {} }}]\".format(labels_str, props_str)\n\n    def __repr__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}={!r}\".format(k, v) for k, v in kwargs.items())\n        return \"[{}, {}]\".format(labels_str, props_str)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph-database",
        "relationship",
        "edge",
        "property-graph",
        "data-structure",
        "neo4j-style",
        "labeled-graph",
        "dictionary-like"
      ],
      "updated_at": "2025-12-07T02:03:43.991915",
      "usage_example": "# Create a relationship with labels and properties\nrel = _Relationship('KNOWS', 'FRIEND_OF', _element_id=123, since=2020, strength=0.8)\n\n# Access labels\nprint(rel.labels)  # {'KNOWS', 'FRIEND_OF'}\n\n# Modify labels\nrel.labels = ['WORKS_WITH', 'COLLEAGUE']\n\n# Access properties (inherited from PropertyDict)\nprint(rel['since'])  # 2020\nrel['verified'] = True\n\n# Access element_id\nprint(rel.element_id)  # 123\n\n# Update element_id\nrel.element_id = 456\n\n# String representation\nprint(str(rel))  # [WORKS_WITH, COLLEAGUE { since: 2020, strength: 0.8, verified: True }]\nprint(repr(rel))  # [WORKS_WITH, COLLEAGUE, since=2020, strength=0.8, verified=True]"
    },
    {
      "best_practices": [
        "Always check if a node is bound to a graph (has element_id and graph) before performing database operations",
        "Use unbind() to create independent copies of nodes that won't sync with the database",
        "The _lock class variable prevents automatic database pulls when True - use carefully to avoid unintended synchronization",
        "Labels are stored in a set and are case-sensitive; ensure consistent label naming conventions",
        "The UID property is automatically generated and should not be manually modified",
        "When working with Neo4j nodes, use the _from_neo4j_node() class method for proper conversion",
        "The __ensure_labels() private method automatically pulls from database before label operations if the node is bound",
        "Use clear_element_id() to unbind a node from the database without creating a new instance",
        "Properties are managed through PropertyDict inheritance, so use dictionary-style access for properties",
        "The node maintains immutable label sets through the labels property; use add_label/remove_label methods to modify"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Class variable that prevents automatic database pulls when True. Default is False",
            "is_class_variable": true,
            "name": "_lock",
            "type": "bool"
          },
          {
            "description": "The Neo4j database element ID for this node. None if unbound",
            "is_class_variable": false,
            "name": "_element_id",
            "type": "int | None"
          },
          {
            "description": "Reference to the Graph object this node belongs to. None if unbound",
            "is_class_variable": false,
            "name": "_graph",
            "type": "Graph | None"
          },
          {
            "description": "Internal set storing the node's labels",
            "is_class_variable": false,
            "name": "_labels",
            "type": "set"
          },
          {
            "description": "List containing this node instance (for graph structure compatibility)",
            "is_class_variable": false,
            "name": "_nodes",
            "type": "list"
          },
          {
            "description": "Unique identifier (UUID) for this node instance, stored as 'UID' property",
            "is_class_variable": false,
            "name": "_uid",
            "type": "str"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "**properties": "Key-value pairs for node properties",
              "*labels": "Variable number of label strings to assign to the node",
              "_element_id": "Optional integer ID from Neo4j database, or None for unbound nodes",
              "_graph": "Optional Graph object reference for database operations"
            },
            "purpose": "Initialize a new Node instance with labels, optional database binding, and properties",
            "returns": "None (constructor)",
            "signature": "__init__(self, *labels, _element_id=None, _graph=None, **properties)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_from_neo4j_node",
            "parameters": {
              "**kwargs": "Optional keyword arguments, particularly 'graph' for binding to a Graph instance",
              "original": "Neo4j node object with element_id, labels, and properties"
            },
            "purpose": "Class method to create a Node instance from a Neo4j native node object",
            "returns": "New Node instance populated with data from the Neo4j node",
            "signature": "_from_neo4j_node(cls, original, **kwargs) -> Node"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "keys",
            "parameters": {},
            "purpose": "Return the keys of the node's properties (inherited from PropertyDict)",
            "returns": "View of property keys",
            "signature": "keys(self) -> KeysView"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__hash__",
            "parameters": {},
            "purpose": "Return hash value based on the node's UID for use in sets and dictionaries",
            "returns": "Integer hash value",
            "signature": "__hash__(self) -> int"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__str__",
            "parameters": {},
            "purpose": "Return human-readable string representation of the node",
            "returns": "String in format 'Node(Label1, Label2 { prop1: value1, prop2: value2 })'",
            "signature": "__str__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return developer-friendly string representation suitable for debugging",
            "returns": "String in format 'Node(Label1, Label2, prop1=value1, prop2=value2)'",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "pull",
            "parameters": {},
            "purpose": "Synchronize node data from the database if bound to a graph",
            "returns": "None, but updates the node's labels and properties from the database",
            "signature": "pull(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "unbind",
            "parameters": {},
            "purpose": "Create an unbound copy of the node without graph or element_id references",
            "returns": "New Node instance with same labels and properties but no database binding",
            "signature": "unbind(self) -> Node"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "nodes",
            "parameters": {},
            "purpose": "Property that returns a list containing this node (for compatibility with graph structures)",
            "returns": "List containing only this node instance",
            "signature": "nodes(self) -> list"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "relationships",
            "parameters": {},
            "purpose": "Property that returns relationships connected to this node",
            "returns": "Empty set (base implementation, may be overridden in subclasses)",
            "signature": "relationships(self) -> set"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "element_id",
            "parameters": {},
            "purpose": "Property to get or set the Neo4j database element ID",
            "returns": "Integer element ID if bound to database, None otherwise",
            "signature": "element_id(self) -> int | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "graph",
            "parameters": {},
            "purpose": "Property to get or set the Graph instance this node is bound to",
            "returns": "Graph object if bound, None otherwise",
            "signature": "graph(self) -> Graph | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "labels",
            "parameters": {},
            "purpose": "Property that returns an immutable set of all labels on this node",
            "returns": "Set of label strings (immutable copy)",
            "signature": "labels(self) -> set"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "clear_element_id",
            "parameters": {},
            "purpose": "Remove the element_id, effectively unbinding the node from the database",
            "returns": "None, sets element_id to None",
            "signature": "clear_element_id(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "has_label",
            "parameters": {
              "label": "String label or tuple of label strings to check for"
            },
            "purpose": "Check if the node has a specific label or tuple of labels",
            "returns": "True if node has the label(s), False otherwise. For tuples, returns True only if all labels are present",
            "signature": "has_label(self, label) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "add_label",
            "parameters": {
              "label": "String label or tuple of label strings to add"
            },
            "purpose": "Add a label or multiple labels (if tuple) to the node",
            "returns": "None, modifies the node's label set",
            "signature": "add_label(self, label) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "remove_label",
            "parameters": {
              "label": "String label or tuple of label strings to remove"
            },
            "purpose": "Remove a label or multiple labels (if tuple) from the node",
            "returns": "None, modifies the node's label set. Does nothing if label doesn't exist",
            "signature": "remove_label(self, label) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "clear_labels",
            "parameters": {},
            "purpose": "Remove all labels from the node",
            "returns": "None, empties the node's label set",
            "signature": "clear_labels(self) -> None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_labels",
            "parameters": {
              "labels": "Iterable of label strings to add"
            },
            "purpose": "Add multiple labels from an iterable to the node",
            "returns": "None, adds all labels from the iterable to the node",
            "signature": "update_labels(self, labels) -> None"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:03:13",
      "decorators": [],
      "dependencies": [
        "uuid"
      ],
      "description": "A Node class representing a graph node with labels and properties, designed to work with Neo4j graph databases. It extends PropertyDict to manage node properties and provides methods for label management and graph synchronization.",
      "docstring": "    ",
      "id": 2140,
      "imports": [
        "from uuid import uuid4",
        "from itertools import chain",
        "import warnings"
      ],
      "imports_required": [
        "from uuid import uuid4"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 238,
      "line_start": 97,
      "name": "Node",
      "parameters": [
        {
          "annotation": "PropertyDict",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "**properties": "Arbitrary keyword arguments representing the properties (key-value pairs) to store on this node. These are passed to the PropertyDict parent class for management.",
        "*labels": "Variable number of string arguments representing the labels to assign to this node. Labels are used to categorize nodes in the graph database (e.g., 'Person', 'Company').",
        "_element_id": "Optional integer or None. The unique identifier assigned by Neo4j when the node is persisted to the database. None indicates an unbound node that hasn't been saved yet. Must be coercible to an integer.",
        "_graph": "Optional reference to a Graph object that this node belongs to. Used for database operations like pulling updated data from the database. None indicates the node is not associated with a graph instance."
      },
      "parent_class": null,
      "purpose": "This class represents a node in a graph database (specifically Neo4j). It manages node labels, properties, and maintains a connection to a graph instance. The Node can be bound to a database (with an element_id) or exist as an unbound entity. It provides functionality for label manipulation, property management through inheritance from PropertyDict, and synchronization with the database through pull operations. The class supports creating nodes from Neo4j native node objects and maintains a unique identifier (UID) for each instance.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Node object. The object contains labels, properties, and optional graph binding. Key method returns: pull() returns None but updates the node's state; unbind() returns a new unbound Node instance; has_label() returns boolean; labels property returns an immutable set of label strings; element_id property returns integer or None; graph property returns Graph object or None.",
      "settings_required": [
        "Requires PropertyDict parent class to be available in the same module or imported",
        "For database operations, requires a Graph object instance with a pull() method that accepts a Node"
      ],
      "source_code": "class Node(PropertyDict):\n    \"\"\" \n    \"\"\"\n    \n    _lock = False\n    \n    def __init__(self, *labels, _element_id=None, _graph=None, **properties):\n        if _element_id is None:\n            self._element_id = None\n        else:\n            try:\n                self._element_id = int(_element_id)\n            except ValueError:\n                raise ValueError(f\"element_id must be an integer or None, got {_element_id}\")\n        self._graph = _graph\n        self._labels = set(labels)\n        self._nodes = [self]\n        PropertyDict.__init__(self, properties)\n        self._uid = self.setdefault('UID', str(uuid4()))\n        \n    @classmethod\n    def _from_neo4j_node(cls, original, **kwargs):\n        try:\n            element_id = int(original.element_id)\n        except:\n            if ':' in original.element_id:\n                element_id = int(original.element_id.split(':')[-1])\n            else:\n                raise Exception(f\"Could not obtain element ID. Found ID: {original.element_id}\")\n        return cls(*list(original.labels), _element_id=element_id, _graph=kwargs.get('graph', None), **dict(original))\n\n    def __ensure_labels(self):\n        if self._graph and self.element_id and not self._lock:\n            self.graph.pull(self)\n\n    def keys(self):\n        return PropertyDict.keys(self)\n    \n    def __hash__(self):\n        return hash(self._uid)\n    \n    def __str__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}: {!r}\".format(k, v) for k, v in kwargs.items())\n        return \"Node({} {{ {} }})\".format(labels_str, props_str)\n\n    def __repr__(self):\n        kwargs = dict(self)\n        labels_str = \", \".join(self.labels)\n        props_str = \", \".join(\"{}={!r}\".format(k, v) for k, v in kwargs.items())\n        return \"Node({}, {})\".format(labels_str, props_str)\n    \n    def pull(self):\n        if self._graph and self.element_id and not self._lock:\n            self.graph.pull(self)\n        \n    def unbind(self):\n        \"\"\"Returns an unbound copy of itself\"\"\"\n        return Node(*self.labels, **dict(self))\n    \n    @property\n    def nodes(self):\n        return self._nodes\n    \n    @property\n    def relationships(self):\n        return set([])\n    \n    @property\n    def element_id(self):\n        return self._element_id\n    \n    @element_id.setter\n    def element_id(self, x):\n        try:\n            int(x)\n        except:\n            raise TypeError(\"Invalid input for element_id, value cannot be coerced to int\")\n        self._element_id = x\n    \n    @property\n    def graph(self):\n        return self._graph\n    \n    @graph.setter\n    def graph(self, graph):\n        self._graph = graph\n    \n    @property\n    def labels(self):\n        \"\"\" The full set of labels associated with with this *node*.\n        This set is immutable and cannot be used to add or remove\n        labels. Use methods such as :meth:`.add_label` and\n        :meth:`.remove_label` for that instead.\n        \"\"\"\n        return set(self._labels)\n    \n    def clear_element_id(self):\n        self._element_id = None\n\n    def has_label(self, label):\n        \"\"\" Return :const:`True` if this node has the label `label`,\n        :const:`False` otherwise.\n        \"\"\"\n        self.__ensure_labels()\n        if isinstance(label, tuple):\n            return all(lab in self._labels for lab in label)\n        else:\n            return label in self._labels\n\n    def add_label(self, label):\n        \"\"\" Add the label `label` to this node.\n        \"\"\"\n        self.__ensure_labels()\n        if isinstance(label, tuple):\n            self._labels.update(label)\n        else:\n            self._labels.add(label)\n\n    def remove_label(self, label):\n        \"\"\" Remove the label `label` from this node, if it exists.\n        \"\"\"\n        self.__ensure_labels()\n        if isinstance(label, tuple):\n            for lab in label:\n                self._labels.discard(lab)\n        else:\n            self._labels.discard(label)\n\n    def clear_labels(self):\n        \"\"\" Remove all labels from this node.\n        \"\"\"\n        self._labels.clear()\n\n    def update_labels(self, labels):\n        \"\"\" Add multiple labels to this node from the iterable\n        `labels`.\n        \"\"\"\n        self.__ensure_labels()\n        for label in labels:\n            self.add_label(label)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_objects.py",
      "tags": [
        "graph-database",
        "neo4j",
        "node",
        "graph-node",
        "labels",
        "properties",
        "database-orm",
        "data-model",
        "graph-structure"
      ],
      "updated_at": "2025-12-07T02:03:13.739345",
      "usage_example": "# Create an unbound node\nnode = Node('Person', 'Employee', name='John Doe', age=30)\n\n# Access labels\nprint(node.labels)  # {'Person', 'Employee'}\n\n# Check for label\nif node.has_label('Person'):\n    print('Is a Person')\n\n# Add/remove labels\nnode.add_label('Manager')\nnode.remove_label('Employee')\n\n# Access properties (inherited from PropertyDict)\nprint(node['name'])  # 'John Doe'\nnode['department'] = 'Engineering'\n\n# Create from Neo4j node object\n# neo4j_node = session.run('MATCH (n) RETURN n').single()['n']\n# node = Node._from_neo4j_node(neo4j_node, graph=my_graph)\n\n# Bind to graph and pull updates\n# node.graph = my_graph\n# node.element_id = 123\n# node.pull()  # Syncs with database\n\n# Create unbound copy\nunbound_copy = node.unbind()\n\n# Access unique identifier\nprint(node._uid)  # UUID string"
    },
    {
      "best_practices": [
        "Always call close() when done with the Graph instance to properly release database connections",
        "Use context managers or try-finally blocks to ensure connections are closed even if errors occur",
        "The class automatically handles ServiceUnavailable exceptions and attempts to reconnect, but persistent connection issues should be investigated",
        "Nodes and relationships must have a UID property for proper MERGE operations in create()",
        "Before calling push(), ensure the node has been created in the database (has an element_id)",
        "The pull() method locks nodes during update to prevent concurrent modifications",
        "Label parameters can be passed as a single string or list of strings; they are automatically converted to lists internally",
        "Element IDs must be integers; the class attempts to coerce string IDs to integers but will raise ValueError if coercion fails",
        "When creating entities, the graph property is automatically set on nodes and relationships",
        "The delete() method performs DETACH DELETE, which removes all relationships before deleting nodes"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Neo4j driver instance used for all database connections and operations",
            "is_class_variable": false,
            "name": "driver",
            "type": "neo4j.GraphDatabase.driver"
          },
          {
            "description": "Name of the specific Neo4j database to connect to, or None for default database",
            "is_class_variable": false,
            "name": "database",
            "type": "str or None"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "URI": "Connection URI for Neo4j database",
              "auth": "Authentication credentials (username, password tuple or auth object)",
              "database": "Optional database name to connect to",
              "name": "Alternative parameter for database name (deprecated)"
            },
            "purpose": "Initialize a Graph instance with connection parameters to a Neo4j database",
            "returns": "None (constructor)",
            "signature": "__init__(self, URI, auth, database=None, name=None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "close",
            "parameters": {},
            "purpose": "Close the Neo4j driver connection",
            "returns": "None",
            "signature": "close(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "open",
            "parameters": {},
            "purpose": "Open/reopen the Neo4j driver connection",
            "returns": "None",
            "signature": "open(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "__repr__",
            "parameters": {},
            "purpose": "Return a string representation of the Graph instance showing host and database",
            "returns": "String describing the graph interface connection",
            "signature": "__repr__(self) -> str"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_get_label_strings",
            "parameters": {
              "labels": "List of label strings or None"
            },
            "purpose": "Convert a list of labels into a Cypher-compatible label string (e.g., ':Person:Employee')",
            "returns": "Formatted label string for Cypher queries, empty string if None in labels",
            "signature": "_get_label_strings(labels) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "catch_service_unavailable",
            "parameters": {
              "func": "Function to wrap with exception handling"
            },
            "purpose": "Decorator that catches ServiceUnavailable exceptions and attempts to reconnect before retrying",
            "returns": "Wrapped function with automatic reconnection on service unavailability",
            "signature": "catch_service_unavailable(func)"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_run",
            "parameters": {
              "kwargs": "Parameters to pass to the query",
              "query": "Cypher query string to execute",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Execute a Cypher query within a transaction and return all records",
            "returns": "List of result records from the query",
            "signature": "_run(tx, query, **kwargs) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "run",
            "parameters": {
              "kwargs": "Parameters to pass to the query",
              "query": "Cypher query string to execute"
            },
            "purpose": "Execute a Cypher query and return results wrapped in a ResultWrapper object",
            "returns": "ResultWrapper object containing query results",
            "signature": "run(self, query, **kwargs) -> ResultWrapper"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_by_id",
            "parameters": {
              "label": "List of labels to filter by",
              "tx": "Neo4j transaction object",
              "x": "Integer ID of the node"
            },
            "purpose": "Internal method to match a node by its Neo4j internal ID within a transaction",
            "returns": "Neo4j node object",
            "signature": "_match_by_id(tx, x, label)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_by_id",
            "parameters": {
              "label": "Optional label or list of labels to filter by",
              "x": "Integer or string ID of the node (will be coerced to int)"
            },
            "purpose": "Match and return a node by its Neo4j internal ID",
            "returns": "Node object representing the matched node",
            "signature": "match_by_id(self, x, label=None) -> Node"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_by_uid",
            "parameters": {
              "label": "List of labels to filter by",
              "tx": "Neo4j transaction object",
              "uid": "UID property value to match"
            },
            "purpose": "Internal method to match a node by its UID property within a transaction",
            "returns": "Neo4j node object",
            "signature": "_match_by_uid(tx, uid, label)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_by_uid",
            "parameters": {
              "label": "Optional label or list of labels to filter by",
              "uid": "UID property value to match"
            },
            "purpose": "Match and return a node by its UID property",
            "returns": "Node object representing the matched node",
            "signature": "match_by_uid(self, uid, label=None) -> Node"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_by_name",
            "parameters": {
              "label": "List of labels to filter by",
              "name": "Name property value to match",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to match a node by its N (name) property within a transaction",
            "returns": "Neo4j node object",
            "signature": "_match_by_name(tx, name, label)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_by_name",
            "parameters": {
              "label": "Optional label or list of labels to filter by",
              "name": "Name property value to match"
            },
            "purpose": "Match and return a node by its N (name) property",
            "returns": "Node object representing the matched node",
            "signature": "match_by_name(self, name, label=None) -> Node"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_match_relationship_by_id",
            "parameters": {
              "tx": "Neo4j transaction object",
              "x": "Integer ID of the relationship"
            },
            "purpose": "Internal method to match a relationship by its Neo4j internal ID within a transaction",
            "returns": "Neo4j relationship object",
            "signature": "_match_relationship_by_id(tx, x)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "match_relationship_by_id",
            "parameters": {
              "x": "Integer or string ID of the relationship (will be coerced to int)"
            },
            "purpose": "Match and return a relationship by its Neo4j internal ID",
            "returns": "Relationship object representing the matched relationship",
            "signature": "match_relationship_by_id(self, x) -> Relationship"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_push",
            "parameters": {
              "element_id": "Integer ID of the node to update",
              "properties": "Dictionary of properties to set on the node",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to update a node's properties in the database within a transaction",
            "returns": "List of result records",
            "signature": "_push(tx, element_id, properties)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "push",
            "parameters": {
              "node": "Node object to push to the database (must have element_id and be bound to this graph)"
            },
            "purpose": "Update an existing node's properties in the database",
            "returns": "None",
            "signature": "push(self, node) -> None"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_node_pull",
            "parameters": {
              "ids": "List of node IDs to retrieve",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to retrieve node data by IDs within a transaction",
            "returns": "List of tuples containing (id, labels, properties) for each node",
            "signature": "_node_pull(tx, ids) -> list"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_relationship_pull",
            "parameters": {
              "ids": "List of relationship IDs to retrieve",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to retrieve relationship data by IDs within a transaction",
            "returns": "List of tuples containing (id, properties) for each relationship",
            "signature": "_relationship_pull(tx, ids) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "pull",
            "parameters": {
              "entity": "Entity object (Node, Relationship, or Subgraph) to refresh from database"
            },
            "purpose": "Refresh an entity (node or subgraph) with the latest data from the database",
            "returns": "None (updates entity in place)",
            "signature": "pull(self, entity) -> None"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_create",
            "parameters": {
              "data": "List of data dictionaries to create",
              "query": "Cypher query string for creating entities",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to execute a create/merge query within a transaction",
            "returns": "List of result records",
            "signature": "_create(tx, query, data) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create",
            "parameters": {
              "entity": "Entity object (Node, Relationship, or Subgraph) to create in database"
            },
            "purpose": "Create nodes and relationships in the database, using MERGE to avoid duplicates based on UID",
            "returns": "None (updates entity element_ids in place)",
            "signature": "create(self, entity) -> None"
          },
          {
            "is_property": false,
            "is_static": true,
            "name": "_delete",
            "parameters": {
              "identities": "List of element IDs to delete",
              "tx": "Neo4j transaction object"
            },
            "purpose": "Internal method to delete nodes and relationships by ID within a transaction",
            "returns": "List of result records",
            "signature": "_delete(tx, identities) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "delete",
            "parameters": {
              "entity": "Entity object (Node, Relationship, or Subgraph) to delete from database"
            },
            "purpose": "Delete nodes and relationships from the database (performs DETACH DELETE)",
            "returns": "None",
            "signature": "delete(self, entity) -> None"
          }
        ]
      },
      "complexity": "complex",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "Required for Node, Relationship, and ResultWrapper classes that are used throughout the Graph class methods",
          "import": "from neo4j_objects import *",
          "optional": false
        },
        {
          "condition": "Imported in source file but not directly used in this class; may be used by related classes or for data processing",
          "import": "import pandas as pd",
          "optional": true
        }
      ],
      "created_at": "2025-12-07 01:01:51",
      "decorators": [],
      "dependencies": [
        "neo4j",
        "functools",
        "warnings",
        "pandas"
      ],
      "description": "A Graph class that provides an interface for interacting with a Neo4j graph database, supporting CRUD operations on nodes and relationships through Cypher queries.",
      "docstring": "A Graph class for interacting with a Neo4j graph database. The Graph class has methods for running Cypher queries, matching nodes by ID, UID, and name, and matching relationships by ID.",
      "id": 2139,
      "imports": [
        "import neo4j",
        "from functools import wraps",
        "from neo4j import GraphDatabase",
        "from neo4j.exceptions import ServiceUnavailable",
        "from neo4j_objects import *",
        "import warnings",
        "import pandas as pd"
      ],
      "imports_required": [
        "from neo4j import GraphDatabase",
        "from neo4j.exceptions import ServiceUnavailable",
        "from functools import wraps",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 328,
      "line_start": 92,
      "name": "Graph",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "URI": "The connection URI for the Neo4j database (e.g., 'bolt://localhost:7687' or 'neo4j://localhost:7687'). This specifies the protocol, host, and port for the database connection.",
        "auth": "Authentication credentials for the Neo4j database, typically a tuple of (username, password) or an auth object created by neo4j.basic_auth(). Required for database access.",
        "database": "Optional name of the specific database to connect to within the Neo4j instance. If not provided, falls back to the 'name' parameter or uses the default database.",
        "name": "Alternative parameter name for specifying the database name. Deprecated in favor of 'database' parameter but maintained for backward compatibility."
      },
      "parent_class": null,
      "purpose": "This class serves as a wrapper around the Neo4j Python driver, providing convenient methods for connecting to a Neo4j database and performing common operations like matching nodes by ID/UID/name, creating/updating/deleting nodes and relationships, and executing custom Cypher queries. It handles connection management, automatic reconnection on service unavailability, and provides a clean interface for working with graph entities.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a Graph object that maintains a connection to the Neo4j database. Methods return various types: run() returns a ResultWrapper object, match methods return Node or Relationship objects, create/push/pull/delete methods return None or modify entities in place.",
      "settings_required": [
        "Neo4j database must be running and accessible at the specified URI",
        "Valid authentication credentials (username and password) for the Neo4j database",
        "Network connectivity to the Neo4j server",
        "neo4j_objects module must be available with Node, Relationship, and ResultWrapper classes defined"
      ],
      "source_code": "class Graph():\n    \"\"\"\n    A Graph class for interacting with a Neo4j graph database. The Graph class has methods for running Cypher queries, matching nodes by ID, UID, and name, and matching relationships by ID.\n    \"\"\"\n    def __init__(self, URI, auth, database=None, name=None):\n        self.driver = GraphDatabase.driver(URI, auth=auth)\n        self.database = database or name\n        \n    def close(self):\n        self.driver.close()\n        \n    def open(self):\n        self.driver.open()\n        \n    def __repr__(self):\n        return \"Graph interface bound to host %s to database '%s'\" % (self.driver.initial_addresses[0], self.database)\n    \n    __str__ = __repr__\n    \n    @staticmethod\n    def _get_label_strings(labels):\n        if None in labels:\n            return ''\n        return ''.join(\":\" + i for i in labels)\n    \n    def catch_service_unavailable(func):\n        @wraps(func)\n        def wrapper(self,*args, **kwargs):\n            try:\n                return func(self,*args, **kwargs)\n            except ServiceUnavailable:\n                self.open()\n                return func(self,*args, **kwargs)\n        return wrapper\n            \n    @staticmethod\n    def _run(tx, query, **kwargs):\n        result = tx.run(query, **kwargs)\n        records = list(result)\n        summary = result.consume()\n        return records\n    \n    @catch_service_unavailable\n    def run(self, query, **kwargs):\n        with self.driver.session(database=self.database) as session:\n            result = self._run(session, query, **kwargs)\n        return ResultWrapper(list(result), graph=self)\n    \n    @staticmethod\n    def _match_by_id(tx, x, label):\n        result = tx.run(\"MATCH (o%s) WHERE id(o) = $x RETURN o\" % Graph._get_label_strings(label), x=x)\n        return result.single()[0]\n    \n    @catch_service_unavailable\n    def match_by_id(self, x, label=None):\n        if not isinstance(x, int):\n            try:\n                x=int(x)\n            except:\n                raise ValueError(\"Failed to coerce id to type int. Element id must be of type int, passed '%s' of type %s\" % (x, type(x)))\n        if not isinstance(label, list):\n            label=[label]\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_by_id, x, label)\n        return Node._from_neo4j_node(result, graph = self)\n    \n    @staticmethod\n    def _match_by_uid(tx, uid, label):\n        result = tx.run(\"MATCH (o%s {UID:$uid}) RETURN o\" % Graph._get_label_strings(label), uid=uid)\n        record = result.single()[0]\n        summary = result.consume()\n        return record\n    \n    @catch_service_unavailable\n    def match_by_uid(self, uid, label=None):\n        if not isinstance(label, list):\n            label=[label]\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_by_uid, uid, label)\n        return Node._from_neo4j_node(result, graph=self)\n    \n    @staticmethod\n    def _match_by_name(tx, name, label):\n        result = tx.run(\"MATCH (o%s {N:$name}) RETURN n\" % Graph._get_label_strings(label), name=name)\n        return result.single()[0]\n    \n    @catch_service_unavailable\n    def match_by_name(self, name, label=None):\n        if not isinstance(label, list):\n            label=[label]\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_by_name, name, label)\n        return Node._from_neo4j_node(result, graph=self)\n    \n    @staticmethod\n    def _match_relationship_by_id(tx, x):\n        result = tx.run(\"MATCH ()-[_]->() WHERE id(_) = $x RETURN _\", x=x)\n        return result.single()[0]\n    \n    @catch_service_unavailable\n    def match_relationship_by_id(self, x):\n        if not isinstance(x, int):\n            try:\n                x=int(x)\n            except:\n                raise ValueError(\"Failed to coerce id to type int. Element id must be of type int, passed '%s' of type %s\" % (x, type(x)))\n        with self.driver.session(database=self.database) as session:\n            result = session.execute_read(self._match_relationship_by_id, x)\n        return Relationship._from_neo4j_node(result, graph=self)\n    \n    @staticmethod\n    def _push(tx, element_id, properties):\n        result = tx.run(\"MATCH (o) WHERE id(o) = $x SET o = $properties\", x=int(element_id), properties=properties)\n        records = list(result)\n        summary = result.consume()\n        return records\n    \n    @catch_service_unavailable\n    def push(self, node):\n        assert node.graph, \"Node is not associated with any database. Please use graph.create for new nodes, or retrieve the node from the database first.\"\n        assert node.graph == self, \"Entity bound to different database.\"\n        assert node.element_id, \"Please run graph.create when creating a node for the first time.\"\n        items = dict(node)\n        with self.driver.session(database=self.database) as session:\n            session.execute_write(self._push, node.element_id, items)\n        return\n        \n    @staticmethod\n    def _node_pull(tx, ids):\n        query = tx.run(\"MATCH (_) WHERE id(_) in $x \"\n                       \"RETURN id(_), labels(_), properties(_)\", x=ids)\n        return list(query)\n    \n    @staticmethod\n    def _relationship_pull(tx, ids):\n        result = tx.run(\"MATCH ()-[_]->() WHERE id(_) in $x \"\n                       \"RETURN id(_), properties(_)\", x=ids)\n        return list(result)\n        \n    @catch_service_unavailable\n    def pull(self, entity):\n        nodes = {}\n        for node in entity.nodes:\n            if node.graph == self:\n                if not isinstance(node.element_id, int):\n                    try:\n                        node.element_id = int(node.element_id)\n                    except:\n                        warnings.warn(\"Could not coerce element id to int, skipped node %s\" % node.element_id, stacklevel=5)\n                        continue\n                nodes[node.element_id] = node\n                node._lock = True\n        with self.driver.session(database=self.database) as session:\n            query = session.execute_read(self._node_pull, list(nodes.keys()))\n        for element_id, new_labels, new_properties in query:\n            node = nodes[element_id]\n            node.clear_labels()\n            node.update_labels(new_labels)\n            node.clear()\n            node.update(new_properties)\n            node._lock = False\n        relationships = {}\n        for relationship in entity.relationships:\n            if relationship.graph == self:\n                relationships[relationship.element_id] = relationship\n        with self.driver.session(database=self.database) as session:\n            query = session.execute_read(self._relationship_pull, list(relationships.keys()))\n        for element_id, new_properties in query:\n            relationship = relationships[element_id]\n            relationship.clear()\n            relationship.update(new_properties)\n            \n    \n    @staticmethod\n    def _create(tx, query, data):\n        result = tx.run(query, data=data)\n        return list(result)\n            \n    @catch_service_unavailable\n    def create(self, entity):\n        entity.graph=self #mostly to bind subgraphs\n        node_dict={}\n        for node in entity.nodes:\n            if node:\n                if not node.element_id:\n                    key = frozenset(node.labels)\n                    node_dict.setdefault(key, []).append(node)\n        rel_dict = {}\n        for relationship in entity.relationships:\n            key = frozenset(relationship.labels)\n            rel_dict.setdefault(key, []).append(relationship)\n        for labels, nodes in node_dict.items():\n            query = \"\"\"\n            UNWIND $data AS d\n            MERGE (_%s {UID:d.UID})\n            ON CREATE\n                SET _ += d\n            RETURN id(_)\n            \"\"\" % self._get_label_strings(labels)\n            with self.driver.session(database=self.database) as session:\n                result = session.execute_write(self._create, query, list(map(dict, nodes)))\n                for i, return_id in enumerate(result):\n                    node = nodes[i]\n                    node.graph = self\n                    node.element_id = return_id.value()\n        for labels, relationships in rel_dict.items():\n            data = map(lambda r: [r.start_node.element_id, dict(r.relationship), r.end_node.element_id],\n                               relationships)\n            # print(list(data)) #calling prematurely exhausts the generator\n            query = \"\"\"\n            UNWIND $data as d\n            MATCH (a) WHERE id(a) = d[0]\n            MATCH (b) WHERE id(b) = d[2]\n            MERGE (a)-[_%s]->(b) SET _ = d[1]\n            RETURN id(_)\n            \"\"\" % self._get_label_strings(labels)\n            with self.driver.session(database=self.database) as session:\n                result = session.execute_write(self._create, query, list(data))\n                for i, return_id in enumerate(result):\n                    rel = relationships[i]\n                    rel.graph=self\n                    rel.element_id = return_id.value()\n    @staticmethod\n    def _delete(tx, identities):\n        result = tx.run(\"MATCH (_) WHERE id(_) IN $x DETACH DELETE _\", x=identities)\n        return list(result)\n               \n    @catch_service_unavailable\n    def delete(self, entity):\n        identities = []\n        for rel in entity.relationships:\n            identities.append(rel.element_id)\n        for node in entity.nodes:\n            if node.element_id:\n                identities.append(node.element_id)\n        with self.driver.session(database=self.database) as session:\n            session.execute_write(self._delete, identities)",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_driver.py",
      "tags": [
        "neo4j",
        "graph-database",
        "database-interface",
        "cypher",
        "nodes",
        "relationships",
        "CRUD",
        "connection-management",
        "graph-operations"
      ],
      "updated_at": "2025-12-07T02:01:51.650352",
      "usage_example": "from neo4j import GraphDatabase\nfrom neo4j.auth import basic_auth\n\n# Create a Graph instance\ngraph = Graph(\n    URI='bolt://localhost:7687',\n    auth=basic_auth('neo4j', 'password'),\n    database='neo4j'\n)\n\n# Run a Cypher query\nresult = graph.run('MATCH (n:Person) RETURN n LIMIT 10')\n\n# Match a node by ID\nnode = graph.match_by_id(123, label=['Person'])\n\n# Match a node by UID property\nnode = graph.match_by_uid('unique-id-123', label=['Person'])\n\n# Create a new node (assuming Node class is available)\nfrom neo4j_objects import Node\nnew_node = Node(labels=['Person'], properties={'name': 'John', 'UID': 'john-123'})\ngraph.create(new_node)\n\n# Update an existing node\nnode['age'] = 30\ngraph.push(node)\n\n# Pull latest data from database\ngraph.pull(node)\n\n# Delete a node\ngraph.delete(node)\n\n# Close the connection when done\ngraph.close()"
    },
    {
      "best_practices": [
        "Always pass the graph parameter when instantiating to maintain proper graph context for Node and Relationship objects",
        "Use evaluate() for single-result queries; it warns if multiple nodes are matched but only processes the first",
        "Check if ResultWrapper is empty before calling methods to avoid errors",
        "Use to_subgraph() only with Neo4j path, node, or relationship objects; other types will raise TypeError",
        "Be aware that to_ndarray() is deprecated and only returns lists, not actual ndarrays",
        "The class is immutable (tuple subclass), so results cannot be modified after creation",
        "When working with collections of nodes/relationships, use COLLECT() in Cypher queries for proper handling with evaluate()",
        "The values property returns None if the ResultWrapper is empty, so check for None before iterating"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "Private attribute storing reference to the parent Graph object, used for maintaining context when creating Node and Relationship objects",
            "is_class_variable": false,
            "name": "_graph",
            "type": "Graph | None"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "*args": "Variable positional arguments containing Neo4j query result records",
              "graph": "Optional reference to the parent Graph object for maintaining context"
            },
            "purpose": "Initialize the ResultWrapper with query results and optional graph reference",
            "returns": "None (constructor)",
            "signature": "__init__(self, *args, graph=None)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "evaluate",
            "parameters": {},
            "purpose": "Evaluate the first result and convert it to appropriate Python objects (Node, Relationship, or primitive types)",
            "returns": "Returns None if empty; Node object for single node results; Relationship object for single relationship results; list of Node objects for node collections; list of Relationship objects for relationship collections; or the raw value for other types. Warns if multiple nodes matched but only processes first.",
            "signature": "evaluate(self) -> Node | Relationship | list | Any | None"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "to_data_frame",
            "parameters": {},
            "purpose": "Convert the query results to a Pandas DataFrame",
            "returns": "Pandas DataFrame containing the results. If values are Node objects, converts them to dictionaries. Otherwise uses keys and values. Returns empty DataFrame on error.",
            "signature": "to_data_frame(self) -> pd.DataFrame"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "data",
            "parameters": {},
            "purpose": "Convert query results to a list of dictionaries",
            "returns": "List of dictionaries where each dictionary represents a result record. Node objects are converted to dicts, other values are zipped with keys. Returns empty list if values is None.",
            "signature": "data(self) -> list[dict]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "to_ndarray",
            "parameters": {},
            "purpose": "Deprecated method that returns values as a list (not an actual ndarray)",
            "returns": "Returns the values property (a list). Emits deprecation warning recommending use of collections and evaluate() instead.",
            "signature": "to_ndarray(self) -> list"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "to_subgraph",
            "parameters": {},
            "purpose": "Convert query results to a Subgraph object containing nodes and/or relationships",
            "returns": "Subgraph object constructed from the results. Handles Path, Node, and Relationship types. Returns empty Subgraph if ResultWrapper is empty. Raises TypeError for unsupported types.",
            "signature": "to_subgraph(self) -> Subgraph"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "values",
            "parameters": {},
            "purpose": "Property that returns the values from all result records, converting Neo4j objects to Python objects",
            "returns": "None if empty; list of Node objects if results contain nodes; list of Relationship objects if results contain relationships; otherwise list of raw values from each record.",
            "signature": "values(self) -> list | None"
          },
          {
            "is_property": true,
            "is_static": false,
            "name": "keys",
            "parameters": {},
            "purpose": "Property that returns all unique keys from the result records",
            "returns": "List of unique key names (strings) from all records in the results, preserving order of first appearance.",
            "signature": "keys(self) -> list[str]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [
        {
          "condition": "only when calling to_data_frame() method",
          "import": "import pandas as pd",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 01:00:55",
      "decorators": [],
      "dependencies": [
        "neo4j",
        "pandas",
        "warnings",
        "neo4j_objects"
      ],
      "description": "ResultWrapper is a tuple subclass that wraps Cypher query results from Neo4j, providing methods to convert results into various formats including Node/Relationship objects, DataFrames, dictionaries, and Subgraphs.",
      "docstring": "The ResultWrapper class is a custom class that extends the built-in tuple class. It is used to wrap the results of Cypher queries and provide additional methods for working with the results. \nThe methods include evaluating the results to return Node and Relationship objects, converting the results to a Pandas DataFrame, returning the data as a list of dictionaries, \nand returning the results as a Subgraph object.",
      "id": 2138,
      "imports": [
        "import neo4j",
        "from functools import wraps",
        "from neo4j import GraphDatabase",
        "from neo4j.exceptions import ServiceUnavailable",
        "from neo4j_objects import *",
        "import warnings",
        "import pandas as pd"
      ],
      "imports_required": [
        "import neo4j",
        "import warnings",
        "from neo4j_objects import Node, Relationship, Subgraph"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 90,
      "line_start": 8,
      "name": "ResultWrapper",
      "parameters": [
        {
          "annotation": "tuple",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "*args": "Variable positional arguments passed to the tuple constructor, typically containing Neo4j query result records",
        "graph": "Optional reference to the parent Graph object that executed the query. Used to maintain graph context when creating Node and Relationship objects from Neo4j results. Defaults to None."
      },
      "parent_class": null,
      "purpose": "This class serves as a wrapper around Neo4j Cypher query results, extending the built-in tuple class to provide convenient methods for transforming query results into different data structures. It handles conversion of raw Neo4j graph objects (nodes, relationships, paths) into higher-level Python objects and data structures like Pandas DataFrames, lists of dictionaries, and Subgraph objects. The class maintains a reference to the parent graph for proper object instantiation.",
      "return_annotation": null,
      "return_explained": "Instantiation returns a ResultWrapper object that behaves like a tuple but with additional methods. Key method returns: evaluate() returns Node, Relationship objects, or lists thereof, or primitive values; to_data_frame() returns a Pandas DataFrame; data() returns a list of dictionaries; to_subgraph() returns a Subgraph object; values property returns a list of converted values; keys property returns a list of unique keys from all records.",
      "settings_required": [
        "Neo4j database connection must be established",
        "neo4j_objects module must be available with Node, Relationship, and Subgraph classes defined"
      ],
      "source_code": "class ResultWrapper(tuple):\n    \"\"\"\n    The ResultWrapper class is a custom class that extends the built-in tuple class. It is used to wrap the results of Cypher queries and provide additional methods for working with the results. \n    The methods include evaluating the results to return Node and Relationship objects, converting the results to a Pandas DataFrame, returning the data as a list of dictionaries, \n    and returning the results as a Subgraph object.\n    \"\"\"\n    def __init__(self, *args, graph=None):\n        tuple.__init__(self)\n        self._graph=graph\n        \n    def evaluate(self):\n        if not self:\n            return None\n        if isinstance(self[0].value(), neo4j.graph.Node):\n            if len(self) > 1:\n                warnings.warn(\"Query matched multiple nodes, but .evaluate only processes a single node. Either use .subgraph or call `RETURN COLLECT(nodes)`\", stacklevel=2)\n            return Node._from_neo4j_node(self[0].value(), graph=self._graph)\n        elif isinstance(self[0].value(), list) and len(self[0].value()) > 0 and isinstance(self[0].value()[0], neo4j.graph.Node):\n            return [Node._from_neo4j_node(i, graph=self._graph) for i in self[0].value()]\n        elif isinstance(self[0].value(), list) and len(self[0].value()) > 0 and isinstance(self[0].value()[0], neo4j.graph.Relationship):\n            return [Relationship._from_neo4j_node(i, graph=self._graph) for i in self[0].value()]\n        elif isinstance(self[0].value(), neo4j.graph.Relationship):\n            return Relationship._from_neo4j_node(self[0].value(), graph=self._graph)\n        return self[0].value()\n    \n    def to_data_frame(self):\n        import pandas as pd\n        try:\n            if isinstance(self.values[0], Node):\n                return pd.DataFrame([dict(i) for i in self.values])\n            return pd.DataFrame(self.values, columns=self.keys)\n        except:\n            return pd.DataFrame()\n    \n    def data(self):\n        if self.values is None:\n            return []\n        data=[]\n        for values in self.values:\n            if isinstance(values, Node):\n                d=dict(values)\n            else:\n                d= dict(zip(self.keys, values))\n            data.append(d)\n        return data\n    \n    def to_ndarray(self):\n        warnings.warn(\"This function was only implemented to return lists and does not actually return an ndarray. Please return collections and call .evaluate instead\", stacklevel=5)\n        return self.values\n    \n    def to_subgraph(self):\n        if not self:\n            return Subgraph(graph=self._graph)\n        if isinstance(self[0].values(), neo4j.graph.Path) or isinstance(self[0].values()[0], neo4j.graph.Path):\n            relationships=[]\n            for path in self:\n                for relationship in path.value():\n                    relationships.append(relationship)\n            subgraph = Subgraph(relationships=[Relationship._from_neo4j_node(i) for i in relationships], graph=self)\n        elif isinstance(self[0].values()[0], neo4j.graph.Node):\n            subgraph = Subgraph(nodes=self.values, graph=self._graph)\n        elif isinstance(self[0].values()[0], neo4j.graph.Relationship):\n            subgraph = Subgraph(relationships=self.values, graph=self._graph)\n        else:\n            raise TypeError(\".to_subgraph only works on neo4j path, node and relationship objects.\")\n        return subgraph\n            \n        \n    @property\n    def values(self):\n        if len(self) == 0:\n            return None\n        if isinstance(self[0].value(), neo4j.graph.Node):\n            return [Node._from_neo4j_node(i.value(), graph=self._graph) for i in self]\n        elif isinstance(self[0].value(), neo4j.graph.Relationship):\n            return [Relationship._from_neo4j_node(i.value(), graph=self._graph) for i in self]\n        return [i.values() for i in self]\n    \n    @property\n    def keys(self):\n        keys = []\n        keys.extend(j for i in self for j in i.keys() if not j in keys)\n        return keys",
      "source_file": "/tf/active/vicechatdev/neo4j_driver/neo4j_driver.py",
      "tags": [
        "neo4j",
        "graph-database",
        "cypher",
        "query-results",
        "data-conversion",
        "wrapper",
        "tuple-subclass",
        "dataframe",
        "graph-objects"
      ],
      "updated_at": "2025-12-07T02:00:55.680734",
      "usage_example": "# Assuming you have a Neo4j graph connection and query results\nfrom neo4j import GraphDatabase\nfrom neo4j_objects import Node, Relationship, Subgraph\n\n# Execute a query that returns results\ndriver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'password'))\nwith driver.session() as session:\n    raw_results = session.run('MATCH (n:Person) RETURN n LIMIT 5')\n    \n    # Wrap results\n    results = ResultWrapper(*raw_results, graph=my_graph)\n    \n    # Get single node/relationship\n    node = results.evaluate()\n    \n    # Convert to DataFrame\n    df = results.to_data_frame()\n    \n    # Get as list of dictionaries\n    data_list = results.data()\n    \n    # Convert to Subgraph\n    subgraph = results.to_subgraph()\n    \n    # Access values and keys\n    values = results.values\n    keys = results.keys"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "replica_database_path": "Type: str",
              "session": "Type: requests.Session"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, session, replica_database_path)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_clear_document_context",
            "parameters": {},
            "purpose": "Clear the current document UUID context for new uploads",
            "returns": "None",
            "signature": "_clear_document_context(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_database",
            "parameters": {},
            "purpose": "Load the replica database",
            "returns": "Returns Dict[str, Any]",
            "signature": "_load_database(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_database",
            "parameters": {},
            "purpose": "Save the updated database",
            "returns": "None",
            "signature": "_save_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_hash",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute SHA256 hash of content",
            "returns": "Returns str",
            "signature": "_compute_hash(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_crc32c_header",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute CRC32C checksum and return as x-goog-hash header value",
            "returns": "Returns str",
            "signature": "_compute_crc32c_header(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_timestamp",
            "parameters": {},
            "purpose": "Generate reMarkable timestamp",
            "returns": "Returns str",
            "signature": "_generate_timestamp(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_generation",
            "parameters": {},
            "purpose": "Generate reMarkable generation number",
            "returns": "Returns int",
            "signature": "_generate_generation(self) -> int"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_raw_content",
            "parameters": {
              "content": "Type: bytes",
              "content_hash": "Type: str",
              "content_type": "Type: str",
              "filename": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload raw content and return its hash",
            "returns": "Returns Optional[str]",
            "signature": "upload_raw_content(self, content, content_hash, filename, content_type, system_filename) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_system_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload system files like roothash, root.docSchema with fixed filenames",
            "returns": "Returns Optional[str]",
            "signature": "upload_system_file(self, content, system_filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_document_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "filename": "Type: str"
            },
            "purpose": "Upload document files with UUID.extension pattern",
            "returns": "Returns Optional[str]",
            "signature": "upload_document_file(self, content, filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_metadata_json",
            "parameters": {
              "document_type": "Type: str",
              "name": "Type: str",
              "parent_uuid": "Type: str"
            },
            "purpose": "Create metadata JSON for a document",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_metadata_json(self, name, parent_uuid, document_type) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_content_json",
            "parameters": {
              "pages": "Type: List[str]",
              "template": "Type: str"
            },
            "purpose": "Create content JSON for a notebook with pages",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_content_json(self, pages, template) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_directory_listing",
            "parameters": {
              "child_objects": "Type: List[Dict]",
              "data_components": "Type: List[Dict]"
            },
            "purpose": "Create directory listing content",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_directory_listing(self, child_objects, data_components) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_hash",
            "parameters": {
              "new_root_hash": "Type: str"
            },
            "purpose": "Update the root hash in the cloud",
            "returns": "Returns bool",
            "signature": "update_root_hash(self, new_root_hash) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "edit_document_metadata",
            "parameters": {
              "document_uuid": "Type: str",
              "new_name": "Type: str",
              "new_parent": "Type: str"
            },
            "purpose": "Edit an existing document's metadata",
            "returns": "Returns bool",
            "signature": "edit_document_metadata(self, document_uuid, new_name, new_parent) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_pdf_document",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "pdf_path": "Type: str"
            },
            "purpose": "Upload a new PDF document to reMarkable following the correct sequence from app logs",
            "returns": "Returns bool",
            "signature": "upload_pdf_document(self, pdf_path, name, parent_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_notebook",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "template": "Type: str"
            },
            "purpose": "Create a new empty notebook",
            "returns": "Returns bool",
            "signature": "create_notebook(self, name, parent_uuid, template) -> bool"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:00:05",
      "decorators": [],
      "dependencies": [],
      "description": "Manages uploads to reMarkable cloud",
      "docstring": "Manages uploads to reMarkable cloud",
      "id": 2137,
      "imports": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid",
        "import base64",
        "import binascii",
        "import zlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import time",
        "import crc32c",
        "import sys",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 665,
      "line_start": 32,
      "name": "RemarkableUploadManager_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Manages uploads to reMarkable cloud",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableUploadManager:\n    \"\"\"Manages uploads to reMarkable cloud\"\"\"\n    \n    def __init__(self, session: requests.Session, replica_database_path: str):\n        self.session = session\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        \n        # Load replica database\n        self.database_path = Path(replica_database_path)\n        self.database = self._load_database()\n        \n        # Track uploads\n        self.upload_queue: List[Dict[str, Any]] = []\n        self.uploaded_hashes: Dict[str, str] = {}  # hash -> upload_status\n        self._current_document_uuid: Optional[str] = None  # UUID for consistent rm-filename headers\n        \n    def _clear_document_context(self):\n        \"\"\"Clear the current document UUID context for new uploads\"\"\"\n        self._current_document_uuid = None\n        \n    def _load_database(self) -> Dict[str, Any]:\n        \"\"\"Load the replica database\"\"\"\n        if not self.database_path.exists():\n            raise FileNotFoundError(f\"Database not found: {self.database_path}\")\n            \n        with open(self.database_path, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    \n    def _save_database(self):\n        \"\"\"Save the updated database\"\"\"\n        with open(self.database_path, 'w', encoding='utf-8') as f:\n            json.dump(self.database, f, indent=2, ensure_ascii=False)\n    \n    def _compute_hash(self, content: bytes) -> str:\n        \"\"\"Compute SHA256 hash of content\"\"\"\n        return hashlib.sha256(content).hexdigest()\n    \n    def _compute_crc32c_header(self, content: bytes) -> str:\n        \"\"\"Compute CRC32C checksum and return as x-goog-hash header value\"\"\"\n        try:\n            # Use proper crc32c library if available\n            if HAS_CRC32C:\n                checksum = crc32c.crc32c(content)\n            else:\n                # Fallback to standard CRC32 (not ideal but better than nothing)\n                checksum = zlib.crc32(content) & 0xffffffff\n            \n            # Convert to bytes and base64 encode\n            checksum_bytes = checksum.to_bytes(4, byteorder='big')\n            checksum_b64 = base64.b64encode(checksum_bytes).decode('ascii')\n            \n            return f\"crc32c={checksum_b64}\"\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Warning: Failed to compute CRC32C checksum: {e}\")\n            # Return empty string to skip the header if computation fails\n            return \"\"\n    \n    def _generate_timestamp(self) -> str:\n        \"\"\"Generate reMarkable timestamp\"\"\"\n        return str(int(time.time() * 1000))\n    \n    def _generate_generation(self) -> int:\n        \"\"\"Generate reMarkable generation number\"\"\"\n        return int(time.time() * 1000000)\n    \n    def upload_raw_content(self, content: bytes, content_hash: str = None, filename: str = None, \n                          content_type: str = \"application/octet-stream\", system_filename: str = None) -> Optional[str]:\n        \"\"\"Upload raw content and return its hash\"\"\"\n        if content_hash is None:\n            content_hash = self._compute_hash(content)\n        \n        # Check if already uploaded\n        if content_hash in self.uploaded_hashes:\n            print(f\"\u2705 Content already uploaded: {content_hash[:16]}...\")\n            return content_hash\n        \n        try:\n            url = f\"{self.base_url}/sync/v3/files/{content_hash}\"\n            \n            # Prepare headers like the reMarkable app\n            headers = {\n                'Content-Type': content_type,\n                'rm-batch-number': '1',\n                'rm-sync-id': str(uuid.uuid4()),\n                'User-Agent': 'desktop/3.20.0.922 (macos 15.4)',\n                'Accept-Encoding': 'gzip, deflate',\n                'Accept-Language': 'en-BE,*',\n                'Connection': 'Keep-Alive'\n            }\n            \n            # Add rm-filename header - REQUIRED for all PUT requests\n            # Handle different patterns: UUID-based files vs system files\n            if system_filename:\n                # System files like \"roothash\", \"root.docSchema\" (no UUID)\n                rm_filename = system_filename\n                print(f\"\ud83c\udff7\ufe0f rm-filename (system): {rm_filename}\")\n            elif filename:\n                # Document files with UUID pattern\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    # Generate and store new UUID for this document\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                    print(f\"\ud83d\udcca Generated new document UUID: {doc_uuid}\")\n                \n                # Use the filename as provided or construct UUID.extension format\n                if '.' in filename and len(filename.split('.')[0]) == 36:  # Already UUID.extension\n                    rm_filename = filename\n                else:\n                    # Determine extension and construct UUID.extension\n                    if content_type == 'application/pdf' or filename.lower().endswith('.pdf'):\n                        rm_filename = f\"{doc_uuid}.pdf\"\n                    elif 'metadata' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.metadata\"\n                    elif filename.lower().endswith('.content'):\n                        rm_filename = f\"{doc_uuid}.content\"\n                    elif filename.lower().endswith('.rm'):\n                        # Page data keeps original filename for .rm files\n                        rm_filename = filename\n                    elif filename.lower().endswith('.docschema') or 'docschema' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.docSchema\"\n                    elif filename.lower().endswith('.pagedata'):\n                        rm_filename = f\"{doc_uuid}.pagedata\"\n                    else:\n                        # Default construction\n                        rm_filename = f\"{doc_uuid}.{filename}\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (document): {rm_filename}\")\n            else:\n                # Fallback - generate basic filename\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                \n                if content_type == 'application/pdf':\n                    rm_filename = f\"{doc_uuid}.pdf\"\n                elif content_type == 'application/octet-stream':\n                    rm_filename = f\"{doc_uuid}.metadata\"\n                else:\n                    rm_filename = f\"{doc_uuid}.content\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (fallback): {rm_filename}\")\n            \n            headers['rm-filename'] = rm_filename\n            \n            # Add CRC32C checksum (this is the missing piece!)\n            crc32c_header = self._compute_crc32c_header(content)\n            if crc32c_header:\n                headers['x-goog-hash'] = crc32c_header\n            \n            print(f\"\ud83d\udd0d Debug: Upload headers for {content_hash[:16]}...\")\n            for key, value in headers.items():\n                print(f\"    {key}: {value}\")\n            \n            # Make the PUT request\n            response = self.session.put(url, data=content, headers=headers)\n            \n            print(f\"\ud83d\udd0d Debug: Response status: {response.status_code}\")\n            print(f\"\ud83d\udd0d Debug: Response text: {response.text}\")\n            \n            response.raise_for_status()\n            \n            self.uploaded_hashes[content_hash] = \"uploaded\"\n            print(f\"\u2705 Uploaded content: {content_hash[:16]}... ({len(content)} bytes)\")\n            return content_hash\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload content {content_hash[:16]}...: {e}\")\n            if hasattr(e, 'response') and e.response is not None:\n                print(f\"    Response: {e.response.text}\")\n            return None\n    \n    def upload_system_file(self, content: bytes, system_filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload system files like roothash, root.docSchema with fixed filenames\"\"\"\n        print(f\"\ud83d\udcc1 Uploading system file: {system_filename}\")\n        return self.upload_raw_content(content, system_filename=system_filename, content_type=content_type)\n    \n    def upload_document_file(self, content: bytes, filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload document files with UUID.extension pattern\"\"\"\n        print(f\"\ud83d\udcc4 Uploading document file: {filename}\")\n        return self.upload_raw_content(content, filename=filename, content_type=content_type)\n\n    def create_metadata_json(self, name: str, parent_uuid: str = \"\", document_type: str = \"DocumentType\") -> Tuple[bytes, str]:\n        \"\"\"Create metadata JSON for a document\"\"\"\n        timestamp = self._generate_timestamp()\n        \n        metadata = {\n            \"createdTime\": timestamp,\n            \"lastModified\": timestamp,\n            \"lastOpened\": timestamp,\n            \"lastOpenedPage\": 0,\n            \"new\": False,\n            \"parent\": parent_uuid,\n            \"pinned\": False,\n            \"source\": \"\",\n            \"type\": document_type,\n            \"visibleName\": name\n        }\n        \n        content = json.dumps(metadata, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_content_json(self, pages: List[str], template: str = \"Blank\") -> Tuple[bytes, str]:\n        \"\"\"Create content JSON for a notebook with pages\"\"\"\n        timestamp_base = f\"2:{len(pages)}\"\n        \n        # Create pages structure\n        pages_list = []\n        for i, page_id in enumerate(pages):\n            pages_list.append({\n                \"id\": page_id,\n                \"idx\": {\n                    \"timestamp\": f\"2:{i+2}\",\n                    \"value\": chr(ord('a') + i) if i < 26 else f\"page_{i}\"\n                },\n                \"template\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": template\n                }\n            })\n        \n        content_data = {\n            \"cPages\": {\n                \"lastOpened\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": pages[0] if pages else \"\"\n                },\n                \"original\": {\n                    \"timestamp\": \"0:0\",\n                    \"value\": -1\n                },\n                \"pages\": pages_list\n            },\n            \"extraMetadata\": {},\n            \"fileType\": \"notebook\",\n            \"fontName\": \"\",\n            \"lineHeight\": -1,\n            \"margins\": 180,\n            \"pageCount\": len(pages),\n            \"textScale\": 1,\n            \"transform\": {}\n        }\n        \n        content = json.dumps(content_data, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_directory_listing(self, child_objects: List[Dict], data_components: List[Dict]) -> Tuple[bytes, str]:\n        \"\"\"Create directory listing content\"\"\"\n        lines = [str(len(child_objects) + len(data_components))]\n        \n        # Add child objects (folders/documents)\n        for obj in child_objects:\n            line = f\"{obj['hash']}:80000000:{obj['uuid']}:{obj['type']}:{obj['size']}\"\n            lines.append(line)\n        \n        # Add data components (.content, .metadata, .rm files, etc.)\n        for comp in data_components:\n            line = f\"{comp['hash']}:0:{comp['component']}:0:{comp['size']}\"\n            lines.append(line)\n        \n        content = '\\n'.join(lines).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def update_root_hash(self, new_root_hash: str) -> bool:\n        \"\"\"Update the root hash in the cloud\"\"\"\n        try:\n            generation = self._generate_generation()\n            \n            root_data = {\n                \"broadcast\": True,\n                \"generation\": generation,\n                \"hash\": new_root_hash\n            }\n            \n            url = f\"{self.base_url}/sync/v3/root\"\n            response = self.session.put(url, json=root_data)\n            response.raise_for_status()\n            \n            print(f\"\u2705 Updated root hash: {new_root_hash}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to update root hash: {e}\")\n            return False\n    \n    def edit_document_metadata(self, document_uuid: str, new_name: str = None, new_parent: str = None) -> bool:\n        \"\"\"Edit an existing document's metadata\"\"\"\n        try:\n            # Find the document in database\n            if document_uuid not in self.database['nodes']:\n                raise ValueError(f\"Document {document_uuid} not found in database\")\n            \n            node = self.database['nodes'][document_uuid]\n            print(f\"\ud83d\udcdd Editing document: {node['name']}\")\n            \n            # Get current metadata\n            current_metadata = node['metadata'].copy()\n            \n            # Update metadata\n            if new_name:\n                current_metadata['visibleName'] = new_name\n            if new_parent is not None:\n                current_metadata['parent'] = new_parent\n            \n            current_metadata['lastModified'] = self._generate_timestamp()\n            \n            # Create new metadata content\n            metadata_content = json.dumps(current_metadata, indent=4).encode('utf-8')\n            metadata_hash = self._compute_hash(metadata_content)\n            \n            # Upload metadata\n            self.upload_raw_content(metadata_content, metadata_hash)\n            \n            # Update component hashes\n            old_metadata_hash = node['component_hashes']['metadata']\n            node['component_hashes']['metadata'] = metadata_hash\n            \n            # Get parent node to update its directory listing\n            parent_uuid = current_metadata.get('parent', '')\n            if parent_uuid and parent_uuid in self.database['nodes']:\n                parent_node = self.database['nodes'][parent_uuid]\n                \n                # Rebuild parent's directory listing\n                child_objects = []\n                data_components = []\n                \n                # Find all children of this parent\n                for uuid, child_node in self.database['nodes'].items():\n                    if child_node.get('parent_uuid') == parent_uuid:\n                        if child_node['node_type'] == 'folder':\n                            type_val = '1'\n                        else:\n                            type_val = '3'\n                        \n                        child_objects.append({\n                            'hash': child_node['hash'],\n                            'uuid': uuid,\n                            'type': type_val,\n                            'size': len(str(child_node).encode('utf-8'))  # Approximate\n                        })\n                \n                # Add metadata components for this updated document\n                comp_hashes = node['component_hashes']\n                for comp_type, comp_hash in comp_hashes.items():\n                    if comp_hash:\n                        if comp_type == 'rm_files':\n                            for i, rm_hash in enumerate(comp_hash):\n                                data_components.append({\n                                    'hash': rm_hash,\n                                    'component': f\"{document_uuid}/{uuid.uuid4()}.rm\",\n                                    'size': 14661  # Typical RM file size\n                                })\n                        else:\n                            data_components.append({\n                                'hash': comp_hash,\n                                'component': f\"{document_uuid}.{comp_type}\",\n                                'size': len(metadata_content) if comp_type == 'metadata' else 2209\n                            })\n                \n                # Create and upload new directory listing\n                dir_content, dir_hash = self.create_directory_listing(child_objects, data_components)\n                self.upload_raw_content(dir_content, dir_hash)\n                \n                # Update parent node hash\n                parent_node['hash'] = dir_hash\n                self.database['hash_registry'][dir_hash] = {\n                    'uuid': parent_uuid,\n                    'type': 'node',\n                    'last_seen': datetime.now().isoformat()\n                }\n                \n                # Update root if parent is root\n                if not parent_node.get('parent_uuid'):\n                    self.update_root_hash(dir_hash)\n            \n            # Update database\n            node['metadata'] = current_metadata\n            node['last_modified'] = current_metadata['lastModified']\n            node['sync_status'] = 'updated'\n            node['last_synced'] = datetime.now().isoformat()\n            \n            # Update hash registry\n            self.database['hash_registry'][metadata_hash] = {\n                'uuid': document_uuid,\n                'type': 'metadata',\n                'last_seen': datetime.now().isoformat()\n            }\n            \n            self._save_database()\n            print(f\"\u2705 Successfully updated document metadata\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to edit document metadata: {e}\")\n            return False\n    \n    def upload_pdf_document(self, pdf_path: str, name: str, parent_uuid: str = \"\") -> bool:\n        \"\"\"Upload a new PDF document to reMarkable following the correct sequence from app logs\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            pdf_file = Path(pdf_path)\n            if not pdf_file.exists():\n                raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n            print(f\"\ud83d\udcc4 Uploading PDF: {name}\")\n            \n            # Generate UUID for new document and set it for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Read PDF content\n            with open(pdf_file, 'rb') as f:\n                pdf_content = f.read()\n            \n            # FOLLOW APP LOGS UPLOAD ORDER:\n            # 1. Content (if any) - for PDFs this might be empty or minimal\n            # 2. Page data (.rm files) - not needed for PDF\n            # 3. Metadata\n            # 4. PDF content\n            \n            print(\"\ud83d\udcdd Step 1: Creating and uploading content...\")\n            # Create minimal content for PDF (empty content structure)\n            content_data, content_hash = self.create_content_json([], \"PDF\")\n            self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            \n            print(\"\ud83d\udcdd Step 2: Creating and uploading metadata...\")\n            # Create metadata\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            \n            print(\"\ud83d\udcdd Step 3: Uploading PDF content...\")\n            # Upload PDF content LAST (as per app logs)\n            pdf_hash = self.upload_raw_content(\n                content=pdf_content,\n                content_type='application/pdf',\n                filename=f\"{document_uuid}.pdf\"\n            )\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': pdf_hash,\n                    'component': f\"{document_uuid}.pdf\",\n                    'size': len(pdf_content)\n                }\n            ]\n            \n            doc_dir_content, doc_dir_hash = self.create_directory_listing([], data_components)\n            self.upload_raw_content(doc_dir_content, doc_dir_hash)\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_dir_hash,\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [str(pdf_file)],\n                'component_hashes': {\n                    'content': None,\n                    'metadata': metadata_hash,\n                    'pdf': pdf_hash,\n                    'pagedata': None,\n                    'rm_files': []\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'uploaded',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n            self.database['nodes'][document_uuid] = new_node\n            \n            # Update hash registry\n            for hash_val, info in [\n                (doc_dir_hash, {'uuid': document_uuid, 'type': 'node'}),\n                (metadata_hash, {'uuid': document_uuid, 'type': 'metadata'}),\n                (pdf_hash, {'uuid': document_uuid, 'type': 'pdf'})\n            ]:\n                self.database['hash_registry'][hash_val] = {\n                    **info,\n                    'last_seen': datetime.now().isoformat()\n                }\n            \n            # Update parent directory and root if needed\n            if parent_uuid and parent_uuid in self.database['nodes']:\n                # TODO: Update parent directory listing\n                pass\n            else:\n                # Document added to root - update root hash\n                self.update_root_hash(doc_dir_hash)\n            \n            self._save_database()\n            print(f\"\u2705 Successfully uploaded PDF document: {name}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload PDF document: {e}\")\n            return False\n    \n    def create_notebook(self, name: str, parent_uuid: str = \"\", template: str = \"Blank\") -> bool:\n        \"\"\"Create a new empty notebook\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            print(f\"\ud83d\udcd3 Creating notebook: {name}\")\n            \n            # Generate UUIDs and set current document UUID for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            page_uuid = str(uuid.uuid4())\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Create empty .rm content for first page\n            rm_content = b'\\x00' * 1000  # Minimal empty page content\n            rm_hash = self.upload_raw_content(\n                content=rm_content,\n                content_type='application/octet-stream',\n                filename=f\"{page_uuid}.rm\"\n            )\n            \n            # Create content.json\n            content_data, content_hash = self.create_content_json([page_uuid], template)\n            self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            \n            # Create metadata\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': content_hash,\n                    'component': f\"{document_uuid}.content\",\n                    'size': len(content_data)\n                },\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': rm_hash,\n                    'component': f\"{document_uuid}/{page_uuid}.rm\",\n                    'size': len(rm_content)\n                }\n            ]\n            \n            doc_dir_content, doc_dir_hash = self.create_directory_listing([], data_components)\n            self.upload_raw_content(doc_dir_content, doc_dir_hash)\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_dir_hash,\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [],\n                'component_hashes': {\n                    'content': content_hash,\n                    'metadata': metadata_hash,\n                    'pdf': None,\n                    'pagedata': None,\n                    'rm_files': [rm_hash]\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'created',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n            self.database['nodes'][document_uuid] = new_node\n            \n            # Update hash registry\n            for hash_val, info in [\n                (doc_dir_hash, {'uuid': document_uuid, 'type': 'node'}),\n                (content_hash, {'uuid': document_uuid, 'type': 'content'}),\n                (metadata_hash, {'uuid': document_uuid, 'type': 'metadata'}),\n                (rm_hash, {'uuid': document_uuid, 'type': 'rm_0'})\n            ]:\n                self.database['hash_registry'][hash_val] = {\n                    **info,\n                    'last_seen': datetime.now().isoformat()\n                }\n            \n            # Update root hash (simplified for demo)\n            self.update_root_hash(doc_dir_hash)\n            \n            self._save_database()\n            print(f\"\u2705 Successfully created notebook: {name}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to create notebook: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_manager_old.py",
      "tags": [
        "class",
        "remarkableuploadmanager"
      ],
      "updated_at": "2025-12-07T02:00:05.203641",
      "usage_example": "# Example usage:\n# result = RemarkableUploadManager(bases)"
    },
    {
      "best_practices": [
        "Ensure the remarkable_device_token.txt file exists and contains valid credentials before running this test",
        "Create the test_uploads directory and place a valid PDF file named test_document.pdf in it",
        "This function is intended for testing purposes only and should not be used in production code",
        "The function prints status messages to stdout, making it suitable for manual testing but not for automated test suites without output capture",
        "Consider wrapping this in a proper unit test framework (pytest, unittest) for better integration with CI/CD pipelines",
        "The function uses relative paths based on __file__, so it must be run as a script or from the correct working directory",
        "Handle the boolean return value appropriately in calling code to determine test success or failure"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:00:04",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "upload_manager"
      ],
      "description": "A test function that performs a quick upload of a PDF document to a reMarkable tablet without performing a full synchronization.",
      "docstring": "Quick test without full sync",
      "id": 2136,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "from pathlib import Path",
        "from upload_manager import RemarkableUploadManager"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from upload_manager import RemarkableUploadManager"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 49,
      "line_start": 9,
      "name": "test_quick_upload_v1",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a lightweight integration test for the reMarkable upload functionality. It authenticates with the reMarkable cloud service, locates a test PDF file, and uploads it to the root folder of the device. It's designed for rapid testing during development without the overhead of a complete sync operation.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the PDF upload was successful, False if authentication failed, the test PDF file was not found, or the upload operation failed. Note that the function may also return None implicitly if an exception occurs during upload (though the exception is caught and False is printed).",
      "settings_required": [
        "A file named 'remarkable_device_token.txt' must exist in the same directory as the test script, containing valid reMarkable device authentication token",
        "A test PDF file must exist at './test_uploads/test_document.pdf' relative to the script location",
        "The upload_manager module must be available and contain the RemarkableUploadManager class",
        "Valid reMarkable cloud service credentials configured in the device token file"
      ],
      "source_code": "def test_quick_upload():\n    \"\"\"Quick test without full sync\"\"\"\n    print(\"\ud83d\ude80 Quick PDF Upload Test\")\n    print(\"=\" * 50)\n    \n    # Initialize upload manager\n    device_token_path = Path(__file__).parent / \"remarkable_device_token.txt\"\n    upload_manager = RemarkableUploadManager(device_token_path)\n    \n    # Authenticate\n    print(\"\ud83d\udd11 Authenticating...\")\n    if not upload_manager.authenticate():\n        print(\"\u274c Authentication failed\")\n        return False\n    print(\"\u2705 Authentication successful\")\n    \n    # Create test document\n    test_pdf = Path(__file__).parent / \"test_uploads\" / \"test_document.pdf\"\n    if not test_pdf.exists():\n        print(f\"\u274c Test PDF not found: {test_pdf}\")\n        return False\n    \n    # Upload the PDF\n    print(f\"\ud83d\udcc4 Uploading: {test_pdf.name}\")\n    try:\n        success = upload_manager.upload_pdf_document(\n            pdf_file=test_pdf,\n            name=\"QuickUploadTest\",\n            parent_uuid=\"\"  # Root folder\n        )\n        \n        if success:\n            print(\"\u2705 Upload successful!\")\n            return True\n        else:\n            print(\"\u274c Upload failed\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Upload error: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/quick_upload_test.py",
      "tags": [
        "testing",
        "pdf-upload",
        "remarkable",
        "integration-test",
        "file-upload",
        "cloud-sync",
        "authentication",
        "quick-test"
      ],
      "updated_at": "2025-12-07T02:00:04.933045",
      "usage_example": "# Ensure prerequisites are met:\n# 1. Create remarkable_device_token.txt with your device token\n# 2. Create test_uploads/test_document.pdf\n\nfrom pathlib import Path\nfrom upload_manager import RemarkableUploadManager\n\n# Run the test\nresult = test_quick_upload()\n\nif result:\n    print(\"Test passed successfully\")\nelse:\n    print(\"Test failed\")"
    },
    {
      "best_practices": [
        "This function modifies sys.path at runtime which can affect module resolution - use with caution in production code",
        "The function has a lazy import of RemarkableAuth which may hide import errors until runtime",
        "Ensure the database path 'remarkable_replica_v2/replica_database.json' exists or handle creation appropriately",
        "The function returns True/False but doesn't handle the actual operations shown in the menu - it's only a demo scaffold",
        "Consider adding error handling for database path access and RemarkableUploadManager initialization",
        "The function assumes RemarkableUploadManager is defined elsewhere in the module - ensure it's available before calling main()",
        "For production use, consider making the database path configurable rather than hardcoded"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside the function at runtime, required for authentication",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:59:48",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "auth",
        "requests",
        "json",
        "hashlib",
        "uuid",
        "base64",
        "binascii",
        "zlib",
        "datetime",
        "time",
        "crc32c",
        "re",
        "local_replica_v2"
      ],
      "description": "Demo function that showcases the reMarkable upload functionality by authenticating a user session and initializing an upload manager with available operations menu.",
      "docstring": "Demo of upload functionality",
      "id": 2135,
      "imports": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid",
        "import base64",
        "import binascii",
        "import zlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import time",
        "import crc32c",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "from local_replica_v2 import RemarkableReplicaBuilder",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "from pathlib import Path",
        "import sys"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1153,
      "line_start": 1128,
      "name": "main_v67",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a demonstration and entry point for the reMarkable Upload Manager. It handles authentication through RemarkableAuth, creates a session, initializes the RemarkableUploadManager with a database path, and displays a menu of available operations (edit metadata, upload PDF, create notebook). It's designed to be run as a standalone demo to test the upload functionality.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if authentication succeeds and the upload manager is initialized successfully, False if authentication fails. The function primarily serves as a demo and doesn't perform actual operations beyond setup and menu display.",
      "settings_required": [
        "RemarkableAuth configuration (likely requires reMarkable API credentials or authentication tokens)",
        "Database file at path 'remarkable_replica_v2/replica_database.json' must exist or be creatable",
        "RemarkableUploadManager class must be defined in the same module or imported",
        "Valid reMarkable account credentials for authentication"
      ],
      "source_code": "def main():\n    \"\"\"Demo of upload functionality\"\"\"\n    import sys\n    sys.path.insert(0, str(Path(__file__).parent))\n    \n    from auth import RemarkableAuth\n    \n    # Authenticate\n    auth = RemarkableAuth()\n    session = auth.get_authenticated_session()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return False\n    \n    # Initialize upload manager\n    database_path = \"remarkable_replica_v2/replica_database.json\"\n    uploader = RemarkableUploadManager(session, database_path)\n    \n    print(\"\ud83d\ude80 reMarkable Upload Manager Demo\")\n    print(\"Available operations:\")\n    print(\"1. Edit document metadata\")\n    print(\"2. Upload PDF document\")\n    print(\"3. Create new notebook\")\n    \n    return True",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_manager.py",
      "tags": [
        "demo",
        "authentication",
        "remarkable",
        "upload-manager",
        "initialization",
        "session-management",
        "entry-point",
        "cli-menu",
        "document-management"
      ],
      "updated_at": "2025-12-07T01:59:48.529715",
      "usage_example": "# Ensure RemarkableUploadManager is defined in the same module\n# Ensure auth.py module exists with RemarkableAuth class\n# Ensure database directory exists\nimport os\nos.makedirs('remarkable_replica_v2', exist_ok=True)\n\n# Run the demo\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Demo initialized successfully')\n    else:\n        print('Demo failed to initialize')"
    },
    {
      "best_practices": [
        "Ensure the print_client function is properly defined and handles connection errors gracefully",
        "Validate server address and queue name before calling this function to avoid connection failures",
        "Consider wrapping this function call in try-except blocks to handle potential network or connection errors",
        "The function assumes print_client returns an object with a send_job() method - ensure this contract is maintained",
        "Consider adding timeout parameters in kwargs to prevent indefinite blocking on network operations",
        "Ensure obj_uid is unique to prevent job conflicts or overwrites in the queue system"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:16:12",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "socket"
      ],
      "description": "Initializes a TCP messenger client and sends a job to a server queue for processing graph-related tasks with specified label types and language settings.",
      "docstring": null,
      "id": 1906,
      "imports": [
        "import asyncio",
        "import socket"
      ],
      "imports_required": [
        "import asyncio",
        "import socket"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 144,
      "line_start": 142,
      "name": "main_v117",
      "parameters": [
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "graph"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "server"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "queue"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "labeltype"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "language"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "obj_uid"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "var_keyword",
          "name": "**kwargs"
        }
      ],
      "parameters_explained": {
        "**kwargs": "Additional keyword arguments passed through to the print_client function. Allows for extensibility and additional configuration options without modifying the function signature.",
        "graph": "Graph data structure or identifier representing the data/document to be processed. Expected to be compatible with the print_client function's requirements.",
        "labeltype": "Type or format of labels to be used in the printing/processing operation. Specifies how labels should be rendered or formatted.",
        "language": "Language code or identifier for localization purposes. Determines the language used for labels, text rendering, or document generation.",
        "obj_uid": "Unique identifier for the object being processed. Used to track and identify the specific job or document in the system.",
        "queue": "Queue identifier or name where the job should be submitted. Used to route the job to the appropriate processing queue on the server.",
        "server": "Server address or hostname where the print client should connect. Typically a string containing IP address or domain name."
      },
      "parent_class": null,
      "purpose": "This function serves as an entry point for creating a print client connection to a server, configuring it with graph data, queue information, label types, and language settings, then triggering the job submission. It appears to be part of a distributed printing or document processing system where jobs are queued and processed remotely via TCP connections.",
      "return_annotation": null,
      "return_explained": "This function does not explicitly return a value (implicitly returns None). The function's purpose is to perform side effects by creating a TCP messenger and sending a job, rather than returning data.",
      "settings_required": [
        "The print_client function must be defined and accessible in the same module or imported",
        "Server must be reachable and accepting TCP connections",
        "Appropriate network permissions and firewall rules to connect to the specified server",
        "Server-side queue configuration matching the provided queue parameter"
      ],
      "source_code": "def main(graph, server, queue, labeltype, language, obj_uid, **kwargs):\n    tcp_messenger = print_client(graph, server, queue, labeltype, language, obj_uid, **kwargs)\n    tcp_messenger.send_job()",
      "source_file": "/tf/active/vicechatdev/resources/printclient.py",
      "tags": [
        "tcp-client",
        "messaging",
        "job-submission",
        "print-client",
        "queue",
        "networking",
        "distributed-system",
        "graph-processing",
        "label-printing",
        "localization"
      ],
      "updated_at": "2025-12-07T01:59:48.529067",
      "usage_example": "# Assuming print_client is defined elsewhere in the module\n# Example usage:\ngraph_data = {'nodes': [1, 2, 3], 'edges': [(1, 2), (2, 3)]}\nserver_address = '192.168.1.100'\nqueue_name = 'print_queue_1'\nlabel_type = 'barcode'\nlanguage_code = 'en-US'\nobject_uid = 'doc-12345'\n\n# Call the main function\nmain(\n    graph=graph_data,\n    server=server_address,\n    queue=queue_name,\n    labeltype=label_type,\n    language=language_code,\n    obj_uid=object_uid,\n    timeout=30,\n    retry_count=3\n)"
    },
    {
      "best_practices": [
        "This function should be called from an if __name__ == '__main__': block to prevent execution when imported as a module",
        "The function depends on send_test_email being defined elsewhere in the codebase - ensure this dependency is available",
        "Use sys.exit(main()) to properly propagate the exit code to the operating system",
        "The --to argument is required; all other arguments have sensible defaults for local testing",
        "Default SMTP port 2525 is commonly used for testing/development; production systems typically use port 25, 465, or 587",
        "Consider adding error handling for invalid email addresses or network connectivity issues in production use"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:37:41",
      "decorators": [],
      "dependencies": [
        "argparse",
        "smtplib",
        "sys",
        "email.mime.text",
        "email.mime.multipart",
        "email.mime.application"
      ],
      "description": "Command-line interface function that parses arguments and sends a test email through an SMTP forwarder service, displaying connection details and returning an exit code based on success.",
      "docstring": null,
      "id": 1480,
      "imports": [
        "import smtplib",
        "import sys",
        "import argparse",
        "from email.mime.text import MIMEText",
        "from email.mime.multipart import MIMEMultipart",
        "from email.mime.application import MIMEApplication"
      ],
      "imports_required": [
        "import argparse",
        "import smtplib",
        "import sys",
        "from email.mime.text import MIMEText",
        "from email.mime.multipart import MIMEMultipart",
        "from email.mime.application import MIMEApplication"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 68,
      "line_start": 40,
      "name": "main_v116",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a test client application that validates email forwarding functionality. It configures an argument parser to accept SMTP connection parameters, sender/recipient information, and email content, then invokes the send_test_email function with these parameters. The function is designed for testing and debugging email forwarder services by providing a simple CLI tool to send test emails with customizable parameters.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if the email was sent successfully (when send_test_email returns True), or 1 if the email sending failed (when send_test_email returns False). This follows standard Unix convention for command-line tool exit codes.",
      "settings_required": [
        "Requires a send_test_email function to be defined in the same module or imported",
        "SMTP server must be accessible at the specified host and port (default: localhost:2525)",
        "Recipient email address must be provided via --to argument"
      ],
      "source_code": "def main():\n    parser = argparse.ArgumentParser(description='Send test email to forwarder service')\n    parser.add_argument('--host', default='localhost', help='SMTP host (default: localhost)')\n    parser.add_argument('--port', default=2525, type=int, help='SMTP port (default: 2525)')\n    parser.add_argument('--from', dest='sender', default='test@example.com', help='Sender email')\n    parser.add_argument('--to', dest='recipient', required=True, help='Recipient email')\n    parser.add_argument('--subject', default='Test Email from Forwarder', help='Email subject')\n    parser.add_argument('--message', default='This is a test email from the email forwarder service.', help='Email message')\n    \n    args = parser.parse_args()\n    \n    print(\"Email Forwarder Test Client\")\n    print(\"=\" * 30)\n    print(f\"SMTP Server: {args.host}:{args.port}\")\n    print(f\"From: {args.sender}\")\n    print(f\"To: {args.recipient}\")\n    print(f\"Subject: {args.subject}\")\n    print()\n    \n    success = send_test_email(\n        smtp_host=args.host,\n        smtp_port=args.port,\n        sender=args.sender,\n        recipient=args.recipient,\n        subject=args.subject,\n        message=args.message\n    )\n    \n    return 0 if success else 1",
      "source_file": "/tf/active/vicechatdev/email-forwarder/send_test_email.py",
      "tags": [
        "cli",
        "email",
        "smtp",
        "testing",
        "command-line",
        "argparse",
        "email-forwarder",
        "test-client",
        "entry-point",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.528429",
      "usage_example": "# Basic usage with required recipient\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)\n\n# Command line examples:\n# python script.py --to recipient@example.com\n# python script.py --host smtp.example.com --port 587 --from sender@test.com --to recipient@example.com --subject \"Custom Subject\" --message \"Custom message body\""
    },
    {
      "best_practices": [
        "This function is hardcoded to debug a specific session ID ('3fea9b6e-92ea-462a-ba67-996f251e39db'). Modify the session_id variable to debug different sessions.",
        "Ensure the database connection is properly configured before running this function to avoid connection errors.",
        "This function accesses a private method (_get_execution_tracking) which may change in future versions of the service.",
        "Use this function in development/debugging environments only, not in production code.",
        "The function assumes the session exists in the database; add error handling if using with dynamic session IDs.",
        "Consider parameterizing the session_id if this function will be reused for multiple sessions."
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:09:53",
      "decorators": [],
      "dependencies": [
        "services.StatisticalAnalysisService",
        "config.Config"
      ],
      "description": "A debugging utility function that analyzes and displays execution tracking information for a specific session in a statistical analysis service.",
      "docstring": null,
      "id": 1224,
      "imports": [
        "import sys",
        "from services import StatisticalAnalysisService",
        "from config import Config"
      ],
      "imports_required": [
        "from services import StatisticalAnalysisService",
        "from config import Config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 35,
      "line_start": 9,
      "name": "main_v115",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a diagnostic tool to inspect the execution steps and metadata of a statistical analysis session. It retrieves session steps from the database, displays detailed information about each step (including type, ID, success status, and metadata), and tests the internal execution tracking method. This is primarily used for debugging and verifying that session execution data is being properly stored and retrieved.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces console output displaying session step information and execution tracking results.",
      "settings_required": [
        "Config class must be properly configured with database connection settings",
        "Database must be accessible and contain the session with ID '3fea9b6e-92ea-462a-ba67-996f251e39db'",
        "StatisticalAnalysisService must have a database_manager attribute with get_session_steps method",
        "StatisticalAnalysisService must have a _get_execution_tracking method"
      ],
      "source_code": "def main():\n    config = Config()\n    service = StatisticalAnalysisService(config)\n    \n    session_id = '3fea9b6e-92ea-462a-ba67-996f251e39db'\n    print(f\"Debugging execution tracking for session: {session_id}\")\n    \n    # Get steps directly\n    steps = service.database_manager.get_session_steps(session_id)\n    print(f\"\\nFound {len(steps)} steps:\")\n    \n    for i, step in enumerate(steps):\n        print(f\"  Step {i+1}: {step.step_type}\")\n        print(f\"    ID: {step.step_id}\")\n        print(f\"    Success: {step.execution_success}\")\n        print(f\"    Has metadata: {bool(step.metadata)}\")\n        if step.metadata:\n            print(f\"    Metadata keys: {list(step.metadata.keys())}\")\n            if 'execution_log' in step.metadata:\n                print(f\"    Execution log entries: {len(step.metadata['execution_log'])}\")\n        print()\n    \n    # Test the execution tracking method\n    print(\"Testing _get_execution_tracking method:\")\n    result = service._get_execution_tracking(session_id)\n    print(f\"Result keys: {list(result.keys())}\")\n    print(f\"Result: {result}\")",
      "source_file": "/tf/active/vicechatdev/full_smartstat/debug_execution_tracking.py",
      "tags": [
        "debugging",
        "diagnostics",
        "session-tracking",
        "execution-analysis",
        "database-inspection",
        "logging",
        "metadata-inspection",
        "statistical-analysis",
        "development-tool"
      ],
      "updated_at": "2025-12-07T01:59:48.527730",
      "usage_example": "# Ensure config.py and services module are available\n# from config import Config\n# from services import StatisticalAnalysisService\n\ndef main():\n    config = Config()\n    service = StatisticalAnalysisService(config)\n    \n    session_id = '3fea9b6e-92ea-462a-ba67-996f251e39db'\n    print(f\"Debugging execution tracking for session: {session_id}\")\n    \n    steps = service.database_manager.get_session_steps(session_id)\n    print(f\"\\nFound {len(steps)} steps:\")\n    \n    for i, step in enumerate(steps):\n        print(f\"  Step {i+1}: {step.step_type}\")\n        print(f\"    ID: {step.step_id}\")\n        print(f\"    Success: {step.execution_success}\")\n        print(f\"    Has metadata: {bool(step.metadata)}\")\n        if step.metadata:\n            print(f\"    Metadata keys: {list(step.metadata.keys())}\")\n            if 'execution_log' in step.metadata:\n                print(f\"    Execution log entries: {len(step.metadata['execution_log'])}\")\n        print()\n    \n    print(\"Testing _get_execution_tracking method:\")\n    result = service._get_execution_tracking(session_id)\n    print(f\"Result keys: {list(result.keys())}\")\n    print(f\"Result: {result}\")\n\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the test script",
        "The return value should be used as the process exit code to indicate test success/failure to CI/CD systems",
        "All test functions called by main() must be defined before calling main()",
        "Test functions should raise AssertionError for test failures to be properly caught and reported",
        "The function provides clear visual feedback with separator lines and status symbols (\u2713 and \u2717)",
        "Error handling includes full traceback printing for debugging unexpected exceptions"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an exception occurs during test execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 10:46:30",
      "decorators": [],
      "dependencies": [
        "pandas",
        "pathlib",
        "traceback"
      ],
      "description": "Test runner function that executes a suite of regional format handling tests for CSV parsing, including European and US number formats with various delimiters.",
      "docstring": null,
      "id": 477,
      "imports": [
        "import os",
        "import sys",
        "import pandas as pd",
        "from pathlib import Path",
        "from smartstat_service import smart_read_csv",
        "from smartstat_service import convert_european_decimals",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import pandas as pd",
        "from pathlib import Path",
        "from smartstat_service import smart_read_csv",
        "from smartstat_service import convert_european_decimals"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 210,
      "line_start": 186,
      "name": "main_v114",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a test suite that validates CSV parsing functionality across different regional formats. It sequentially runs tests for European CSV (comma as decimal separator), US CSV (period as decimal separator), formats with thousands separators, and tab-delimited files. The function provides formatted console output showing test progress and results, returning 0 for success or 1 for failure.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all tests pass successfully, 1 if any test fails (either through AssertionError or any other Exception). This follows standard Unix convention for process exit codes.",
      "settings_required": [
        "The test functions (test_european_csv, test_us_csv, test_european_with_thousands, test_us_with_thousands, test_tab_delimited_european) must be defined in the same module or imported",
        "The smartstat_service module must be available with smart_read_csv and convert_european_decimals functions",
        "Test CSV files must be present in expected locations for the test functions to work"
      ],
      "source_code": "def main():\n    print(\"\\n\" + \"=\"*60)\n    print(\"Regional Format Handling Tests\")\n    print(\"=\"*60)\n    \n    try:\n        test_european_csv()\n        test_us_csv()\n        test_european_with_thousands()\n        test_us_with_thousands()\n        test_tab_delimited_european()\n        \n        print(\"\\n\" + \"=\"*60)\n        print(\"\u2713 ALL TESTS PASSED!\")\n        print(\"=\"*60 + \"\\n\")\n        return 0\n        \n    except AssertionError as e:\n        print(f\"\\n\u2717 TEST FAILED: {e}\\n\")\n        return 1\n    except Exception as e:\n        print(f\"\\n\u2717 ERROR: {e}\\n\")\n        import traceback\n        traceback.print_exc()\n        return 1",
      "source_file": "/tf/active/vicechatdev/vice_ai/test_regional_formats.py",
      "tags": [
        "testing",
        "csv-parsing",
        "regional-formats",
        "test-runner",
        "data-validation",
        "european-format",
        "us-format",
        "number-formatting",
        "integration-tests"
      ],
      "updated_at": "2025-12-07T01:59:48.527098",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function is hardcoded with specific document hashes and metadata - it's designed for a specific debugging scenario and should be adapted for general use",
        "The function makes multiple HTTP requests sequentially; consider adding rate limiting or error handling for production use",
        "Authentication credentials should be properly secured in the RemarkableAuth implementation",
        "The function prints directly to stdout; consider using logging module for better control in production environments",
        "Error handling uses broad exception catching which may hide specific issues; consider more granular exception handling for production code",
        "The document dictionary is hardcoded and should be externalized to a configuration file for reusability",
        "Consider adding retry logic for network requests to handle transient failures"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:36:37",
      "decorators": [],
      "dependencies": [
        "auth",
        "json"
      ],
      "description": "Analyzes and compares .content files for PDF documents stored in reMarkable cloud storage, identifying differences between working and non-working documents.",
      "docstring": null,
      "id": 2073,
      "imports": [
        "from auth import RemarkableAuth",
        "import json"
      ],
      "imports_required": [
        "from auth import RemarkableAuth",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 118,
      "line_start": 6,
      "name": "main_v113",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This diagnostic function authenticates with the reMarkable cloud service, downloads .content metadata files for four specific PDF documents (two working, two broken), parses their JSON structure, and performs a detailed comparison to identify what makes some documents visible/working while others are not. It's designed for debugging document upload/visibility issues in the reMarkable ecosystem.",
      "return_annotation": null,
      "return_explained": "This function returns None (implicitly). It performs side effects by printing diagnostic information to stdout, including authentication status, document metadata, JSON content structure, and comparative analysis between working and broken documents.",
      "settings_required": [
        "RemarkableAuth class must be available and properly configured with authentication credentials",
        "Network access to eu.tectonic.remarkable.com API endpoint",
        "Valid reMarkable cloud account credentials (handled by RemarkableAuth)",
        "The auth module must implement get_authenticated_session() method that returns a requests-compatible session object"
      ],
      "source_code": "def main():\n    auth = RemarkableAuth()\n    session = auth.get_authenticated_session()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return\n    \n    print(\"\ud83d\udcc4 COMPARING .CONTENT FILES FOR ALL 4 PDF DOCUMENTS\")\n    print(\"=\" * 70)\n    \n    # Document info from the log file\n    documents = {\n        'invoice_poulpharm': {\n            'name': 'invoice poulpharm june 2025',\n            'content_hash': '4843f8d18f154198752eef85dbefb3c8d2d9984fe84e70d13857f5a7d61dcff3',\n            'working': True,\n            'size': 720\n        },\n        'pylontech': {\n            'name': 'Pylontech force H3 datasheet',\n            'content_hash': 'feb1654a645e7d42eea63bb8f87a1888026fd3ac197aa725fa3d77ae8b3e1e8c',\n            'working': True,\n            'size': 831\n        },\n        'upload_test_1': {\n            'name': 'UploadTest_1753969395',\n            'content_hash': '1ea64a7fb8fdd227cff533ea190a74d5111656f57699db714d33f69aba4404d5',\n            'working': False,\n            'size': 741\n        },\n        'upload_test_2': {\n            'name': 'UploadTest_1753968602',\n            'content_hash': 'ddc9459da5fc01058d854c85e3879b05c145e82189f4dd409bdc5d88014ad5e5',\n            'working': False,\n            'size': 741\n        }\n    }\n    \n    content_data = {}\n    \n    for doc_key, doc_info in documents.items():\n        print(f\"\\n\ud83d\udd0d {doc_info['name']} ({'\u2705 WORKING' if doc_info['working'] else '\u274c NOT VISIBLE'})\")\n        print(f\"   Content hash: {doc_info['content_hash']}\")\n        print(f\"   Expected size: {doc_info['size']} bytes\")\n        \n        try:\n            # Download the .content file\n            content_response = session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_info['content_hash']}\")\n            content_response.raise_for_status()\n            content_text = content_response.text\n            \n            print(f\"   Actual size: {len(content_text)} bytes\")\n            print(f\"   Size match: {'\u2705' if len(content_text) == doc_info['size'] else '\u274c'}\")\n            \n            # Parse JSON\n            try:\n                content_json = json.loads(content_text)\n                content_data[doc_key] = content_json\n                \n                print(f\"   \ud83d\udcca JSON Content:\")\n                print(f\"      fileType: {content_json.get('fileType', 'MISSING')}\")\n                print(f\"      pageCount: {content_json.get('pageCount', 'MISSING')}\")\n                print(f\"      originalPageCount: {content_json.get('originalPageCount', 'MISSING')}\")\n                print(f\"      sizeInBytes: {content_json.get('sizeInBytes', 'MISSING')}\")\n                print(f\"      formatVersion: {content_json.get('formatVersion', 'MISSING')}\")\n                print(f\"      orientation: {content_json.get('orientation', 'MISSING')}\")\n                print(f\"      pages array: {len(content_json.get('pages', []))} items\")\n                if content_json.get('pages'):\n                    print(f\"         First page UUID: {content_json['pages'][0]}\")\n                print(f\"      redirectionPageMap: {content_json.get('redirectionPageMap', 'MISSING')}\")\n                \n            except json.JSONDecodeError as e:\n                print(f\"   \u274c Invalid JSON: {e}\")\n                print(f\"   Raw content: {repr(content_text[:200])}\")\n                \n        except Exception as e:\n            print(f\"   \u274c Failed to download: {e}\")\n        \n        print(\"-\" * 50)\n    \n    # Compare working vs non-working\n    print(\"\\n\ud83d\udd0d DETAILED COMPARISON: WORKING vs NON-WORKING\")\n    print(\"=\" * 70)\n    \n    working_docs = [k for k, v in documents.items() if v['working']]\n    broken_docs = [k for k, v in documents.items() if not v['working']]\n    \n    print(f\"Working documents: {[documents[k]['name'] for k in working_docs]}\")\n    print(f\"Broken documents: {[documents[k]['name'] for k in broken_docs]}\")\n    \n    if working_docs and broken_docs:\n        print(\"\\n\ud83d\udd0d Key Differences Analysis:\")\n        \n        # Compare first working vs first broken\n        working_content = content_data.get(working_docs[0], {})\n        broken_content = content_data.get(broken_docs[0], {})\n        \n        print(f\"\\nComparing {documents[working_docs[0]]['name']} (working) vs {documents[broken_docs[0]]['name']} (broken):\")\n        \n        all_keys = set(working_content.keys()) | set(broken_content.keys())\n        for key in sorted(all_keys):\n            working_val = working_content.get(key, \"MISSING\")\n            broken_val = broken_content.get(key, \"MISSING\")\n            \n            if working_val != broken_val:\n                print(f\"   \ud83d\udd25 DIFFERENCE - {key}:\")\n                print(f\"      Working: {working_val}\")\n                print(f\"      Broken:  {broken_val}\")\n            else:\n                print(f\"   \u2705 SAME - {key}: {working_val}\")\n    \n    print(f\"\\n\ud83d\udcbe Content files analysis complete!\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_content_files.py",
      "tags": [
        "remarkable",
        "cloud-storage",
        "pdf-analysis",
        "debugging",
        "api-client",
        "document-metadata",
        "json-parsing",
        "comparison",
        "diagnostic",
        "file-download"
      ],
      "updated_at": "2025-12-07T01:59:48.526407",
      "usage_example": "# Ensure auth.py is available with RemarkableAuth class\n# from auth import RemarkableAuth\n# import json\n\nif __name__ == '__main__':\n    main()\n\n# Output will be printed to console showing:\n# - Authentication status\n# - Document metadata for 4 PDFs\n# - JSON content structure for each document\n# - Comparative analysis between working and broken documents"
    },
    {
      "best_practices": [
        "Always use --dry-run flag first to preview changes before modifying file timestamps",
        "Ensure the calling script defines fix_file_dates() and process_directory() functions before calling main()",
        "Use pattern matching (--pattern) to limit processing to specific file types",
        "The function expects to be called with no arguments as it uses argparse to parse sys.argv",
        "Error handling exits with code 1 for file not found errors",
        "Either --file or directory argument must be provided, but not both",
        "Default behavior is recursive directory traversal unless --no-recursive is specified"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:11:53",
      "decorators": [],
      "dependencies": [
        "argparse",
        "os",
        "sys",
        "pathlib",
        "datetime",
        "subprocess"
      ],
      "description": "Entry point function that parses command-line arguments to fix file timestamps by setting them to the oldest date found, either for a single file or recursively through a directory.",
      "docstring": null,
      "id": 1830,
      "imports": [
        "import os",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "import subprocess",
        "import argparse"
      ],
      "imports_required": [
        "import argparse",
        "import os",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 238,
      "line_start": 165,
      "name": "main_v112",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This main function serves as the CLI interface for a file date fixing utility. It configures an argument parser to handle various options including directory/file processing, pattern matching, dry-run mode, and recursive/non-recursive directory traversal. The function validates inputs and delegates to either fix_file_dates() for single files or process_directory() for directory operations.",
      "return_annotation": null,
      "return_explained": "This function does not return a value (implicitly returns None). It either exits with sys.exit(1) on error or completes execution after calling the appropriate processing functions.",
      "settings_required": [
        "Requires fix_file_dates() function to be defined in the same module for processing single files",
        "Requires process_directory() function to be defined in the same module for processing directories",
        "File system read/write permissions for the target files and directories"
      ],
      "source_code": "def main():\n    parser = argparse.ArgumentParser(\n        description=\"Fix file dates by setting all timestamps to the oldest date\",\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        epilog=\"\"\"\nExamples:\n  # Dry run on all files in a directory (recursive by default)\n  python3 fix_file_dates.py /path/to/folder --dry-run\n\n  # Actually fix the dates for all files in directory\n  python3 fix_file_dates.py /path/to/folder\n\n  # Process with custom pattern\n  python3 fix_file_dates.py /path/to/folder --pattern \"*.pdf\"\n\n  # Process only specific pattern\n  python3 fix_file_dates.py /path/to/folder --pattern \"*_fully_signed.pdf\"\n\n  # Process single file\n  python3 fix_file_dates.py --file /path/to/file.pdf\n\n  # Process without recursing subdirectories\n  python3 fix_file_dates.py /path/to/folder --no-recursive\n        \"\"\"\n    )\n    \n    parser.add_argument(\n        'directory',\n        nargs='?',\n        help='Directory to process (required unless using --file)'\n    )\n    \n    parser.add_argument(\n        '--pattern',\n        default='*',\n        help='File pattern to match (default: * = all files)'\n    )\n    \n    parser.add_argument(\n        '--file',\n        help='Process a single file instead of a directory'\n    )\n    \n    parser.add_argument(\n        '--dry-run',\n        action='store_true',\n        help='Show what would be done without making changes'\n    )\n    \n    parser.add_argument(\n        '--no-recursive',\n        action='store_true',\n        help='Do not search subdirectories'\n    )\n    \n    args = parser.parse_args()\n    \n    if args.file:\n        # Process single file\n        if not os.path.exists(args.file):\n            print(f\"Error: File {args.file} does not exist\")\n            sys.exit(1)\n        fix_file_dates(args.file, args.dry_run)\n    else:\n        # Process directory\n        if not args.directory:\n            parser.error(\"directory is required unless using --file\")\n        \n        process_directory(\n            args.directory,\n            pattern=args.pattern,\n            dry_run=args.dry_run,\n            recursive=not args.no_recursive\n        )",
      "source_file": "/tf/active/vicechatdev/mailsearch/fix_file_dates.py",
      "tags": [
        "cli",
        "command-line",
        "argparse",
        "file-processing",
        "timestamp-management",
        "entry-point",
        "file-dates",
        "directory-processing",
        "dry-run",
        "pattern-matching"
      ],
      "updated_at": "2025-12-07T01:59:48.525714",
      "usage_example": "# This function is typically called as the script entry point:\n# if __name__ == '__main__':\n#     main()\n\n# Command-line usage examples:\n# python3 fix_file_dates.py /path/to/folder --dry-run\n# python3 fix_file_dates.py /path/to/folder --pattern '*.pdf'\n# python3 fix_file_dates.py --file /path/to/file.pdf\n# python3 fix_file_dates.py /path/to/folder --no-recursive"
    },
    {
      "best_practices": [
        "This function should be run with appropriate permissions to delete directories within the configured scripts folder",
        "The function is platform-dependent and requires Unix-like systems (Linux/macOS) due to the 'du' command usage",
        "Consider backing up important data before running this cleanup operation",
        "The function performs cleanup on ALL sessions without confirmation - ensure this is the intended behavior before execution",
        "Monitor the output carefully to verify that the correct number of venvs are being cleaned and projects are preserved",
        "The function uses subprocess to call system commands which may have security implications if paths are not properly validated",
        "Exit codes should be properly handled by the calling process to determine if cleanup was successful"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 12:42:22",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "subprocess",
        "smartstat_config",
        "agent_executor"
      ],
      "description": "A cleanup utility function that removes virtual environment directories from all SmartStat sessions while preserving project files, reporting disk space freed and cleanup statistics.",
      "docstring": null,
      "id": 772,
      "imports": [
        "import os",
        "import sys",
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 55,
      "line_start": 12,
      "name": "main_v111",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a comprehensive cleanup operation across all SmartStat sessions. It initializes the necessary configuration and agent executor, measures disk usage before and after cleanup, executes the cleanup of virtual environment directories across all sessions, and provides detailed reporting of the cleanup results including space freed, number of venvs cleaned, and projects preserved. This is useful for maintenance tasks to reclaim disk space by removing temporary virtual environments while keeping important project files intact.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 if cleanup failed. This follows standard Unix convention for command-line tools where 0 indicates success and non-zero indicates failure.",
      "settings_required": [
        "Config class must be properly configured with GENERATED_SCRIPTS_FOLDER attribute pointing to the directory containing SmartStat session folders",
        "AgentExecutor class must be available and properly implement the cleanup_venv_directories() method",
        "The 'du' command must be available on the system (Unix/Linux/macOS) for disk usage calculation",
        "Appropriate file system permissions to read directory sizes and delete virtual environment directories"
      ],
      "source_code": "def main():\n    # Initialize config and agent executor\n    config = Config()\n    agent_executor = AgentExecutor(config)\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Running FULL cleanup on all SmartStat sessions\")\n    print(f\"{'='*60}\\n\")\n    \n    # Get total size before\n    import subprocess\n    scripts_dir = Path(config.GENERATED_SCRIPTS_FOLDER)\n    result = subprocess.run(['du', '-sh', str(scripts_dir)], capture_output=True, text=True)\n    size_before = result.stdout.split()[0]\n    print(f\"Total size BEFORE cleanup: {size_before}\")\n    \n    # Count sessions and projects\n    session_count = len([d for d in scripts_dir.iterdir() if d.is_dir()])\n    print(f\"Total sessions: {session_count}\\n\")\n    \n    # Run cleanup on all sessions\n    print(\"Running cleanup...\")\n    cleanup_result = agent_executor.cleanup_venv_directories()  # No session_id = clean all\n    \n    if cleanup_result['success']:\n        print(f\"\\n\u2713 Cleanup completed successfully!\")\n        print(f\"  - Total venvs cleaned: {cleanup_result['cleaned_count']}\")\n        print(f\"  - Total projects preserved: {cleanup_result['preserved_count']}\")\n        print(f\"  - Total space freed: {cleanup_result['space_freed_mb'] / 1024:.2f} GB\")\n    else:\n        print(f\"\\n\u2717 Cleanup failed: {cleanup_result.get('error', 'Unknown error')}\")\n        return 1\n    \n    # Get size after\n    result = subprocess.run(['du', '-sh', str(scripts_dir)], capture_output=True, text=True)\n    size_after = result.stdout.split()[0]\n    print(f\"\\nTotal size AFTER cleanup: {size_after}\")\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Full cleanup completed successfully!\")\n    print(f\"Before: {size_before} \u2192 After: {size_after}\")\n    print(f\"{'='*60}\\n\")\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/vice_ai/run_full_cleanup.py",
      "tags": [
        "cleanup",
        "maintenance",
        "disk-space",
        "virtual-environment",
        "venv",
        "session-management",
        "file-system",
        "utility",
        "smartstat",
        "batch-processing"
      ],
      "updated_at": "2025-12-07T01:59:48.525035",
      "usage_example": "if __name__ == '__main__':\n    import sys\n    from pathlib import Path\n    from smartstat_config import Config\n    from agent_executor import AgentExecutor\n    import subprocess\n    \n    # Run the cleanup\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function is hardcoded to test a specific session ID and should be modified or parameterized for production use",
        "Requires Unix-like system with 'du' command; will not work on Windows without modification",
        "Should be run with appropriate file system permissions to access and clean session directories",
        "The function performs destructive operations (deleting venv directories) and should only be run on test data",
        "Consider adding error handling for subprocess calls that might fail on different systems",
        "The test session ID should exist before running this function, or the function will return early with error code 1",
        "Output is verbose and designed for manual inspection; consider adding structured logging for automated testing"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 11:14:52",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "subprocess",
        "smartstat_config",
        "agent_executor"
      ],
      "description": "A test function that validates the cleanup functionality of virtual environments in project directories by testing on a specific session, measuring disk space before/after cleanup, and verifying that important files are preserved.",
      "docstring": null,
      "id": 563,
      "imports": [
        "import os",
        "import sys",
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 86,
      "line_start": 12,
      "name": "main_v110",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive integration test for the venv cleanup feature. It tests the AgentExecutor's cleanup_venv_directories method on a hardcoded test session ID, measuring disk space usage before and after cleanup, verifying that virtual environments are removed while preserving critical project files (analysis_results.json, analysis_script.py). The function provides detailed console output showing the cleanup process and results.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful cleanup test completion, 1 if the session directory is not found or if cleanup fails. This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        "Config object must be properly configured with GENERATED_SCRIPTS_FOLDER attribute pointing to the directory containing session folders",
        "Session directory with ID '558edb65-de39-403f-85c3-06ebfe8fa252' must exist in the GENERATED_SCRIPTS_FOLDER",
        "Unix-like system with 'du' command available for disk usage measurement",
        "Proper file system permissions to read session directories and execute cleanup operations"
      ],
      "source_code": "def main():\n    # Initialize config and agent executor\n    config = Config()\n    agent_executor = AgentExecutor(config)\n    \n    # Test cleanup on oldest session\n    test_session_id = \"558edb65-de39-403f-85c3-06ebfe8fa252\"\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Testing cleanup on session: {test_session_id}\")\n    print(f\"{'='*60}\\n\")\n    \n    # Check size before cleanup\n    session_dir = Path(config.GENERATED_SCRIPTS_FOLDER) / test_session_id  # Use GENERATED_SCRIPTS_FOLDER\n    if not session_dir.exists():\n        print(f\"ERROR: Session directory not found: {session_dir}\")\n        return 1\n    \n    # Get size before\n    import subprocess\n    result = subprocess.run(['du', '-sh', str(session_dir)], capture_output=True, text=True)\n    size_before = result.stdout.split()[0]\n    print(f\"Session size BEFORE cleanup: {size_before}\")\n    \n    # List projects\n    projects = [d for d in session_dir.iterdir() if d.is_dir() and d.name.startswith('project_')]\n    print(f\"Found {len(projects)} project(s) in session:\\n\")\n    \n    for proj in projects:\n        proj_result = subprocess.run(['du', '-sh', str(proj)], capture_output=True, text=True)\n        proj_size = proj_result.stdout.split()[0]\n        venv_path = proj / 'venv'\n        has_venv = '\u2713' if venv_path.exists() else '\u2717'\n        print(f\"  {proj.name}: {proj_size} [venv: {has_venv}]\")\n    \n    # Run cleanup\n    print(f\"\\nRunning cleanup...\")\n    cleanup_result = agent_executor.cleanup_venv_directories(test_session_id)\n    \n    if cleanup_result['success']:\n        print(f\"\\n\u2713 Cleanup completed successfully!\")\n        print(f\"  - Cleaned venvs: {cleanup_result['cleaned_count']}\")\n        print(f\"  - Space freed: {cleanup_result['space_freed_mb']:.2f} MB\")\n        print(f\"  - Preserved projects: {cleanup_result['preserved_count']}\")\n    else:\n        print(f\"\\n\u2717 Cleanup failed: {cleanup_result.get('error', 'Unknown error')}\")\n        return 1\n    \n    # Get size after\n    result = subprocess.run(['du', '-sh', str(session_dir)], capture_output=True, text=True)\n    size_after = result.stdout.split()[0]\n    print(f\"\\nSession size AFTER cleanup: {size_after}\")\n    \n    # Verify venvs are gone but projects remain\n    print(f\"\\nVerifying cleanup:\\n\")\n    for proj in projects:\n        proj_result = subprocess.run(['du', '-sh', str(proj)], capture_output=True, text=True)\n        proj_size = proj_result.stdout.split()[0]\n        venv_path = proj / 'venv'\n        has_venv = '\u2713' if venv_path.exists() else '\u2717'\n        print(f\"  {proj.name}: {proj_size} [venv: {has_venv}]\")\n        \n        # Check that important files are preserved\n        analysis_file = proj / 'analysis_results.json'\n        script_file = proj / 'analysis_script.py'\n        if analysis_file.exists():\n            print(f\"    \u2713 analysis_results.json preserved\")\n        if script_file.exists():\n            print(f\"    \u2713 analysis_script.py preserved\")\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Cleanup test completed successfully!\")\n    print(f\"{'='*60}\\n\")\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/vice_ai/test_cleanup.py",
      "tags": [
        "testing",
        "cleanup",
        "virtual-environment",
        "venv",
        "disk-space",
        "integration-test",
        "file-system",
        "session-management",
        "project-cleanup",
        "validation"
      ],
      "updated_at": "2025-12-07T01:59:48.524308",
      "usage_example": "if __name__ == '__main__':\n    import sys\n    from pathlib import Path\n    from smartstat_config import Config\n    from agent_executor import AgentExecutor\n    import subprocess\n    \n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Ensure the 'test_files' list is populated with valid file paths before calling this function",
        "Implement the required helper functions (test_pptx_file, test_docx_file, test_libreoffice_conversion) before using this function",
        "Install LibreOffice on the system to enable the conversion fallback feature",
        "Consider adding error handling for the case where helper functions are not defined",
        "The function modifies no state and only produces console output, making it safe for repeated execution",
        "Use this function as part of a test suite or diagnostic tool rather than in production code",
        "Consider capturing the results dictionary for programmatic access instead of relying solely on console output"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:59:06",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "traceback",
        "python-pptx",
        "python-docx",
        "subprocess",
        "tempfile",
        "sys"
      ],
      "description": "A test harness function that validates the ability to open and process PowerPoint and Word document files, with fallback to LibreOffice conversion for problematic files.",
      "docstring": null,
      "id": 350,
      "imports": [
        "import sys",
        "from pathlib import Path",
        "import traceback",
        "import pptx",
        "from docx import Document as DocxDocument",
        "import subprocess",
        "import tempfile"
      ],
      "imports_required": [
        "import sys",
        "from pathlib import Path",
        "import traceback",
        "import pptx",
        "from docx import Document as DocxDocument",
        "import subprocess",
        "import tempfile"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 211,
      "line_start": 161,
      "name": "main_v109",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive testing utility for document file processing. It iterates through a predefined list of test files (PPTX, PPT, DOCX, DOC, DOCM formats), attempts to open them using native Python libraries (python-pptx and python-docx), and falls back to LibreOffice conversion if direct opening fails. It provides detailed console output with status indicators and generates a summary report of all test results.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). Instead, it prints test results to the console and displays a summary table showing the status of each tested file with visual indicators (\u2713 for pass, \u274c for fail, \u26a0\ufe0f for pass with conversion, \u2753 for not found).",
      "settings_required": [
        "A global variable 'test_files' must be defined containing a list of file paths to test",
        "Functions 'test_pptx_file(file_path)' and 'test_docx_file(file_path)' must be defined in the same module",
        "Function 'test_libreoffice_conversion(file_path)' must be defined in the same module",
        "LibreOffice must be installed on the system for the conversion fallback to work",
        "Test files must exist at the paths specified in the 'test_files' list"
      ],
      "source_code": "def main():\n    print(\"=\"*80)\n    print(\"TESTING PROBLEMATIC FILES\")\n    print(\"=\"*80)\n    \n    results = {}\n    \n    for file_path in test_files:\n        file_path_obj = Path(file_path)\n        \n        if not file_path_obj.exists():\n            print(f\"\\n\u274c Skipping non-existent file: {file_path_obj.name}\")\n            results[file_path_obj.name] = \"NOT_FOUND\"\n            continue\n            \n        ext = file_path_obj.suffix.lower()\n        \n        if ext in ['.pptx', '.ppt']:\n            success = test_pptx_file(file_path)\n            results[file_path_obj.name] = \"PASS\" if success else \"FAIL\"\n            \n            # If direct opening failed, try LibreOffice conversion\n            if not success:\n                print(f\"\\nTrying LibreOffice conversion as fallback...\")\n                conv_success = test_libreoffice_conversion(file_path)\n                if conv_success:\n                    results[file_path_obj.name] = \"PASS_WITH_CONVERSION\"\n                    \n        elif ext in ['.docx', '.doc', '.docm']:\n            success = test_docx_file(file_path)\n            results[file_path_obj.name] = \"PASS\" if success else \"FAIL\"\n            \n            # If direct opening failed, try LibreOffice conversion\n            if not success:\n                print(f\"\\nTrying LibreOffice conversion as fallback...\")\n                conv_success = test_libreoffice_conversion(file_path)\n                if conv_success:\n                    results[file_path_obj.name] = \"PASS_WITH_CONVERSION\"\n    \n    # Print summary\n    print(\"\\n\" + \"=\"*80)\n    print(\"SUMMARY\")\n    print(\"=\"*80)\n    for filename, status in results.items():\n        status_icon = {\n            \"PASS\": \"\u2713\",\n            \"FAIL\": \"\u274c\",\n            \"PASS_WITH_CONVERSION\": \"\u26a0\ufe0f\",\n            \"NOT_FOUND\": \"\u2753\"\n        }.get(status, \"?\")\n        print(f\"{status_icon} {filename}: {status}\")",
      "source_file": "/tf/active/vicechatdev/docchat/test_problematic_files.py",
      "tags": [
        "testing",
        "document-processing",
        "file-validation",
        "powerpoint",
        "word",
        "pptx",
        "docx",
        "libreoffice",
        "conversion",
        "test-harness",
        "file-handling",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:59:48.523619",
      "usage_example": "# Define required dependencies first\ntest_files = [\n    '/path/to/presentation.pptx',\n    '/path/to/document.docx',\n    '/path/to/legacy.ppt'\n]\n\ndef test_pptx_file(file_path):\n    try:\n        prs = pptx.Presentation(file_path)\n        return True\n    except:\n        return False\n\ndef test_docx_file(file_path):\n    try:\n        doc = DocxDocument(file_path)\n        return True\n    except:\n        return False\n\ndef test_libreoffice_conversion(file_path):\n    try:\n        result = subprocess.run(['libreoffice', '--headless', '--convert-to', 'pdf', file_path], capture_output=True)\n        return result.returncode == 0\n    except:\n        return False\n\n# Run the test suite\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Ensure all required environment variables (especially OPENAI_API_KEY) are set before calling this function",
        "Verify that the ImprovedProjectVictoriaGenerator class is properly defined with a run_complete_pipeline method",
        "Check that all required data sources and PDF files are accessible before execution",
        "Handle the returned output_path appropriately, checking if the file was successfully created",
        "Consider wrapping the call in try-except blocks to handle potential errors from the pipeline execution",
        "Ensure sufficient disk space for ChromaDB storage and generated output files",
        "Monitor API usage and costs when using OpenAI services through this function"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:32:26",
      "decorators": [],
      "dependencies": [
        "os",
        "re",
        "json",
        "tiktoken",
        "typing",
        "datetime",
        "chromadb",
        "langchain_openai",
        "sentence_transformers",
        "fitz",
        "OneCo_hybrid_RAG"
      ],
      "description": "Entry point function that instantiates an ImprovedProjectVictoriaGenerator and executes its complete pipeline to generate disclosure documents.",
      "docstring": "Main function to run the improved disclosure generator.",
      "id": 99,
      "imports": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List, Dict, Any, Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 793,
      "line_start": 789,
      "name": "main_v108",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running the improved disclosure generator system. It creates an instance of ImprovedProjectVictoriaGenerator, executes the full pipeline for generating disclosure documents (likely involving RAG-based document generation using embeddings and LLMs), and returns the path to the generated output file. This is typically used as the primary execution function when running the disclosure generation system.",
      "return_annotation": null,
      "return_explained": "Returns a string representing the file path (output_path) where the generated disclosure document has been saved. The exact format and location depend on the ImprovedProjectVictoriaGenerator's configuration and run_complete_pipeline method implementation.",
      "settings_required": [
        "OPENAI_API_KEY environment variable (required for langchain_openai.ChatOpenAI)",
        "ImprovedProjectVictoriaGenerator class must be defined and importable in the same module or imported",
        "OneCo_hybrid_RAG module must be available with MyEmbeddingFunction class",
        "ChromaDB database configuration and initialization",
        "PDF files or data sources required by the ImprovedProjectVictoriaGenerator pipeline",
        "Sentence transformer model files for CrossEncoder (downloaded automatically on first use)",
        "Tiktoken encoding files (downloaded automatically on first use)"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run the improved disclosure generator.\"\"\"\n    generator = ImprovedProjectVictoriaGenerator()\n    output_path = generator.run_complete_pipeline()\n    return output_path",
      "source_file": "/tf/active/vicechatdev/improved_project_victoria_generator.py",
      "tags": [
        "entry-point",
        "main-function",
        "disclosure-generation",
        "RAG",
        "document-generation",
        "pipeline",
        "LLM",
        "embeddings",
        "chromadb",
        "langchain",
        "project-victoria"
      ],
      "updated_at": "2025-12-07T01:59:48.523001",
      "usage_example": "# Ensure all environment variables are set\nimport os\nos.environ['OPENAI_API_KEY'] = 'your-api-key-here'\n\n# Import the function (assuming it's in disclosure_generator.py)\nfrom disclosure_generator import main\n\n# Run the disclosure generator\noutput_file_path = main()\nprint(f'Disclosure document generated at: {output_file_path}')"
    },
    {
      "best_practices": [
        "Ensure the Poulpharm_labosoft.json schema file exists and is valid before running this function",
        "Set up required API keys (likely OpenAI) as environment variables before execution",
        "This function is intended for demonstration and testing purposes, not production use",
        "The function uses exception handling to gracefully handle schema loading errors and query generation failures",
        "The max_rows parameter is set to 100 to limit result sets during demonstration",
        "Review the generated SQL queries before executing them against a production database",
        "The function prints extensive output with emojis for readability - consider redirecting output if logging to files",
        "Each example query demonstrates different aspects of the database schema (requests, statistics, results, contacts, analysis groups)"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 18:05:58",
      "decorators": [],
      "dependencies": [
        "sql_query_generator"
      ],
      "description": "Demonstrates the SmartStat SQL Workflow by loading a database schema, initializing a SQL query generator, and generating SQL queries from natural language requests for various laboratory data analysis scenarios.",
      "docstring": null,
      "id": 1518,
      "imports": [
        "import sys",
        "import os",
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "imports_required": [
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 74,
      "line_start": 13,
      "name": "main_v107",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a demonstration and testing tool for the SmartStat SQL workflow system. It showcases the complete pipeline from loading a database schema (Poulpharm_labosoft.json) to generating SQL queries from natural language requests. The function demonstrates five example use cases including laboratory requests, customer statistics, bacteriology results, veterinarian information, and analysis groups. It's designed to validate the integration between natural language processing and SQL query generation before deployment in the SmartStat Flask application.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing demonstration output to the console, including schema information, generated SQL queries, explanations, and metadata for each example query. The function may return early (None) if the schema fails to load.",
      "settings_required": [
        "Poulpharm_labosoft.json file must exist in the current working directory containing the database schema definition",
        "The SQLQueryGenerator class must be properly configured with any required API keys or credentials (likely OpenAI API key for natural language processing)",
        "Database connection configuration may be required depending on SQLQueryGenerator implementation"
      ],
      "source_code": "def main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    # Load the database schema\n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"Poulpharm_labosoft.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    # Initialize the SQL query generator\n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    # Example queries to demonstrate the workflow\n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\",\n        \"Get customer statistics including number of requests and most common tests\",\n        \"Find bacteriology results with antibiotic sensitivity data\",\n        \"List veterinarians and their associated practices with contact information\",\n        \"Show analysis groups and their associated individual analyses\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            # Generate SQL query\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            \n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"```sql\")\n            print(sql_query)\n            print(\"```\")\n            \n            print(f\"\\n\ud83d\udcca Metadata:\")\n            print(f\"   Database: {metadata['database_name']}\")\n            print(f\"   Max rows: {metadata['max_rows']}\")\n            print(f\"   Generated at: {metadata['generated_at']}\")\n            \n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n        \n        print(\"\\n\" + \"=\"*80 + \"\\n\")\n    \n    print(\"\ud83c\udfaf Workflow Summary:\")\n    print(\"1. User provides natural language analysis request\")\n    print(\"2. AI analyzes request against database schema\")\n    print(\"3. Appropriate SQL query is generated\")\n    print(\"4. Query is executed to retrieve relevant data\")\n    print(\"5. Data continues through normal analysis pipeline\")\n    print(\"\\n\u2728 Ready to integrate with SmartStat Flask application!\")",
      "source_file": "/tf/active/vicechatdev/smartstat/demo_sql_workflow.py",
      "tags": [
        "demonstration",
        "sql-generation",
        "natural-language-processing",
        "database-schema",
        "workflow",
        "laboratory-data",
        "query-generator",
        "testing",
        "example",
        "smartstat",
        "veterinary",
        "bacteriology"
      ],
      "updated_at": "2025-12-07T01:59:48.522274",
      "usage_example": "# Ensure Poulpharm_labosoft.json exists in current directory\n# Set any required environment variables (e.g., OPENAI_API_KEY)\n\nfrom sql_query_generator import SQLQueryGenerator, DatabaseSchema, ConnectionConfig\n\ndef main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"Poulpharm_labosoft.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\",\n        \"Get customer statistics including number of requests and most common tests\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            \n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"sql\")\n            print(sql_query)\n            print(\"\")\n            \n            print(f\"\\n\ud83d\udcca Metadata:\")\n            print(f\"   Database: {metadata['database_name']}\")\n            print(f\"   Max rows: {metadata['max_rows']}\")\n            print(f\"   Generated at: {metadata['generated_at']}\")\n            \n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n        \n        print(\"\\n\" + \"=\"*80 + \"\\n\")\n\nif __name__ == \"__main__\":\n    main()"
    },
    {
      "best_practices": [
        "Ensure the database schema JSON file exists and is properly formatted before running this function",
        "The function expects a specific schema file name ('database_schema_20251003_120434.json') - modify this if using a different schema file",
        "This is a demonstration function intended for testing and documentation purposes, not for production use",
        "Error handling is implemented for schema loading and query generation, but errors are only printed to console",
        "The function uses a hardcoded list of example queries - customize these based on your specific use case",
        "The max_rows parameter is set to 100 for all queries - adjust this based on expected data volume",
        "Console output uses emoji characters for visual clarity - ensure your terminal supports UTF-8 encoding",
        "The function demonstrates the complete workflow but does not actually execute the generated SQL queries against a database"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:22:16",
      "decorators": [],
      "dependencies": [
        "sql_query_generator"
      ],
      "description": "Demonstrates a SmartStat SQL workflow by loading a database schema, initializing a SQL query generator, and generating SQL queries from natural language requests with detailed output and metadata.",
      "docstring": null,
      "id": 1248,
      "imports": [
        "import sys",
        "import os",
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "imports_required": [
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 74,
      "line_start": 13,
      "name": "main_v106",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is a demonstration/testing function that showcases the complete workflow of the SmartStat SQL query generation system. It loads a database schema from a JSON file, creates a SQLQueryGenerator instance, and processes multiple example natural language queries to demonstrate how user requests are converted into SQL queries. The function provides detailed console output with emojis for visual clarity, showing the schema loading process, query generation results, explanations, and metadata for each example query. It serves as both a testing tool and documentation of the intended workflow for integration with the SmartStat Flask application.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing demonstration output to the console. If the schema file cannot be loaded, the function returns early without processing queries.",
      "settings_required": [
        "A database schema JSON file named 'database_schema_20251003_120434.json' must exist in the current working directory",
        "The sql_query_generator module must be available in the Python path",
        "The schema JSON file must contain valid database schema information including database_name, description, and complete_table_list"
      ],
      "source_code": "def main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    # Load the database schema\n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"database_schema_20251003_120434.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    # Initialize the SQL query generator\n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    # Example queries to demonstrate the workflow\n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\",\n        \"Get customer statistics including number of requests and most common tests\",\n        \"Find bacteriology results with antibiotic sensitivity data\",\n        \"List veterinarians and their associated practices with contact information\",\n        \"Show analysis groups and their associated individual analyses\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            # Generate SQL query\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            \n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"```sql\")\n            print(sql_query)\n            print(\"```\")\n            \n            print(f\"\\n\ud83d\udcca Metadata:\")\n            print(f\"   Database: {metadata['database_name']}\")\n            print(f\"   Max rows: {metadata['max_rows']}\")\n            print(f\"   Generated at: {metadata['generated_at']}\")\n            \n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n        \n        print(\"\\n\" + \"=\"*80 + \"\\n\")\n    \n    print(\"\ud83c\udfaf Workflow Summary:\")\n    print(\"1. User provides natural language analysis request\")\n    print(\"2. AI analyzes request against database schema\")\n    print(\"3. Appropriate SQL query is generated\")\n    print(\"4. Query is executed to retrieve relevant data\")\n    print(\"5. Data continues through normal analysis pipeline\")\n    print(\"\\n\u2728 Ready to integrate with SmartStat Flask application!\")",
      "source_file": "/tf/active/vicechatdev/full_smartstat/demo_sql_workflow.py",
      "tags": [
        "demonstration",
        "workflow",
        "sql-generation",
        "natural-language-processing",
        "database-schema",
        "query-generator",
        "testing",
        "console-output",
        "smartstat",
        "example-queries",
        "metadata",
        "laboratory-data"
      ],
      "updated_at": "2025-12-07T01:59:48.521518",
      "usage_example": "# Ensure the schema file exists in the current directory\n# database_schema_20251003_120434.json\n\nfrom sql_query_generator import SQLQueryGenerator, DatabaseSchema, ConnectionConfig\n\ndef main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"database_schema_20251003_120434.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"sql\")\n            print(sql_query)\n            print(\"\")\n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n\nif __name__ == \"__main__\":\n    main()"
    },
    {
      "best_practices": [
        "Ensure ChromaDB server is running before executing this function",
        "Use --skip-collections to avoid reprocessing already cleaned collections",
        "Adjust --similarity-threshold based on your data characteristics (higher values are more strict)",
        "The function includes a 1-second sleep between collections to avoid overwhelming the server",
        "Errors in individual collections are caught and logged but don't stop the entire process",
        "Monitor disk space as cleaned collections are created as new collections rather than modifying existing ones",
        "Consider using --skip-summarization for faster processing if summarization is not needed",
        "The function expects a clean_collection function to be defined elsewhere in the module"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required by the clean_collection function that this main function calls",
          "import": "from src.cleaners.hash_cleaner import HashCleaner",
          "optional": false
        },
        {
          "condition": "Required by the clean_collection function that this main function calls",
          "import": "from src.cleaners.similarity_cleaner import SimilarityCleaner",
          "optional": false
        },
        {
          "condition": "Required by the clean_collection function that this main function calls",
          "import": "from src.cleaners.combined_cleaner import CombinedCleaner",
          "optional": false
        },
        {
          "condition": "Required by the cleaning utilities",
          "import": "from src.utils.hash_utils import hash_text",
          "optional": false
        },
        {
          "condition": "Required by the cleaning utilities",
          "import": "from src.utils.similarity_utils import calculate_similarity",
          "optional": false
        },
        {
          "condition": "Required by the cleaning utilities",
          "import": "from src.clustering.text_clusterer import TextClusterer",
          "optional": false
        },
        {
          "condition": "Required for configuration settings",
          "import": "from src.config import Config",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 10:32:50",
      "decorators": [],
      "dependencies": [
        "argparse",
        "chromadb",
        "time",
        "tqdm",
        "src.cleaners.hash_cleaner",
        "src.cleaners.similarity_cleaner",
        "src.cleaners.combined_cleaner",
        "src.utils.hash_utils",
        "src.utils.similarity_utils",
        "src.clustering.text_clusterer",
        "src.config"
      ],
      "description": "Command-line interface function that orchestrates the cleaning of ChromaDB collections by removing duplicates and similar documents, with options to skip collections and customize the cleaning process.",
      "docstring": null,
      "id": 434,
      "imports": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "import os",
        "import time",
        "from tqdm import tqdm",
        "from src.cleaners.hash_cleaner import HashCleaner",
        "from src.cleaners.similarity_cleaner import SimilarityCleaner",
        "from src.cleaners.combined_cleaner import CombinedCleaner",
        "from src.utils.hash_utils import hash_text",
        "from src.utils.similarity_utils import calculate_similarity",
        "from src.clustering.text_clusterer import TextClusterer",
        "from src.config import Config"
      ],
      "imports_required": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "import time",
        "from tqdm import tqdm"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 68,
      "line_start": 20,
      "name": "main_v105",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. It uses argparse to parse command-line arguments including: --host (ChromaDB server host, default 'vice_chroma'), --port (ChromaDB server port, default 8000), --similarity-threshold (float threshold for detecting similar documents, default 0.95), --skip-collections (list of collection names to skip), --suffix (suffix for cleaned collection names, default '_clean'), and --skip-summarization (flag to skip summarization step)"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a ChromaDB collection cleaning utility. It connects to a ChromaDB instance, retrieves all collections, filters out collections to skip (including already cleaned ones), and processes each collection through a cleaning pipeline that removes duplicates and optionally summarizes similar documents. The cleaned data is stored in new collections with a configurable suffix.",
      "return_annotation": null,
      "return_explained": "Returns None. This function performs side effects by creating new cleaned collections in ChromaDB and printing progress information to stdout. Errors during collection cleaning are caught and printed but do not stop the overall process.",
      "settings_required": [
        "ChromaDB server must be running and accessible at the specified host and port",
        "The clean_collection function must be defined in the same module or imported",
        "Custom cleaner modules (HashCleaner, SimilarityCleaner, CombinedCleaner) must be available in src.cleaners",
        "Utility modules (hash_utils, similarity_utils) must be available in src.utils",
        "TextClusterer must be available in src.clustering",
        "Config module must be available in src.config",
        "Write permissions to create new collections in ChromaDB"
      ],
      "source_code": "def main():\n    # Parse command line arguments\n    parser = argparse.ArgumentParser(description='Clean up all ChromaDB collections')\n    parser.add_argument('--host', type=str, default='vice_chroma', help='ChromaDB host')\n    parser.add_argument('--port', type=int, default=8000, help='ChromaDB port')\n    parser.add_argument('--similarity-threshold', type=float, default=0.95, \n                        help='Similarity threshold for detecting similar documents')\n    parser.add_argument('--skip-collections', type=str, nargs='+', default=[], \n                        help='Collections to skip (e.g., already cleaned ones)')\n    parser.add_argument('--suffix', type=str, default='_clean', \n                        help='Suffix to add to cleaned collection names')\n    parser.add_argument('--skip-summarization', action='store_true', \n                        help='Skip the summarization step')\n    \n    args = parser.parse_args()\n    \n    # Connect to ChromaDB\n    client = chromadb.HttpClient(\n        host=args.host,\n        port=args.port,\n        settings=Settings(anonymized_telemetry=False)\n    )\n    \n    # Get all available collections\n    collection_names = client.list_collections()\n    \n    # Filter out collections to skip (e.g., already cleaned ones)\n    skip_suffix = args.suffix\n    to_process = [name for name in collection_names \n                 if not name.endswith(skip_suffix) and name not in args.skip_collections]\n    \n    print(f\"Found {len(collection_names)} total collections\")\n    print(f\"Will clean {len(to_process)} collections (skipping {len(collection_names) - len(to_process)})\")\n    \n    # Process each collection\n    for collection_name in tqdm(to_process, desc=\"Cleaning collections\"):\n        try:\n            clean_collection(\n                collection_name=collection_name,\n                output_collection=f\"{collection_name}{args.suffix}\",\n                host=args.host,\n                port=args.port,\n                similarity_threshold=args.similarity_threshold,\n                skip_summarization=args.skip_summarization\n            )\n            # Sleep briefly to avoid overwhelming the server\n            time.sleep(1)\n        except Exception as e:\n            print(f\"Error cleaning collection {collection_name}: {e}\")",
      "source_file": "/tf/active/vicechatdev/chromadb-cleanup/main.py",
      "tags": [
        "cli",
        "command-line",
        "chromadb",
        "database-cleaning",
        "deduplication",
        "similarity-detection",
        "batch-processing",
        "data-cleaning",
        "vector-database",
        "collection-management"
      ],
      "updated_at": "2025-12-07T01:59:48.520720",
      "usage_example": "# Run from command line:\n# python script.py --host localhost --port 8000 --similarity-threshold 0.95 --skip-collections collection1 collection2 --suffix _cleaned --skip-summarization\n\n# Or call directly in Python:\nif __name__ == '__main__':\n    main()\n\n# Example with custom arguments:\n# python cleanup_script.py --host vice_chroma --port 8000 --similarity-threshold 0.90 --skip-collections already_clean_collection --suffix _v2"
    },
    {
      "best_practices": [
        "This function is hardcoded with specific UUIDs and hashes - it should be modified or parameterized for production use",
        "The function uses a timestamp-based naming scheme to avoid document name collisions",
        "Error handling is present but minimal - production code should have more robust error handling",
        "The function performs synchronous operations and may block for extended periods during upload and sync",
        "The test specifically validates using hash values as parent identifiers, which may be an edge case or alternative API usage pattern",
        "Ensure the RemarkableTestUpload and RemarkableReplicaBuilder classes are properly initialized with necessary credentials before calling",
        "The function assumes the target folder already exists and will fail if it doesn't",
        "Network connectivity is required throughout execution for authentication, upload, and sync operations"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:37:10",
      "decorators": [],
      "dependencies": [
        "sys",
        "os",
        "time",
        "requests"
      ],
      "description": "A test function that uploads a PDF document to a reMarkable tablet folder using the folder's hash value as the parent identifier instead of its UUID, then verifies the upload through replica synchronization.",
      "docstring": null,
      "id": 2075,
      "imports": [
        "import sys",
        "import os",
        "from upload_manager import UploadManager",
        "import time",
        "import requests"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "from upload_manager import UploadManager",
        "import time",
        "import requests"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 84,
      "line_start": 14,
      "name": "main_v104",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test/demonstration script for the reMarkable tablet upload functionality. It specifically tests whether using a folder's hash value (instead of UUID) as the parent identifier works correctly when uploading documents. The function authenticates with the reMarkable cloud service, verifies a target folder exists, uploads a test PDF with a timestamped name, and then builds a local replica to verify the upload succeeded.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including printing status messages to console, uploading a document to reMarkable cloud, and building a local replica directory.",
      "settings_required": [
        "RemarkableTestUpload class must be defined and available in scope",
        "RemarkableReplicaBuilder class must be defined and available in scope",
        "Authentication credentials for reMarkable cloud service (likely stored in config or environment)",
        "Test PDF file must exist at the path specified by uploader.test_pdf_path",
        "Target folder with UUID '65aabcb0-94de-4e73-bb44-5f1e304c45a5' must exist in reMarkable account",
        "Network connectivity to reMarkable cloud services"
      ],
      "source_code": "def main():\n    print(\"\ud83d\udcc1 TESTING UPLOAD WITH HASH AS PARENT\")\n    print(\"=\" * 50)\n    \n    # Initialize uploader\n    uploader = RemarkableTestUpload()\n    \n    # Get folder info\n    myfolder_uuid = \"65aabcb0-94de-4e73-bb44-5f1e304c45a5\"\n    myfolder_hash = \"c9d2450e4584240a6a3e94237637861645a7ad9a3adc4a57a684f05399c75928\"\n    \n    print(f\"\ud83c\udfaf Target folder: Myfolder\")\n    print(f\"   UUID: {myfolder_uuid}\")\n    print(f\"   Hash: {myfolder_hash}\")\n    \n    # Authenticate\n    uploader.authenticate()\n    \n    # Verify folder exists\n    folder_info = uploader.get_node_info(myfolder_uuid)\n    if folder_info:\n        print(f\"\u2705 Found folder: {folder_info['metadata']['visibleName']}\")\n        print(f\"   Type: {folder_info['metadata']['type'].lower()}\")\n        print(f\"   Parent: {folder_info['metadata'].get('parent', 'root')}\")\n    else:\n        print(\"\u274c Folder not found!\")\n        return\n    \n    # Generate unique document name\n    timestamp = int(time.time())\n    doc_name = f\"HashParentTest_{timestamp}\"\n    \n    print(f\"\ud83d\udcc4 Uploading PDF: {doc_name}\")\n    print(f\"   Source file: {uploader.test_pdf_path}\")\n    print(f\"   Target folder: Myfolder (using HASH as parent)\")\n    \n    try:\n        # Upload with HASH as parent instead of UUID\n        result = uploader.upload_pdf_document(\n            pdf_path=uploader.test_pdf_path,\n            visible_name=doc_name,\n            parent_uuid=myfolder_hash  # Using HASH instead of UUID!\n        )\n        \n        if result:\n            print(f\"\u2705 Successfully uploaded PDF document: {doc_name}\")\n            print(f\"\ud83d\udd04 Document should appear in your device shortly after sync\")\n        else:\n            print(f\"\u274c Upload failed!\")\n            return\n            \n    except Exception as e:\n        print(f\"\u274c Upload error: {e}\")\n        return\n    \n    print(f\"\u2705 Upload to folder completed successfully!\")\n    print(f\"\ud83d\udd04 The document should now appear in Myfolder on your device\")\n    \n    # Run replica sync to verify\n    print(f\"\ud83d\udd04 Running replica sync to verify...\")\n    builder = RemarkableReplicaBuilder()\n    builder.build_replica()\n    \n    # Check if document appears in Myfolder\n    print(f\"\ud83d\udcc1 Myfolder contents after upload:\")\n    if os.path.exists(f\"{builder.replica_dir}/content/Myfolder\"):\n        for file in os.listdir(f\"{builder.replica_dir}/content/Myfolder\"):\n            if file.endswith('.pdf'):\n                print(f\"   \ud83d\udcc4 {file} (document)\")\n    else:\n        print(\"   \ud83d\udcc2 Myfolder directory not found\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_hash_parent_upload.py",
      "tags": [
        "remarkable",
        "tablet",
        "upload",
        "pdf",
        "cloud-sync",
        "testing",
        "file-management",
        "hash-identifier",
        "replica-sync",
        "document-upload"
      ],
      "updated_at": "2025-12-07T01:59:48.519931",
      "usage_example": "# Ensure required classes are imported/defined\n# from remarkable_upload import RemarkableTestUpload\n# from remarkable_replica import RemarkableReplicaBuilder\n\n# Simply call the function - it handles everything internally\nmain()\n\n# Expected output:\n# - Prints upload progress and status messages\n# - Uploads PDF to reMarkable folder 'Myfolder'\n# - Builds local replica to verify upload\n# - Lists contents of Myfolder after upload"
    },
    {
      "best_practices": [
        "This function should be called using asyncio.run(main()) or within an existing async context",
        "Ensure MIXED_AVAILABLE flag is properly set before calling this function",
        "All helper test functions (test_remarkable_auth, test_onedrive_auth, test_remarkable_discovery, test_mixed_mode_dry_run) must be defined and available",
        "Handle KeyboardInterrupt gracefully for user-initiated test cancellation",
        "The function uses sys.exit(1) which will terminate the entire program if MIXED_AVAILABLE is False",
        "Command-line arguments are mutually exclusive in practice (only one test mode should be specified at a time)",
        "Proper authentication credentials must be configured before running tests",
        "The function prints formatted output with emoji indicators for visual clarity in terminal"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required for mixed cloud processing functionality; availability checked via MIXED_AVAILABLE flag",
          "import": "from mixed_cloud_processor import MixedCloudProcessor",
          "optional": false
        },
        {
          "condition": "Required for reMarkable cloud watching functionality",
          "import": "from mixed_cloud_processor import RemarkableCloudWatcher",
          "optional": false
        },
        {
          "condition": "Required for creating mixed processor instances",
          "import": "from mixed_cloud_processor import create_mixed_processor",
          "optional": false
        },
        {
          "condition": "Required for creating reMarkable session objects",
          "import": "from mixed_cloud_processor import create_remarkable_session",
          "optional": false
        },
        {
          "condition": "Required for OneDrive authentication and operations",
          "import": "from onedrive_client import OneDriveClient",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 23:51:24",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "argparse",
        "json",
        "sys",
        "pathlib",
        "logging",
        "traceback"
      ],
      "description": "Asynchronous main entry point for a test suite that validates Mixed Cloud Processor functionality, including authentication, discovery, and dry-run operations for reMarkable and OneDrive integration.",
      "docstring": null,
      "id": 1960,
      "imports": [
        "import asyncio",
        "import argparse",
        "import json",
        "import sys",
        "from pathlib import Path",
        "from mixed_cloud_processor import MixedCloudProcessor",
        "from mixed_cloud_processor import RemarkableCloudWatcher",
        "from mixed_cloud_processor import create_mixed_processor",
        "from mixed_cloud_processor import create_remarkable_session",
        "from onedrive_client import OneDriveClient",
        "import logging",
        "import traceback"
      ],
      "imports_required": [
        "import asyncio",
        "import argparse",
        "import json",
        "import sys",
        "from pathlib import Path",
        "import logging",
        "import traceback"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 239,
      "line_start": 176,
      "name": "main_v103",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the command-line interface for testing a mixed cloud processing system that integrates reMarkable tablet cloud storage with OneDrive. It provides multiple test modes: authentication-only testing, reMarkable folder discovery testing, dry-run mode for mixed processing, and a comprehensive test suite that runs all tests sequentially. The function handles command-line arguments, orchestrates different test scenarios, and provides formatted console output with status indicators.",
      "return_annotation": null,
      "return_explained": "This function does not explicitly return a value (implicitly returns None). It performs side effects by printing test results to console and may exit the program with sys.exit(1) if MIXED_AVAILABLE is False.",
      "settings_required": [
        "MIXED_AVAILABLE global variable must be defined (boolean flag indicating if mixed cloud processor is available)",
        "test_remarkable_auth() async function must be defined in the same module",
        "test_onedrive_auth() async function must be defined in the same module",
        "test_remarkable_discovery(session) async function must be defined in the same module",
        "test_mixed_mode_dry_run() async function must be defined in the same module",
        "create_remarkable_session() function must be available from mixed_cloud_processor module",
        "reMarkable cloud credentials (likely environment variables or config file)",
        "OneDrive authentication credentials (likely environment variables or config file)"
      ],
      "source_code": "async def main():\n    parser = argparse.ArgumentParser(description=\"Test Mixed Cloud Processor\")\n    parser.add_argument('--test-auth', action='store_true', help='Test authentication only')\n    parser.add_argument('--test-discovery', action='store_true', help='Test reMarkable folder discovery')\n    parser.add_argument('--dry-run', action='store_true', help='Test mixed mode without processing')\n    \n    args = parser.parse_args()\n    \n    if not MIXED_AVAILABLE:\n        print(\"\u274c Mixed cloud processor not available\")\n        sys.exit(1)\n    \n    print(\"\ud83e\uddea Mixed Cloud Processor Test Suite\")\n    print(\"=\" * 50)\n    \n    try:\n        if args.test_auth:\n            # Test authentication only\n            auth_success = await test_remarkable_auth()\n            onedrive_success = await test_onedrive_auth()\n            \n            if auth_success and onedrive_success:\n                print(\"\\n\u2705 All authentication tests passed\")\n            else:\n                print(\"\\n\u26a0\ufe0f Some authentication tests failed\")\n        \n        elif args.test_discovery:\n            # Test discovery\n            session = create_remarkable_session()\n            await test_remarkable_discovery(session)\n        \n        elif args.dry_run:\n            # Full dry run test\n            success = await test_mixed_mode_dry_run()\n            \n            if success:\n                print(\"\\n\u2705 Mixed mode dry run successful\")\n            else:\n                print(\"\\n\u274c Mixed mode dry run failed\")\n        \n        else:\n            # Run all tests\n            print(\"\ud83d\udd10 Authentication Tests\")\n            print(\"-\" * 30)\n            auth_success = await test_remarkable_auth()\n            onedrive_success = await test_onedrive_auth()\n            \n            if auth_success:\n                print(\"\\n\ud83d\udd0d Discovery Tests\")\n                print(\"-\" * 30)\n                session = create_remarkable_session()\n                await test_remarkable_discovery(session)\n            \n            if auth_success and onedrive_success:\n                print(\"\\n\ud83d\udd04 Mixed Mode Tests\")\n                print(\"-\" * 30)\n                await test_mixed_mode_dry_run()\n    \n    except KeyboardInterrupt:\n        print(\"\\n\ud83d\udc4b Test interrupted\")\n    except Exception as e:\n        print(f\"\\n\u274c Unexpected error: {e}\")\n        import traceback\n        traceback.print_exc()",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_mixed_mode.py",
      "tags": [
        "async",
        "testing",
        "cloud-integration",
        "remarkable",
        "onedrive",
        "authentication",
        "cli",
        "test-suite",
        "command-line",
        "argparse",
        "discovery",
        "dry-run"
      ],
      "updated_at": "2025-12-07T01:59:48.519131",
      "usage_example": "# Run from command line:\n# Test all functionality:\npython script.py\n\n# Test authentication only:\npython script.py --test-auth\n\n# Test reMarkable discovery:\npython script.py --test-discovery\n\n# Run dry-run mode:\npython script.py --dry-run\n\n# Or call programmatically:\nimport asyncio\n\nasync def run_tests():\n    await main()\n\nif __name__ == '__main__':\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "Ensure all required helper functions (scan_output_folder, scan_wuxi2_folder, compare_documents, save_results, print_summary) are properly implemented before calling main()",
        "Verify that OUTPUT_FOLDER and WUXI2_FOLDER paths exist and are accessible before execution",
        "Ensure sufficient disk space is available for writing RESULTS_FILE and DETAILED_JSON outputs",
        "Consider wrapping the main() call in a try-except block to handle potential file I/O errors, permission issues, or missing dependencies",
        "The function assumes specific folder structures and naming conventions - ensure your directories match the expected format",
        "For large document sets, be aware that this function may take significant time to complete and consume considerable memory",
        "Consider adding logging instead of or in addition to print statements for production use",
        "This function has side effects (file I/O, console output) - it's not idempotent and should be used carefully in automated workflows"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:16:07",
      "decorators": [],
      "dependencies": [
        "PyPDF2"
      ],
      "description": "Main entry point function that orchestrates a document comparison workflow between two folders (mailsearch/output and wuxi2 repository), detecting signatures and generating comparison results.",
      "docstring": null,
      "id": 1843,
      "imports": [
        "import os",
        "import re",
        "import json",
        "import csv",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Optional",
        "from difflib import SequenceMatcher",
        "import PyPDF2",
        "from collections import defaultdict"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import json",
        "import csv",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, List, Tuple, Optional",
        "from difflib import SequenceMatcher",
        "import PyPDF2",
        "from collections import defaultdict"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 469,
      "line_start": 448,
      "name": "main_v102",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary orchestrator for an enhanced document comparison tool. It coordinates the entire workflow: scanning two document folders, comparing their contents (including signature detection), saving results to files, and displaying a summary. It's designed to identify similarities, differences, and signatures between documents in the OUTPUT_FOLDER and WUXI2_FOLDER directories.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including printing to console, writing results to files (RESULTS_FILE and DETAILED_JSON), and potentially creating/modifying files in the file system.",
      "settings_required": [
        "OUTPUT_FOLDER constant/variable must be defined in the module scope pointing to the mailsearch/output directory",
        "WUXI2_FOLDER constant/variable must be defined in the module scope pointing to the wuxi2 repository directory",
        "RESULTS_FILE constant/variable must be defined specifying the path for results output",
        "DETAILED_JSON constant/variable must be defined specifying the path for detailed JSON output",
        "scan_output_folder() function must be defined in the same module",
        "scan_wuxi2_folder() function must be defined in the same module",
        "compare_documents() function must be defined in the same module",
        "save_results() function must be defined in the same module",
        "print_summary() function must be defined in the same module",
        "Read permissions required for OUTPUT_FOLDER and WUXI2_FOLDER directories",
        "Write permissions required for the directory where RESULTS_FILE and DETAILED_JSON will be saved"
      ],
      "source_code": "def main():\n    print(\"=\"*80)\n    print(\"Enhanced Document Comparison Tool with Signature Detection\")\n    print(\"Comparing mailsearch/output with wuxi2 repository\")\n    print(\"=\"*80)\n    \n    # Scan folders\n    output_docs = scan_output_folder(OUTPUT_FOLDER)\n    wuxi2_docs = scan_wuxi2_folder(WUXI2_FOLDER)\n    \n    # Compare documents\n    results = compare_documents(output_docs, wuxi2_docs)\n    \n    # Save results\n    save_results(results, RESULTS_FILE, DETAILED_JSON)\n    \n    # Print summary\n    print_summary(results)\n    \n    print(\"\\n\" + \"=\"*80)\n    print(\"Enhanced comparison complete!\")\n    print(\"=\"*80)",
      "source_file": "/tf/active/vicechatdev/mailsearch/enhanced_document_comparison.py",
      "tags": [
        "document-comparison",
        "signature-detection",
        "file-scanning",
        "orchestration",
        "main-entry-point",
        "pdf-processing",
        "batch-processing",
        "reporting",
        "file-analysis"
      ],
      "updated_at": "2025-12-07T01:59:48.510428",
      "usage_example": "# Define required constants and helper functions first\nOUTPUT_FOLDER = './mailsearch/output'\nWUXI2_FOLDER = './wuxi2'\nRESULTS_FILE = './comparison_results.csv'\nDETAILED_JSON = './detailed_results.json'\n\n# Define helper functions (scan_output_folder, scan_wuxi2_folder, etc.)\n# ... (implementation of helper functions)\n\n# Run the main function\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Ensure all required environment variables (especially OPENAI_API_KEY) are set before calling this function",
        "Verify that the FixedProjectVictoriaGenerator class is properly defined and all its dependencies are satisfied",
        "Handle the returned output_path appropriately, checking if the file exists and is valid",
        "Consider wrapping this function call in try-except blocks to handle potential errors from the pipeline execution",
        "This function has no parameters, so all configuration must be done through the FixedProjectVictoriaGenerator class initialization or environment variables",
        "Ensure sufficient system resources (memory, disk space) are available as disclosure generation may be resource-intensive",
        "Check that ChromaDB is properly initialized and accessible before running",
        "Verify that all required PDF source documents are available if the pipeline processes PDFs"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:11:24",
      "decorators": [],
      "dependencies": [
        "os",
        "re",
        "json",
        "tiktoken",
        "typing",
        "datetime",
        "chromadb",
        "langchain_openai",
        "sentence_transformers",
        "fitz",
        "OneCo_hybrid_RAG"
      ],
      "description": "Entry point function that instantiates a FixedProjectVictoriaGenerator and executes its complete pipeline to generate fixed disclosure documents.",
      "docstring": "Main function to run the fixed disclosure generator.",
      "id": 42,
      "imports": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List, Dict, Any, Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1628,
      "line_start": 1624,
      "name": "main_v101",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running the fixed disclosure generation pipeline. It creates an instance of FixedProjectVictoriaGenerator, executes the complete pipeline workflow, and returns the path to the generated output. This is typically used as the primary execution function for the disclosure generation system, orchestrating the entire process from initialization to completion.",
      "return_annotation": null,
      "return_explained": "Returns a string representing the file path to the generated output from the disclosure generator pipeline. The exact format and location of this path depends on the FixedProjectVictoriaGenerator's run_complete_pipeline() method implementation. This could be a local file path, a relative path, or an absolute path to the generated disclosure document.",
      "settings_required": [
        "FixedProjectVictoriaGenerator class must be defined and available in the same module or imported",
        "OPENAI_API_KEY environment variable (required by langchain_openai.ChatOpenAI)",
        "ChromaDB database configuration and initialization",
        "Access to PDF files if fitz (PyMuPDF) is used for document processing",
        "Sentence transformers model files for CrossEncoder",
        "Custom embedding function from OneCo_hybrid_RAG module must be accessible",
        "Tiktoken encoding files for token counting"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run the fixed disclosure generator.\"\"\"\n    generator = FixedProjectVictoriaGenerator()\n    output_path = generator.run_complete_pipeline()\n    return output_path",
      "source_file": "/tf/active/vicechatdev/fixed_project_victoria_generator.py",
      "tags": [
        "entry-point",
        "pipeline",
        "disclosure-generation",
        "orchestration",
        "main-function",
        "document-generation",
        "RAG",
        "LLM",
        "chromadb",
        "openai"
      ],
      "updated_at": "2025-12-07T01:59:48.509790",
      "usage_example": "# Ensure all environment variables are set\nimport os\nos.environ['OPENAI_API_KEY'] = 'your-api-key-here'\n\n# Import and run the main function\nfrom your_module import main\n\n# Execute the disclosure generation pipeline\noutput_path = main()\nprint(f'Disclosure generated at: {output_path}')"
    },
    {
      "best_practices": [
        "This function is designed as a standalone test and should not be used in production code",
        "The hardcoded folder UUID ('65aabcb0-94de-4e73-bb44-5f1e304c45a5') is specific to a test environment and should be replaced for different use cases",
        "The function performs side effects including creating files, uploading to cloud services, and saving logs",
        "Ensure proper authentication is configured before running this function",
        "The function uses enable_raw_logging=True which may generate large log files",
        "Consider wrapping the function call in proper error handling when integrating into larger test suites",
        "The function relies on test_uploads.py infrastructure, so that module must be properly configured",
        "Network connectivity to reMarkable cloud services is required for successful execution",
        "The function performs a full replica sync which may take time depending on the device's content"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "always required - imported at function start",
          "import": "from test_uploads import RemarkableUploadTests",
          "optional": false
        },
        {
          "condition": "required for replica sync verification after upload",
          "import": "from local_replica_v2 import RemarkableReplicaBuilder",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:28:43",
      "decorators": [],
      "dependencies": [
        "auth",
        "upload_manager",
        "pathlib",
        "time",
        "test_uploads",
        "local_replica_v2"
      ],
      "description": "Tests uploading a PDF document to a specific folder ('Myfolder') on a reMarkable device and verifies the upload by syncing and checking folder contents.",
      "docstring": null,
      "id": 2049,
      "imports": [
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from pathlib import Path",
        "import time",
        "from test_uploads import RemarkableUploadTests",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from pathlib import Path",
        "import time"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 87,
      "line_start": 8,
      "name": "main_v100",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive integration test for the reMarkable upload functionality. It initializes the upload test suite, verifies a target folder exists in the database, creates a test PDF, uploads it to the specified folder, saves HTTP logs, performs a replica sync to verify the upload, and displays the folder contents. It's designed to validate the entire upload-to-folder workflow end-to-end.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the upload to the folder completed successfully and verification passed, False if the folder UUID was not found, upload failed, or an exception occurred during the process. The function may also return None implicitly if the folder verification fails early.",
      "settings_required": [
        "reMarkable device authentication credentials (handled by RemarkableAuth)",
        "Valid reMarkable API session (created by test_uploads.RemarkableUploadTests)",
        "Access to reMarkable cloud services",
        "Write permissions to save test PDFs and log files",
        "The target folder UUID ('65aabcb0-94de-4e73-bb44-5f1e304c45a5') must exist in the reMarkable device database"
      ],
      "source_code": "def main():\n    print(\"\ud83d\udcc1 TESTING UPLOAD TO MYFOLDER\")\n    print(\"=\" * 50)\n    \n    # Initialize the same way as test_uploads.py\n    from test_uploads import RemarkableUploadTests\n    test_suite = RemarkableUploadTests(enable_raw_logging=True)\n    \n    # Myfolder UUID from the logs\n    myfolder_uuid = \"65aabcb0-94de-4e73-bb44-5f1e304c45a5\"\n    \n    print(f\"\ud83c\udfaf Target folder: Myfolder (UUID: {myfolder_uuid})\")\n    \n    # Verify the folder exists in our database\n    if myfolder_uuid not in test_suite.uploader.database['nodes']:\n        print(f\"\u274c Myfolder UUID not found in database\")\n        return False\n    \n    folder_node = test_suite.uploader.database['nodes'][myfolder_uuid]\n    print(f\"\u2705 Found folder: {folder_node['name']}\")\n    print(f\"   Type: {folder_node['node_type']}\")\n    print(f\"   Parent: {folder_node.get('metadata', {}).get('parent', 'root')}\")\n    \n    # Create a test PDF using the same method as test_uploads.py\n    test_pdf_path = test_suite.test_create_test_pdf()\n    \n    # Generate unique name for this test\n    test_name = f\"FolderTest_{int(time.time())}\"\n    \n    print(f\"\ud83d\udcc4 Uploading PDF: {test_name}\")\n    print(f\"   Source file: {test_pdf_path}\")\n    print(f\"   Target folder: Myfolder\")\n    \n    # Upload to the folder using the same method as test_uploads.py\n    try:\n        success = test_suite.uploader.upload_pdf_document(\n            str(test_pdf_path), \n            test_name, \n            parent_uuid=myfolder_uuid\n        )\n        \n        if success:\n            print(f\"\u2705 Upload to folder completed successfully!\")\n            print(f\"\ud83d\udd04 The document should now appear in Myfolder on your device\")\n            \n            # Save raw logs like test_uploads.py does\n            log_file = test_suite.save_raw_logs()\n            if log_file:\n                print(f\"\ud83d\udcdd Raw HTTP logs saved to: {log_file}\")\n            \n            # Run a quick sync to verify\n            print(f\"\ud83d\udd04 Running replica sync to verify...\")\n            from local_replica_v2 import RemarkableReplicaBuilder\n            replica_builder = RemarkableReplicaBuilder(test_suite.session)\n            replica_builder.build_complete_replica()\n            \n            # Check if it's in the folder\n            test_suite.uploader._load_database()\n            \n            folder_contents = []\n            for uuid, node in test_suite.uploader.database['nodes'].items():\n                if node.get('metadata', {}).get('parent') == myfolder_uuid:\n                    folder_contents.append({\n                        'name': node['name'],\n                        'uuid': uuid,\n                        'type': node['node_type']\n                    })\n            \n            print(f\"\ud83d\udcc1 Myfolder contents after upload:\")\n            for item in folder_contents:\n                print(f\"   \ud83d\udcc4 {item['name']} ({item['type']}) - {item['uuid'][:8]}...\")\n            \n            return True\n        else:\n            print(f\"\u274c Upload failed\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Upload error: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_folder_upload.py",
      "tags": [
        "testing",
        "remarkable",
        "upload",
        "pdf",
        "folder",
        "integration-test",
        "device-sync",
        "file-management",
        "cloud-storage",
        "verification"
      ],
      "updated_at": "2025-12-07T01:59:48.508976",
      "usage_example": "if __name__ == '__main__':\n    # Run the folder upload test\n    result = main()\n    if result:\n        print('Test passed: Document successfully uploaded to folder')\n    else:\n        print('Test failed: Upload or verification unsuccessful')\n    \n    # The function handles all setup internally:\n    # - Initializes test suite with raw logging\n    # - Creates test PDF\n    # - Uploads to specific folder UUID\n    # - Verifies upload through sync\n    # - Displays folder contents"
    },
    {
      "best_practices": [
        "Ensure all required constants (CLIENT_ID, TENANT_ID, SENDER_EMAIL, KEYWORD, DOWNLOAD_DIR, GRAPH_SCOPE) are defined before calling this function",
        "All helper functions (ensure_download_dir, get_msal_app, get_access_token, search_messages, download_attachments_for_message) must be implemented and available in scope",
        "The Azure AD application must have appropriate permissions granted and admin consent provided for Mail.Read or Mail.ReadWrite",
        "Store sensitive credentials (CLIENT_ID, TENANT_ID, client secrets) in environment variables or secure configuration, not hardcoded",
        "Consider adding error handling around API calls and file operations for production use",
        "The function prints to console - consider using logging module for better control in production environments",
        "Ensure sufficient disk space is available in DOWNLOAD_DIR before running",
        "Be aware of API rate limits when processing large numbers of messages",
        "This function is designed to be called as a script entry point, typically wrapped in 'if __name__ == \"__main__\"' block"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:26:19",
      "decorators": [],
      "dependencies": [
        "os",
        "base64",
        "requests",
        "msal",
        "pathlib"
      ],
      "description": "Main entry point function that authenticates with Microsoft Graph API, searches for emails from a specific sender containing a keyword, and downloads all attachments from matching messages to a local directory.",
      "docstring": null,
      "id": 1874,
      "imports": [
        "import os",
        "import base64",
        "import requests",
        "import msal",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import os",
        "import base64",
        "import requests",
        "import msal",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 158,
      "line_start": 144,
      "name": "main_v99",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function orchestrates an automated email attachment download workflow. It authenticates using MSAL (Microsoft Authentication Library), queries Microsoft Graph API for emails matching specific criteria (sender and keyword), and downloads all attachments from the found messages. This is useful for automated document retrieval, backup systems, or processing incoming attachments from specific sources.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including: printing search results and progress to console, creating directories, and downloading files to the filesystem.",
      "settings_required": [
        "DOWNLOAD_DIR constant/variable - path where attachments will be saved",
        "CLIENT_ID constant/variable - Azure AD application client ID",
        "TENANT_ID constant/variable - Azure AD tenant ID",
        "GRAPH_SCOPE constant/variable - Microsoft Graph API scope (typically 'https://graph.microsoft.com/.default' or 'Mail.Read')",
        "SENDER_EMAIL constant/variable - email address to filter messages by sender",
        "KEYWORD constant/variable - search keyword to filter messages",
        "ensure_download_dir() function must be defined - creates download directory if it doesn't exist",
        "get_msal_app() function must be defined - initializes MSAL application",
        "get_access_token() function must be defined - obtains OAuth access token",
        "search_messages() function must be defined - searches for messages via Graph API",
        "download_attachments_for_message() function must be defined - downloads attachments for a specific message",
        "Azure AD app registration with appropriate Microsoft Graph API permissions (Mail.Read or Mail.ReadWrite)"
      ],
      "source_code": "def main():\n    ensure_download_dir(DOWNLOAD_DIR)\n\n    app = get_msal_app(CLIENT_ID, TENANT_ID)\n    token = get_access_token(app, GRAPH_SCOPE)\n\n    print(f\"Searching for messages from {SENDER_EMAIL} containing '{KEYWORD}'...\")\n    messages = search_messages(token, SENDER_EMAIL, KEYWORD)\n    print(f\"Found {len(messages)} messages.\")\n\n    for msg in messages:\n        subject = msg.get(\"subject\", \"(no subject)\")\n        msg_id = msg.get(\"id\")\n        print(f\"\\nMessage: {subject}\")\n        download_attachments_for_message(token, msg_id, DOWNLOAD_DIR)",
      "source_file": "/tf/active/vicechatdev/mailsearch/example_script.py",
      "tags": [
        "email-automation",
        "microsoft-graph",
        "attachment-download",
        "msal",
        "oauth",
        "azure-ad",
        "email-search",
        "file-download",
        "orchestration",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.508303",
      "usage_example": "# Define required constants and helper functions first\nDOWNLOAD_DIR = './downloads'\nCLIENT_ID = 'your-client-id-here'\nTENANT_ID = 'your-tenant-id-here'\nGRAPH_SCOPE = ['https://graph.microsoft.com/.default']\nSENDER_EMAIL = 'sender@example.com'\nKEYWORD = 'invoice'\n\n# Helper functions (simplified examples)\ndef ensure_download_dir(path):\n    Path(path).mkdir(parents=True, exist_ok=True)\n\ndef get_msal_app(client_id, tenant_id):\n    authority = f'https://login.microsoftonline.com/{tenant_id}'\n    return msal.ConfidentialClientApplication(client_id, authority=authority, client_credential='secret')\n\ndef get_access_token(app, scopes):\n    result = app.acquire_token_for_client(scopes=scopes)\n    return result['access_token']\n\ndef search_messages(token, sender, keyword):\n    # Implementation to search messages\n    return []\n\ndef download_attachments_for_message(token, msg_id, download_dir):\n    # Implementation to download attachments\n    pass\n\n# Run the main function\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "SECURITY WARNING: Credentials are hardcoded in the source code. Use environment variables or secure credential management instead.",
        "The function depends on external functions 'has_wuxi_coding()' and 'upload_file_to_filecloud()' which must be defined in the same module.",
        "Use --dry-run flag first to verify which files will be uploaded before performing actual uploads.",
        "Ensure the source directory exists and contains files matching the pattern before running.",
        "The function uses a persistent session object for FileCloud API calls to maintain authentication.",
        "Error handling is implemented per-file, so one failure won't stop the entire batch.",
        "The timezone is set to Europe/Brussels (CET) - adjust if needed for different regions.",
        "Consider implementing retry logic for network failures in production use.",
        "The function prints progress to stdout - redirect or capture if logging to file is needed."
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:17:53",
      "decorators": [],
      "dependencies": [
        "argparse",
        "pathlib",
        "requests",
        "xmltodict",
        "datetime",
        "zoneinfo",
        "os",
        "re"
      ],
      "description": "Command-line application that uploads PDF files without WUXI coding from a local directory to a FileCloud server, with support for dry-run mode and customizable file patterns.",
      "docstring": null,
      "id": 1849,
      "imports": [
        "import os",
        "import re",
        "import requests",
        "import xmltodict",
        "from datetime import datetime",
        "from zoneinfo import ZoneInfo",
        "import argparse",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import argparse",
        "from pathlib import Path",
        "import requests",
        "import xmltodict",
        "from datetime import datetime",
        "from zoneinfo import ZoneInfo",
        "import os",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 246,
      "line_start": 157,
      "name": "main_v98",
      "parameters": [],
      "parameters_explained": {
        "No direct parameters": "This function takes no parameters directly. Instead, it uses argparse to parse command-line arguments: --source (source directory path, default './output'), --target (FileCloud target folder path), --dry-run (boolean flag for simulation mode), and --pattern (file glob pattern, default '*.pdf')"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a file upload utility that filters PDF files based on WUXI coding patterns and uploads them to a specific FileCloud location. It's designed for document management workflows where files need to be categorized and uploaded to a shared cloud storage system. The function handles authentication, file filtering, batch uploading, and provides detailed progress reporting with success/error summaries.",
      "return_annotation": null,
      "return_explained": "Returns None implicitly. The function performs side effects (file uploads, console output) and exits normally. Early returns occur when no files are found or in dry-run mode.",
      "settings_required": [
        "FileCloud server URL: 'https://filecloud.vicebio.com/' (hardcoded)",
        "FileCloud credentials: userid='wim@vicebio.com', password='Studico01!' (hardcoded - security concern)",
        "Access to Europe/Brussels timezone data (system timezone database)",
        "Network access to FileCloud server",
        "Functions 'has_wuxi_coding()' and 'upload_file_to_filecloud()' must be defined in the same module",
        "Read permissions on source directory (default: ./output)",
        "Write permissions on FileCloud target directory"
      ],
      "source_code": "def main():\n    parser = argparse.ArgumentParser(\n        description=\"Upload non-WUXI coded files from output folder to FileCloud\"\n    )\n    \n    parser.add_argument(\n        '--source',\n        default='./output',\n        help='Source directory (default: ./output)'\n    )\n    \n    parser.add_argument(\n        '--target',\n        default='/SHARED/vicebio_shares/03_CMC/e-sign - document to approve/Extract docusign - not Wuxi coded',\n        help='Target folder in FileCloud'\n    )\n    \n    parser.add_argument(\n        '--dry-run',\n        action='store_true',\n        help='Show what would be uploaded without actually uploading'\n    )\n    \n    parser.add_argument(\n        '--pattern',\n        default='*.pdf',\n        help='File pattern to match (default: *.pdf)'\n    )\n    \n    args = parser.parse_args()\n    \n    # Setup timezone\n    cet_timezone = ZoneInfo(\"Europe/Brussels\")\n    \n    # Find all PDF files without WUXI coding\n    source_path = Path(args.source)\n    all_files = list(source_path.glob(args.pattern))\n    non_wuxi_files = [f for f in all_files if not has_wuxi_coding(f.name)]\n    \n    print(f\"Found {len(all_files)} total files\")\n    print(f\"Filtered to {len(non_wuxi_files)} files without WUXI coding\")\n    print(f\"Target folder: {args.target}\")\n    print(\"=\" * 80)\n    \n    if not non_wuxi_files:\n        print(\"No files to upload\")\n        return\n    \n    if args.dry_run:\n        print(\"\\nDRY RUN MODE - No files will be uploaded\")\n        print(\"=\" * 80)\n        for file_path in sorted(non_wuxi_files):\n            print(f\"\\n{file_path.name}\")\n            print(f\"  \u2192 Would upload to: {args.target}/{file_path.name}\")\n        return\n    \n    # Login to FileCloud\n    print(\"\\nLogging in to FileCloud...\")\n    Headers = {'Accept': 'application/json'}\n    Creds = {'userid': 'wim@vicebio.com', 'password': 'Studico01!'}\n    ServerURL = 'https://filecloud.vicebio.com/'\n    LoginEndPoint = 'core/loginguest'\n    \n    s = requests.session()\n    LoginCall = s.post(ServerURL + LoginEndPoint, data=Creds, headers=Headers).json()\n    print(\"\u2713 Logged in successfully\")\n    print(\"=\" * 80)\n    \n    # Upload files\n    success_count = 0\n    error_count = 0\n    \n    for file_path in sorted(non_wuxi_files):\n        try:\n            if upload_file_to_filecloud(str(file_path), args.target, s, cet_timezone, args.dry_run):\n                success_count += 1\n            else:\n                error_count += 1\n        except Exception as e:\n            print(f\"\\n{file_path.name}\")\n            print(f\"  \u2717 Error: {e}\")\n            error_count += 1\n    \n    # Summary\n    print(\"\\n\" + \"=\" * 80)\n    print(\"SUMMARY\")\n    print(\"=\" * 80)\n    print(f\"Total files: {len(non_wuxi_files)}\")\n    print(f\"Successful: {success_count}\")\n    print(f\"Errors: {error_count}\")",
      "source_file": "/tf/active/vicechatdev/mailsearch/upload_non_wuxi_coded.py",
      "tags": [
        "file-upload",
        "filecloud",
        "cli",
        "batch-processing",
        "pdf-management",
        "document-management",
        "file-filtering",
        "cloud-storage",
        "argparse",
        "dry-run",
        "authentication",
        "session-management"
      ],
      "updated_at": "2025-12-07T01:59:48.507523",
      "usage_example": "# Run with default settings\nif __name__ == '__main__':\n    main()\n\n# Command-line usage examples:\n# python script.py\n# python script.py --source ./my_pdfs --pattern '*.pdf'\n# python script.py --dry-run\n# python script.py --target '/SHARED/custom_folder' --source ./docs\n# python script.py --pattern '*.docx' --dry-run"
    },
    {
      "best_practices": [
        "This function must be run from the email-forwarder project root directory containing requirements.txt and src/",
        "Requires helper functions 'run_command' and 'check_file_exists' to be defined in the same module",
        "The function should be run twice: once to create the venv, then again after activation to install dependencies",
        "Always activate the virtual environment before running the second time to ensure dependencies are installed in the correct location",
        "Review and configure the .env file with actual MS365 credentials before starting the application",
        "The function uses interactive prompts (input()) so it's not suitable for automated/non-interactive environments",
        "Uses exec() to test imports which can be a security concern - ensure the test_script content is trusted",
        "The function assumes Unix-like commands (cp) which may not work on Windows without modification",
        "Check return value to determine if setup was successful before proceeding with application startup"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from forwarder.smtp_server import SMTPServer",
          "optional": false
        },
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from forwarder.o365_client import O365Client",
          "optional": false
        },
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from forwarder.email_handler import EmailHandler",
          "optional": false
        },
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from config.settings import Settings",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 17:35:43",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "os",
        "sys",
        "subprocess",
        "shutil"
      ],
      "description": "Interactive setup script that configures a Python virtual environment for an email forwarder application, installs dependencies, and verifies the installation.",
      "docstring": null,
      "id": 1473,
      "imports": [
        "import os",
        "import sys",
        "import subprocess",
        "import shutil",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import subprocess",
        "import shutil",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 173,
      "line_start": 36,
      "name": "main_v97",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function orchestrates the complete setup process for an email forwarder application. It performs directory validation, checks Python/pip installation, creates or recreates a virtual environment, provides activation instructions, installs dependencies from requirements.txt, verifies configuration files (.env), tests Python imports, and displays final usage instructions. It's designed to be run as a standalone setup script to prepare the development/production environment.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the setup process completes successfully (all checks pass and dependencies are installed), False if any critical step fails (missing Python, wrong directory, failed dependency installation, or import errors). The return value indicates whether the environment is ready for use.",
      "settings_required": [
        "Must be run from the email-forwarder project root directory",
        "Requires requirements.txt file in the current directory",
        "Requires src/ directory in the current directory",
        "Python 3.8 or higher must be installed and accessible as 'python3'",
        "pip3 must be installed and accessible",
        ".env.example file should exist for configuration template",
        ".env file with MS365 credentials: TENANT_ID, CLIENT_ID, CLIENT_SECRET, FROM_EMAIL",
        "Virtual environment should be activated for full setup (VIRTUAL_ENV environment variable)",
        "Depends on external function 'run_command' for executing shell commands",
        "Depends on external function 'check_file_exists' for file validation"
      ],
      "source_code": "def main():\n    print(\"=\" * 60)\n    print(\"\ud83d\udce7 EMAIL FORWARDER - VIRTUAL ENVIRONMENT SETUP\")\n    print(\"=\" * 60)\n    print()\n    \n    # Check current directory\n    current_dir = Path.cwd()\n    print(f\"\ud83d\udcc1 Current directory: {current_dir}\")\n    \n    # Verify we're in the right directory\n    if not (Path(\"requirements.txt\").exists() and Path(\"src\").exists()):\n        print(\"\u274c This script must be run from the email-forwarder directory\")\n        print(\"   Required files: requirements.txt, src/ directory\")\n        return False\n    \n    print(\"\u2705 Project directory structure verified\")\n    print()\n    \n    # Step 1: Check Python\n    print(\"\ud83d\udc0d STEP 1: Checking Python installation\")\n    if not run_command(\"python3 --version\", \"Checking Python 3\"):\n        print(\"\u274c Python 3 is required. Please install Python 3.8 or higher.\")\n        return False\n    \n    if not run_command(\"pip3 --version\", \"Checking pip3\"):\n        print(\"\u274c pip3 is required. Please install pip3.\")\n        return False\n    \n    print()\n    \n    # Step 2: Virtual Environment Setup\n    print(\"\ud83c\udf10 STEP 2: Setting up virtual environment\")\n    \n    venv_path = Path(\"venv\")\n    if venv_path.exists():\n        print(\"\u26a0\ufe0f  Virtual environment already exists\")\n        response = input(\"Do you want to recreate it? (y/N): \").strip().lower()\n        if response == 'y':\n            shutil.rmtree(venv_path)\n            print(\"\ud83d\uddd1\ufe0f  Removed existing virtual environment\")\n    \n    if not venv_path.exists():\n        if not run_command(\"python3 -m venv venv\", \"Creating virtual environment\"):\n            return False\n    \n    print()\n    \n    # Step 3: Activation Instructions\n    print(\"\ud83d\udd27 STEP 3: Virtual environment activation\")\n    print(\"   To activate the virtual environment, run:\")\n    print(\"   \ud83d\udccb Linux/Mac: source venv/bin/activate\")\n    print(\"   \ud83d\udccb Windows:   venv\\\\Scripts\\\\activate\")\n    print()\n    \n    # Step 4: Check if we can detect activation\n    virtual_env = os.environ.get('VIRTUAL_ENV')\n    if virtual_env:\n        print(f\"\u2705 Virtual environment is active: {virtual_env}\")\n        \n        # Install dependencies\n        print(\"\ud83d\udce6 STEP 4: Installing dependencies\")\n        if run_command(\"pip install --upgrade pip\", \"Upgrading pip\"):\n            if run_command(\"pip install -r requirements.txt\", \"Installing requirements\"):\n                print(\"\u2705 Dependencies installed successfully\")\n            else:\n                print(\"\u274c Failed to install dependencies\")\n                return False\n        else:\n            print(\"\u26a0\ufe0f  pip upgrade failed, but continuing...\")\n            if not run_command(\"pip install -r requirements.txt\", \"Installing requirements\"):\n                print(\"\u274c Failed to install dependencies\")\n                return False\n        \n        print()\n        \n        # Step 5: Configuration check\n        print(\"\u2699\ufe0f  STEP 5: Configuration verification\")\n        \n        if not check_file_exists(\".env\", \".env configuration file\"):\n            if check_file_exists(\".env.example\", \".env.example template\"):\n                print(\"\ud83d\udccb Copy .env.example to .env and configure with your MS365 credentials:\")\n                print(\"   - TENANT_ID=your_tenant_id\")\n                print(\"   - CLIENT_ID=your_client_id\") \n                print(\"   - CLIENT_SECRET=your_client_secret\")\n                print(\"   - FROM_EMAIL=your_sender@domain.com\")\n                run_command(\"cp .env.example .env\", \"Copying configuration template\")\n        \n        print()\n        \n        # Step 6: Import test\n        print(\"\ud83e\uddea STEP 6: Testing Python imports\")\n        test_script = \"\"\"\nimport sys\nsys.path.insert(0, 'src')\ntry:\n    from forwarder.smtp_server import SMTPServer\n    from forwarder.o365_client import O365Client\n    from forwarder.email_handler import EmailHandler\n    from config.settings import Settings\n    print(\"\u2705 All imports successful\")\nexcept ImportError as e:\n    print(f\"\u274c Import error: {e}\")\n    sys.exit(1)\n\"\"\"\n        \n        try:\n            exec(test_script)\n        except SystemExit:\n            print(\"\u274c Import test failed\")\n            return False\n        \n        print()\n        \n        # Step 7: Final instructions\n        print(\"\ud83d\ude80 SETUP COMPLETE!\")\n        print(\"=\" * 40)\n        print(\"To start the email forwarder:\")\n        print(\"1. Ensure virtual environment is active:\")\n        print(\"   source venv/bin/activate\")\n        print()\n        print(\"2. Configure .env file with your MS365 credentials\")\n        print()\n        print(\"3. Start the service:\")\n        print(\"   python src/main.py\")\n        print()\n        print(\"4. Test the service:\")\n        print(\"   python send_test_email.py\")\n        print()\n        print(\"5. Stop with Ctrl+C when done\")\n        print(\"=\" * 40)\n        \n    else:\n        print(\"\u26a0\ufe0f  Virtual environment is not currently active\")\n        print(\"   Please activate it first with: source venv/bin/activate\")\n        print(\"   Then run this script again to complete the setup\")\n    \n    return True",
      "source_file": "/tf/active/vicechatdev/email-forwarder/setup_venv.py",
      "tags": [
        "setup",
        "installation",
        "virtual-environment",
        "venv",
        "dependency-management",
        "configuration",
        "email-forwarder",
        "interactive-setup",
        "environment-setup",
        "python-setup",
        "pip",
        "validation",
        "initialization"
      ],
      "updated_at": "2025-12-07T01:59:48.506764",
      "usage_example": "# This function is typically called as the entry point of a setup script\n# Run from the email-forwarder project directory:\n\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Setup completed successfully')\n        sys.exit(0)\n    else:\n        print('Setup failed')\n        sys.exit(1)\n\n# Expected workflow:\n# 1. Navigate to email-forwarder directory\n# 2. Run: python setup.py\n# 3. If venv not active, activate it: source venv/bin/activate\n# 4. Run setup.py again to complete dependency installation\n# 5. Configure .env file with MS365 credentials\n# 6. Start the application: python src/main.py"
    },
    {
      "best_practices": [
        "This function requires three helper functions to be defined in the same module: check_service_process(), check_port_listening(), and test_smtp_basic()",
        "The function performs checks sequentially and returns False immediately upon first failure, implementing fail-fast behavior",
        "Ensure the email forwarder service (src/main.py) is running before calling this function",
        "The function modifies sys.path to import configuration, which may affect module resolution in the calling context",
        "Use this function as part of deployment verification or continuous health monitoring",
        "The function provides user-friendly console output with visual indicators (\u2713/\u2717) for easy interpretation",
        "Exit codes should be used when calling from command line: exit(0) for success, exit(1) for failure",
        "Consider wrapping this in a try-except block if using in automated monitoring to handle unexpected exceptions",
        "The function assumes port 2525 is the SMTP listening port; modify if using a different port"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported dynamically during configuration check (Check 4), requires src directory in path",
          "import": "import config.settings as settings",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 17:33:22",
      "decorators": [],
      "dependencies": [
        "subprocess",
        "sys",
        "os",
        "socket",
        "time",
        "smtplib"
      ],
      "description": "Performs a comprehensive status check of an email forwarder service, verifying process status, port availability, SMTP communication, and configuration settings.",
      "docstring": null,
      "id": 1465,
      "imports": [
        "import subprocess",
        "import sys",
        "import os",
        "import socket",
        "import time",
        "import smtplib",
        "import config.settings as settings"
      ],
      "imports_required": [
        "import subprocess",
        "import sys",
        "import os",
        "import socket",
        "import time",
        "import smtplib"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 101,
      "line_start": 44,
      "name": "main_v96",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a diagnostic and verification tool for an email forwarder service that accepts SMTP connections and forwards emails via Microsoft 365 Graph API. It systematically checks four critical aspects: (1) whether the service process is running, (2) if port 2525 is listening for connections, (3) SMTP protocol communication functionality, and (4) configuration validity. The function provides detailed console output with visual indicators and usage instructions, making it suitable for deployment verification, troubleshooting, and health monitoring.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if all four checks (service process, port listening, SMTP communication, and configuration) pass successfully, indicating the service is fully operational; False if any check fails, indicating the service has issues that need attention.",
      "settings_required": [
        "config.settings module must exist in src/config/ directory with the following attributes: SMTP_LISTEN_HOST, SMTP_LISTEN_PORT, MS365_SENDER_EMAIL, VALIDATE_RECIPIENTS",
        "Helper functions must be defined: check_service_process(), check_port_listening(), test_smtp_basic()",
        "Email forwarder service should be running as a separate process (src/main.py)",
        "SMTP port 2525 must be configured and accessible on 127.0.0.1",
        "Microsoft 365 Graph API credentials must be configured in settings"
      ],
      "source_code": "def main():\n    print(\"=\" * 60)\n    print(\"EMAIL FORWARDER SERVICE - FINAL STATUS CHECK\")\n    print(\"=\" * 60)\n    \n    # Check 1: Process running\n    print(\"\\n1. Checking if service process is running...\")\n    is_running, process_info = check_service_process()\n    if is_running:\n        print(f\"   \u2713 Service is running: {process_info}\")\n    else:\n        print(\"   \u2717 Service process not found\")\n        return False\n    \n    # Check 2: Port listening\n    print(\"\\n2. Checking if SMTP port 2525 is listening...\")\n    if check_port_listening():\n        print(\"   \u2713 Port 2525 is accepting connections\")\n    else:\n        print(\"   \u2717 Port 2525 is not accessible\")\n        return False\n    \n    # Check 3: SMTP communication\n    print(\"\\n3. Testing SMTP protocol communication...\")\n    smtp_result = test_smtp_basic()\n    if smtp_result is True:\n        print(\"   \u2713 SMTP protocol communication successful\")\n    else:\n        print(f\"   \u2717 SMTP communication failed: {smtp_result}\")\n        return False\n    \n    # Check 4: Configuration\n    print(\"\\n4. Checking configuration...\")\n    try:\n        sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))\n        import config.settings as settings\n        print(f\"   \u2713 SMTP Listen: {settings.SMTP_LISTEN_HOST}:{settings.SMTP_LISTEN_PORT}\")\n        print(f\"   \u2713 MS365 Sender: {settings.MS365_SENDER_EMAIL}\")\n        print(f\"   \u2713 Validation: {settings.VALIDATE_RECIPIENTS}\")\n    except Exception as e:\n        print(f\"   \u2717 Configuration error: {e}\")\n        return False\n    \n    print(\"\\n\" + \"=\" * 60)\n    print(\"SERVICE STATUS: \u2713 FULLY OPERATIONAL\")\n    print(\"=\" * 60)\n    print(\"\\nThe email forwarder service is successfully running and ready to:\")\n    print(\"\u2022 Accept SMTP connections on 127.0.0.1:2525\")\n    print(\"\u2022 Parse incoming email messages\")\n    print(\"\u2022 Forward emails via Microsoft 365 Graph API\")\n    print(\"\u2022 Handle rate limiting and retries\")\n    print(\"\u2022 Log all activities\")\n    print(\"\\nTo send test emails, use:\")\n    print(\"  python send_test_email.py --to recipient@domain.com --from sender@domain.com\")\n    print(\"\\nTo stop the service:\")\n    print(\"  pkill -f 'python src/main.py'\")\n    \n    return True",
      "source_file": "/tf/active/vicechatdev/email-forwarder/service_status.py",
      "tags": [
        "service-monitoring",
        "health-check",
        "smtp",
        "email-forwarder",
        "diagnostic",
        "status-check",
        "verification",
        "microsoft-365",
        "port-check",
        "process-check",
        "configuration-validation"
      ],
      "updated_at": "2025-12-07T01:59:48.505999",
      "usage_example": "# Assuming this is in a file called check_status.py with required helper functions\n# and the email forwarder service is running\n\nif __name__ == '__main__':\n    # Run the comprehensive status check\n    success = main()\n    \n    if success:\n        print(\"\\nAll systems operational!\")\n        exit(0)\n    else:\n        print(\"\\nService has issues that need attention.\")\n        exit(1)\n\n# Expected output when successful:\n# ============================================================\n# EMAIL FORWARDER SERVICE - FINAL STATUS CHECK\n# ============================================================\n# \n# 1. Checking if service process is running...\n#    \u2713 Service is running: PID 12345\n# \n# 2. Checking if SMTP port 2525 is listening...\n#    \u2713 Port 2525 is accepting connections\n# \n# 3. Testing SMTP protocol communication...\n#    \u2713 SMTP protocol communication successful\n# \n# 4. Checking configuration...\n#    \u2713 SMTP Listen: 127.0.0.1:2525\n#    \u2713 MS365 Sender: sender@domain.com\n#    \u2713 Validation: True\n# \n# ============================================================\n# SERVICE STATUS: \u2713 FULLY OPERATIONAL\n# ============================================================"
    },
    {
      "best_practices": [
        "Ensure the CSV file path is updated from 'your_dataset.csv' to your actual dataset location before running",
        "The load_dataset() function must be defined or imported before calling main()",
        "Dataset must contain all required columns: 'weight_gain', 'feed_conversion_ratio', 'mortality_rate', 'eimeria_infection', 'treatment', 'challenge_regimen'",
        "The 'eimeria_infection' column should be binary (0 = No, 1 = Yes) for proper visualization",
        "Run in an environment that supports matplotlib plot display (Jupyter notebook, IDE with plot support, or with appropriate backend configured)",
        "Consider adding plt.close() calls after plt.show() to prevent memory issues with multiple plots",
        "For large datasets, consider adding data sampling or limiting the number of visualizations",
        "The function performs multiple statistical tests; consider adjusting for multiple comparisons if using results for publication",
        "Missing values are reported but not automatically handled; consider preprocessing data before analysis"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 13:11:36",
      "decorators": [],
      "dependencies": [
        "pandas",
        "numpy",
        "seaborn",
        "matplotlib",
        "scipy",
        "os"
      ],
      "description": "Performs comprehensive exploratory data analysis on a broiler chicken performance dataset, analyzing the correlation between Eimeria infection and performance measures (weight gain, feed conversion ratio, mortality rate) across different treatments and challenge regimens.",
      "docstring": null,
      "id": 809,
      "imports": [
        "import pandas as pd",
        "import numpy as np",
        "import seaborn as sns",
        "import matplotlib.pyplot as plt",
        "from scipy.stats import pearsonr",
        "import os"
      ],
      "imports_required": [
        "import pandas as pd",
        "import numpy as np",
        "import seaborn as sns",
        "import matplotlib.pyplot as plt",
        "from scipy.stats import pearsonr",
        "import os"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 103,
      "line_start": 22,
      "name": "main_v95",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a complete data analysis pipeline for veterinary/agricultural research data. It loads a CSV dataset, validates required columns, generates descriptive statistics, performs correlation analysis between Eimeria infection and performance metrics, and creates multiple visualizations (histograms, boxplots) to explore relationships between infection status, treatments, challenge regimens, and performance outcomes. The function is designed for exploratory data analysis in poultry health research.",
      "return_annotation": null,
      "return_explained": "Returns None. The function performs side effects including printing analysis results to console and displaying multiple matplotlib/seaborn visualizations. It may return early (None) if the dataset fails to load or if required columns are missing.",
      "settings_required": [
        "A CSV file named 'your_dataset.csv' (or custom path) containing columns: 'weight_gain', 'feed_conversion_ratio', 'mortality_rate', 'eimeria_infection', 'treatment', 'challenge_regimen'",
        "The load_dataset() function must be defined in the same module or imported",
        "Display environment capable of showing matplotlib plots (GUI backend or Jupyter notebook)"
      ],
      "source_code": "def main():\n    # Load the dataset\n    file_path = 'your_dataset.csv'  # Replace with the path to your dataset\n    data = load_dataset(file_path)\n    \n    if data is None:\n        return\n\n    # Display the first few rows of the dataset\n    print(\"Dataset Preview:\")\n    print(data.head())\n\n    # Check if required columns exist\n    required_columns = ['weight_gain', 'feed_conversion_ratio', 'mortality_rate', 'eimeria_infection', 'treatment', 'challenge_regimen']\n    missing_columns = [col for col in required_columns if col not in data.columns]\n    if missing_columns:\n        print(f\"Error: Missing columns in the dataset: {missing_columns}\")\n        return\n\n    # Descriptive statistics for performance measures\n    print(\"\\nDescriptive Statistics:\")\n    performance_measures = ['weight_gain', 'feed_conversion_ratio', 'mortality_rate']\n    print(data[performance_measures].describe())\n\n    # Check for missing values\n    print(\"\\nMissing Values:\")\n    print(data.isnull().sum())\n\n    # Visualize the distribution of performance measures\n    for measure in performance_measures:\n        plt.figure(figsize=(8, 4))\n        sns.histplot(data[measure].dropna(), kde=True)\n        plt.title(f'Distribution of {measure}')\n        plt.xlabel(measure)\n        plt.ylabel('Frequency')\n        plt.show()\n\n    # Correlation analysis between Eimeria infection and performance measures\n    correlations = {}\n    for measure in performance_measures:\n        if data['eimeria_infection'].isnull().any() or data[measure].isnull().any():\n            print(f\"Warning: Missing data for correlation analysis with {measure}.\")\n            continue\n        corr, p_value = pearsonr(data['eimeria_infection'], data[measure])\n        correlations[measure] = {'correlation': corr, 'p_value': p_value}\n\n    # Display correlation results\n    print(\"\\nCorrelation Analysis:\")\n    for measure, stats in correlations.items():\n        print(f\"{measure}: Correlation = {stats['correlation']:.2f}, p-value = {stats['p_value']:.4f}\")\n\n    # Visualize the relationship between Eimeria infection and performance measures\n    for measure in performance_measures:\n        plt.figure(figsize=(8, 4))\n        sns.boxplot(x='eimeria_infection', y=measure, data=data)\n        plt.title(f'{measure} by Eimeria Infection Status')\n        plt.xlabel('Eimeria Infection (0 = No, 1 = Yes)')\n        plt.ylabel(measure)\n        plt.show()\n\n    # Grouping by treatment and challenge regimen\n    grouped_data = data.groupby(['treatment', 'challenge_regimen'])\n\n    # Descriptive statistics by group\n    print(\"\\nDescriptive Statistics by Treatment and Challenge Regimen:\")\n    for name, group in grouped_data:\n        print(f\"\\nGroup: {name}\")\n        print(group[performance_measures].describe())\n\n    # Visualize performance measures by treatment and challenge regimen\n    for measure in performance_measures:\n        plt.figure(figsize=(12, 6))\n        sns.boxplot(x='treatment', y=measure, hue='challenge_regimen', data=data)\n        plt.title(f'{measure} by Treatment and Challenge Regimen')\n        plt.xlabel('Treatment')\n        plt.ylabel(measure)\n        plt.legend(title='Challenge Regimen')\n        plt.show()\n\n    # Conclusion\n    print(\"\\nConclusion:\")\n    print(\"The analysis provides descriptive statistics and visualizations to explore the correlation between Eimeria infection and performance measures in broilers. Further statistical tests may be required to draw definitive conclusions.\")",
      "source_file": "/tf/active/vicechatdev/vice_ai/smartstat_scripts/343f5578-64e0-4101-84bd-5824b3c15deb/project_1/analysis.py",
      "tags": [
        "data-analysis",
        "exploratory-data-analysis",
        "veterinary-research",
        "poultry-health",
        "correlation-analysis",
        "data-visualization",
        "statistical-analysis",
        "eimeria-infection",
        "broiler-performance",
        "pandas",
        "seaborn",
        "matplotlib",
        "descriptive-statistics"
      ],
      "updated_at": "2025-12-07T01:59:48.505269",
      "usage_example": "# Ensure load_dataset function is defined\ndef load_dataset(file_path):\n    try:\n        return pd.read_csv(file_path)\n    except Exception as e:\n        print(f'Error loading dataset: {e}')\n        return None\n\n# Prepare your dataset CSV with required columns:\n# weight_gain, feed_conversion_ratio, mortality_rate, eimeria_infection, treatment, challenge_regimen\n\n# Update the file_path variable in the function or modify the CSV filename\n# Then call the function\nmain()\n\n# The function will:\n# 1. Load 'your_dataset.csv'\n# 2. Display dataset preview and statistics\n# 3. Show distribution plots for performance measures\n# 4. Perform correlation analysis with Eimeria infection\n# 5. Generate boxplots comparing groups by treatment and challenge regimen"
    },
    {
      "best_practices": [
        "Ensure DocumentComparator class is properly defined and imported before calling this function",
        "This function is designed to be called as a script entry point, typically within an if __name__ == '__main__' block",
        "The function catches all exceptions broadly, so specific error details may be lost - consider logging for production use",
        "Return value can be used for exit code determination in CLI applications",
        "Console output uses emoji characters - ensure terminal supports UTF-8 encoding"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:54:40",
      "decorators": [],
      "dependencies": [
        "json",
        "auth"
      ],
      "description": "Entry point function that compares real versus uploaded documents using DocumentComparator and displays the comparison results with formatted output.",
      "docstring": "Compare real vs uploaded documents",
      "id": 2118,
      "imports": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 256,
      "line_start": 237,
      "name": "main_v94",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main execution entry point for a document comparison utility. It instantiates a DocumentComparator object, performs a comparison between real and uploaded documents, and provides user-friendly console output with emojis indicating success or failure. The function is designed to help understand differences in document visibility or move behavior between real and uploaded versions.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the comparison process completes successfully (regardless of whether differences are found), False if an exception occurs during the comparison process.",
      "settings_required": [
        "DocumentComparator class must be available in the same module or imported",
        "RemarkableAuth module must be accessible for authentication functionality",
        "Appropriate authentication credentials/configuration required by RemarkableAuth",
        "Network access may be required if DocumentComparator fetches remote documents"
      ],
      "source_code": "def main():\n    \"\"\"Compare real vs uploaded documents\"\"\"\n    try:\n        comparator = DocumentComparator()\n        \n        print(f\"\ud83e\uddea Document Structure Comparison\")\n        \n        invoice_data, upload_data = comparator.compare_documents()\n        \n        if invoice_data and upload_data:\n            print(f\"\\n\u2705 Comparison completed!\")\n            print(f\"\ud83d\udca1 Check the differences above to understand why visibility/move behavior differs\")\n        else:\n            print(f\"\\n\u274c Comparison failed\")\n        \n        return True\n        \n    except Exception as e:\n        print(f\"\u274c Comparison failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/compare_documents.py",
      "tags": [
        "document-comparison",
        "entry-point",
        "main-function",
        "remarkable",
        "document-analysis",
        "error-handling",
        "console-output"
      ],
      "updated_at": "2025-12-07T01:59:48.504603",
      "usage_example": "# Assuming DocumentComparator is defined in the same module\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Document comparison completed successfully')\n    else:\n        print('Document comparison encountered errors')"
    },
    {
      "best_practices": [
        "Ensure 'input_data.csv' exists and contains the required columns before calling this function",
        "The function uses comprehensive error handling with try-except blocks for each major operation",
        "All print statements provide progress tracking and error diagnostics",
        "The function follows early return pattern on errors to prevent cascading failures",
        "Output files are automatically named with descriptive prefixes (plot_01, table_01)",
        "Statistical significance is evaluated at the 0.05 level by default",
        "The function closes matplotlib figures after saving to prevent memory leaks",
        "Consider wrapping this function call in a try-except block for production use",
        "Verify write permissions in the working directory before execution",
        "The correlation assumes linear relationship between variables; check data distribution first"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 18:07:50",
      "decorators": [],
      "dependencies": [
        "pandas",
        "numpy",
        "matplotlib",
        "seaborn",
        "scipy"
      ],
      "description": "Performs statistical analysis to determine the correlation between antibiotic use frequency and vaccination modes (in-ovo vs non-in-ovo), generating visualizations and saving results to files.",
      "docstring": null,
      "id": 1522,
      "imports": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy.stats import pearsonr",
        "import warnings"
      ],
      "imports_required": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy.stats import pearsonr"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 86,
      "line_start": 17,
      "name": "main_v93",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a complete data analysis pipeline that: (1) loads antibiotic treatment data from a CSV file, (2) validates required columns exist, (3) calculates Pearson correlation between two vaccination modes, (4) creates a scatter plot visualization, (5) saves correlation metrics to a CSV file, and (6) writes statistical conclusions to a text file. It's designed for analyzing the relationship between antibiotic treatment frequencies in different vaccination contexts.",
      "return_annotation": null,
      "return_explained": "This function returns None implicitly. It performs side effects by creating three output files: 'plot_01_correlation_antibiotic_vaccination.png' (scatter plot), 'table_01_correlation_results.csv' (correlation metrics), and 'conclusions.txt' (statistical interpretation). The function may return early (None) if errors occur during data loading, validation, or processing.",
      "settings_required": [
        "Input file 'input_data.csv' must exist in the current working directory",
        "The CSV file must contain columns 'DWTreatmentId_False' and 'DWTreatmentId_True'",
        "Write permissions in the current directory for creating output files",
        "Sufficient disk space for saving PNG plot and CSV/TXT output files"
      ],
      "source_code": "def main():\n    print(\"Starting statistical analysis...\")\n    print(f\"Query: Conclude on the correlation between antibiotic use frequency and vaccination modes (in-ovo true or false). Use a single plot to illustrate this correlation.\")\n    \n    # Load data\n    try:\n        df = pd.read_csv('input_data.csv')\n        print(f\"Data loaded successfully: {df.shape}\")\n    except Exception as e:\n        print(f\"Error loading data: {e}\")\n        return\n    \n    # Data validation\n    required_columns = ['DWTreatmentId_False', 'DWTreatmentId_True']\n    for col in required_columns:\n        if col not in df.columns:\n            print(f\"Error: Missing required column '{col}' in the dataset.\")\n            return\n    \n    # Calculate correlation\n    try:\n        correlation, p_value = pearsonr(df['DWTreatmentId_False'], df['DWTreatmentId_True'])\n        print(f\"Correlation calculated: {correlation}, p-value: {p_value}\")\n    except Exception as e:\n        print(f\"Error calculating correlation: {e}\")\n        return\n    \n    # Plotting\n    try:\n        plt.figure(figsize=(10, 6))\n        sns.scatterplot(x='DWTreatmentId_False', y='DWTreatmentId_True', data=df)\n        plt.title('Correlation between Antibiotic Use Frequency and Vaccination Modes')\n        plt.xlabel('Antibiotic Use Frequency (Not In-Ovo)')\n        plt.ylabel('Antibiotic Use Frequency (In-Ovo)')\n        plt.grid(True)\n        plt.savefig('plot_01_correlation_antibiotic_vaccination.png')\n        plt.close()\n        print(\"Plot saved as 'plot_01_correlation_antibiotic_vaccination.png'\")\n    except Exception as e:\n        print(f\"Error generating plot: {e}\")\n        return\n    \n    # Save correlation result to a CSV file\n    try:\n        correlation_data = pd.DataFrame({\n            'Metric': ['Correlation', 'P-Value'],\n            'Value': [correlation, p_value]\n        })\n        correlation_data.to_csv('table_01_correlation_results.csv', index=False)\n        print(\"Correlation results saved as 'table_01_correlation_results.csv'\")\n    except Exception as e:\n        print(f\"Error saving correlation results: {e}\")\n        return\n    \n    # Write conclusions\n    try:\n        with open('conclusions.txt', 'w') as f:\n            f.write(\"Conclusions on the correlation between antibiotic use frequency and vaccination modes:\\n\")\n            f.write(f\"Pearson correlation coefficient: {correlation:.4f}\\n\")\n            f.write(f\"P-value: {p_value:.4f}\\n\")\n            if p_value < 0.05:\n                f.write(\"The correlation is statistically significant at the 0.05 significance level.\\n\")\n            else:\n                f.write(\"The correlation is not statistically significant at the 0.05 significance level.\\n\")\n        print(\"Conclusions written to 'conclusions.txt'\")\n    except Exception as e:\n        print(f\"Error writing conclusions: {e}\")\n        return\n    \n    print(\"Analysis completed successfully!\")",
      "source_file": "/tf/active/vicechatdev/smartstat/output/b7a013ae-a461-4aca-abae-9ed243119494/analysis_6cdbc6c8/analysis.py",
      "tags": [
        "statistical-analysis",
        "correlation",
        "data-visualization",
        "pearson-correlation",
        "antibiotic-analysis",
        "vaccination",
        "csv-processing",
        "scatter-plot",
        "data-pipeline",
        "file-io",
        "healthcare-analytics"
      ],
      "updated_at": "2025-12-07T01:59:48.503816",
      "usage_example": "import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.stats import pearsonr\n\n# Prepare sample input data\ndata = {\n    'DWTreatmentId_False': [10, 15, 20, 25, 30],\n    'DWTreatmentId_True': [12, 18, 22, 28, 35]\n}\ndf = pd.DataFrame(data)\ndf.to_csv('input_data.csv', index=False)\n\n# Run the analysis\nmain()\n\n# Output files created:\n# - plot_01_correlation_antibiotic_vaccination.png\n# - table_01_correlation_results.csv\n# - conclusions.txt"
    },
    {
      "best_practices": [
        "Ensure 'input_data.csv' exists and is properly formatted before calling this function",
        "The function expects specific column names ('Medication_Type', 'DWTreatmentId_False', 'DWTreatmentId_True') - verify data schema compatibility",
        "Check that the working directory has write permissions for output files",
        "The function filters for 'ANTIBIOTICA' medication type - ensure this value exists in your data",
        "Consider wrapping the function call in a try-except block for production use to handle unexpected errors",
        "The function uses early returns on errors - monitor console output for error messages",
        "Output files are overwritten if they already exist - backup important files before running",
        "The histogram uses 20 bins by default - this may need adjustment for different data distributions",
        "The function assumes DWTreatmentId_False and DWTreatmentId_True contain numeric values suitable for addition",
        "For large datasets, consider memory usage as the function loads the entire CSV into memory"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 18:07:33",
      "decorators": [],
      "dependencies": [
        "pandas",
        "numpy",
        "matplotlib",
        "seaborn",
        "scipy"
      ],
      "description": "Performs statistical analysis on antibiotic usage data, comparing distribution patterns between vaccinated and non-vaccinated groups, and generates visualization plots, summary tables, and written conclusions.",
      "docstring": null,
      "id": 1521,
      "imports": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy import stats",
        "import warnings"
      ],
      "imports_required": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy import stats",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 67,
      "line_start": 17,
      "name": "main_v92",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a complete data analysis pipeline for examining the relationship between antibiotic use and vaccination status. It loads CSV data, filters for antibiotic medications, calculates total usage metrics, creates distribution visualizations with KDE plots, generates summary statistics, and outputs interpretative conclusions. The function is designed for healthcare data analysis workflows where understanding medication patterns relative to vaccination is important.",
      "return_annotation": null,
      "return_explained": "This function returns None. It performs side effects by creating three output files: (1) 'plot_01_antibiotic_use_vs_vaccination.png' - a histogram with KDE showing antibiotic use distribution by vaccination status, (2) 'table_01_summary_antibiotic_use.csv' - descriptive statistics of antibiotic usage, and (3) 'conclusions.txt' - written interpretations of the analysis. The function prints status messages to console throughout execution and may return early (None) if errors occur during data loading or validation.",
      "settings_required": [
        "Input file 'input_data.csv' must exist in the current working directory",
        "The CSV file must contain columns: 'Medication_Type', 'DWTreatmentId_False', 'DWTreatmentId_True'",
        "Write permissions required in the current directory for output files",
        "Sufficient disk space for output files (plot PNG, CSV table, text file)"
      ],
      "source_code": "def main():\n    print(\"Starting statistical analysis...\")\n    print(f\"Query: Revisit the previous analysis and change the plot reported to become a distribution of antibiotic use versus vaccination modus.\")\n    \n    # Load data\n    try:\n        df = pd.read_csv('input_data.csv')\n        print(f\"Data loaded successfully: {df.shape}\")\n    except Exception as e:\n        print(f\"Error loading data: {e}\")\n        return\n    \n    # Validate necessary columns\n    required_columns = ['Medication_Type', 'DWTreatmentId_False', 'DWTreatmentId_True']\n    for col in required_columns:\n        if col not in df.columns:\n            print(f\"Error: Missing required column '{col}' in the data.\")\n            return\n    \n    # Filter data for antibiotics\n    antibiotics_df = df[df['Medication_Type'] == 'ANTIBIOTICA']\n    print(f\"Filtered antibiotics data: {antibiotics_df.shape}\")\n    \n    # Calculate total antibiotic use\n    antibiotics_df['Total_Antibiotic_Use'] = antibiotics_df['DWTreatmentId_False'] + antibiotics_df['DWTreatmentId_True']\n    \n    # Plot distribution of antibiotic use versus vaccination modus\n    plt.figure(figsize=(10, 6))\n    sns.histplot(data=antibiotics_df, x='Total_Antibiotic_Use', hue='DWTreatmentId_True', bins=20, kde=True)\n    plt.title('Distribution of Antibiotic Use vs. Vaccination Modus')\n    plt.xlabel('Total Antibiotic Use')\n    plt.ylabel('Frequency')\n    plt.legend(title='Vaccination Modus', labels=['Without Vaccination', 'With Vaccination'])\n    plt.tight_layout()\n    plt.savefig('plot_01_antibiotic_use_vs_vaccination.png')\n    print(\"Plot saved as 'plot_01_antibiotic_use_vs_vaccination.png'\")\n    \n    # Create summary table\n    summary_table = antibiotics_df[['Medication_Type', 'Total_Antibiotic_Use']].describe()\n    summary_table.to_csv('table_01_summary_antibiotic_use.csv')\n    print(\"Summary table saved as 'table_01_summary_antibiotic_use.csv'\")\n    \n    # Write conclusions\n    with open('conclusions.txt', 'w') as f:\n        f.write(\"Conclusions and Interpretations:\\n\")\n        f.write(\"1. The distribution plot shows the variation in antibiotic use with respect to vaccination modus.\\n\")\n        f.write(\"2. The summary statistics provide insights into the central tendency and dispersion of antibiotic use.\\n\")\n        f.write(\"3. Further analysis could explore the impact of different vaccination strategies on antibiotic consumption.\\n\")\n    print(\"Conclusions written to 'conclusions.txt'\")\n    \n    print(\"Analysis completed successfully!\")",
      "source_file": "/tf/active/vicechatdev/smartstat/output/b7a013ae-a461-4aca-abae-9ed243119494/analysis_70ac0517/analysis.py",
      "tags": [
        "data-analysis",
        "statistical-analysis",
        "healthcare",
        "antibiotics",
        "vaccination",
        "visualization",
        "histogram",
        "kde-plot",
        "pandas",
        "seaborn",
        "csv-processing",
        "medical-data",
        "distribution-analysis",
        "data-pipeline",
        "reporting"
      ],
      "updated_at": "2025-12-07T01:59:48.503042",
      "usage_example": "# Ensure input_data.csv exists with required columns\n# Example CSV structure:\n# Medication_Type,DWTreatmentId_False,DWTreatmentId_True\n# ANTIBIOTICA,150,200\n# ANTIBIOTICA,180,220\n# OTHER,100,120\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy import stats\nimport warnings\n\n# Define the main function (paste the function code here)\n\n# Execute the analysis\nif __name__ == '__main__':\n    main()\n    # Output files will be created:\n    # - plot_01_antibiotic_use_vs_vaccination.png\n    # - table_01_summary_antibiotic_use.csv\n    # - conclusions.txt"
    },
    {
      "best_practices": [
        "Always run this script from the email-forwarder root directory to pass validation checks",
        "Ensure requirements.txt and src directory exist before execution",
        "The function depends on an external start_service() function which must be defined or imported",
        "Check the return value to determine if the service started successfully",
        "The function prints status messages to stdout, so redirect output if running in automated environments"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:34:39",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "dotenv"
      ],
      "description": "Entry point function that validates the working directory and starts an email forwarding service.",
      "docstring": "Main function.",
      "id": 1470,
      "imports": [
        "import os",
        "import sys",
        "import subprocess",
        "import time",
        "from pathlib import Path",
        "from dotenv import load_dotenv",
        "from main import main"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from dotenv import load_dotenv"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 129,
      "line_start": 116,
      "name": "main_v91",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for the Email Forwarder application. It performs a directory validation check to ensure the script is being run from the correct location (email-forwarder directory), then delegates to a start_service() function to initialize and run the email forwarding service. It provides user feedback through console output and returns a boolean indicating success or failure.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: False if the directory validation fails (requirements.txt or src directory not found in current working directory), otherwise returns the boolean result from the start_service() function call (success status of service startup).",
      "settings_required": [
        "Must be run from the email-forwarder directory containing 'requirements.txt' and 'src' folder",
        "Requires a start_service() function to be defined in the same module or imported",
        "May require environment variables loaded via dotenv (based on import, though not explicitly used in this function)"
      ],
      "source_code": "def main():\n    \"\"\"Main function.\"\"\"\n    print(\"\ud83d\udce7 Email Forwarder - Programmatic Startup\")\n    print(\"=========================================\")\n    print()\n    \n    # Check current directory\n    if not (Path('requirements.txt').exists() and Path('src').exists()):\n        print(\"\u274c This script must be run from the email-forwarder directory\")\n        print(f\"   Current directory: {Path.cwd()}\")\n        return False\n    \n    success = start_service()\n    return success",
      "source_file": "/tf/active/vicechatdev/email-forwarder/run_service.py",
      "tags": [
        "entry-point",
        "main-function",
        "email-forwarder",
        "service-startup",
        "directory-validation",
        "initialization",
        "cli"
      ],
      "updated_at": "2025-12-07T01:59:48.502424",
      "usage_example": "# Ensure you are in the email-forwarder directory\n# Directory structure should include:\n#   - requirements.txt\n#   - src/\n\nfrom pathlib import Path\nfrom dotenv import load_dotenv\n\n# Define or import start_service function\ndef start_service():\n    # Service startup logic here\n    return True\n\n# Call main function\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Service started successfully')\n    else:\n        print('Service failed to start')\n        sys.exit(1)"
    },
    {
      "best_practices": [
        "This function should be called as the entry point of a script using if __name__ == '__main__': main()",
        "Ensure all helper functions (clear_browser_cache_instructions, check_static_files, check_debug_endpoint, touch_static_files, print_help) are defined before calling main()",
        "Command-line arguments are case-insensitive due to .lower() conversion",
        "The function expects sys.argv[1] to contain the command, so it should be run with at least one argument or will display help",
        "Error handling for unknown commands is built-in and will display help information"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 10:47:46",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "A command-line interface (CLI) entry point that parses command-line arguments and dispatches to various development tool functions for managing browser cache, static files, and debug endpoints.",
      "docstring": "Main development tools menu",
      "id": 482,
      "imports": [
        "import os",
        "import time",
        "import requests",
        "import sys",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "import time",
        "import requests",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 102,
      "line_start": 84,
      "name": "main_v90",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a development tools menu system. It processes command-line arguments to execute different development tasks: clearing browser cache ('clear'), checking static files and debug endpoints ('check'), touching/updating static files ('touch'), or displaying debug endpoint information ('info'). If no valid command is provided, it displays help information.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by calling other functions based on command-line arguments and printing output to the console.",
      "settings_required": [
        "Requires the following helper functions to be defined in the same module: clear_browser_cache_instructions(), check_static_files(), check_debug_endpoint(), touch_static_files(), print_help()",
        "Must be executed as a script with command-line arguments accessible via sys.argv"
      ],
      "source_code": "def main():\n    \"\"\"Main development tools menu\"\"\"\n    if len(sys.argv) > 1:\n        command = sys.argv[1].lower()\n        \n        if command == \"clear\":\n            clear_browser_cache_instructions()\n        elif command == \"check\":\n            check_static_files()\n            check_debug_endpoint()\n        elif command == \"touch\":\n            touch_static_files()\n        elif command == \"info\":\n            check_debug_endpoint()\n        else:\n            print(f\"\u274c Unknown command: {command}\")\n            print_help()\n    else:\n        print_help()",
      "source_file": "/tf/active/vicechatdev/vice_ai/dev_tools.py",
      "tags": [
        "cli",
        "command-line",
        "development-tools",
        "menu",
        "dispatcher",
        "entry-point",
        "static-files",
        "cache-management",
        "debug",
        "developer-utilities"
      ],
      "updated_at": "2025-12-07T01:59:48.501416",
      "usage_example": "# Run from command line:\n# python script.py clear\n# python script.py check\n# python script.py touch\n# python script.py info\n# python script.py  # Shows help\n\n# Or call directly in code:\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Always specify the --collection argument as it is required",
        "Ensure ChromaDB server is running before executing this function",
        "Start with default similarity threshold (0.90) and adjust based on results",
        "Use --output-collection to preserve original data during testing",
        "Monitor memory usage when processing large collections as all data is loaded into memory",
        "The clustering/summarization step is currently commented out; uncomment if needed",
        "Consider backing up your ChromaDB collection before running cleanup operations",
        "Review the number of documents before and after cleaning to ensure expected behavior",
        "Lower similarity thresholds (e.g., 0.80) will remove more documents but may lose unique content",
        "Higher similarity thresholds (e.g., 0.95) will be more conservative in removing documents"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only if clustering/summarization functionality is uncommented and enabled",
          "import": "from src.clustering.text_clusterer import TextClusterer",
          "optional": true
        },
        {
          "condition": "imported in source file but not used in current implementation",
          "import": "from src.cleaners.combined_cleaner import CombinedCleaner",
          "optional": true
        },
        {
          "condition": "imported in source file but not directly used in main function",
          "import": "from src.utils.hash_utils import hash_text",
          "optional": true
        },
        {
          "condition": "imported in source file but not directly used in main function",
          "import": "from src.utils.similarity_utils import calculate_similarity",
          "optional": true
        },
        {
          "condition": "imported in source file but not used in current implementation",
          "import": "import os",
          "optional": true
        }
      ],
      "created_at": "2025-12-06 10:34:15",
      "decorators": [],
      "dependencies": [
        "argparse",
        "chromadb",
        "typing"
      ],
      "description": "Command-line interface function that orchestrates a ChromaDB collection cleaning pipeline by removing duplicate and similar documents through hashing and similarity screening.",
      "docstring": null,
      "id": 438,
      "imports": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from src.cleaners.hash_cleaner import HashCleaner",
        "from src.cleaners.similarity_cleaner import SimilarityCleaner",
        "from src.cleaners.combined_cleaner import CombinedCleaner",
        "from src.utils.hash_utils import hash_text",
        "from src.utils.similarity_utils import calculate_similarity",
        "from src.clustering.text_clusterer import TextClusterer",
        "from src.config import Config",
        "import os"
      ],
      "imports_required": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "from typing import List, Dict, Any",
        "from src.cleaners.hash_cleaner import HashCleaner",
        "from src.cleaners.similarity_cleaner import SimilarityCleaner",
        "from src.config import Config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 67,
      "line_start": 18,
      "name": "main_v89",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. All configuration is provided via command-line arguments parsed internally using argparse. Command-line arguments include: --collection (required, ChromaDB collection name), --host (ChromaDB host, default 'vice_chroma'), --port (ChromaDB port, default 8000), --similarity-threshold (float, default 0.90), --num-clusters (int, default 10), --skip-summarization (boolean flag), --output-collection (optional output collection name)"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a ChromaDB data cleaning utility. It parses command-line arguments, initializes cleaning components (HashCleaner and SimilarityCleaner), loads documents from a ChromaDB collection, removes duplicates and near-duplicates based on configurable thresholds, and saves the cleaned data to an output collection. The function supports optional clustering/summarization (currently commented out) and allows users to specify similarity thresholds and output collection names.",
      "return_annotation": null,
      "return_explained": "This function returns None (implicit). It performs side effects by reading from and writing to ChromaDB collections, and prints status messages to stdout indicating the number of documents loaded and saved.",
      "settings_required": [
        "ChromaDB server must be running and accessible at the specified host and port",
        "The specified input collection must exist in ChromaDB",
        "Config class must be properly implemented with attributes: chroma_collection, chroma_host, chroma_port, similarity_threshold, num_clusters, skip_summarization",
        "Functions load_data_from_chromadb() and save_data_to_chromadb() must be defined in the same module or imported",
        "HashCleaner and SimilarityCleaner classes must be properly implemented with clean() methods",
        "Sufficient permissions to read from input collection and write to output collection"
      ],
      "source_code": "def main():\n    # Parse command line arguments\n    parser = argparse.ArgumentParser(description='Clean up ChromaDB collection')\n    parser.add_argument('--collection', type=str, required=True, help='Name of the ChromaDB collection')\n    parser.add_argument('--host', type=str, default='vice_chroma', help='ChromaDB host')\n    parser.add_argument('--port', type=int, default=8000, help='ChromaDB port')\n    parser.add_argument('--similarity-threshold', type=float, default=0.90, \n                        help='Similarity threshold for detecting similar documents')\n    parser.add_argument('--num-clusters', type=int, default=10, \n                        help='Number of clusters for clustering')\n    parser.add_argument('--skip-summarization', action='store_true', \n                        help='Skip the summarization step')\n    parser.add_argument('--output-collection', type=str, default=None,\n                        help='Output collection name (if not specified, will overwrite input collection)')\n    \n    args = parser.parse_args()\n    \n    # Create config object with command line arguments\n    config = Config()\n    config.chroma_collection = args.collection\n    config.chroma_host = args.host\n    config.chroma_port = args.port\n    config.similarity_threshold = args.similarity_threshold\n    config.num_clusters = args.num_clusters\n    config.skip_summarization = args.skip_summarization\n    \n    output_collection = args.output_collection or f\"{config.chroma_collection}_cleaned\"\n    \n    # Initialize cleaners\n    hash_cleaner = HashCleaner(config)\n    similarity_cleaner = SimilarityCleaner(config)\n\n    # Load data from ChromaDB\n    data = load_data_from_chromadb(config)\n    print(f\"Loaded {len(data)} documents from ChromaDB collection '{config.chroma_collection}'\")\n\n    # Step 1: Remove identical text chunks using hashing\n    cleaned_data_hash = hash_cleaner.clean(data)\n\n    # Step 2: Remove nearly similar text chunks using similarity screening\n    cleaned_data_similarity = similarity_cleaner.clean(cleaned_data_hash)\n\n    # Step 3: Cluster and summarize similar text chunks\n    #text_clusterer = TextClusterer(config)\n    #clustered_data = text_clusterer.cluster(cleaned_data_similarity)\n    clustered_data = cleaned_data_similarity\n\n    # Save cleaned and enriched data back to ChromaDB\n    save_data_to_chromadb(clustered_data, config, output_collection)\n    print(f\"Saved {len(clustered_data)} documents to ChromaDB collection '{output_collection}'\")",
      "source_file": "/tf/active/vicechatdev/chromadb-cleanup/main copy.py",
      "tags": [
        "cli",
        "command-line",
        "data-cleaning",
        "deduplication",
        "chromadb",
        "vector-database",
        "similarity-detection",
        "hash-based-deduplication",
        "document-processing",
        "pipeline",
        "orchestration",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.500655",
      "usage_example": "# Run from command line:\n# python script.py --collection my_documents --host localhost --port 8000 --similarity-threshold 0.85 --output-collection my_documents_clean\n\n# Or call directly in Python (not recommended as it's designed for CLI):\nif __name__ == '__main__':\n    main()\n\n# Example with minimal arguments:\n# python script.py --collection my_collection\n\n# Example with all options:\n# python script.py --collection my_docs --host vice_chroma --port 8000 --similarity-threshold 0.90 --num-clusters 10 --skip-summarization --output-collection cleaned_docs"
    },
    {
      "best_practices": [
        "Ensure all required input files exist before calling this function",
        "Configure logging before calling main() to capture all log messages",
        "The function creates a 'signatures' directory in the script's directory - ensure write permissions",
        "Store the owner password logged by the function in a secure location for administrative access",
        "The function expects specific file paths relative to __file__ - adjust paths if running from different locations",
        "Handle exceptions at the caller level if using this as part of a larger application",
        "The watermark image is optional - the function will continue without it if not found",
        "Review logged verification results (hash, PDF/A compliance, protection status) to ensure document integrity",
        "The finalize=True parameter locks the document - ensure this is desired behavior",
        "The function uses compliance_level='2b' for PDF/A-2b standard - adjust if different compliance is needed"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 10:26:49",
      "decorators": [],
      "dependencies": [
        "os",
        "logging",
        "json",
        "sys",
        "pikepdf"
      ],
      "description": "Entry point function that demonstrates document processing workflow by creating an audited, watermarked, and protected PDF/A document from a DOCX file with audit trail data.",
      "docstring": null,
      "id": 422,
      "imports": [
        "import os",
        "import logging",
        "import json",
        "import sys",
        "import pikepdf",
        "from src.document_processor import DocumentProcessor"
      ],
      "imports_required": [
        "import os",
        "import logging",
        "import json",
        "import sys",
        "import pikepdf",
        "from src.document_processor import DocumentProcessor"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 114,
      "line_start": 23,
      "name": "main_v88",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a demonstration and testing entry point for the document processing system. It sets up necessary directories, validates input files (DOCX document, JSON audit data, watermark image), processes the document through the DocumentProcessor pipeline to create a compliant PDF/A output with watermarks and signatures, and performs verification checks on the resulting document including hash verification, PDF/A compliance, and protection status.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including creating directories, processing documents, and logging results. The function may return early (None) if required input files are not found.",
      "settings_required": [
        "Logger must be configured (assumes 'logger' variable exists in module scope)",
        "Directory structure: './signatures/' directory (created if missing)",
        "Input files: './examples/test_document_original.docx' (required)",
        "Input files: './examples/sample_audit_data.json' (required)",
        "Input files: './examples/ViceBio_Logo_dark blue.png' (optional watermark)",
        "DocumentProcessor class must be available from src.document_processor module",
        "DocumentProcessor must have process_document, hash_generator, pdfa_converter, and optionally document_protector attributes"
      ],
      "source_code": "def main():\n    # Create sample directory structure if it doesn't exist\n    signatures_dir = os.path.join(os.path.dirname(__file__), 'signatures')\n    if not os.path.exists(signatures_dir):\n        os.makedirs(signatures_dir)\n        logger.info(f\"Created signatures directory: {signatures_dir}\")\n    \n    # Sample document and audit data\n    sample_doc = os.path.join(os.path.dirname(__file__), './examples/test_document_original.docx')\n    sample_json = os.path.join(os.path.dirname(__file__), './examples/sample_audit_data.json')\n    output_pdf = os.path.join(os.path.dirname(__file__), './examples/audited_document.pdf')\n    watermark_path = os.path.join(os.path.dirname(__file__), './examples/ViceBio_Logo_dark blue.png')\n    \n    # Check if files exist\n    if not os.path.exists(sample_doc):\n        logger.error(f\"Sample document not found: {sample_doc}\")\n        return\n    \n    if not os.path.exists(sample_json):\n        logger.error(f\"Audit data JSON not found: {sample_json}\")\n        return\n    \n    if not os.path.exists(watermark_path):\n        logger.warning(f\"Watermark image not found: {watermark_path}\")\n        watermark_path = None\n    \n    # Initialize document processor\n    processor = DocumentProcessor()\n    \n    # Process document\n    try:\n        output_path = processor.process_document(\n            original_doc_path=sample_doc,\n            json_path=sample_json,\n            output_path=output_pdf,\n            watermark_image=watermark_path,\n            include_signatures=True,\n            convert_to_pdfa=True,\n            compliance_level='2b',\n            finalize=True  # Add this parameter to lock the document\n        )\n        \n        logger.info(f\"Successfully created audited document: {output_path}\")\n        \n        # Verify document hash using processor's stored hash if available\n        if hasattr(processor, '_last_document_hash'):\n            logger.info(\"Using stored document hash for verification\")\n            stored_hash = processor._last_document_hash\n            extracted_hash = None\n            \n            try:\n                with pikepdf.open(output_path) as pdf:\n                    if \"/DocumentHash\" in pdf.docinfo:\n                        hash_json = pdf.docinfo[\"/DocumentHash\"]\n                        hash_metadata = json.loads(str(hash_json))\n                        extracted_hash = hash_metadata.get(\"hash\")\n            except Exception as e:\n                logger.warning(f\"Could not extract hash from PDF metadata: {e}\")\n            \n            hash_verified = stored_hash == extracted_hash\n            if hash_verified:\n                logger.info(f\"Document hash verification: Passed \u2705\")\n            else:\n                logger.warning(f\"Document hash verification: Failed \u274c\")\n        else:\n            # Fall back to standard verification\n            hash_verified = processor.hash_generator.verify_hash(output_path)\n            if hash_verified:\n                logger.info(f\"Document hash verification: Passed \u2705\")\n            else:\n                logger.warning(f\"Document hash verification: Failed \u274c\")\n        \n        # Verify PDF/A compliance\n        pdfa_compliant = processor.pdfa_converter.validate_pdfa(output_path)\n        if pdfa_compliant:\n            logger.info(f\"PDF/A compliance check: Passed \u2705\")\n        else:\n            logger.warning(f\"PDF/A compliance check: Failed \u274c\")\n        \n        # Check if document is protected\n        is_protected = hasattr(processor, 'document_protector') and hasattr(processor, '_last_owner_password')\n        if is_protected:\n            logger.info(\"\ud83d\udd12 Document is protected from editing\")\n            logger.info(f\"Owner password: {getattr(processor, '_last_owner_password', 'Not available')}\")\n            logger.info(\"Keep this password in a secure location for administrative access\")\n        else:\n            logger.info(\"\u26a0\ufe0f Document is not protected from editing\")\n            \n        logger.info(f\"Document processing complete. Output file: {output_path}\")\n        \n    except Exception as e:\n        logger.error(f\"Error processing document: {e}\", exc_info=True)",
      "source_file": "/tf/active/vicechatdev/document_auditor/main.py",
      "tags": [
        "document-processing",
        "pdf-generation",
        "audit-trail",
        "watermarking",
        "pdf-a-compliance",
        "document-protection",
        "hash-verification",
        "entry-point",
        "demo",
        "file-validation",
        "docx-to-pdf"
      ],
      "updated_at": "2025-12-07T01:59:48.499906",
      "usage_example": "# Ensure required files exist in examples directory:\n# - examples/test_document_original.docx\n# - examples/sample_audit_data.json\n# - examples/ViceBio_Logo_dark blue.png (optional)\n\nimport os\nimport logging\nimport json\nimport sys\nimport pikepdf\nfrom src.document_processor import DocumentProcessor\n\n# Configure logger\nlogger = logging.getLogger(__name__)\nlogging.basicConfig(level=logging.INFO)\n\n# Run the main function\nif __name__ == '__main__':\n    main()\n\n# Output will be created at: ./examples/audited_document.pdf\n# The function will log verification results for hash, PDF/A compliance, and protection status"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the diagnostic script, typically within an if __name__ == '__main__': block",
        "Ensure that test_different_scopes() and test_tenant_admin_center_approach() functions are properly defined before calling main()",
        "The function assumes console output is available and appropriate for the execution environment",
        "Consider adding error handling around the test function calls to prevent the entire diagnostic from failing if one test encounters an error",
        "The function provides guidance but does not automatically fix issues - manual intervention by SharePoint administrators may be required"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:22:08",
      "decorators": [],
      "dependencies": [
        "requests",
        "json"
      ],
      "description": "Entry point function that runs a SharePoint permission diagnostic tool, testing different authentication scopes and providing troubleshooting guidance.",
      "docstring": "Main diagnostic function.",
      "id": 238,
      "imports": [
        "import requests",
        "import json"
      ],
      "imports_required": [
        "import requests",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 165,
      "line_start": 151,
      "name": "main_v87",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main orchestrator for a SharePoint permission diagnostic utility. It executes two test functions (test_different_scopes and test_tenant_admin_center_approach) to diagnose SharePoint app-only authentication issues, then displays a summary of potential solutions for enabling app-only tokens and proper app registration. It's designed to help developers troubleshoot SharePoint API authentication problems.",
      "return_annotation": null,
      "return_explained": "This function returns None (implicitly). It performs side effects by printing diagnostic information and test results to stdout.",
      "settings_required": [
        "Requires test_different_scopes() function to be defined in the same module",
        "Requires test_tenant_admin_center_approach() function to be defined in the same module",
        "May require SharePoint tenant URL and authentication credentials configured in the called test functions",
        "May require Azure AD app registration details (client ID, client secret, tenant ID) for SharePoint API access"
      ],
      "source_code": "def main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Permission Diagnostic Tool\")\n    print(\"=\" * 50)\n    \n    test_different_scopes()\n    print()\n    test_tenant_admin_center_approach()\n    \n    print(\"\\n\" + \"=\" * 50)\n    print(\"\ud83d\udccb Summary of Solutions to Try:\")\n    print(\"1. Use SharePoint Admin Center to enable app-only tokens\")\n    print(\"2. Use API Management to approve permissions\")\n    print(\"3. Use PowerShell with PnP to register the app properly\")\n    print(\"4. Contact your SharePoint admin to enable app-only authentication\")",
      "source_file": "/tf/active/vicechatdev/SPFCsync/diagnose_permissions.py",
      "tags": [
        "sharepoint",
        "diagnostic",
        "authentication",
        "troubleshooting",
        "app-only-tokens",
        "permissions",
        "azure-ad",
        "main-entry-point",
        "testing",
        "admin-tools"
      ],
      "updated_at": "2025-12-07T01:59:48.499260",
      "usage_example": "# Assuming test_different_scopes() and test_tenant_admin_center_approach() are defined\nimport requests\nimport json\n\ndef test_different_scopes():\n    print(\"Testing different authentication scopes...\")\n    # Implementation here\n\ndef test_tenant_admin_center_approach():\n    print(\"Testing tenant admin center approach...\")\n    # Implementation here\n\ndef main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Permission Diagnostic Tool\")\n    print(\"=\" * 50)\n    \n    test_different_scopes()\n    print()\n    test_tenant_admin_center_approach()\n    \n    print(\"\\n\" + \"=\" * 50)\n    print(\"\ud83d\udccb Summary of Solutions to Try:\")\n    print(\"1. Use SharePoint Admin Center to enable app-only tokens\")\n    print(\"2. Use API Management to approve permissions\")\n    print(\"3. Use PowerShell with PnP to register the app properly\")\n    print(\"4. Contact your SharePoint admin to enable app-only authentication\")\n\nif __name__ == \"__main__\":\n    main()"
    },
    {
      "best_practices": [
        "This function is designed for testing purposes only and uses a hardcoded UUID - it should not be used in production code",
        "Ensure proper authentication is configured before running this function",
        "The function provides console output for debugging - consider redirecting or capturing output in automated test environments",
        "The hardcoded UUID should be replaced with a parameterized value for reusable testing",
        "Error handling catches all exceptions broadly - consider more specific exception handling for production use",
        "Verify network connectivity and reMarkable cloud service availability before execution"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:49",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "crc32c"
      ],
      "description": "A test function that attempts to move a specific document (identified by UUID) from trash to a 'gpt_in' folder on a reMarkable device using the DocumentMover class.",
      "docstring": "Test moving a document to gpt_in folder",
      "id": 2124,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 546,
      "line_start": 523,
      "name": "main_v86",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test harness to verify the document moving functionality of the DocumentMover class. It specifically tests moving a hardcoded document (UUID: 206f5df3-07c2-4341-8afd-2b7362aefa91) from trash to the 'gpt_in' folder on a reMarkable device, providing console feedback about the operation's success or failure.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the document was successfully moved to the gpt_in folder, False if the operation failed or an exception occurred during initialization.",
      "settings_required": [
        "DocumentMover class must be available in the same module or imported",
        "RemarkableAuth class must be available (from auth module)",
        "Valid reMarkable device authentication credentials configured in RemarkableAuth",
        "Network connectivity to reMarkable cloud services",
        "The document with UUID '206f5df3-07c2-4341-8afd-2b7362aefa91' must exist in the reMarkable account",
        "A 'gpt_in' folder must exist or be creatable on the reMarkable device"
      ],
      "source_code": "def main():\n    \"\"\"Test moving a document to gpt_in folder\"\"\"\n    try:\n        mover = DocumentMover()\n        \n        # Use the document we know exists\n        test_doc_uuid = \"206f5df3-07c2-4341-8afd-2b7362aefa91\"\n        \n        print(f\"\ud83e\uddea Testing Document Move to gpt_in Folder\")\n        print(f\"Target document: {test_doc_uuid}\")\n        \n        success = mover.move_document_from_trash(test_doc_uuid)\n        \n        if success:\n            print(f\"\\n\u2705 Test completed successfully!\")\n            print(f\"\ud83d\udca1 Check your reMarkable device - the document should now be visible in the gpt_in folder\")\n        else:\n            print(f\"\\n\u274c Test failed\")\n        \n        return success\n        \n    except Exception as e:\n        print(f\"\u274c Test failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_move_from_trash.py",
      "tags": [
        "test",
        "remarkable",
        "document-management",
        "file-operations",
        "cloud-sync",
        "integration-test",
        "device-management"
      ],
      "updated_at": "2025-12-07T01:59:48.498609",
      "usage_example": "# Ensure DocumentMover and RemarkableAuth classes are available\n# from document_mover import DocumentMover\n# from auth import RemarkableAuth\n\n# Run the test\nif __name__ == '__main__':\n    result = main()\n    if result:\n        print('Document successfully moved')\n    else:\n        print('Document move failed')"
    },
    {
      "best_practices": [
        "This function is designed for debugging purposes and outputs directly to stdout; it should not be used in production code that requires structured output",
        "The function hardcodes the gpt_in folder UUID ('99c6551f-2855-44cf-a4e4-c9c586558f42'); this should be parameterized for reusability",
        "Ensure proper authentication is established before calling this function to avoid API errors",
        "The function catches all exceptions broadly; consider more specific exception handling for production use",
        "This function depends on the FolderDebugger class which must implement methods: get_root_info(), analyze_gpt_in_folder(), find_documents_in_folder(), and check_web_app_sync_status()",
        "The function is intended as a standalone diagnostic tool and should be run in a context where console output is appropriate"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:54:01",
      "decorators": [],
      "dependencies": [
        "json",
        "auth (custom module containing RemarkableAuth)",
        "FolderDebugger (custom class, not shown in imports but instantiated in function)"
      ],
      "description": "Diagnostic function that debugs visibility issues with the 'gpt_in' folder in a reMarkable tablet's file system by analyzing folder metadata, document contents, and sync status.",
      "docstring": "Debug the gpt_in folder visibility issue",
      "id": 2116,
      "imports": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 251,
      "line_start": 199,
      "name": "main_v85",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function performs comprehensive debugging of the 'gpt_in' folder visibility issue on reMarkable tablets. It instantiates a FolderDebugger, retrieves root folder information, analyzes the gpt_in folder's metadata (checking for deletion status, parent folder, and other properties), finds documents within the folder, checks web app sync status, and provides a detailed diagnostic report. The function is designed to help troubleshoot why documents might not appear in the reMarkable web application despite existing in the device's file system.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the debug process completes successfully (regardless of whether issues are found), False if an exception occurs during debugging. The function primarily outputs diagnostic information to stdout rather than returning data.",
      "settings_required": [
        "FolderDebugger class must be defined and available in the same module or imported",
        "RemarkableAuth must be properly configured with valid reMarkable API credentials",
        "Network access to reMarkable cloud services for API calls",
        "Valid authentication tokens for reMarkable API access"
      ],
      "source_code": "def main():\n    \"\"\"Debug the gpt_in folder visibility issue\"\"\"\n    try:\n        debugger = FolderDebugger()\n        \n        print(f\"\ud83d\udd0d Debugging GPT_IN Folder Visibility\")\n        print(\"=\" * 50)\n        \n        # Get root info\n        root_data, root_content = debugger.get_root_info()\n        \n        # Analyze gpt_in folder\n        gpt_in_info, gpt_in_metadata = debugger.analyze_gpt_in_folder(root_content)\n        \n        if gpt_in_info and gpt_in_metadata:\n            print(f\"\\n\ud83d\udcca GPT_IN FOLDER ANALYSIS:\")\n            print(f\"   Folder Name: {gpt_in_metadata.get('visibleName', 'Unknown')}\")\n            print(f\"   Folder Type: {gpt_in_metadata.get('type', 'Unknown')}\")\n            print(f\"   Folder Parent: {gpt_in_metadata.get('parent', 'Unknown')}\")\n            print(f\"   Folder Deleted: {gpt_in_metadata.get('deleted', False)}\")\n            print(f\"   Folder Pinned: {gpt_in_metadata.get('pinned', False)}\")\n            \n            # Check if folder is deleted or has issues\n            if gpt_in_metadata.get('deleted', False):\n                print(f\"\u274c ISSUE FOUND: gpt_in folder is marked as DELETED!\")\n            elif gpt_in_metadata.get('parent') != '':\n                print(f\"\u274c ISSUE FOUND: gpt_in folder parent is '{gpt_in_metadata.get('parent')}', should be '' (root)!\")\n            else:\n                print(f\"\u2705 gpt_in folder appears healthy\")\n        \n        # Find documents in gpt_in folder\n        gpt_in_uuid = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"\n        documents = debugger.find_documents_in_folder(root_content, gpt_in_uuid)\n        \n        # Check sync status\n        sync_status = debugger.check_web_app_sync_status()\n        \n        print(f\"\\n\ud83c\udfaf SUMMARY:\")\n        print(f\"   \ud83d\udcc1 gpt_in folder: {'\u2705 Found' if gpt_in_info else '\u274c Missing'}\")\n        print(f\"   \ud83d\udcc4 Documents in folder: {len(documents) if documents else 0}\")\n        print(f\"   \ud83c\udf10 Sync generation: {sync_status.get('generation') if sync_status else 'Unknown'}\")\n        \n        if gpt_in_info and documents:\n            print(f\"\\n\ud83d\udca1 CONCLUSION:\")\n            print(f\"   The gpt_in folder exists and contains documents.\")\n            print(f\"   If documents don't appear in the web app, this is likely a client caching issue.\")\n            print(f\"   Try refreshing the web app, clearing browser cache, or waiting for sync.\")\n        \n        return True\n        \n    except Exception as e:\n        print(f\"\u274c Debug failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/debug_gpt_in_folder.py",
      "tags": [
        "debugging",
        "diagnostics",
        "remarkable-tablet",
        "folder-visibility",
        "file-system",
        "sync-status",
        "metadata-analysis",
        "troubleshooting",
        "cloud-sync"
      ],
      "updated_at": "2025-12-07T01:59:48.497864",
      "usage_example": "# Ensure FolderDebugger class is defined or imported\n# Ensure RemarkableAuth is configured with valid credentials\n\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Debug completed successfully')\n    else:\n        print('Debug failed with errors')"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the script, typically within an 'if __name__ == \"__main__\"' block",
        "Ensure that analyze_rm_filename_patterns() and generate_header_examples() functions are properly defined before calling main()",
        "The function provides user-friendly output with emoji indicators (\u2705, \u274c) for status feedback",
        "Error handling is broad (catches all exceptions), which is appropriate for a top-level main function but may hide specific error details",
        "The function prints next steps for the user, suggesting it's part of a development/debugging workflow",
        "Consider checking the return value when calling this function programmatically to handle failure cases"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:48:43",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "Main entry point function that orchestrates an analysis of filename patterns and generates header examples, with error handling and user feedback.",
      "docstring": "Run the analysis",
      "id": 2104,
      "imports": [
        "import json",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 172,
      "line_start": 157,
      "name": "main_v84",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary execution point for analyzing rm-filename patterns and generating corresponding header examples. It coordinates the execution of two analysis functions, provides status feedback to the user, and offers guidance on next steps for fixing upload functionality related to rm-filename handling. The function is designed to be called as the main entry point of a script that analyzes and documents proper filename pattern usage.",
      "return_annotation": null,
      "return_explained": "Returns False if an exception occurs during execution, otherwise returns None implicitly. The False return value indicates analysis failure, while no explicit return (None) indicates successful completion.",
      "settings_required": [
        "Requires analyze_rm_filename_patterns() function to be defined in the same module or imported",
        "Requires generate_header_examples() function to be defined in the same module or imported",
        "May require access to data files or directories that these analysis functions operate on"
      ],
      "source_code": "def main():\n    \"\"\"Run the analysis\"\"\"\n    try:\n        patterns = analyze_rm_filename_patterns()\n        generate_header_examples()\n        \n        print(f\"\\n\u2705 Analysis complete!\")\n        print(\"Next steps:\")\n        print(\"1. Fix upload_raw_content to ALWAYS include rm-filename\")\n        print(\"2. Add proper rm-filename patterns for root/system files\") \n        print(\"3. Follow the correct upload sequence\")\n        print(\"4. Test with the corrected headers\")\n        \n    except Exception as e:\n        print(f\"\u274c Analysis failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_headers.py",
      "tags": [
        "entry-point",
        "orchestration",
        "analysis",
        "filename-patterns",
        "error-handling",
        "user-feedback",
        "main-function",
        "workflow-coordination"
      ],
      "updated_at": "2025-12-07T01:59:48.497220",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function has a hardcoded document UUID which limits its reusability - consider parameterizing the UUID for production use",
        "The function includes user-friendly console output with emoji indicators for status updates",
        "Error handling is implemented with a try-except block to catch initialization failures",
        "The function returns a boolean for easy integration into larger scripts or test suites",
        "Consider adding logging instead of print statements for production environments",
        "The DocumentRefresher class must be properly initialized with authentication before use"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:44:05",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "crc32c"
      ],
      "description": "A standalone function that forces a refresh of document visibility for a specific hardcoded Remarkable document UUID by instantiating a DocumentRefresher and calling its force_refresh_document method.",
      "docstring": "Force refresh the document visibility",
      "id": 2093,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 317,
      "line_start": 294,
      "name": "main_v83",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test/utility script to manually trigger a document visibility refresh operation for a Remarkable cloud document. It targets a specific document (UUID: 206f5df3-07c2-4341-8afd-2b7362aefa91) and attempts to make it visible in the 'gpt_in' folder of the web application. This is useful for debugging synchronization issues or manually forcing document metadata updates.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the document refresh operation completed successfully, False if the refresh failed or an exception occurred during initialization. The return value indicates whether the document visibility was successfully updated.",
      "settings_required": [
        "DocumentRefresher class must be defined and available in the same module or imported",
        "RemarkableAuth module must be available with proper authentication configuration",
        "Valid Remarkable cloud API credentials configured in the auth module",
        "Network access to Remarkable cloud services",
        "The target document UUID (206f5df3-07c2-4341-8afd-2b7362aefa91) should exist in the Remarkable cloud account"
      ],
      "source_code": "def main():\n    \"\"\"Force refresh the document visibility\"\"\"\n    try:\n        refresher = DocumentRefresher()\n        \n        # Use the document we know exists\n        test_doc_uuid = \"206f5df3-07c2-4341-8afd-2b7362aefa91\"\n        \n        print(f\"\ud83e\uddea Force Refreshing Document Visibility\")\n        print(f\"Target document: {test_doc_uuid}\")\n        \n        success = refresher.force_refresh_document(test_doc_uuid)\n        \n        if success:\n            print(f\"\\n\u2705 Refresh completed successfully!\")\n            print(f\"\ud83d\udca1 Check the web app - the document should now be visible in gpt_in folder\")\n        else:\n            print(f\"\\n\u274c Refresh failed\")\n        \n        return success\n        \n    except Exception as e:\n        print(f\"\u274c Refresh failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/force_web_app_refresh.py",
      "tags": [
        "remarkable",
        "document-sync",
        "cloud-storage",
        "visibility-refresh",
        "utility",
        "test-function",
        "document-management",
        "force-update"
      ],
      "updated_at": "2025-12-07T01:59:48.496569",
      "usage_example": "# Assuming DocumentRefresher class is defined in the same file\n# and all required imports are present\n\nif __name__ == '__main__':\n    result = main()\n    if result:\n        print('Document refresh successful')\n    else:\n        print('Document refresh failed')"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the test script, typically within an 'if __name__ == \"__main__\"' block",
        "The function provides comprehensive error handling, so callers should check the boolean return value to determine test success",
        "Stack traces are automatically printed on failure, making debugging easier without additional logging setup",
        "Ensure all required modules (auth, upload_manager, SimplePDFUploadTest) are properly configured before calling this function",
        "Consider using the return value to set appropriate exit codes in command-line scripts",
        "The function catches all exceptions, so specific error types are not propagated to callers"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported lazily when an exception occurs to print stack trace",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:38:13",
      "decorators": [],
      "dependencies": [
        "reportlab",
        "pathlib",
        "typing"
      ],
      "description": "Entry point function that initializes and runs a PDF upload test for reMarkable devices, with comprehensive error handling and traceback reporting.",
      "docstring": "Run the simple PDF upload test",
      "id": 2078,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "import traceback",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, Any",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 262,
      "line_start": 252,
      "name": "main_v82",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for testing PDF upload functionality to reMarkable devices. It instantiates a SimplePDFUploadTest object, executes the test, and handles any exceptions that occur during initialization or execution. The function provides detailed error reporting including stack traces to aid in debugging test failures.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating test success (True) or failure (False). Returns True if the test completes successfully, False if any exception occurs during test initialization or execution.",
      "settings_required": [
        "SimplePDFUploadTest class must be defined and importable in the same module or imported",
        "RemarkableAuth module must be available with proper authentication configuration",
        "RemarkableUploadManager module must be available for handling uploads",
        "Appropriate reMarkable API credentials or authentication tokens may be required",
        "Network connectivity to reMarkable cloud services"
      ],
      "source_code": "def main():\n    \"\"\"Run the simple PDF upload test\"\"\"\n    try:\n        test = SimplePDFUploadTest()\n        success = test.run_test()\n        return success\n    except Exception as e:\n        print(f\"\u274c Test initialization failed: {e}\")\n        import traceback\n        traceback.print_exc()\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_simple_pdf_upload.py",
      "tags": [
        "testing",
        "pdf",
        "upload",
        "remarkable",
        "entry-point",
        "error-handling",
        "integration-test",
        "main-function",
        "test-runner"
      ],
      "updated_at": "2025-12-07T01:59:48.495930",
      "usage_example": "if __name__ == '__main__':\n    # Run the PDF upload test\n    success = main()\n    \n    # Exit with appropriate status code\n    import sys\n    sys.exit(0 if success else 1)\n    \n    # Alternative: Use result for further processing\n    # if success:\n    #     print('Test passed successfully')\n    # else:\n    #     print('Test failed')"
    },
    {
      "best_practices": [
        "This function modifies sys.path at runtime, which should only be done in test/development contexts, not in production code",
        "The function performs lazy imports inside the function body, which is useful for testing but may hide import errors until runtime",
        "Error handling is minimal - only checks if session is None/False, but doesn't catch exceptions from build_complete_replica()",
        "The replica directory name 'remarkable_replica' is hardcoded - consider making it configurable for different test scenarios",
        "This function is designed as a test entry point and should be called from if __name__ == '__main__': block",
        "Ensure proper cleanup of the replica directory between test runs if needed"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside the function to modify sys.path for local module imports",
          "import": "import sys",
          "optional": false
        },
        {
          "condition": "imported inside the function after sys.path modification to access the authentication module",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:36:19",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "os",
        "json",
        "zipfile",
        "requests",
        "logging",
        "shutil",
        "enum",
        "typing",
        "dataclasses",
        "datetime",
        "re"
      ],
      "description": "A test function that authenticates with the Remarkable cloud service and builds a complete local replica of the user's Remarkable data.",
      "docstring": "Main function for testing",
      "id": 2072,
      "imports": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging",
        "import shutil",
        "from enum import Enum",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 640,
      "line_start": 621,
      "name": "main_v81",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a testing entry point for the Remarkable synchronization system. It performs authentication using RemarkableAuth, establishes a session, and then creates a local replica of all Remarkable cloud data using the RemarkableLocalReplica class. The function is designed to verify that the authentication and replication pipeline works correctly.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success or failure. Returns False if authentication fails, otherwise returns the success status from the replica.build_complete_replica() method (expected to be a boolean indicating whether the replica was built successfully).",
      "settings_required": [
        "RemarkableAuth module must be present in the same directory as the script",
        "RemarkableLocalReplica class must be defined in the same module or imported",
        "Valid Remarkable cloud credentials (handled by RemarkableAuth)",
        "Network connectivity to Remarkable cloud services",
        "Write permissions for creating the 'remarkable_replica' directory"
      ],
      "source_code": "def main():\n    \"\"\"Main function for testing\"\"\"\n    import sys\n    sys.path.insert(0, str(Path(__file__).parent))\n    \n    from auth import RemarkableAuth\n    \n    # Authenticate\n    auth = RemarkableAuth()\n    session = auth.authenticate()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return False\n    \n    # Build replica\n    replica = RemarkableLocalReplica(session, \"remarkable_replica\")\n    success = replica.build_complete_replica()\n    \n    return success",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica.py",
      "tags": [
        "testing",
        "authentication",
        "remarkable",
        "cloud-sync",
        "replication",
        "entry-point",
        "integration-test",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.495280",
      "usage_example": "# Assuming this function is in a file called remarkable_sync.py\n# and the auth module and RemarkableLocalReplica class are available\n\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('\u2705 Replica built successfully')\n    else:\n        print('\u274c Failed to build replica')\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should only be called as the main entry point of the program, typically within an 'if __name__ == \"__main__\"' block",
        "Ensure run_complete_test_suite() is properly defined before calling main()",
        "The function uses sys.exit() which terminates the entire program - do not call this from within other functions unless program termination is intended",
        "Exit code 0 indicates success, exit code 1 indicates failure - this follows Unix/Linux conventions",
        "The function provides three levels of error handling: normal test failure, user interruption (Ctrl+C), and unexpected exceptions",
        "Traceback is printed for unexpected exceptions to aid in debugging"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an unexpected exception occurs to print the full stack trace",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:32:34",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "time",
        "traceback"
      ],
      "description": "Main entry point function that executes a complete test suite and handles program exit codes based on test results and exceptions.",
      "docstring": "Main entry point",
      "id": 2062,
      "imports": [
        "import sys",
        "from pathlib import Path",
        "import time",
        "from auth import RemarkableAuth",
        "from discovery import RemarkableDiscovery",
        "from local_replica import RemarkableLocalReplica",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "import sys",
        "import traceback",
        "from pathlib import Path",
        "import time",
        "from auth import RemarkableAuth",
        "from discovery import RemarkableDiscovery",
        "from local_replica import RemarkableLocalReplica"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 228,
      "line_start": 214,
      "name": "main_v80",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for running a test suite. It orchestrates the execution of run_complete_test_suite(), manages the program's exit status based on success/failure, and provides comprehensive error handling for keyboard interrupts and unexpected exceptions. It ensures clean program termination with appropriate exit codes (0 for success, 1 for failure) and user-friendly error messages.",
      "return_annotation": null,
      "return_explained": "This function does not return a value. Instead, it terminates the program using sys.exit() with exit code 0 if tests pass successfully, or exit code 1 if tests fail, user interrupts execution, or an unexpected error occurs.",
      "settings_required": [
        "The function run_complete_test_suite() must be defined and accessible in the same module or imported",
        "Custom modules 'auth', 'discovery', and 'local_replica' must be available in the Python path",
        "Any configuration required by RemarkableAuth, RemarkableDiscovery, and RemarkableLocalReplica classes"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point\"\"\"\n    try:\n        success = run_complete_test_suite()\n        sys.exit(0 if success else 1)\n        \n    except KeyboardInterrupt:\n        print(\"\\n\u23f9\ufe0f Test suite interrupted by user\")\n        sys.exit(1)\n        \n    except Exception as e:\n        print(f\"\\n\ud83d\udca5 Unexpected error: {e}\")\n        import traceback\n        traceback.print_exc()\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_complete_suite.py",
      "tags": [
        "entry-point",
        "test-suite",
        "error-handling",
        "exit-codes",
        "exception-handling",
        "keyboard-interrupt",
        "testing",
        "remarkable",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.494643",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Always run the dry-run preview before applying changes to understand the impact",
        "Ensure proper authentication is configured before running this function",
        "Handle the boolean return value to determine if the operation succeeded",
        "Consider backing up data before running repair operations",
        "The function uses user input, so it should only be run in interactive environments, not in automated scripts",
        "Wrap calls to this function in proper error handling at the application level"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:27:19",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Interactive command-line tool that runs a schema repair process with a dry-run preview before applying changes to the root document schema.",
      "docstring": "Run the corrected repair tool",
      "id": 2044,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Any",
        "import requests",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 299,
      "line_start": 277,
      "name": "main_v79",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a repair tool that fixes issues in a root document schema. It implements a two-phase approach: first showing a preview of changes in dry-run mode, then prompting the user for confirmation before applying the actual corrections. This prevents accidental data modifications and allows users to review changes before committing them.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success (True) or failure (False) of the repair operation. Returns False if the dry run fails, if the user cancels the operation, or if an exception occurs during execution. Returns the result of the actual repair operation (boolean) if the user confirms the changes.",
      "settings_required": [
        "CorrectedRootDocSchemaRepair class must be defined and available in the same module or imported",
        "Authentication credentials required by RemarkableAuth (likely API keys or tokens for Remarkable service)",
        "Network access to communicate with Remarkable API endpoints"
      ],
      "source_code": "def main():\n    \"\"\"Run the corrected repair tool\"\"\"\n    try:\n        repair = CorrectedRootDocSchemaRepair()\n        \n        # First run dry-run\n        print(\"\ud83d\udd0d Running DRY RUN to preview changes...\")\n        success = repair.preview_changes(dry_run=True)\n        \n        if success:\n            response = input(\"\\n\ud83d\ude80 Apply the corrections? (yes/no): \").strip().lower()\n            if response in ['yes', 'y']:\n                return repair.preview_changes(dry_run=False)\n            else:\n                print(\"\u274c Repair cancelled by user\")\n                return False\n        else:\n            print(\"\u274c Dry run failed\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Repair tool failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/corrected_repair.py",
      "tags": [
        "cli",
        "interactive",
        "repair-tool",
        "schema-repair",
        "dry-run",
        "user-confirmation",
        "remarkable",
        "document-schema",
        "data-migration",
        "validation"
      ],
      "updated_at": "2025-12-07T01:59:48.493997",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('Schema repair completed successfully')\n    else:\n        print('Schema repair failed or was cancelled')\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the script, typically within an 'if __name__ == \"__main__\":' block",
        "The function requires run_full_test_suite() to be defined elsewhere in the codebase",
        "Exit codes follow Unix conventions: 0 for success, non-zero (1) for any failure",
        "User interruptions (Ctrl+C) are handled gracefully with a clear message",
        "Full exception tracebacks are printed for debugging unexpected errors",
        "The function does not accept command-line arguments; if needed, they should be parsed before calling main()"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an unexpected exception occurs during test execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:24:51",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "traceback"
      ],
      "description": "Main entry point function that executes a full test suite and handles program exit codes based on test results and exceptions.",
      "docstring": "Main entry point",
      "id": 2039,
      "imports": [
        "import sys",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "from auth import authenticate_remarkable",
        "from discovery import RemarkableDiscovery",
        "import traceback"
      ],
      "imports_required": [
        "import sys",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "from auth import authenticate_remarkable",
        "from discovery import RemarkableDiscovery",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 129,
      "line_start": 115,
      "name": "main_v78",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for running a test suite, likely for testing reMarkable device authentication and discovery functionality. It orchestrates test execution, handles user interruptions gracefully, catches unexpected errors with full traceback output, and returns appropriate exit codes (0 for success, 1 for failure) to the operating system.",
      "return_annotation": null,
      "return_explained": "This function does not return a value in the traditional sense. Instead, it calls sys.exit() with an integer exit code: 0 if all tests pass successfully, or 1 if tests fail, user interrupts execution (KeyboardInterrupt), or an unexpected exception occurs. The exit code can be captured by the shell or calling process.",
      "settings_required": [
        "The function depends on run_full_test_suite() being defined in the same module or imported",
        "Custom modules 'auth' and 'discovery' must be available in the Python path",
        "Any configuration required by RemarkableAuth, authenticate_remarkable, and RemarkableDiscovery components"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point\"\"\"\n    try:\n        success = run_full_test_suite()\n        sys.exit(0 if success else 1)\n        \n    except KeyboardInterrupt:\n        print(\"\\n\u23f9\ufe0f Test suite interrupted by user\")\n        sys.exit(1)\n        \n    except Exception as e:\n        print(f\"\\n\ud83d\udca5 Unexpected error: {e}\")\n        import traceback\n        traceback.print_exc()\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_suite.py",
      "tags": [
        "entry-point",
        "test-suite",
        "error-handling",
        "exit-codes",
        "remarkable",
        "testing",
        "exception-handling",
        "keyboard-interrupt"
      ],
      "updated_at": "2025-12-07T01:59:48.493269",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function should be run in a test environment before making actual API calls to validate request formatting",
        "Review the generated JSON results file to understand specific differences and recommendations",
        "Ensure the test_results directory has appropriate write permissions",
        "The function returns a boolean that can be used for CI/CD pipeline integration to fail builds on critical issues",
        "Check the console output for a summary of issues before diving into the detailed JSON results",
        "The function handles exceptions gracefully and returns False on failure, making it suitable for automated testing workflows"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:23:33",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "pathlib",
        "typing",
        "uuid",
        "hashlib",
        "base64",
        "binascii"
      ],
      "description": "Executes a dry run comparison analysis of PDF upload requests between a simulated implementation and a real application, without making actual API calls.",
      "docstring": "Run dry run comparison analysis",
      "id": 2034,
      "imports": [
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict, Any, List",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 478,
      "line_start": 429,
      "name": "main_v77",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function orchestrates a comprehensive dry run testing workflow to validate PDF upload request formatting. It simulates PDF upload requests, compares them against expected real application behavior, identifies differences and critical issues, generates fix recommendations, and saves detailed results to a JSON file. The function is designed for debugging and validation purposes to ensure request compatibility before making actual API calls.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if no critical issues were found during the comparison (len(differences['critical_issues']) == 0), False if critical issues exist or if an exception occurred during execution.",
      "settings_required": [
        "DryRunUploadComparison class must be defined and available in the same module or imported",
        "Write permissions for creating 'test_results' directory in the parent directory of the script",
        "The DryRunUploadComparison class must implement methods: simulate_pdf_upload(), compare_with_real_app(), and generate_fix_recommendations()"
      ],
      "source_code": "def main():\n    \"\"\"Run dry run comparison analysis\"\"\"\n    try:\n        print(\"\ud83e\uddea DRY RUN UPLOAD COMPARISON\")\n        print(\"=\" * 50)\n        print(\"\ud83d\udeab NO API CALLS - ANALYSIS ONLY\")\n        \n        # Initialize comparison tool\n        comparator = DryRunUploadComparison()\n        \n        # Simulate our PDF upload\n        our_requests = comparator.simulate_pdf_upload(\"TestDocument_DryRun\")\n        \n        # Compare with real app\n        differences = comparator.compare_with_real_app(our_requests)\n        \n        # Generate recommendations\n        recommendations = comparator.generate_fix_recommendations(differences)\n        \n        # Save results\n        results = {\n            'timestamp': time.time(),\n            'our_requests': our_requests,\n            'differences': differences,\n            'recommendations': recommendations\n        }\n        \n        results_file = Path(__file__).parent / \"test_results\" / f\"dry_run_comparison_{int(time.time())}.json\"\n        results_file.parent.mkdir(exist_ok=True)\n        \n        with open(results_file, 'w') as f:\n            json.dump(results, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Dry run results saved to: {results_file}\")\n        \n        # Summary\n        print(f\"\\n\ud83d\udccb SUMMARY:\")\n        print(f\"   Header differences: {len(differences['header_differences'])}\")\n        print(f\"   Critical issues: {len(differences['critical_issues'])}\")\n        print(f\"   Recommendations: {len(recommendations)}\")\n        \n        print(f\"\\n\ud83d\udd27 RECOMMENDATIONS:\")\n        for i, rec in enumerate(recommendations, 1):\n            print(f\"   {i}. {rec}\")\n        \n        return len(differences['critical_issues']) == 0\n        \n    except Exception as e:\n        print(f\"\u274c Dry run comparison failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/dry_run_comparison.py",
      "tags": [
        "testing",
        "dry-run",
        "comparison",
        "validation",
        "pdf-upload",
        "debugging",
        "analysis",
        "request-simulation",
        "remarkable",
        "api-testing"
      ],
      "updated_at": "2025-12-07T01:59:48.492519",
      "usage_example": "if __name__ == '__main__':\n    # Run the dry run comparison\n    success = main()\n    \n    if success:\n        print('\u2705 All checks passed - no critical issues found')\n        exit(0)\n    else:\n        print('\u274c Critical issues detected - review recommendations')\n        exit(1)"
    },
    {
      "best_practices": [
        "This function assumes three test functions (test_imports, test_basic_functionality, test_placeholder_parsing) are defined in the same scope and return boolean values",
        "The function uses print statements for output, making it suitable for CLI usage but not for programmatic testing frameworks",
        "Consider refactoring to return a boolean or raise exceptions for better integration with automated testing frameworks",
        "The function does not handle exceptions from test functions, so any unhandled errors in tests will propagate",
        "All test functions must return True for success and False for failure for proper result tracking",
        "This is designed as a standalone validation script, typically run before enabling hybrid mode features"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:16:29",
      "decorators": [],
      "dependencies": [
        "matplotlib",
        "numpy",
        "networkx"
      ],
      "description": "A test orchestration function that runs a suite of validation tests for hybrid mode functionality, checking imports, basic functionality, and placeholder parsing.",
      "docstring": "Run all tests",
      "id": 2020,
      "imports": [
        "import matplotlib.pyplot as plt",
        "import numpy as np",
        "import networkx as nx",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from graphics_generator import GraphicsGenerator",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from hybrid_response_handler import HybridResponseHandler"
      ],
      "imports_required": [
        "import matplotlib.pyplot as plt",
        "import numpy as np",
        "import networkx as nx",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 146,
      "line_start": 119,
      "name": "main_v76",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for validating a hybrid mode system that combines graphics generation, PDF generation, and response handling. It executes multiple test functions sequentially, tracks their success/failure status, and provides user-friendly feedback with instructions for enabling hybrid mode or installing dependencies based on test results.",
      "return_annotation": null,
      "return_explained": "This function does not explicitly return a value (implicitly returns None). It communicates test results through console output and prints success/failure messages along with usage instructions.",
      "settings_required": [
        "requirements-hybrid.txt file must be present for dependency installation",
        "test_imports(), test_basic_functionality(), and test_placeholder_parsing() functions must be defined in the same module",
        "graphics_generator module with GraphicsGenerator, GraphicSpec, and GraphicType classes",
        "hybrid_response_handler module with HybridResponseHandler class",
        "hybrid_pdf_generator module with HybridPDFGenerator class",
        "main.py script must exist with --file and --enable-hybrid-mode command-line arguments support"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83d\ude80 Hybrid Mode Validation Test\")\n    print(\"=\" * 50)\n    \n    success = True\n    \n    # Test imports\n    if not test_imports():\n        success = False\n    \n    # Test basic functionality\n    if not test_basic_functionality():\n        success = False\n    \n    # Test placeholder parsing\n    if not test_placeholder_parsing():\n        success = False\n    \n    print(\"\\n\" + \"=\" * 50)\n    if success:\n        print(\"\u2705 All tests passed! Hybrid mode is ready to use.\")\n        print(\"\\nTo enable hybrid mode:\")\n        print(\"   python main.py --file input.pdf --enable-hybrid-mode\")\n    else:\n        print(\"\u274c Some tests failed. Please check dependencies and installation.\")\n        print(\"\\nTo install dependencies:\")\n        print(\"   pip install -r requirements-hybrid.txt\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_hybrid_mode.py",
      "tags": [
        "testing",
        "validation",
        "test-suite",
        "hybrid-mode",
        "orchestration",
        "integration-testing",
        "graphics",
        "pdf-generation",
        "diagnostics"
      ],
      "updated_at": "2025-12-07T01:59:48.491819",
      "usage_example": "if __name__ == '__main__':\n    main()\n\n# Expected output:\n# \ud83d\ude80 Hybrid Mode Validation Test\n# ==================================================\n# [Test results from test_imports()]\n# [Test results from test_basic_functionality()]\n# [Test results from test_placeholder_parsing()]\n# ==================================================\n# \u2705 All tests passed! Hybrid mode is ready to use.\n# \n# To enable hybrid mode:\n#    python main.py --file input.pdf --enable-hybrid-mode"
    },
    {
      "best_practices": [
        "This function should be called using asyncio.run(main()) when executed as a script",
        "Ensure all test functions (test_compact_formatter, test_session_manager, demo_improvement_comparison) are defined before calling this function",
        "The function assumes synchronous test functions; if tests need to be async, they should be awaited",
        "Test folder './test' should exist before running the application with --watch-folder option",
        "Consider adding error handling to catch and report test failures individually rather than stopping on first failure",
        "The function currently runs tests sequentially; consider if any tests could benefit from parallel execution"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:04:15",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "pathlib"
      ],
      "description": "Asynchronous test runner function that executes a suite of tests for the E-Ink LLM Assistant application, including tests for compact formatting, session management, and improvement comparisons.",
      "docstring": "Run all tests",
      "id": 1993,
      "imports": [
        "import asyncio",
        "from pathlib import Path",
        "from compact_formatter import CompactResponseFormatter",
        "from session_manager import SessionManager"
      ],
      "imports_required": [
        "import asyncio",
        "from pathlib import Path",
        "from compact_formatter import CompactResponseFormatter",
        "from session_manager import SessionManager"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 209,
      "line_start": 195,
      "name": "main_v75",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running comprehensive tests of the E-Ink LLM Assistant application. It orchestrates the execution of multiple test functions to validate the compact formatter, session manager, and demonstrate improvements. After successful test completion, it provides usage instructions for running the actual application with various command-line options.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing test results and usage instructions to stdout.",
      "settings_required": [
        "Requires test folder structure at './test' for watch-folder functionality",
        "Requires example.pdf file for file-based testing examples",
        "Requires compact_formatter.py module with CompactResponseFormatter class",
        "Requires session_manager.py module with SessionManager class",
        "Requires test functions: test_compact_formatter(), test_session_manager(), demo_improvement_comparison() to be defined in the same module or imported"
      ],
      "source_code": "async def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83e\uddea E-INK LLM ASSISTANT - IMPROVEMENT TESTS\")\n    print(\"=\" * 70)\n    \n    # Run tests\n    test_compact_formatter()\n    test_session_manager()\n    demo_improvement_comparison()\n    \n    print(\"\\n\ud83c\udf89 All tests completed successfully!\")\n    print(\"\\nReady to run the improved E-Ink LLM Assistant:\")\n    print(\"  python main.py --watch-folder ./test\")\n    print(\"  python main.py --list-conversations\") \n    print(\"  python main.py --file example.pdf --verbose-mode\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_improvements.py",
      "tags": [
        "async",
        "testing",
        "test-runner",
        "e-ink",
        "llm-assistant",
        "integration-test",
        "test-suite",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.491151",
      "usage_example": "import asyncio\n\nasync def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83e\uddea E-INK LLM ASSISTANT - IMPROVEMENT TESTS\")\n    print(\"=\" * 70)\n    \n    test_compact_formatter()\n    test_session_manager()\n    demo_improvement_comparison()\n    \n    print(\"\\n\ud83c\udf89 All tests completed successfully!\")\n    print(\"\\nReady to run the improved E-Ink LLM Assistant:\")\n    print(\"  python main.py --watch-folder ./test\")\n    print(\"  python main.py --list-conversations\") \n    print(\"  python main.py --file example.pdf --verbose-mode\")\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "This function should only be called as the main entry point of the script, typically within an 'if __name__ == \"__main__\"' block",
        "Ensure the logger is properly configured before calling this function to capture all test output",
        "The test_setup() and test_single_vendor() functions must be implemented in the same module",
        "Run this test function before executing the full vendor_enrichment.py script to validate configuration",
        "The function exits gracefully on setup failure without raising exceptions, making it suitable for command-line usage",
        "Command-line arguments allow flexible testing of different vendors and collections without code modification"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:24:30",
      "decorators": [],
      "dependencies": [
        "argparse",
        "logging",
        "os",
        "sys",
        "re",
        "hybrid_rag_engine"
      ],
      "description": "Entry point function that orchestrates vendor enrichment testing by parsing command-line arguments, running setup validation, and executing a single vendor test against a ChromaDB collection.",
      "docstring": "Main test function",
      "id": 1257,
      "imports": [
        "import os",
        "import sys",
        "from hybrid_rag_engine import OneCo_hybrid_RAG",
        "import logging",
        "import argparse",
        "import re"
      ],
      "imports_required": [
        "import argparse",
        "import logging",
        "import os",
        "import sys",
        "import re",
        "from hybrid_rag_engine import OneCo_hybrid_RAG"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 163,
      "line_start": 137,
      "name": "main_v74",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main test harness for vendor enrichment functionality. It validates the testing environment setup, allows users to specify a vendor name and ChromaDB collection via command-line arguments, executes a test enrichment for a single vendor, and provides clear success/failure feedback with next steps. It's designed to be run before executing the full vendor enrichment pipeline to ensure everything is configured correctly.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including logging output to console/file and executing test functions. The success or failure is communicated through log messages rather than return values.",
      "settings_required": [
        "A 'logger' object must be configured and available in the module scope before calling this function",
        "The 'test_setup()' function must be defined in the same module",
        "The 'test_single_vendor()' function must be defined in the same module and accept vendor name and collection name as parameters",
        "ChromaDB collection '00_company_governance' (or specified collection) must exist and be accessible",
        "hybrid_rag_engine module must be properly installed and configured",
        "Any environment variables or configuration required by hybrid_rag_engine (e.g., API keys, database connections)"
      ],
      "source_code": "def main():\n    \"\"\"Main test function\"\"\"\n    import argparse\n    \n    parser = argparse.ArgumentParser(description='Test vendor enrichment')\n    parser.add_argument('--vendor', type=str, \n                       default='Merck',\n                       help='Vendor name to test')\n    parser.add_argument('--collection', type=str,\n                       default='00_company_governance',\n                       help='ChromaDB collection to search')\n    \n    args = parser.parse_args()\n    \n    if not test_setup():\n        logger.error(\"Setup failed\")\n        return\n    \n    logger.info(\"\\n\u2713 Setup complete\")\n    \n    success = test_single_vendor(args.vendor, args.collection)\n    \n    if success:\n        logger.info(\"\\n\u2705 Test completed successfully!\")\n        logger.info(\"You can now run the full enrichment with: python vendor_enrichment.py\")\n    else:\n        logger.error(\"\\n\u274c Test failed - check logs above\")",
      "source_file": "/tf/active/vicechatdev/find_email/test_enrichment.py",
      "tags": [
        "testing",
        "vendor-enrichment",
        "command-line",
        "argparse",
        "chromadb",
        "rag",
        "validation",
        "entry-point",
        "main-function",
        "test-harness"
      ],
      "updated_at": "2025-12-07T01:59:48.489942",
      "usage_example": "# Run from command line with default arguments (tests 'Merck' vendor):\n# python script_name.py\n\n# Run with custom vendor:\n# python script_name.py --vendor \"Pfizer\"\n\n# Run with custom vendor and collection:\n# python script_name.py --vendor \"Johnson & Johnson\" --collection \"01_vendor_data\"\n\n# In code (if calling programmatically):\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Ensure OPENAI_API_KEY is set in environment variables before running",
        "Index documents before executing this function to avoid errors",
        "The full_reading_example() is commented out by default due to performance considerations - uncomment only when needed",
        "This function is designed to be called as the main entry point of an example/demo script",
        "Error messages provide clear guidance on setup requirements if execution fails",
        "Each example function should be independently executable and handle its own errors gracefully"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:29:05",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "document_indexer",
        "rag_engine",
        "config"
      ],
      "description": "Orchestrates and executes a series of example demonstrations for the DocChat system, including document indexing, RAG queries, and conversation modes.",
      "docstring": "Run all examples",
      "id": 261,
      "imports": [
        "from pathlib import Path",
        "from document_indexer import DocumentIndexer",
        "from rag_engine import DocChatRAG",
        "import config"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from document_indexer import DocumentIndexer",
        "from rag_engine import DocChatRAG",
        "import config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 198,
      "line_start": 167,
      "name": "main_v73",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive demonstration runner for the DocChat application. It sequentially executes multiple example functions to showcase different features: document indexing, basic RAG (Retrieval-Augmented Generation), extensive mode querying, full reading mode (commented out by default), and conversation with history. It includes error handling and provides user feedback about setup requirements if failures occur.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces side effects by printing output to the console and executing example functions that demonstrate the DocChat system's capabilities.",
      "settings_required": [
        "OPENAI_API_KEY environment variable must be set (typically in .env file)",
        "Documents must be indexed before running examples",
        "All project dependencies must be installed",
        "config.py file must be present with appropriate configuration",
        "The following functions must be defined in the same module or imported: index_documents_example(), basic_rag_example(), extensive_mode_example(), conversation_example()"
      ],
      "source_code": "def main():\n    \"\"\"Run all examples\"\"\"\n    print(\"=\" * 80)\n    print(\"DocChat - Example Usage\")\n    print(\"=\" * 80)\n    \n    try:\n        # 1. Index documents\n        index_documents_example()\n        \n        # 2. Basic RAG\n        basic_rag_example()\n        \n        # 3. Extensive mode\n        extensive_mode_example()\n        \n        # 4. Full reading mode (commented out by default as it's slow)\n        # full_reading_example()\n        \n        # 5. Conversation with history\n        conversation_example()\n        \n        print(\"\\n\" + \"=\" * 80)\n        print(\"Examples completed!\")\n        print(\"=\" * 80)\n        \n    except Exception as e:\n        print(f\"\\n\u274c Error: {e}\")\n        print(\"\\nMake sure:\")\n        print(\"  1. You have set OPENAI_API_KEY in .env\")\n        print(\"  2. You have indexed some documents\")\n        print(\"  3. Dependencies are installed\")",
      "source_file": "/tf/active/vicechatdev/docchat/example_usage.py",
      "tags": [
        "demo",
        "examples",
        "orchestration",
        "RAG",
        "document-chat",
        "testing",
        "showcase",
        "runner",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.489223",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function depends on test_graph_api_access() and provide_admin_instructions() being defined elsewhere in the codebase",
        "The function is designed for interactive CLI use with formatted console output",
        "Should be called as the main entry point of the diagnostic script",
        "Does not handle exceptions - ensure dependent functions have proper error handling",
        "Uses emoji characters which may not display correctly in all terminal environments",
        "Consider redirecting output to a file if running in environments without emoji support"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:20:06",
      "decorators": [],
      "dependencies": [
        "requests",
        "json"
      ],
      "description": "Diagnostic function that tests SharePoint tenant configuration by checking Microsoft Graph API access and provides recommendations based on the results.",
      "docstring": "Main diagnostic function.",
      "id": 233,
      "imports": [
        "import requests",
        "import json"
      ],
      "imports_required": [
        "import requests",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 245,
      "line_start": 219,
      "name": "main_v72",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a diagnostic tool that verifies whether Microsoft Graph API or SharePoint REST API can be accessed with app-only authentication. It tests Graph API connectivity, displays results with visual indicators (emojis), provides admin instructions, and recommends next steps based on whether Graph API access is successful. This is useful for troubleshooting SharePoint sync application authentication issues at the tenant level.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing diagnostic information, test results, and recommendations to stdout.",
      "settings_required": [
        "Requires test_graph_api_access() function to be defined in the same module or imported",
        "Requires provide_admin_instructions() function to be defined in the same module or imported",
        "May require Microsoft Graph API credentials (client ID, client secret, tenant ID) configured for test_graph_api_access() to work",
        "May require environment variables or configuration file for authentication (depends on test_graph_api_access() implementation)"
      ],
      "source_code": "def main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Tenant Configuration Checker\")\n    print(\"=\" * 50)\n    \n    # Test if Graph API approach works\n    graph_works = test_graph_api_access()\n    \n    if graph_works:\n        print(\"\\n\ud83c\udf89 SUCCESS: Microsoft Graph API works!\")\n        print(\"This means we can use Graph API instead of SharePoint REST API.\")\n        print(\"I can modify the sync app to use Graph API as a workaround.\")\n    else:\n        print(\"\\n\u274c Microsoft Graph API also has issues.\")\n        print(\"This confirms it's a tenant-level app-only authentication problem.\")\n    \n    provide_admin_instructions()\n    \n    print(\"\\n\" + \"=\" * 60)\n    print(\"\ud83d\udca1 **RECOMMENDATION**\")\n    print(\"=\" * 60)\n    if graph_works:\n        print(\"Since Graph API works, I can create a Graph-based version\")\n        print(\"of the sync application that bypasses SharePoint REST API issues.\")\n    else:\n        print(\"You need SharePoint admin to enable app-only authentication\")\n        print(\"at the tenant level before the sync application will work.\")",
      "source_file": "/tf/active/vicechatdev/SPFCsync/check_tenant_config.py",
      "tags": [
        "diagnostic",
        "sharepoint",
        "microsoft-graph",
        "authentication",
        "tenant-configuration",
        "troubleshooting",
        "app-only-auth",
        "cli",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.488485",
      "usage_example": "# Assuming test_graph_api_access() and provide_admin_instructions() are defined\n# Example standalone execution:\nif __name__ == '__main__':\n    main()\n\n# Or call directly:\nmain()\n\n# Expected output:\n# SharePoint Tenant Configuration Checker\n# ==================================================\n# [Test results from test_graph_api_access()]\n# \ud83c\udf89 SUCCESS: Microsoft Graph API works!\n# [Additional instructions and recommendations]"
    },
    {
      "best_practices": [
        "This function depends on three external functions: load_config(), test_azure_token(), and test_sharepoint_token() which must be defined in the same module",
        "Use the return value as a system exit code for proper CLI integration",
        "Ensure Azure AD application has proper permissions (Sites.Read.All) before running",
        "The function prints sensitive information (partial client ID), ensure output is not logged in production",
        "Configuration should be stored securely, preferably using environment variables or encrypted config files",
        "The function assumes exactly 2 tests; if adding more tests, update the total_tests variable accordingly"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:18:00",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Orchestrates a comprehensive SharePoint connection diagnostic tool that validates Azure AD authentication and SharePoint access by running multiple tests and reporting results.",
      "docstring": "Run all diagnostics.",
      "id": 227,
      "imports": [
        "import requests",
        "import json",
        "import base64",
        "from urllib.parse import quote",
        "import os",
        "import sys"
      ],
      "imports_required": [
        "import requests",
        "import json",
        "import base64",
        "from urllib.parse import quote",
        "import os",
        "import sys"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 227,
      "line_start": 189,
      "name": "main_v71",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a diagnostic tool that verifies SharePoint connectivity. It loads configuration, displays connection parameters, executes authentication tests (Azure token and SharePoint token), and provides detailed feedback on test results with troubleshooting guidance. Returns 0 on success or 1 on failure, making it suitable for use as a CLI tool exit code.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all diagnostic tests pass successfully, or 1 if any tests fail or configuration cannot be loaded. This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        "SHAREPOINT_SITE_URL - SharePoint site URL to connect to",
        "AZURE_CLIENT_ID - Azure AD application client ID",
        "AZURE_CLIENT_SECRET - Azure AD application client secret",
        "Configuration must be loadable via load_config() function",
        "Azure AD app must have Sites.Read.All permissions with admin consent granted"
      ],
      "source_code": "def main():\n    \"\"\"Run all diagnostics.\"\"\"\n    print(\"SharePoint Connection Diagnostic Tool\")\n    print(\"=\" * 50)\n    \n    config = load_config()\n    if not config:\n        print(\"\u274c Could not load configuration\")\n        return 1\n    \n    print(f\"SharePoint Site: {config.get('SHAREPOINT_SITE_URL', 'Not set')}\")\n    print(f\"Client ID: {config.get('AZURE_CLIENT_ID', 'Not set')[:8]}...\")\n    print(f\"Client Secret: {'Set' if config.get('AZURE_CLIENT_SECRET') else 'Not set'}\")\n    print()\n    \n    # Run tests\n    tests_passed = 0\n    total_tests = 2\n    \n    if test_azure_token():\n        tests_passed += 1\n    \n    if test_sharepoint_token():\n        tests_passed += 1\n    \n    print(\"\\n\" + \"=\" * 50)\n    print(f\"Diagnostic Results: {tests_passed}/{total_tests} tests passed\")\n    \n    if tests_passed == total_tests:\n        print(\"\ud83c\udf89 All diagnostics passed! SharePoint connection should work.\")\n        return 0\n    else:\n        print(\"\u274c Some diagnostics failed. Please check the guidance above.\")\n        print(\"\\n\ud83d\udccb Common Solutions:\")\n        print(\"1. Verify Azure AD app permissions (Sites.Read.All)\")\n        print(\"2. Ensure admin consent is granted\")\n        print(\"3. Check client ID and secret are correct\")\n        print(\"4. Verify SharePoint site URL is accessible\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/diagnose_sharepoint.py",
      "tags": [
        "diagnostics",
        "sharepoint",
        "azure-ad",
        "authentication",
        "testing",
        "cli-tool",
        "connection-validation",
        "oauth",
        "troubleshooting",
        "entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.487745",
      "usage_example": "# Assuming load_config(), test_azure_token(), and test_sharepoint_token() are defined\n# and environment variables or config file are set up\n\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)\n\n# Or simply:\n# python diagnostic_tool.py\n# The function will print diagnostic results and return appropriate exit code"
    },
    {
      "best_practices": [
        "This function should only be called as the script's entry point, typically within an 'if __name__ == \"__main__\"' block",
        "Passwords passed via command-line arguments may be visible in process lists; consider using environment variables or secure input methods for production use",
        "Ensure the test_acl_functions function is properly defined before calling main()",
        "The --path argument should point to a location where the user has appropriate permissions to create and modify ACLs",
        "Handle keyboard interrupts (Ctrl+C) gracefully if adding to production code",
        "Consider adding logging configuration before calling test_acl_functions for better debugging"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:36:46",
      "decorators": [],
      "dependencies": [
        "argparse",
        "FC_api"
      ],
      "description": "Entry point function for a FileCloud ACL management test script that parses command-line arguments and initiates ACL testing.",
      "docstring": "Main function for the test script.",
      "id": 113,
      "imports": [
        "import os",
        "import sys",
        "import argparse",
        "import json",
        "import logging",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from FC_api import FileCloudAPI"
      ],
      "imports_required": [
        "import argparse",
        "from FC_api import FileCloudAPI"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 137,
      "line_start": 127,
      "name": "main_v70",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no parameters. All inputs are collected via command-line arguments using argparse."
      },
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a test script that validates FileCloud Access Control List (ACL) management functionality. It sets up an argument parser to collect server connection details and test parameters from the command line, then invokes the test_acl_functions with the provided credentials and path. This is designed to be called when the script is executed directly, providing a CLI interface for testing FileCloud ACL operations.",
      "return_annotation": null,
      "return_explained": "Returns None (implicitly). The function does not return any value; it executes the test suite and exits. Any results or errors are handled by the test_acl_functions that it calls.",
      "settings_required": [
        "FileCloud server URL must be accessible",
        "Valid FileCloud username and password credentials",
        "The test_acl_functions function must be defined in the same module or imported",
        "Network connectivity to the FileCloud server",
        "Appropriate permissions on the FileCloud account to manage ACLs"
      ],
      "source_code": "def main():\n    \"\"\"Main function for the test script.\"\"\"\n    parser = argparse.ArgumentParser(description='Test FileCloud ACL management functions')\n    parser.add_argument('--server', '-s', required=True, help='FileCloud server URL')\n    parser.add_argument('--username', '-u', required=True, help='Username for authentication')\n    parser.add_argument('--password', '-p', required=True, help='Password for authentication')\n    parser.add_argument('--path', default='/test_acl', help='Path to use for ACL testing (default: /test_acl)')\n    \n    args = parser.parse_args()\n    \n    test_acl_functions(args.server, args.username, args.password, args.path)",
      "source_file": "/tf/active/vicechatdev/test_acl_functions.py",
      "tags": [
        "cli",
        "command-line",
        "testing",
        "filecloud",
        "acl",
        "access-control",
        "authentication",
        "entry-point",
        "argparse",
        "test-script"
      ],
      "updated_at": "2025-12-07T01:59:48.487098",
      "usage_example": "# Save the script as test_filecloud_acl.py\n# Run from command line:\n# python test_filecloud_acl.py --server https://filecloud.example.com --username admin --password secret123 --path /test_acl\n\n# Or with short flags:\n# python test_filecloud_acl.py -s https://filecloud.example.com -u admin -p secret123\n\n# Using default path:\n# python test_filecloud_acl.py -s https://filecloud.example.com -u admin -p secret123\n\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Always provide required --username and --password arguments when running as script",
        "The function ensures cleanup is called even if processing fails by using try-finally block",
        "Exits with status code 1 if connection to FileCloud fails",
        "Credentials are passed via command-line arguments - consider using environment variables or secure credential storage for production use",
        "The FileCloudEmailProcessor class must be defined in the same module or imported before calling main()",
        "Ensure FC_api module and FileCloudAPI class are available in the Python path"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:45:14",
      "decorators": [],
      "dependencies": [
        "argparse",
        "sys",
        "extract_msg",
        "os",
        "mimetypes",
        "logging",
        "email",
        "traceback",
        "tempfile",
        "base64",
        "shutil",
        "subprocess",
        "pathlib",
        "datetime",
        "FC_api",
        "html",
        "re",
        "reportlab",
        "time",
        "PIL",
        "fitz",
        "PyPDF2"
      ],
      "description": "Entry point function that parses command-line arguments and orchestrates the FileCloud email processing workflow to find, download, and convert .msg files.",
      "docstring": "Main function to run as script",
      "id": 133,
      "imports": [
        "import extract_msg",
        "import os",
        "import mimetypes",
        "import logging",
        "import email",
        "from email.message import EmailMessage",
        "from email.utils import formatdate",
        "from email.utils import formataddr",
        "from email.headerregistry import Address",
        "import email.charset",
        "import traceback",
        "import tempfile",
        "import sys",
        "import base64",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "import argparse",
        "from FC_api import FileCloudAPI",
        "import html",
        "import base64",
        "import re",
        "import re",
        "import re",
        "import html",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib import colors",
        "import re",
        "import html",
        "import time",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Image as RLImage",
        "from reportlab.platypus import Paragraph",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.units import inch",
        "from PIL import Image",
        "import fitz",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "import html",
        "import base64",
        "from PyPDF2 import PdfMerger"
      ],
      "imports_required": [
        "import argparse",
        "import sys",
        "from FC_api import FileCloudAPI"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1535,
      "line_start": 1518,
      "name": "main_v69",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main script entry point for a FileCloud email processor application. It sets up argument parsing for server connection details, authenticates with FileCloud, and initiates the processing of all .msg files found in a specified path. It handles connection lifecycle including cleanup on exit.",
      "return_annotation": null,
      "return_explained": "No explicit return value. The function exits with sys.exit(1) if connection fails, otherwise completes normally after processing.",
      "settings_required": [
        "FileCloud server URL (default: https://filecloud.vicebio.com)",
        "FileCloud username (required command-line argument)",
        "FileCloud password (required command-line argument)",
        "FileCloudEmailProcessor class must be defined in the same module",
        "FC_api module must be available with FileCloudAPI class"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run as script\"\"\"\n    parser = argparse.ArgumentParser(description=\"FileCloud Email Processor - Find, download, and convert .msg files\")\n    parser.add_argument(\"--server\", default=\"https://filecloud.vicebio.com\", help=\"FileCloud server URL\")\n    parser.add_argument(\"--username\", required=True, help=\"FileCloud username\")\n    parser.add_argument(\"--password\", required=True, help=\"FileCloud password\")\n    parser.add_argument(\"--path\", default=\"/\", help=\"Start path in FileCloud to search for .msg files\")\n    \n    args = parser.parse_args()\n    \n    processor = FileCloudEmailProcessor(args.server, args.username, args.password)\n    try:\n        if processor.connect():\n            processor.process_all_msg_files(args.path)\n        else:\n            sys.exit(1)\n    finally:\n        processor.cleanup()",
      "source_file": "/tf/active/vicechatdev/msg_to_eml.py",
      "tags": [
        "cli",
        "command-line",
        "entry-point",
        "filecloud",
        "email-processing",
        "msg-files",
        "file-conversion",
        "authentication",
        "script-runner",
        "argument-parsing"
      ],
      "updated_at": "2025-12-07T01:59:48.486374",
      "usage_example": "# Run from command line:\n# python script.py --username myuser --password mypass --path /emails\n\n# Or call directly in code:\nif __name__ == '__main__':\n    main()\n\n# Command-line arguments:\n# --server: FileCloud server URL (optional, defaults to https://filecloud.vicebio.com)\n# --username: FileCloud username (required)\n# --password: FileCloud password (required)\n# --path: Start path to search for .msg files (optional, defaults to /)"
    },
    {
      "best_practices": [
        "Always set OPENAI_API_KEY environment variable or use --api-key argument before running",
        "Use --verbose flag for debugging and detailed error messages",
        "For production use, configure cloud services via JSON config files rather than command-line arguments",
        "Install appropriate dependencies based on mode: requirements-remarkable.txt for reMarkable, msal/requests for OneDrive",
        "Use --no-existing flag when starting watcher to avoid processing old files",
        "Enable compact mode (default) for optimal e-ink display rendering",
        "Use conversation IDs to maintain context across multiple document exchanges",
        "Set reasonable --max-pages limit to avoid processing extremely large PDFs",
        "For mixed mode, ensure both OneDrive and reMarkable Cloud are properly authenticated",
        "Use --list-conversations to track active sessions before starting new ones",
        "Handle KeyboardInterrupt gracefully - the application is designed for long-running operation",
        "Check for REMARKABLE_AVAILABLE, ONEDRIVE_AVAILABLE, and MIXED_AVAILABLE flags before using respective modes",
        "Use --generate-timeline to create visual summaries of conversation history",
        "Enable hybrid mode (default) for rich responses with both text and graphics"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when --generate-timeline argument is used",
          "import": "from conversation_timeline import ConversationTimelineGenerator",
          "optional": true
        },
        {
          "condition": "only when using reMarkable Cloud mode (--mode remarkable/both/mixed or --remarkable-document-id)",
          "import": "from remarkable_processor import RemarkableEInkProcessor",
          "optional": true
        },
        {
          "condition": "only when processing single reMarkable document (--remarkable-document-id)",
          "import": "from remarkable_processor import process_single_remarkable_file",
          "optional": true
        },
        {
          "condition": "only when using OneDrive mode (--mode onedrive/both/mixed)",
          "import": "from onedrive_client import OneDriveClient",
          "optional": true
        },
        {
          "condition": "only when using OneDrive mode (--mode onedrive/both/mixed)",
          "import": "from onedrive_client import OneDriveProcessor",
          "optional": true
        },
        {
          "condition": "only when using mixed cloud mode (--mode mixed)",
          "import": "from mixed_cloud_processor import MixedCloudProcessor",
          "optional": true
        },
        {
          "condition": "only when using mixed cloud mode (--mode mixed)",
          "import": "from mixed_cloud_processor import create_mixed_processor",
          "optional": true
        },
        {
          "condition": "only when using mixed cloud mode (--mode mixed)",
          "import": "from mixed_cloud_processor import create_remarkable_session",
          "optional": true
        },
        {
          "condition": "only when verbose mode is enabled (--verbose)",
          "import": "import traceback",
          "optional": true
        }
      ],
      "created_at": "2025-12-07 00:00:30",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "argparse",
        "sys",
        "os",
        "json",
        "pathlib",
        "dotenv",
        "processor",
        "session_manager",
        "remarkable_processor",
        "onedrive_client",
        "mixed_cloud_processor",
        "conversation_timeline",
        "traceback",
        "msal",
        "requests"
      ],
      "description": "Async entry point for an E-Ink LLM Assistant that processes handwritten/drawn content using AI vision models, supporting local files, reMarkable Cloud, and OneDrive integration.",
      "docstring": null,
      "id": 1981,
      "imports": [
        "import asyncio",
        "import argparse",
        "import sys",
        "import os",
        "import json",
        "from pathlib import Path",
        "from dotenv import load_dotenv",
        "from processor import EInkLLMProcessor",
        "from processor import process_single_file",
        "from session_manager import SessionManager",
        "from remarkable_processor import RemarkableEInkProcessor",
        "from remarkable_processor import process_single_remarkable_file",
        "from onedrive_client import OneDriveClient",
        "from onedrive_client import OneDriveProcessor",
        "from mixed_cloud_processor import MixedCloudProcessor",
        "from mixed_cloud_processor import create_mixed_processor",
        "from conversation_timeline import ConversationTimelineGenerator",
        "import traceback",
        "from mixed_cloud_processor import create_remarkable_session"
      ],
      "imports_required": [
        "import asyncio",
        "import argparse",
        "import sys",
        "import os",
        "import json",
        "from pathlib import Path",
        "from dotenv import load_dotenv",
        "from processor import EInkLLMProcessor",
        "from processor import process_single_file",
        "from session_manager import SessionManager"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 643,
      "line_start": 146,
      "name": "main_v68",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is the main CLI application entry point that orchestrates an AI-powered document processing system designed for e-ink devices. It supports multiple modes: single file processing, file watching, reMarkable Cloud integration, OneDrive integration, and mixed cloud modes. The system processes handwritten notes, drawings, and PDFs using OpenAI's GPT-4 Vision API, maintains conversation history, generates responses optimized for e-ink displays, and can sync with cloud storage services. It includes features like conversation management, timeline generation, multi-page PDF processing, annotation detection, and hybrid text/graphics generation.",
      "return_annotation": null,
      "return_explained": "This function does not return a value. It runs until interrupted by the user (KeyboardInterrupt) or exits with sys.exit() on errors. Side effects include processing files, generating PDFs, updating databases, and syncing with cloud services.",
      "settings_required": [
        "OPENAI_API_KEY environment variable (or --api-key argument) for GPT-4 Vision API access",
        "eink_sessions.db SQLite database file for conversation tracking (auto-created)",
        "eink_llm.log file for activity logging (auto-created)",
        "Optional: remarkable_config.json for reMarkable Cloud settings (--remarkable-config)",
        "Optional: onedrive_config.json for OneDrive settings (--onedrive-config)",
        "Optional: Azure App Registration client ID for OneDrive access (--onedrive-client-id)",
        "Optional: reMarkable one-time authentication code (--remarkable-one-time-code)",
        "Watch folder directory (default: ./watch) for local file monitoring",
        "requirements-remarkable.txt dependencies for reMarkable Cloud integration",
        "requirements-mixed.txt dependencies for mixed cloud mode",
        "msal and requests packages for OneDrive integration"
      ],
      "source_code": "async def main():\n    parser = argparse.ArgumentParser(\n        description=\"E-Ink LLM Assistant - Process handwritten/drawn content with AI\",\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        epilog=\"\"\"\nExamples:\n  # Start file watcher (default mode)\n  python main.py --watch-folder ./documents\n\n  # Process a single file\n  python main.py --file drawing.pdf\n\n  # Start watcher with custom API key\n  python main.py --api-key sk-... --watch-folder ./input\n\n  # Continue existing conversation\n  python main.py --conversation-id conv_20250731_143022_a8f9c2d1 --file new_question.pdf\n\n  # Use verbose formatting instead of compact\n  python main.py --verbose-mode --file document.pdf\n\n  # List active conversations\n  python main.py --list-conversations\n\nEnvironment Variables:\n  OPENAI_API_KEY    OpenAI API key for GPT-4 Vision models\n\nSupported File Types:\n  PDF, JPG, JPEG, PNG, GIF, BMP, TIFF, WEBP\n\nOutput:\n  - Response PDFs: RESPONSE_[conv_id]_ex[num]_[filename].pdf\n  - Error reports: ERROR_[conv_id]_ex[num]_[filename].pdf\n  - Activity logs: eink_llm.log\n  - Session database: eink_sessions.db\n        \"\"\"\n    )\n    \n    # Mode selection\n    mode_group = parser.add_mutually_exclusive_group()\n    mode_group.add_argument(\n        '--file', '-f',\n        type=str,\n        help='Process a single file instead of watching a folder'\n    )\n    mode_group.add_argument(\n        '--watch-folder', '-w',\n        type=str,\n        help='Folder to watch for new files (default: ./watch)'\n    )\n    mode_group.add_argument(\n        '--remarkable-document-id',\n        type=str,\n        help='Process a single document from reMarkable Cloud by ID'\n    )\n    \n    # Operation mode\n    parser.add_argument(\n        '--mode',\n        choices=['local', 'remarkable', 'onedrive', 'both', 'mixed'],\n        default='local',\n        help='Processing mode: local file watching, reMarkable Cloud, OneDrive, both, or mixed (mixed = monitors both OneDrive and reMarkable for input, outputs to OneDrive) (default: local)'\n    )\n    \n    # Configuration options\n    parser.add_argument(\n        '--api-key',\n        type=str,\n        help='OpenAI API key (can also use OPENAI_API_KEY environment variable)'\n    )\n    parser.add_argument(\n        '--no-existing',\n        action='store_true',\n        help='Skip processing existing files when starting watcher'\n    )\n    parser.add_argument(\n        '--verbose', '-v',\n        action='store_true',\n        help='Enable verbose output'\n    )\n    parser.add_argument(\n        '--conversation-id',\n        type=str,\n        help='Continue existing conversation by ID (default: create new)'\n    )\n    parser.add_argument(\n        '--compact-mode',\n        action='store_true',\n        default=True,\n        help='Use compact response formatting for e-ink optimization (default: enabled)'\n    )\n    parser.add_argument(\n        '--verbose-mode',\n        action='store_true',\n        help='Use verbose response formatting (disables compact mode)'\n    )\n    parser.add_argument(\n        '--no-auto-detect',\n        action='store_true',\n        help='Disable automatic session detection from PDF metadata/content'\n    )\n    parser.add_argument(\n        '--no-multi-page',\n        action='store_true',\n        help='Disable multi-page PDF processing (process only first page)'\n    )\n    parser.add_argument(\n        '--max-pages',\n        type=int,\n        default=50,\n        help='Maximum pages to process in multi-page PDFs (default: 50)'\n    )\n    parser.add_argument(\n        '--no-editing-workflow',\n        action='store_true',\n        help='Disable annotation detection and text editing workflow'\n    )\n    parser.add_argument(\n        '--enable-hybrid-mode',\n        action='store_true',\n        default=True,\n        help='Enable hybrid mode with text and graphics generation (default: enabled)'\n    )\n    parser.add_argument(\n        '--no-hybrid-mode',\n        action='store_true',\n        help='Disable hybrid mode, use text-only responses'\n    )\n    parser.add_argument(\n        '--list-conversations',\n        action='store_true',\n        help='List active conversations and exit'\n    )\n    parser.add_argument(\n        '--generate-timeline',\n        type=str,\n        help='Generate conversation timeline PDF for specified conversation ID'\n    )\n    \n    # reMarkable Cloud specific options\n    remarkable_group = parser.add_argument_group('reMarkable Cloud Options')\n    remarkable_group.add_argument(\n        '--remarkable-config',\n        type=str,\n        help='Path to JSON config file for reMarkable Cloud settings'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-watch-folder',\n        type=str,\n        default='/E-Ink LLM Input',\n        help='Folder path in reMarkable Cloud to watch for input files (default: /E-Ink LLM Input)'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-output-folder',\n        type=str,\n        default='/E-Ink LLM Output',\n        help='Folder path in reMarkable Cloud to upload responses (default: /E-Ink LLM Output)'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-one-time-code',\n        type=str,\n        help='One-time code from reMarkable account for initial authentication'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-poll-interval',\n        type=int,\n        default=60,\n        help='Seconds between checks for new files in reMarkable Cloud (default: 60)'\n    )\n    \n    # OneDrive specific options\n    onedrive_group = parser.add_argument_group('OneDrive Options')\n    onedrive_group.add_argument(\n        '--onedrive-config',\n        type=str,\n        help='Path to JSON config file for OneDrive settings'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-watch-folder',\n        type=str,\n        default='/E-Ink LLM Input',\n        help='Folder path in OneDrive to watch for input files (default: /E-Ink LLM Input)'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-output-folder',\n        type=str,\n        default='/E-Ink LLM Output',\n        help='Folder path in OneDrive to upload responses (default: /E-Ink LLM Output)'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-poll-interval',\n        type=int,\n        default=60,\n        help='Seconds between checks for new files in OneDrive (default: 60)'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-client-id',\n        type=str,\n        help='Azure App Registration client ID for OneDrive access'\n    )\n    \n    args = parser.parse_args()\n    \n    # Handle timeline generation\n    if args.generate_timeline:\n        from conversation_timeline import ConversationTimelineGenerator\n        \n        session_manager = SessionManager()\n        timeline_generator = ConversationTimelineGenerator()\n        \n        # Check if conversation exists\n        conversation = session_manager.get_conversation(args.generate_timeline)\n        if not conversation:\n            print(f\"\u274c Error: Conversation '{args.generate_timeline}' not found.\")\n            sys.exit(1)\n        \n        print(f\"\ud83d\udcca Generating timeline for conversation: {args.generate_timeline}\")\n        timeline_path = await timeline_generator.generate_timeline_pdf(\n            conversation_id=args.generate_timeline,\n            session_manager=session_manager\n        )\n        \n        if timeline_path:\n            print(f\"\u2705 Timeline generated successfully: {timeline_path}\")\n        else:\n            print(\"\u274c Error: Failed to generate timeline PDF\")\n            sys.exit(1)\n        \n        sys.exit(0)\n    \n    # Handle conversation listing\n    if args.list_conversations:\n        session_manager = SessionManager()\n        conversations = session_manager.list_active_conversations()\n        \n        if conversations:\n            print(\"\ud83d\uddc2\ufe0f  Active Conversations:\")\n            print(\"=\" * 70)\n            for conv in conversations:\n                print(f\"\ud83c\udd94 {conv['conversation_id']}\")\n                print(f\"   \ud83d\udcc5 Created: {conv['created_at']}\")\n                print(f\"   \ud83d\udd50 Last activity: {conv['last_activity']}\")\n                print(f\"   \ud83d\udcac Exchanges: {conv['total_exchanges']}\")\n                if conv['user_id']:\n                    print(f\"   \ud83d\udc64 User: {conv['user_id']}\")\n                print()\n        else:\n            print(\"\ud83d\udcdd No active conversations found.\")\n        \n        sys.exit(0)\n    \n    # Determine compact mode setting\n    compact_mode = args.compact_mode and not args.verbose_mode\n    \n    # Determine hybrid mode setting\n    enable_hybrid_mode = args.enable_hybrid_mode and not args.no_hybrid_mode\n    \n    # Check if remarkable mode is requested but not available\n    if (args.mode in ['remarkable', 'both'] or args.remarkable_document_id) and not REMARKABLE_AVAILABLE:\n        print(\"\u274c Error: reMarkable Cloud integration not available!\")\n        print(\"   Install with: pip install -r requirements-remarkable.txt\")\n        print(\"   Or use local mode: python main.py --mode local\")\n        sys.exit(1)\n    \n    # Check if OneDrive mode is requested but not available  \n    if args.mode in ['onedrive', 'both', 'mixed'] and not ONEDRIVE_AVAILABLE:\n        print(\"\u274c Error: OneDrive integration not available!\")\n        print(\"   Install with: pip install msal requests\")\n        print(\"   Or use local mode: python main.py --mode local\")\n        sys.exit(1)\n    \n    # Check if mixed mode is requested but not available\n    if args.mode == 'mixed' and not MIXED_AVAILABLE:\n        print(\"\u274c Error: Mixed cloud integration not available!\")\n        print(\"   Install dependencies with: pip install -r requirements-mixed.txt\")\n        print(\"   Or use setup script: ./setup_mixed_mode.sh\")\n        print(\"   Or use local mode: python main.py --mode local\")\n        sys.exit(1)\n    \n    # Check if mixed mode is requested but reMarkable not available\n    if args.mode == 'mixed' and not REMARKABLE_AVAILABLE:\n        print(\"\u274c Error: Mixed mode requires reMarkable Cloud integration!\")\n        print(\"   Install with: pip install -r requirements-remarkable.txt\")\n        print(\"   Or use onedrive mode: python main.py --mode onedrive\")\n        sys.exit(1)\n    \n    # Setup environment\n    setup_environment()\n    \n    # Validate API key\n    api_key = validate_api_key(args.api_key)\n    \n    # Load reMarkable configuration\n    remarkable_config = load_remarkable_config(args.remarkable_config)\n    \n    # Load OneDrive configuration\n    onedrive_config = load_onedrive_config(args.onedrive_config)\n    \n    # Override config with command line arguments\n    if args.mode in ['remarkable', 'both', 'mixed'] or args.remarkable_document_id:\n        remarkable_config.update({\n            'enabled': True,\n            'watch_folder_path': args.remarkable_watch_folder,\n            'output_folder_path': args.remarkable_output_folder,\n            'poll_interval': args.remarkable_poll_interval,\n        })\n        \n        # For mixed mode, we only watch gpt_out folder in reMarkable, not the regular input folder\n        if args.mode == 'mixed':\n            remarkable_config['watch_folder_path'] = '/gpt_out'  # Force gpt_out folder for mixed mode\n        \n        if args.remarkable_one_time_code:\n            remarkable_config['one_time_code'] = args.remarkable_one_time_code\n    \n    # Override OneDrive config with command line arguments\n    if args.mode in ['onedrive', 'both', 'mixed']:\n        onedrive_config.update({\n            'enabled': True,\n            'watch_folder_path': args.onedrive_watch_folder,\n            'output_folder_path': args.onedrive_output_folder,\n            'poll_interval': args.onedrive_poll_interval,\n        })\n        \n        # For mixed mode, also include reMarkable input folder configuration\n        if args.mode == 'mixed':\n            onedrive_config['remarkable_input_folder'] = args.remarkable_watch_folder\n            onedrive_config['remarkable_poll_interval'] = args.remarkable_poll_interval\n        \n        if args.onedrive_client_id:\n            onedrive_config['client_id'] = args.onedrive_client_id\n    \n    # Print banner\n    print(\"=\" * 70)\n    print(\"\ud83d\udd8b\ufe0f  E-INK LLM ASSISTANT\")\n    print(\"    AI-Powered Handwriting & Drawing Analysis\")\n    if args.mode == 'mixed':\n        print(\"    with Mixed Cloud Integration (OneDrive + reMarkable Input/Output)\")\n    elif remarkable_config.get('enabled'):\n        print(\"    with reMarkable Cloud Integration\")\n    elif onedrive_config.get('enabled'):\n        print(\"    with OneDrive Integration\")\n    print(\"=\" * 70)\n    \n    try:\n        if args.file:\n            # Single file processing mode\n            file_path = Path(args.file)\n            if not file_path.exists():\n                print(f\"\u274c Error: File not found: {file_path}\")\n                sys.exit(1)\n            \n            print(f\"\ud83d\udcc4 Single file mode: {file_path.name}\")\n            result = await process_single_file(\n                str(file_path),\n                api_key,\n                conversation_id=args.conversation_id,\n                compact_mode=compact_mode,\n                auto_detect_session=not args.no_auto_detect,\n                enable_multi_page=not args.no_multi_page,\n                max_pages=args.max_pages,\n                enable_editing_workflow=not args.no_editing_workflow,\n                enable_hybrid_mode=enable_hybrid_mode\n            )\n            \n            if result:\n                print(f\"\u2705 Processing complete!\")\n                print(f\"\ud83d\udcc4 Response saved: {Path(result).name}\")\n            else:\n                print(f\"\u274c Processing failed\")\n                sys.exit(1)\n        \n        elif args.remarkable_document_id:\n            # Single reMarkable document processing mode\n            print(f\"\ud83c\udf10 Single reMarkable document mode: {args.remarkable_document_id}\")\n            result = await process_single_remarkable_file(\n                args.remarkable_document_id, \n                api_key, \n                remarkable_config\n            )\n            \n            if result:\n                print(f\"\u2705 Processing complete!\")\n                print(f\"\ud83d\udcc4 Response saved: {Path(result).name}\")\n            else:\n                print(f\"\u274c Processing failed\")\n                sys.exit(1)\n        \n        else:\n            # File watcher mode (default)\n            if args.mode == 'mixed':\n                # Mixed mode: OneDrive + reMarkable gpt_out watching\n                if not onedrive_config.get('client_id'):\n                    print(\"\u274c Error: OneDrive client_id required for mixed mode\")\n                    print(\"   Set via --onedrive-client-id or in config file\")\n                    sys.exit(1)\n                \n                # Setup reMarkable session for mixed mode\n                from mixed_cloud_processor import create_remarkable_session\n                \n                print(\"\ud83d\udd10 Authenticating with reMarkable Cloud...\")\n                try:\n                    remarkable_session = create_remarkable_session(remarkable_config)\n                    print(\"\u2705 reMarkable authentication successful\")\n                except Exception as e:\n                    print(f\"\u274c Error: Failed to authenticate with reMarkable Cloud: {e}\")\n                    sys.exit(1)\n                \n                # Create and start mixed processor\n                mixed_processor = create_mixed_processor(\n                    onedrive_config, \n                    remarkable_session, \n                    api_key\n                )\n                await mixed_processor.start_watching()\n                \n            elif args.mode == 'onedrive':\n                # OneDrive only mode\n                if not onedrive_config.get('client_id'):\n                    print(\"\u274c Error: OneDrive client_id required for OneDrive mode\")\n                    print(\"   Set via --onedrive-client-id or in config file\")\n                    sys.exit(1)\n                \n                processor = OneDriveProcessor(onedrive_config, api_key)\n                await processor.start_watching()\n                \n            elif args.mode == 'both':\n                # Both reMarkable and OneDrive (run concurrently)\n                print(\"\ud83d\udd04 Starting both reMarkable and OneDrive watchers...\")\n                \n                tasks = []\n                \n                # Start reMarkable watcher if configured\n                if remarkable_config.get('enabled'):\n                    remarkable_processor = RemarkableEInkProcessor(\n                        api_key=api_key,\n                        watch_folder=args.watch_folder,\n                        remarkable_config=remarkable_config\n                    )\n                    tasks.append(remarkable_processor.start_watching(process_existing=not args.no_existing, mode='remarkable'))\n                \n                # Start OneDrive watcher if configured\n                if onedrive_config.get('enabled'):\n                    if not onedrive_config.get('client_id'):\n                        print(\"\u274c Error: OneDrive client_id required for both mode\")\n                        print(\"   Set via --onedrive-client-id or in config file\")\n                        sys.exit(1)\n                    \n                    onedrive_processor = OneDriveProcessor(onedrive_config, api_key)\n                    tasks.append(onedrive_processor.start_watching())\n                \n                if not tasks:\n                    print(\"\u274c Error: No valid configurations for both mode\")\n                    sys.exit(1)\n                \n                # Run both watchers concurrently\n                await asyncio.gather(*tasks)\n                \n            elif remarkable_config.get('enabled'):\n                # Use enhanced processor with reMarkable support\n                processor = RemarkableEInkProcessor(\n                    api_key=api_key, \n                    watch_folder=args.watch_folder,\n                    remarkable_config=remarkable_config\n                )\n            else:\n                # Use original processor for local-only mode\n                watch_folder = args.watch_folder or \"./watch\"\n                processor = EInkLLMProcessor(\n                    api_key=api_key, \n                    watch_folder=watch_folder,\n                    conversation_id=args.conversation_id,\n                    compact_mode=compact_mode,\n                    auto_detect_session=not args.no_auto_detect,\n                    enable_multi_page=not args.no_multi_page,\n                    max_pages=args.max_pages,\n                    enable_editing_workflow=not args.no_editing_workflow,\n                    enable_hybrid_mode=enable_hybrid_mode\n                )\n            \n            # For non-mixed/non-onedrive/non-both modes, start the processor\n            if args.mode not in ['onedrive', 'both', 'mixed']:\n                process_existing = not args.no_existing\n                \n                if hasattr(processor, 'start_watching') and len(processor.start_watching.__code__.co_varnames) > 2:\n                    # Enhanced processor with mode support\n                    await processor.start_watching(process_existing=process_existing, mode=args.mode)\n                else:\n                    # Original processor\n                    await processor.start_watching(process_existing=process_existing)\n    \n    except KeyboardInterrupt:\n        print(f\"\\n\ud83d\udc4b Goodbye!\")\n    except Exception as e:\n        print(f\"\\n\u274c Unexpected error: {e}\")\n        if args.verbose:\n            import traceback\n            traceback.print_exc()\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/main.py",
      "tags": [
        "async",
        "cli",
        "entry-point",
        "file-processing",
        "ai-vision",
        "gpt-4",
        "openai",
        "e-ink",
        "handwriting-recognition",
        "pdf-processing",
        "cloud-sync",
        "remarkable",
        "onedrive",
        "conversation-management",
        "file-watcher",
        "document-processing",
        "argparse",
        "multi-mode",
        "session-management",
        "timeline-generation"
      ],
      "updated_at": "2025-12-07T01:59:48.485387",
      "usage_example": "import asyncio\nimport sys\n\n# Example 1: Process a single file\nsys.argv = ['main.py', '--file', 'drawing.pdf', '--api-key', 'sk-...']\nawait main()\n\n# Example 2: Start file watcher in local mode\nsys.argv = ['main.py', '--watch-folder', './documents']\nawait main()\n\n# Example 3: Use reMarkable Cloud mode\nsys.argv = ['main.py', '--mode', 'remarkable', '--remarkable-one-time-code', 'abc123']\nawait main()\n\n# Example 4: Continue existing conversation\nsys.argv = ['main.py', '--file', 'question.pdf', '--conversation-id', 'conv_20250731_143022_a8f9c2d1']\nawait main()\n\n# Example 5: List active conversations\nsys.argv = ['main.py', '--list-conversations']\nawait main()\n\n# Example 6: Generate conversation timeline\nsys.argv = ['main.py', '--generate-timeline', 'conv_20250731_143022_a8f9c2d1']\nawait main()\n\n# Example 7: Mixed cloud mode (OneDrive + reMarkable)\nsys.argv = ['main.py', '--mode', 'mixed', '--onedrive-client-id', 'azure-client-id']\nawait main()\n\n# Run with: python -c \"import asyncio; from main import main; asyncio.run(main())\""
    },
    {
      "best_practices": [
        "Ensure all required configuration constants (TENANT_ID, CLIENT_ID, etc.) are defined before calling this function",
        "The EmailSearchApp class must be properly implemented with all required methods",
        "Verify that the Azure AD application has appropriate Microsoft Graph API permissions (Mail.Read, Mail.ReadWrite)",
        "Ensure OUTPUT_DIR exists or the application has permissions to create it",
        "Handle KeyboardInterrupt gracefully to allow users to cancel long-running operations",
        "The function uses max_results=50 for pagination; adjust based on expected email volume",
        "Monitor console output for progress updates and error messages",
        "Review the generated CSV register file for audit trail of downloaded attachments",
        "Implement proper secret management for CLIENT_SECRET (use environment variables or key vault)",
        "Consider implementing retry logic for network failures in the EmailSearchApp class methods"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:21:05",
      "decorators": [],
      "dependencies": [
        "msal",
        "requests",
        "os",
        "base64",
        "csv",
        "typing",
        "datetime",
        "pathlib"
      ],
      "description": "Orchestrates an email search and PDF attachment download workflow using Microsoft Graph API, including authentication, email search, result display, and attachment processing.",
      "docstring": "Main execution function",
      "id": 1858,
      "imports": [
        "import os",
        "import base64",
        "import csv",
        "import msal",
        "import requests",
        "from typing import List",
        "from typing import Dict",
        "from typing import Optional",
        "from datetime import datetime",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import os",
        "import base64",
        "import csv",
        "import msal",
        "import requests",
        "from typing import List, Dict, Optional",
        "from datetime import datetime",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 505,
      "line_start": 436,
      "name": "main_v13",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is the main entry point for an email search application that connects to Microsoft 365, searches for emails based on sender and keyword criteria, displays results, downloads PDF attachments, and generates a download register. It handles the complete workflow from authentication through file download and metadata tracking.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including console output, file downloads to OUTPUT_DIR, and creation of a CSV register file at REGISTER_FILE location.",
      "settings_required": [
        "TENANT_ID: Azure AD tenant identifier",
        "CLIENT_ID: Azure AD application (client) ID",
        "CLIENT_SECRET: Azure AD application client secret",
        "TARGET_MAILBOX: Email address of the mailbox to search",
        "SCOPES: List of Microsoft Graph API permission scopes (e.g., ['https://graph.microsoft.com/.default'])",
        "SENDER_EMAIL: Email address to filter search results by sender",
        "SEARCH_KEYWORD: Keyword to search for in email content",
        "OUTPUT_DIR: Directory path where PDF attachments will be saved",
        "REGISTER_FILE: File path for the CSV download register",
        "EmailSearchApp class must be defined with methods: authenticate(), search_emails(), display_email_list(), download_pdf_attachments(), save_download_register()"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    \n    # Initialize the application\n    app = EmailSearchApp(\n        tenant_id=TENANT_ID,\n        client_id=CLIENT_ID,\n        client_secret=CLIENT_SECRET,\n        target_mailbox=TARGET_MAILBOX\n    )\n    \n    try:\n        # Step 1: Authenticate user\n        app.authenticate(SCOPES)\n        \n        # Step 2: Search for emails\n        emails = app.search_emails(\n            sender=SENDER_EMAIL,\n            keyword=SEARCH_KEYWORD,\n            max_results=50  # Results per page\n        )\n        \n        # Step 3: Display results\n        app.display_email_list(emails)\n        \n        # Step 4: Download PDF attachments\n        if emails:\n            print(f\"\\n{'='*80}\")\n            print(\"Downloading PDF Attachments\")\n            print(f\"{'='*80}\\n\")\n            \n            all_download_records = []\n            \n            for idx, email in enumerate(emails, 1):\n                subject = email.get(\"subject\", \"(No Subject)\")\n                has_attachments = email.get(\"hasAttachments\", False)\n                \n                print(f\"[{idx}] Processing: {subject[:60]}...\")\n                \n                if not has_attachments:\n                    print(f\"  \u2298 No attachments\")\n                    continue\n                \n                # Download returns metadata for each file\n                download_metadata = app.download_pdf_attachments(\n                    email=email,\n                    output_dir=OUTPUT_DIR\n                )\n                \n                all_download_records.extend(download_metadata)\n            \n            # Save register\n            if all_download_records:\n                app.save_download_register(all_download_records, REGISTER_FILE)\n            \n            print(f\"\\n{'='*80}\")\n            print(f\"\u2713 Download completed!\")\n            print(f\"Total PDF files downloaded: {len(all_download_records)}\")\n            print(f\"Saved to: {os.path.abspath(OUTPUT_DIR)}\")\n            print(f\"Register: {os.path.abspath(REGISTER_FILE)}\")\n            print(f\"{'='*80}\")\n        \n        print(f\"\\n\u2713 Search completed successfully!\")\n        print(f\"Total emails found: {len(emails)}\")\n        \n    except KeyboardInterrupt:\n        print(\"\\n\\n\u2717 Operation cancelled by user\")\n    except Exception as e:\n        print(f\"\\n\u2717 Error occurred: {str(e)}\")\n        raise",
      "source_file": "/tf/active/vicechatdev/mailsearch/email_search_app.py",
      "tags": [
        "email-processing",
        "microsoft-graph",
        "oauth2",
        "attachment-download",
        "pdf-extraction",
        "workflow-orchestration",
        "file-management",
        "authentication",
        "api-integration",
        "batch-processing"
      ],
      "updated_at": "2025-12-07T01:59:48.484432",
      "usage_example": "# Configuration constants\nTENANT_ID = 'your-tenant-id'\nCLIENT_ID = 'your-client-id'\nCLIENT_SECRET = 'your-client-secret'\nTARGET_MAILBOX = 'user@example.com'\nSCOPES = ['https://graph.microsoft.com/.default']\nSENDER_EMAIL = 'sender@example.com'\nSEARCH_KEYWORD = 'invoice'\nOUTPUT_DIR = './downloads'\nREGISTER_FILE = './download_register.csv'\n\n# Ensure EmailSearchApp class is defined\n# from email_search_app import EmailSearchApp\n\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function expects parse_arguments() to be defined elsewhere in the module and return a properly structured args object",
        "Ensure the configuration file exists and contains all required sections (filecloud, logging, document_processing) before calling",
        "The function creates directories automatically but requires write permissions in the working directory",
        "Use --dry-run flag first to verify FileCloud connectivity and document discovery before running full analysis",
        "Monitor the logs directory for detailed execution logs, especially when debugging issues",
        "The function handles KeyboardInterrupt gracefully, allowing users to stop long-running analyses",
        "LLM usage statistics are only displayed if tokens were actually consumed during analysis",
        "Command-line arguments override configuration file settings, allowing flexible runtime customization",
        "The concurrent parameter controls parallelism; adjust based on system resources and API rate limits",
        "Exit codes follow Unix conventions: check return value for automation/scripting purposes"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when dry_run mode is enabled (args.dry_run is True)",
          "import": "from utils.filecloud_client import FileCloudClient",
          "optional": true
        }
      ],
      "created_at": "2025-12-06 10:17:24",
      "decorators": [],
      "dependencies": [
        "os",
        "sys",
        "argparse",
        "pathlib"
      ],
      "description": "Main entry point function for the Contract Validity Analyzer application that orchestrates configuration loading, logging setup, FileCloud connection, and contract analysis execution.",
      "docstring": "Main entry point.",
      "id": 396,
      "imports": [
        "import os",
        "import sys",
        "import argparse",
        "from pathlib import Path",
        "from config.config import Config",
        "from core.analyzer import ContractAnalyzer",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger",
        "from utils.filecloud_client import FileCloudClient"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import argparse",
        "from pathlib import Path",
        "from config.config import Config",
        "from core.analyzer import ContractAnalyzer",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 186,
      "line_start": 87,
      "name": "main_v12",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for a command-line application that analyzes contracts from FileCloud storage. It handles argument parsing, configuration management, logging initialization, optional dry-run mode for document discovery, and executes the full contract analysis pipeline with concurrent processing support. The function manages the complete lifecycle from initialization through execution to summary reporting, including error handling and graceful shutdown.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 for errors (including KeyboardInterrupt, connection failures, or fatal exceptions). This follows standard Unix convention for process exit codes.",
      "settings_required": [
        "Configuration file (default or specified via --config argument) with sections: 'filecloud', 'logging', 'document_processing'",
        "parse_arguments() function must be defined to return args object with attributes: config, path, verbose, extensions, output_dir, concurrent, dry_run, max_files",
        "FileCloud credentials and connection settings in configuration file",
        "Write permissions for creating 'logs' directory and optional output directory",
        "ContractAnalyzer class must be available from core.analyzer module",
        "FileCloudClient class must be available from utils.filecloud_client module",
        "Config class must support methods: get_section(), set()",
        "Logging utilities must be available from utils.logging_utils"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point.\"\"\"\n    args = parse_arguments()\n    \n    try:\n        # Load configuration\n        config = Config(args.config)\n        \n        # Override configuration with command line arguments\n        if args.path:\n            config.set('filecloud', {**config.get_section('filecloud'), 'base_path': args.path})\n        \n        if args.verbose:\n            config.set('logging', {**config.get_section('logging'), 'level': 'DEBUG'})\n        \n        if args.extensions:\n            extensions = [ext.strip() for ext in args.extensions.split(',')]\n            config.set('document_processing', {**config.get_section('document_processing'), 'supported_extensions': extensions})\n        \n        # Set up logging\n        log_dir = \"logs\"\n        if args.output_dir:\n            log_dir = os.path.join(args.output_dir, \"logs\")\n        \n        os.makedirs(log_dir, exist_ok=True)\n        setup_logging(config.get_section('logging'), log_dir)\n        \n        logger = get_logger(__name__)\n        logger.info(\"Starting Contract Validity Analyzer\")\n        logger.info(f\"Configuration: {config.config_path}\")\n        logger.info(f\"FileCloud path: {config.get_section('filecloud').get('base_path')}\")\n        logger.info(f\"Concurrent threads: {args.concurrent}\")\n        \n        # Create output directory if specified\n        if args.output_dir:\n            output_dir = os.path.join(args.output_dir, \"output\")\n            os.makedirs(output_dir, exist_ok=True)\n            config.set('output_dir', output_dir)\n        \n        # Dry run mode\n        if args.dry_run:\n            logger.info(\"DRY RUN MODE - Discovering documents without processing\")\n            \n            from utils.filecloud_client import FileCloudClient\n            \n            # Connect to FileCloud and list documents\n            fc_client = FileCloudClient(config.get_section('filecloud'))\n            if not fc_client.connect():\n                logger.error(\"Failed to connect to FileCloud\")\n                return 1\n            \n            documents = fc_client.search_documents()\n            fc_client.disconnect()\n            \n            if documents:\n                logger.info(f\"Found {len(documents)} documents to analyze:\")\n                for doc in documents:\n                    logger.info(f\"  - {doc['filename']} ({doc['size']} bytes)\")\n            else:\n                logger.warning(\"No documents found\")\n            \n            return 0\n        \n        # Initialize and run analyzer\n        analyzer = ContractAnalyzer(config.config)\n        \n        # Set up analysis parameters\n        analysis_kwargs = {'max_concurrent': args.concurrent}\n        if args.max_files:\n            analysis_kwargs['max_files'] = args.max_files\n        \n        results = analyzer.analyze_contracts(**analysis_kwargs)\n        \n        # Print summary\n        stats = analyzer.get_summary_stats()\n        if stats:\n            logger.info(\"=\" * 50)\n            logger.info(\"ANALYSIS SUMMARY\")\n            logger.info(\"=\" * 50)\n            for key, value in stats.items():\n                logger.info(f\"{key.replace('_', ' ').title()}: {value}\")\n            logger.info(\"=\" * 50)\n        \n        # Print LLM usage stats\n        llm_stats = analyzer.llm_client.get_usage_stats()\n        if llm_stats.get('total_tokens', 0) > 0:\n            logger.info(\"LLM Usage Statistics:\")\n            logger.info(f\"  Total tokens: {llm_stats['total_tokens']:,}\")\n            logger.info(f\"  Prompt tokens: {llm_stats['total_prompt_tokens']:,}\")\n            logger.info(f\"  Completion tokens: {llm_stats['total_completion_tokens']:,}\")\n        \n        logger.info(\"Analysis complete!\")\n        return 0\n        \n    except KeyboardInterrupt:\n        logger.info(\"Analysis interrupted by user\")\n        return 1\n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/contract_validity_analyzer/main.py",
      "tags": [
        "entry-point",
        "main-function",
        "cli-application",
        "contract-analysis",
        "filecloud",
        "configuration-management",
        "logging",
        "concurrent-processing",
        "document-processing",
        "dry-run",
        "error-handling",
        "orchestration"
      ],
      "updated_at": "2025-12-07T01:59:48.483430",
      "usage_example": "# This function is designed to be called as the main entry point of the application\n# Typically invoked from a script like:\n\nif __name__ == '__main__':\n    import sys\n    sys.exit(main())\n\n# Command line usage examples:\n# Basic run:\n# python script.py --config config.yaml\n\n# Dry run to discover documents:\n# python script.py --config config.yaml --dry-run\n\n# With custom settings:\n# python script.py --config config.yaml --path /contracts --verbose --concurrent 5 --max-files 100\n\n# With custom extensions:\n# python script.py --config config.yaml --extensions pdf,docx,txt --output-dir ./results"
    },
    {
      "best_practices": [
        "Always specify the --pattern argument as it is required for execution",
        "Use --start-date to filter data to relevant time periods and improve performance",
        "When using --pattern all, be aware that output filenames will be automatically modified with pattern suffixes",
        "Use --sample-size for testing or when working with large datasets to limit processing time",
        "Enable --skip-geocoding if coordinates are not needed to speed up processing",
        "Use --cache-only to avoid API rate limits when geocoding data that may already be cached",
        "Check return code (0 for success, 1 for error) when calling programmatically",
        "Ensure the PatternBasedExtractor class is properly defined and imported before calling main()",
        "The function prints detailed progress information to stdout, so redirect or capture if needed",
        "Handle early exits gracefully - function returns None if no mixed farms or patterns are found"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only used in exception handling block when errors occur",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 08:22:13",
      "decorators": [],
      "dependencies": [
        "argparse",
        "os",
        "sys",
        "pandas",
        "numpy",
        "datetime",
        "typing",
        "traceback",
        "matched_sample_analysis",
        "extractor"
      ],
      "description": "Command-line interface function that orchestrates pattern-based extraction of poultry flock data, including data loading, pattern classification, geocoding, and export functionality.",
      "docstring": "Main function for pattern-based extraction.",
      "id": 70,
      "imports": [
        "import os",
        "import sys",
        "import pandas as pd",
        "import numpy as np",
        "import argparse",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Tuple",
        "from matched_sample_analysis import MatchedSampleAnalyzer",
        "from extractor import PehestatDataExtractor",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import pandas as pd",
        "import numpy as np",
        "import argparse",
        "from datetime import datetime",
        "from typing import Dict, List, Optional, Tuple",
        "from matched_sample_analysis import MatchedSampleAnalyzer",
        "from extractor import PehestatDataExtractor",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 622,
      "line_start": 505,
      "name": "main_v11",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. All configuration is handled through command-line arguments parsed via argparse, including: --pattern (required: sequential/concurrent/mixed/all), --output (CSV filename), --sample-size (number of flocks), --geocoded-data (path to geocoded data), --data-dir (Pehestat data directory), --skip-geocoding (flag), --cache-only (flag), --create-map (flag), --map-output (map filename), --use-clustering (flag), --start-date (YYYY-MM-DD format)"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a pattern-based poultry data extraction tool. It processes command-line arguments to extract flock data based on In-Ovo usage patterns (sequential, concurrent, mixed, or all), filters data by date, optionally performs geocoding and map generation, and exports results to CSV files. The function coordinates multiple extraction steps including data loading, mixed farm identification, pattern classification, data enrichment, and result export.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 for error conditions. Returns None implicitly if no mixed farms or patterns are found (early exit scenarios).",
      "settings_required": [
        "Data directory containing Pehestat data files (default: /tf/active/pehestat_data, configurable via --data-dir)",
        "PatternBasedExtractor class must be available in the module scope",
        "Optional: Geocoded data file for coordinate enrichment (via --geocoded-data)",
        "Optional: Geocoding API credentials if not using --skip-geocoding or --cache-only flags",
        "File system write permissions for output CSV and map files"
      ],
      "source_code": "def main():\n    \"\"\"Main function for pattern-based extraction.\"\"\"\n    parser = argparse.ArgumentParser(description='Pattern-Based Poultry Data Extraction')\n    parser.add_argument('--pattern', type=str, required=True, \n                       choices=['sequential', 'concurrent', 'mixed', 'all'],\n                       help='In-Ovo usage pattern to extract')\n    parser.add_argument('--output', type=str, default=None,\n                       help='Output CSV filename (default: auto-generated)')\n    parser.add_argument('--sample-size', type=int, default=None,\n                       help='Number of flocks to sample (default: extract all)')\n    parser.add_argument('--geocoded-data', type=str, default=None,\n                       help='Path to geocoded data file for coordinate enrichment')\n    parser.add_argument('--data-dir', type=str, default='/tf/active/pehestat_data',\n                       help='Directory containing Pehestat data files')\n    parser.add_argument('--skip-geocoding', action='store_true',\n                       help='Skip geocoding and map generation')\n    parser.add_argument('--cache-only', action='store_true',\n                       help='Use geocoding cache only (no API calls)')\n    parser.add_argument('--create-map', action='store_true',\n                       help='Create interactive map (requires geocoding)')\n    parser.add_argument('--map-output', type=str, default=None,\n                       help='Output map filename (default: auto-generated)')\n    parser.add_argument('--use-clustering', action='store_true',\n                       help='Enable marker clustering on the map')\n    parser.add_argument('--start-date', type=str, default='2020-01-01',\n                       help='Start date filter (YYYY-MM-DD, default: 2020-01-01)')\n    \n    args = parser.parse_args()\n    \n    print(\"=\" * 80)\n    print(\"PATTERN-BASED POULTRY DATA EXTRACTION\")\n    print(\"=\" * 80)\n    print(f\"Target pattern: {args.pattern}\")\n    print(f\"Start date filter: {args.start_date}\")\n    print(f\"Sample size: {'All flocks' if args.sample_size is None else f'{args.sample_size:,} flocks'}\")\n    print(f\"Data directory: {args.data_dir}\")\n    if args.geocoded_data:\n        print(f\"Geocoded data: {args.geocoded_data}\")\n    if not args.skip_geocoding:\n        if args.cache_only:\n            print(\"Geocoding: Cache-only mode (no API calls)\")\n        else:\n            print(\"Geocoding: Full mode (includes API calls if needed)\")\n        if args.create_map:\n            print(\"Map generation: Enabled\")\n    else:\n        print(\"Geocoding: Disabled\")\n    print(\"=\" * 80)\n    \n    try:\n        # Initialize extractor\n        extractor = PatternBasedExtractor(\n            data_dir=args.data_dir,\n            geocoded_file=args.geocoded_data\n        )\n        \n        # Load and filter base data\n        flocks_df = extractor.load_and_filter_base_data(start_date=args.start_date)\n        \n        # Identify mixed farms\n        mixed_farms_df = extractor.identify_mixed_farms(flocks_df)\n        \n        if len(mixed_farms_df) == 0:\n            print(\"No mixed farms found! Cannot proceed with pattern extraction.\")\n            return\n        \n        # Classify farm patterns\n        patterns_df = extractor.classify_farm_patterns(flocks_df, mixed_farms_df)\n        \n        if len(patterns_df) == 0:\n            print(\"No farm patterns could be classified! Cannot proceed.\")\n            return\n        \n        # Extract flocks by pattern\n        if args.pattern == 'all':\n            # Extract all patterns\n            for pattern in ['sequential', 'concurrent', 'mixed']:\n                pattern_flocks = extractor.extract_flocks_by_pattern(\n                    pattern, flocks_df, patterns_df, args.sample_size\n                )\n                \n                if len(pattern_flocks) > 0:\n                    # Enrich data\n                    enriched_flocks = extractor.enrich_flock_data(pattern_flocks)\n                    \n                    # Export results\n                    output_file = args.output\n                    if output_file and args.pattern == 'all':\n                        # Modify filename for each pattern\n                        base, ext = os.path.splitext(output_file)\n                        output_file = f\"{base}_{pattern}{ext}\"\n                    \n                    extractor.export_results(enriched_flocks, pattern, output_file)\n        else:\n            # Extract specific pattern\n            pattern_flocks = extractor.extract_flocks_by_pattern(\n                args.pattern, flocks_df, patterns_df, args.sample_size\n            )\n            \n            if len(pattern_flocks) == 0:\n                print(f\"No flocks found for pattern '{args.pattern}'!\")\n                return\n            \n            # Enrich data\n            enriched_flocks = extractor.enrich_flock_data(pattern_flocks)\n            \n            # Export results\n            extractor.export_results(enriched_flocks, args.pattern, args.output)\n        \n        print(\"\\n\u2705 Pattern-based extraction completed successfully!\")\n        \n    except Exception as e:\n        print(f\"\\n\u274c Error during pattern-based extraction: {e}\")\n        import traceback\n        traceback.print_exc()\n        return 1\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/pattern_based_extraction.py",
      "tags": [
        "cli",
        "command-line-interface",
        "data-extraction",
        "poultry-data",
        "pattern-analysis",
        "geocoding",
        "data-processing",
        "csv-export",
        "map-generation",
        "argparse",
        "main-function",
        "entry-point",
        "flock-data",
        "in-ovo-patterns"
      ],
      "updated_at": "2025-12-07T01:59:48.482310",
      "usage_example": "# Run from command line:\n# Extract sequential pattern flocks from 2020 onwards\npython script.py --pattern sequential --start-date 2020-01-01 --output sequential_flocks.csv\n\n# Extract all patterns with sampling and geocoding\npython script.py --pattern all --sample-size 1000 --geocoded-data geocoded.csv --create-map\n\n# Extract concurrent pattern without geocoding\npython script.py --pattern concurrent --skip-geocoding --output concurrent_only.csv\n\n# Extract mixed pattern with cache-only geocoding and clustering map\npython script.py --pattern mixed --cache-only --create-map --use-clustering --map-output mixed_map.html\n\n# Programmatic usage (if called from Python):\nif __name__ == '__main__':\n    sys.exit(main())"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the script using if __name__ == '__main__': main()",
        "Ensure DocumentAnalyzer class is properly defined before calling this function",
        "Configure logging before calling main() to capture all log messages",
        "The function expects a CSV register file with specific format - ensure compatibility",
        "Use --limit parameter during testing to avoid processing large document sets",
        "Ensure sufficient disk space in output directory for results",
        "Handle keyboard interrupts gracefully if processing large batches",
        "The function will raise exceptions on fatal errors - wrap in try-except if calling programmatically",
        "Verify all system dependencies (Tesseract, poppler) are installed before running",
        "Set appropriate OpenAI API credentials before execution"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside the function body, only when main() is called",
          "import": "import argparse",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 22:16:33",
      "decorators": [],
      "dependencies": [
        "argparse",
        "logging",
        "csv",
        "json",
        "pathlib",
        "datetime",
        "typing",
        "numpy",
        "pdf2image",
        "pytesseract",
        "easyocr",
        "PIL",
        "openai"
      ],
      "description": "Command-line interface function that orchestrates PDF document analysis using OCR and LLM processing, with configurable input/output paths and processing limits.",
      "docstring": "Main execution function",
      "id": 1845,
      "imports": [
        "import os",
        "import sys",
        "import csv",
        "import json",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "import logging",
        "import numpy as np",
        "from pdf2image import convert_from_path",
        "import pytesseract",
        "import easyocr",
        "from PIL import Image",
        "from openai import OpenAI",
        "import argparse",
        "import re"
      ],
      "imports_required": [
        "import argparse",
        "import logging",
        "import csv",
        "import json",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, List, Optional, Any",
        "import numpy as np",
        "from pdf2image import convert_from_path",
        "import pytesseract",
        "import easyocr",
        "from PIL import Image",
        "from openai import OpenAI"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 617,
      "line_start": 563,
      "name": "main_v10",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. It uses argparse to parse command-line arguments: --register (path to CSV file containing document registry, default './output/download_register.csv'), --limit (optional integer to limit number of documents processed for testing), and --output-dir (directory for saving results, default './output')"
      },
      "parent_class": null,
      "purpose": "Serves as the main entry point for a document analysis application that reads PDF files from a download register, processes them using a DocumentAnalyzer class (which performs OCR and LLM analysis), and saves structured results. Designed for batch processing of PDF documents with progress tracking and error handling.",
      "return_annotation": null,
      "return_explained": "Returns None. The function performs side effects including printing status messages to stdout, processing documents through DocumentAnalyzer, and saving results to files. May raise exceptions on fatal errors.",
      "settings_required": [
        "DocumentAnalyzer class must be defined and available in the same module or imported",
        "logger object must be configured and available in the module scope",
        "OpenAI API key must be configured (likely via environment variable OPENAI_API_KEY)",
        "Tesseract OCR must be installed on the system for pytesseract",
        "poppler-utils must be installed for pdf2image to work",
        "Input CSV register file must exist at specified path with expected format",
        "Output directory must be writable or creatable",
        "PDF files referenced in the register must be accessible"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    import argparse\n    \n    parser = argparse.ArgumentParser(description=\"Analyze downloaded PDF documents\")\n    parser.add_argument(\n        '--register',\n        default='./output/download_register.csv',\n        help='Path to download register CSV'\n    )\n    parser.add_argument(\n        '--limit',\n        type=int,\n        default=None,\n        help='Limit number of documents to process (for testing)'\n    )\n    parser.add_argument(\n        '--output-dir',\n        default='./output',\n        help='Output directory for results'\n    )\n    \n    args = parser.parse_args()\n    \n    print(f\"\\n{'='*80}\")\n    print(\"Document Analyzer - PDF Analysis with OCR and LLM\")\n    print(f\"{'='*80}\\n\")\n    \n    try:\n        # Initialize analyzer\n        analyzer = DocumentAnalyzer(output_dir=args.output_dir)\n        \n        # Process documents\n        results = analyzer.process_documents_from_register(\n            register_path=args.register,\n            limit=args.limit\n        )\n        \n        # Save results\n        analyzer.save_results(results)\n        \n        # Summary\n        successful = sum(1 for r in results if r['success'])\n        failed = len(results) - successful\n        \n        print(f\"\\n{'='*80}\")\n        print(f\"Processing Complete!\")\n        print(f\"  Total documents: {len(results)}\")\n        print(f\"  Successful: {successful}\")\n        print(f\"  Failed: {failed}\")\n        print(f\"{'='*80}\\n\")\n        \n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        raise",
      "source_file": "/tf/active/vicechatdev/mailsearch/document_analyzer.py",
      "tags": [
        "cli",
        "command-line",
        "entry-point",
        "pdf-processing",
        "ocr",
        "llm",
        "document-analysis",
        "batch-processing",
        "argparse",
        "main-function",
        "orchestration",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:59:48.481152",
      "usage_example": "# Run from command line with default settings:\n# python script.py\n\n# Run with custom register path and limit:\n# python script.py --register /path/to/register.csv --limit 10 --output-dir /path/to/output\n\n# In Python code (if calling directly):\nif __name__ == '__main__':\n    main()\n\n# The function expects to be run as a script entry point and will:\n# 1. Parse command-line arguments\n# 2. Initialize DocumentAnalyzer with output directory\n# 3. Process documents from the register CSV\n# 4. Save results and print summary statistics"
    },
    {
      "best_practices": [
        "This function must be run using asyncio.run(main()) or equivalent async event loop",
        "Ensure all required configuration settings are properly set before calling this function",
        "The function registers signal handlers which may interfere with other signal handling in the application - avoid multiple signal handler registrations",
        "The SMTPServer class must implement stop() and run_forever() methods for proper operation",
        "Logging should be configured to handle concurrent async operations if the SMTP server processes multiple connections",
        "The function calls sys.exit() which terminates the entire process - ensure all cleanup is handled in the finally block or signal handlers",
        "On Windows, SIGTERM may not be available - consider platform-specific signal handling if cross-platform support is needed",
        "The print_banner() function is called but not imported in the provided imports list - ensure this function is available in scope",
        "Consider implementing a timeout mechanism for smtp_server.run_forever() to prevent indefinite hanging",
        "The signal_handler function is synchronous but calls smtp_server.stop() - ensure stop() is safe to call from signal handlers"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:40:59",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "logging",
        "sys",
        "signal"
      ],
      "description": "Asynchronous main entry point function that initializes and runs an email forwarding SMTP server with logging, configuration validation, and graceful shutdown handling.",
      "docstring": "Main application entry point.",
      "id": 1491,
      "imports": [
        "import asyncio",
        "import logging",
        "import sys",
        "import signal",
        "from datetime import datetime",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.smtp_server import SMTPServer"
      ],
      "imports_required": [
        "import asyncio",
        "import logging",
        "import sys",
        "import signal",
        "from datetime import datetime",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.smtp_server import SMTPServer"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 72,
      "line_start": 36,
      "name": "main_v9",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary application entry point for an email forwarding service. It orchestrates the complete lifecycle of the application including: setting up logging infrastructure, validating configuration settings, creating and starting an SMTP server instance, registering signal handlers for graceful shutdown on SIGINT/SIGTERM signals, and handling fatal errors with appropriate cleanup. The function is designed to run indefinitely until interrupted by signals or fatal errors.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It runs indefinitely until interrupted by a signal (SIGINT/SIGTERM) or a fatal exception occurs, at which point it exits the process with status code 0 (graceful shutdown) or 1 (error condition).",
      "settings_required": [
        "config module with settings object that has validate_config() method",
        "utils.logger module with setup_logging() function",
        "forwarder.smtp_server module with SMTPServer class",
        "Configuration settings accessible via settings object (specific settings depend on validate_config() implementation)",
        "Appropriate permissions to bind to SMTP ports (typically port 25, 587, or custom port)",
        "Signal handling support (SIGINT and SIGTERM) - standard on Unix-like systems"
      ],
      "source_code": "async def main():\n    \"\"\"Main application entry point.\"\"\"\n    \n    # Set up logging\n    setup_logging()\n    \n    # Print banner\n    print_banner()\n    \n    try:\n        # Validate configuration\n        settings.validate_config()\n        logger.info(\"Configuration validated successfully\")\n        \n        # Create and start SMTP server\n        smtp_server = SMTPServer()\n        \n        # Set up graceful shutdown\n        def signal_handler(signum, frame):\n            logger.info(f\"Received signal {signum}, initiating shutdown...\")\n            smtp_server.stop()\n            sys.exit(0)\n            \n        signal.signal(signal.SIGINT, signal_handler)\n        signal.signal(signal.SIGTERM, signal_handler)\n        \n        # Start server\n        logger.info(\"Starting Email Forwarder service...\")\n        await smtp_server.run_forever()\n        \n    except KeyboardInterrupt:\n        logger.info(\"Received keyboard interrupt, shutting down...\")\n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        sys.exit(1)\n    finally:\n        logger.info(\"Email Forwarder service stopped\")",
      "source_file": "/tf/active/vicechatdev/email-forwarder/src/main.py",
      "tags": [
        "async",
        "smtp",
        "email",
        "server",
        "entry-point",
        "signal-handling",
        "graceful-shutdown",
        "logging",
        "configuration",
        "daemon",
        "service",
        "forwarder",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.480032",
      "usage_example": "import asyncio\nimport logging\nimport sys\nimport signal\nfrom datetime import datetime\nfrom config import settings\nfrom utils.logger import setup_logging\nfrom forwarder.smtp_server import SMTPServer\n\nasync def main():\n    \"\"\"Main application entry point.\"\"\"\n    setup_logging()\n    print_banner()\n    try:\n        settings.validate_config()\n        logger.info(\"Configuration validated successfully\")\n        smtp_server = SMTPServer()\n        def signal_handler(signum, frame):\n            logger.info(f\"Received signal {signum}, initiating shutdown...\")\n            smtp_server.stop()\n            sys.exit(0)\n        signal.signal(signal.SIGINT, signal_handler)\n        signal.signal(signal.SIGTERM, signal_handler)\n        logger.info(\"Starting Email Forwarder service...\")\n        await smtp_server.run_forever()\n    except KeyboardInterrupt:\n        logger.info(\"Received keyboard interrupt, shutting down...\")\n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        sys.exit(1)\n    finally:\n        logger.info(\"Email Forwarder service stopped\")\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "Ensure proper logging configuration before calling this function to capture all import activities",
        "Verify that an admin user exists in the system before running the import",
        "The function performs duplicate checking using both cdoc_uid (preferred) and doc_number (fallback) to prevent duplicate imports",
        "Documents with cdoc_uid metadata take precedence over doc_number for duplicate detection",
        "Monitor the import summary logs to track success rates and identify issues",
        "Handle FileCloud connection errors gracefully - the function will log errors but continue processing remaining documents",
        "Ensure sufficient disk space and memory for processing large document sets",
        "Run this function in a controlled environment as it performs database writes and file operations",
        "Consider implementing rate limiting if importing large numbers of documents to avoid overwhelming FileCloud API",
        "Review failed_count in the summary to identify and address systematic import issues"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 13:37:15",
      "decorators": [],
      "dependencies": [
        "os",
        "sys",
        "logging",
        "tempfile",
        "uuid",
        "io",
        "typing",
        "datetime",
        "CDocs.db.db_operations",
        "CDocs.models.document",
        "CDocs.models.user_extensions",
        "CDocs.controllers.filecloud_controller",
        "CDocs.controllers.document_controller",
        "CDocs.config",
        "FC_api",
        "metadata_catalog",
        "traceback"
      ],
      "description": "Main execution function that orchestrates the import of controlled documents from FileCloud into a Neo4j database, checking for duplicates and managing document metadata.",
      "docstring": "Main execution function",
      "id": 852,
      "imports": [
        "import os",
        "import sys",
        "import logging",
        "import tempfile",
        "import uuid",
        "import io",
        "from typing import Dict",
        "from typing import List",
        "from typing import Any",
        "from typing import Optional",
        "from datetime import datetime",
        "from CDocs.db import db_operations as db",
        "from CDocs.models.document import ControlledDocument",
        "from CDocs.models.document import DocumentVersion",
        "from CDocs.models.user_extensions import DocUser",
        "from CDocs.controllers.filecloud_controller import get_filecloud_client",
        "from CDocs.controllers.filecloud_controller import upload_document_to_filecloud",
        "from CDocs.controllers.filecloud_controller import get_filecloud_document_path",
        "from CDocs.controllers.filecloud_controller import ensure_document_folders",
        "from CDocs.controllers.filecloud_controller import FileCloudError",
        "from CDocs.controllers.document_controller import create_document_version",
        "from CDocs.config import settings",
        "from FC_api import FileCloudAPI",
        "from CDocs.controllers.document_controller import set_current_version",
        "from CDocs.controllers.filecloud_controller import upload_document_to_filecloud",
        "from metadata_catalog import MetadataCatalog",
        "import traceback",
        "import traceback",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import logging",
        "import tempfile",
        "import uuid",
        "import io",
        "from typing import Dict, List, Any, Optional",
        "from datetime import datetime",
        "from CDocs.db import db_operations as db",
        "from CDocs.models.document import ControlledDocument, DocumentVersion",
        "from CDocs.models.user_extensions import DocUser",
        "from CDocs.controllers.filecloud_controller import get_filecloud_client, upload_document_to_filecloud, get_filecloud_document_path, ensure_document_folders, FileCloudError",
        "from CDocs.controllers.document_controller import create_document_version, set_current_version",
        "from CDocs.config import settings",
        "from FC_api import FileCloudAPI",
        "from metadata_catalog import MetadataCatalog",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 565,
      "line_start": 483,
      "name": "main_v8",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a document import script that searches FileCloud for controlled documents, validates them against existing records in Neo4j (by UID or document number), and imports new documents while tracking success/failure statistics. It handles duplicate detection, error logging, and provides a comprehensive summary of the import operation.",
      "return_annotation": null,
      "return_explained": "Returns None. The function performs side effects (importing documents, logging results) rather than returning a value. Success/failure information is logged and tracked internally through counters (imported_count, skipped_count, failed_count).",
      "settings_required": [
        "Logger instance named 'logger' must be configured before calling this function",
        "get_admin_user() function must be available and return a DocUser object",
        "search_filecloud_for_documents() function must be available to search FileCloud",
        "check_document_exists_by_uid() function must be available to check Neo4j by UID",
        "check_document_exists_by_doc_number() function must be available to check Neo4j by document number",
        "import_document_from_filecloud() function must be available to perform the actual import",
        "FileCloud API credentials and connection settings must be configured in CDocs.config.settings",
        "Neo4j database connection must be configured and accessible through CDocs.db.db_operations",
        "Admin user must exist in the system for document creation"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    try:\n        logger.info(\"Starting FileCloud document import script\")\n        \n        # Get admin user for document creation\n        admin_user = get_admin_user()\n        if not admin_user:\n            logger.error(\"Cannot proceed without an admin user\")\n            return\n        \n        # Search for controlled documents in FileCloud\n        documents = search_filecloud_for_documents()\n        if not documents:\n            logger.info(\"No documents found to import\")\n            return\n            \n        # Process found documents\n        imported_count = 0\n        skipped_count = 0\n        failed_count = 0\n        \n        for doc in documents:\n            file_path = doc.get('file_path')\n            metadata = doc.get('metadata', {})\n            \n            # First check if document has a cdoc_uid in metadata\n            cdoc_uid = metadata.get('cdoc_uid')\n            if cdoc_uid:\n                logger.info(f\"Found document with cdoc_uid: {cdoc_uid}\")\n                # Check if this document exists in Neo4j by UID\n                existing_doc = check_document_exists_by_uid(cdoc_uid)\n                if existing_doc:\n                    logger.info(f\"Document with UID {cdoc_uid} is already managed in Neo4j - skipping\")\n                    skipped_count += 1\n                    continue\n                    \n                # If we have a cdoc_uid but it's not in Neo4j, this means the document \n                # was meant to be managed but isn't - import it with that ID\n                logger.info(f\"Document with UID {cdoc_uid} not found in Neo4j but has cdoc_uid - will import\")\n            \n            # Then check by document number as fallback\n            doc_number = metadata.get('doc_number')\n            if doc_number and not file_path:\n                logger.warning(f\"Skipping document with missing file_path: {doc_number}\")\n                skipped_count += 1\n                continue\n            \n            if doc_number and not cdoc_uid:\n                # Skip if document already exists in database by number\n                existing_doc = check_document_exists_by_doc_number(doc_number)\n                if existing_doc:\n                    logger.info(f\"Document {doc_number} already exists in database by number - skipping\")\n                    skipped_count += 1\n                    continue\n            \n            # Import document\n            logger.info(\"file path: \" + file_path)\n            logger.info(\"metadata: \" + str(metadata))\n            logger.info(\"admin_user: \" + str(admin_user.name))\n            result = import_document_from_filecloud(file_path, metadata, admin_user)\n            #result=None\n            \n            if result and result.get('success', False):\n                imported_count += 1\n                logger.info(f\"Successfully imported document: {result.get('doc_number')}\")\n            else:\n                failed_count += 1\n                error_msg = result.get('message') if result else \"Unknown error\"\n                logger.error(f\"Failed to import document: {error_msg}\")\n        \n        # Report summary\n        logger.info(\"===== Import Summary =====\")\n        logger.info(f\"Total documents found in FileCloud: {len(documents)}\")\n        logger.info(f\"Documents imported: {imported_count}\")\n        logger.info(f\"Documents skipped (already exist): {skipped_count}\")\n        logger.info(f\"Documents failed to import: {failed_count}\")\n        logger.info(\"=========================\")\n        \n    except Exception as e:\n        logger.error(f\"Error in main execution: {e}\")\n        import traceback\n        logger.error(traceback.format_exc())",
      "source_file": "/tf/active/vicechatdev/CDocs/FC_sync.py",
      "tags": [
        "document-management",
        "filecloud",
        "neo4j",
        "import",
        "batch-processing",
        "controlled-documents",
        "duplicate-detection",
        "orchestration",
        "logging",
        "error-handling",
        "database-sync"
      ],
      "updated_at": "2025-12-07T01:59:48.478577",
      "usage_example": "# Ensure logger is configured\nimport logging\nlogger = logging.getLogger(__name__)\nlogger.setLevel(logging.INFO)\nhandler = logging.StreamHandler()\nformatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')\nhandler.setFormatter(formatter)\nlogger.addHandler(handler)\n\n# Ensure all required helper functions are defined:\n# - get_admin_user()\n# - search_filecloud_for_documents()\n# - check_document_exists_by_uid(cdoc_uid)\n# - check_document_exists_by_doc_number(doc_number)\n# - import_document_from_filecloud(file_path, metadata, admin_user)\n\n# Execute the main import process\nif __name__ == '__main__':\n    main()\n    # Output will be logged showing:\n    # - Documents found in FileCloud\n    # - Import progress for each document\n    # - Final summary with counts of imported/skipped/failed documents"
    },
    {
      "best_practices": [
        "Ensure the Config class is properly configured before calling this function",
        "Set up all required environment variables (API keys, credentials) before execution",
        "Verify that the logging configuration section exists in the config file to avoid setup_logging errors",
        "The function uses sys.exit(1) on errors, so it should only be called from the main execution context, not from library code",
        "Monitor the logger output for analysis progress and any error messages",
        "Ensure ContractAnalyzer class is imported and available in the module scope",
        "The function expects logger to be available globally after setup_logging is called",
        "Consider wrapping the call to main() in a if __name__ == '__main__' block for proper module execution",
        "Fatal errors are logged before exit, so ensure logging is configured to capture these messages",
        "The function does not accept command-line arguments; all configuration must be in the config file"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required by ContractAnalyzer if it needs to access cloud storage for contract documents",
          "import": "from utils.filecloud_client import FileCloudClient",
          "optional": false
        },
        {
          "condition": "Required by ContractAnalyzer for processing contract documents",
          "import": "from utils.document_processor import DocumentProcessor",
          "optional": false
        },
        {
          "condition": "Required by ContractAnalyzer for LLM-based contract analysis",
          "import": "from utils.llm_client import LLMClient",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 10:21:25",
      "decorators": [],
      "dependencies": [
        "logging",
        "sys",
        "csv",
        "json",
        "pandas",
        "typing",
        "datetime",
        "time",
        "concurrent.futures",
        "pathlib"
      ],
      "description": "Main entry point function that orchestrates the contract validity analysis workflow by loading configuration, setting up logging, initializing the analyzer, running analysis, and reporting results.",
      "docstring": "Main entry point for the analyzer.",
      "id": 409,
      "imports": [
        "import logging",
        "import os",
        "import sys",
        "import csv",
        "import json",
        "import pandas as pd",
        "from typing import Dict",
        "from typing import List",
        "from typing import Any",
        "from typing import Optional",
        "from datetime import datetime",
        "import time",
        "import concurrent.futures",
        "from pathlib import Path",
        "from config.config import Config",
        "from utils.filecloud_client import FileCloudClient",
        "from utils.document_processor import DocumentProcessor",
        "from utils.llm_client import LLMClient",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger",
        "from utils.logging_utils import PerformanceLogger",
        "from utils.logging_utils import ProgressLogger",
        "from datetime import datetime",
        "from datetime import date"
      ],
      "imports_required": [
        "import logging",
        "import sys",
        "from config.config import Config",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 595,
      "line_start": 568,
      "name": "main_v7",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary execution entry point for a contract validity analysis application. It coordinates the entire analysis pipeline: loads application configuration, initializes logging infrastructure, creates and runs a ContractAnalyzer instance to process contracts, retrieves and logs summary statistics, and handles fatal errors with appropriate exit codes. It's designed to be called when the application starts, typically from a if __name__ == '__main__' block.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It either completes successfully with logged output or exits the program with sys.exit(1) on fatal errors.",
      "settings_required": [
        "Configuration file accessible by Config class (typically config.yaml or config.json)",
        "Logging configuration section in the config file with appropriate settings",
        "ContractAnalyzer configuration parameters in the config file",
        "Environment variables or API keys required by LLMClient (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY)",
        "File system access permissions for reading contracts and writing logs",
        "Network access if FileCloudClient needs to connect to cloud storage"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point for the analyzer.\"\"\"\n    try:\n        # Load configuration\n        config_manager = Config()\n        config = config_manager.config\n        \n        # Set up logging\n        setup_logging(config.get_section('logging'))\n        \n        logger.info(\"Starting Contract Validity Analyzer\")\n        \n        # Initialize and run analyzer\n        analyzer = ContractAnalyzer(config)\n        results = analyzer.analyze_contracts()\n        \n        # Print summary\n        stats = analyzer.get_summary_stats()\n        if stats:\n            logger.info(\"Analysis Summary:\")\n            for key, value in stats.items():\n                logger.info(f\"  {key}: {value}\")\n        \n        logger.info(\"Analysis complete!\")\n        \n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/contract_validity_analyzer/core/analyzer.py",
      "tags": [
        "entry-point",
        "orchestration",
        "contract-analysis",
        "workflow",
        "configuration",
        "logging",
        "error-handling",
        "main-function",
        "analyzer",
        "batch-processing"
      ],
      "updated_at": "2025-12-07T01:59:48.476989",
      "usage_example": "if __name__ == '__main__':\n    # Ensure config file exists at expected location\n    # e.g., config/config.yaml with sections for 'logging' and analyzer settings\n    # Set required environment variables:\n    # export OPENAI_API_KEY='your-api-key'\n    \n    # Import the ContractAnalyzer class\n    from analyzer.contract_analyzer import ContractAnalyzer\n    \n    # Call main function\n    main()\n    \n    # The function will:\n    # 1. Load configuration from config file\n    # 2. Set up logging based on config\n    # 3. Initialize ContractAnalyzer\n    # 4. Run contract analysis\n    # 5. Log summary statistics\n    # 6. Exit with code 0 on success or 1 on error"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "replica_database_path": "Type: str",
              "session": "Type: requests.Session"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, session, replica_database_path)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_clear_document_context",
            "parameters": {},
            "purpose": "Clear the current document UUID context for new uploads",
            "returns": "None",
            "signature": "_clear_document_context(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_database",
            "parameters": {},
            "purpose": "Load the replica database",
            "returns": "Returns Dict[str, Any]",
            "signature": "_load_database(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_database",
            "parameters": {},
            "purpose": "Save the updated database",
            "returns": "None",
            "signature": "_save_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_hash",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute SHA256 hash of content",
            "returns": "Returns str",
            "signature": "_compute_hash(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_crc32c_header",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute CRC32C checksum and return as x-goog-hash header value",
            "returns": "Returns str",
            "signature": "_compute_crc32c_header(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_timestamp",
            "parameters": {},
            "purpose": "Generate reMarkable timestamp",
            "returns": "Returns str",
            "signature": "_generate_timestamp(self) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_generate_generation",
            "parameters": {},
            "purpose": "Generate reMarkable generation number",
            "returns": "Returns int",
            "signature": "_generate_generation(self) -> int"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_capture_server_generation",
            "parameters": {},
            "purpose": "Capture the current server generation for use in final root update",
            "returns": "Returns bool",
            "signature": "_capture_server_generation(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_raw_content",
            "parameters": {
              "content": "Type: bytes",
              "content_hash": "Type: str",
              "content_type": "Type: str",
              "filename": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload raw content and return its hash",
            "returns": "Returns Optional[str]",
            "signature": "upload_raw_content(self, content, content_hash, filename, content_type, system_filename) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_system_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "system_filename": "Type: str"
            },
            "purpose": "Upload system files like roothash, root.docSchema with fixed filenames",
            "returns": "Returns Optional[str]",
            "signature": "upload_system_file(self, content, system_filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_document_file",
            "parameters": {
              "content": "Type: bytes",
              "content_type": "Type: str",
              "filename": "Type: str"
            },
            "purpose": "Upload document files with UUID.extension pattern",
            "returns": "Returns Optional[str]",
            "signature": "upload_document_file(self, content, filename, content_type) -> Optional[str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_metadata_json",
            "parameters": {
              "document_type": "Type: str",
              "name": "Type: str",
              "parent_uuid": "Type: str"
            },
            "purpose": "Create metadata JSON for a document",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_metadata_json(self, name, parent_uuid, document_type) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_pdf_content_json",
            "parameters": {
              "document_name": "Type: str",
              "pdf_content": "Type: bytes"
            },
            "purpose": "Create content JSON for a PDF document based on real app patterns",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_pdf_content_json(self, pdf_content, document_name) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_content_json",
            "parameters": {
              "pages": "Type: List[str]",
              "template": "Type: str"
            },
            "purpose": "Create content JSON for a notebook with pages",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_content_json(self, pages, template) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_doc_schema",
            "parameters": {
              "content_hash": "Type: str",
              "content_size": "Type: int",
              "document_uuid": "Type: str",
              "metadata_hash": "Type: str",
              "metadata_size": "Type: int",
              "pagedata_hash": "Type: str",
              "pagedata_size": "Type: int",
              "pdf_hash": "Type: str",
              "pdf_size": "Type: int"
            },
            "purpose": "Create document schema content in the exact format expected by reMarkable",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_doc_schema(self, document_uuid, metadata_hash, pagedata_hash, pdf_hash, content_hash, metadata_size, pagedata_size, pdf_size, content_size) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_directory_listing",
            "parameters": {
              "child_objects": "Type: List[Dict]",
              "data_components": "Type: List[Dict]"
            },
            "purpose": "Create directory listing content",
            "returns": "Returns Tuple[bytes, str]",
            "signature": "create_directory_listing(self, child_objects, data_components) -> Tuple[bytes, str]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_directory",
            "parameters": {},
            "purpose": "Update the root directory listing by adding the new document to existing entries",
            "returns": "Returns bool",
            "signature": "update_root_directory(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_current_root_entries",
            "parameters": {},
            "purpose": "Get current root.docSchema entries from server to preserve existing data",
            "returns": "Returns Optional[List[str]]",
            "signature": "_get_current_root_entries(self) -> Optional[List[str]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_add_new_document_to_root_entries",
            "parameters": {
              "existing_entries": "Type: List[str]"
            },
            "purpose": "Add the current document being uploaded to the root entries list",
            "returns": "Returns bool",
            "signature": "_add_new_document_to_root_entries(self, existing_entries) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_root_directory_from_entries",
            "parameters": {
              "entries": "Type: List[str]"
            },
            "purpose": "Create root.docSchema content from list of entries",
            "returns": "Returns bytes",
            "signature": "_create_root_directory_from_entries(self, entries) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_root_directory_listing",
            "parameters": {
              "root_entries": "Type: List[Dict]"
            },
            "purpose": "Create root directory listing with version header '3' (matching /sync/v3/ API version)",
            "returns": "Returns bytes",
            "signature": "create_root_directory_listing(self, root_entries) -> bytes"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_hash",
            "parameters": {
              "new_root_hash": "Type: str"
            },
            "purpose": "Update the root hash in the cloud - send as text body with proper headers like other files",
            "returns": "Returns bool",
            "signature": "update_root_hash(self, new_root_hash) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "edit_document_metadata",
            "parameters": {
              "document_uuid": "Type: str",
              "new_name": "Type: str",
              "new_parent": "Type: str"
            },
            "purpose": "Edit an existing document's metadata",
            "returns": "Returns bool",
            "signature": "edit_document_metadata(self, document_uuid, new_name, new_parent) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_pdf_document",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "pdf_path": "Type: str"
            },
            "purpose": "Upload a new PDF document to reMarkable following the correct sequence from app logs",
            "returns": "Returns bool",
            "signature": "upload_pdf_document(self, pdf_path, name, parent_uuid) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_notebook",
            "parameters": {
              "name": "Type: str",
              "parent_uuid": "Type: str",
              "template": "Type: str"
            },
            "purpose": "Create a new empty notebook",
            "returns": "Returns bool",
            "signature": "create_notebook(self, name, parent_uuid, template) -> bool"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:59:32",
      "decorators": [],
      "dependencies": [],
      "description": "Manages uploads to reMarkable cloud",
      "docstring": "Manages uploads to reMarkable cloud",
      "id": 2134,
      "imports": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid",
        "import base64",
        "import binascii",
        "import zlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import time",
        "import crc32c",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "from local_replica_v2 import RemarkableReplicaBuilder",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1125,
      "line_start": 32,
      "name": "RemarkableUploadManager",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Manages uploads to reMarkable cloud",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableUploadManager:\n    \"\"\"Manages uploads to reMarkable cloud\"\"\"\n    \n    def __init__(self, session: requests.Session, replica_database_path: str):\n        self.session = session\n        self.base_url = \"https://eu.tectonic.remarkable.com\"\n        \n        # Load replica database\n        self.database_path = Path(replica_database_path)\n        self.database = self._load_database()\n        \n        # Track uploads\n        self.upload_queue: List[Dict[str, Any]] = []\n        self.uploaded_hashes: Dict[str, str] = {}  # hash -> upload_status\n        self._current_document_uuid: Optional[str] = None  # UUID for consistent rm-filename headers\n        self._server_generation: Optional[int] = None  # Store generation from server for final root update\n        \n    def _clear_document_context(self):\n        \"\"\"Clear the current document UUID context for new uploads\"\"\"\n        self._current_document_uuid = None\n        \n    def _load_database(self) -> Dict[str, Any]:\n        \"\"\"Load the replica database\"\"\"\n        if not self.database_path.exists():\n            raise FileNotFoundError(f\"Database not found: {self.database_path}\")\n            \n        with open(self.database_path, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    \n    def _save_database(self):\n        \"\"\"Save the updated database\"\"\"\n        with open(self.database_path, 'w', encoding='utf-8') as f:\n            json.dump(self.database, f, indent=2, ensure_ascii=False)\n    \n    def _compute_hash(self, content: bytes) -> str:\n        \"\"\"Compute SHA256 hash of content\"\"\"\n        return hashlib.sha256(content).hexdigest()\n    \n    def _compute_crc32c_header(self, content: bytes) -> str:\n        \"\"\"Compute CRC32C checksum and return as x-goog-hash header value\"\"\"\n        try:\n            # Use proper crc32c library if available\n            if HAS_CRC32C:\n                checksum = crc32c.crc32c(content)\n            else:\n                # Fallback to standard CRC32 (not ideal but better than nothing)\n                checksum = zlib.crc32(content) & 0xffffffff\n            \n            # Convert to bytes and base64 encode\n            checksum_bytes = checksum.to_bytes(4, byteorder='big')\n            checksum_b64 = base64.b64encode(checksum_bytes).decode('ascii')\n            \n            return f\"crc32c={checksum_b64}\"\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Warning: Failed to compute CRC32C checksum: {e}\")\n            # Return empty string to skip the header if computation fails\n            return \"\"\n    \n    def _generate_timestamp(self) -> str:\n        \"\"\"Generate reMarkable timestamp\"\"\"\n        return str(int(time.time() * 1000))\n    \n    def _generate_generation(self) -> int:\n        \"\"\"Generate reMarkable generation number\"\"\"\n        return int(time.time() * 1000000)\n    \n    def _capture_server_generation(self) -> bool:\n        \"\"\"Capture the current server generation for use in final root update\"\"\"\n        try:\n            print(f\"\ud83d\udce1 Capturing server generation for upload sequence...\")\n            root_url = f\"{self.base_url}/sync/v4/root\"\n            root_response = self.session.get(root_url)\n            root_response.raise_for_status()\n            \n            current_root = root_response.json()\n            self._server_generation = current_root.get('generation')\n            \n            print(f\"\ud83d\udd0d Captured server generation: {self._server_generation}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to capture server generation: {e}\")\n            self._server_generation = None\n            return False\n    \n    def upload_raw_content(self, content: bytes, content_hash: str = None, filename: str = None, \n                          content_type: str = \"application/octet-stream\", system_filename: str = None) -> Optional[str]:\n        \"\"\"Upload raw content and return its hash\"\"\"\n        if content_hash is None:\n            content_hash = self._compute_hash(content)\n        \n        # Check if already uploaded\n        if content_hash in self.uploaded_hashes:\n            print(f\"\u2705 Content already uploaded: {content_hash[:16]}...\")\n            return content_hash\n        \n        try:\n            url = f\"{self.base_url}/sync/v3/files/{content_hash}\"\n            \n            # Prepare headers like the reMarkable app\n            headers = {\n                'Content-Type': content_type,\n                'rm-batch-number': '1',\n                'rm-sync-id': str(uuid.uuid4()),\n                'User-Agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 FIXED: Match real app\n                'Accept-Encoding': 'gzip, deflate',\n                'Accept-Language': 'en-BE,*',\n                'Connection': 'Keep-Alive'\n            }\n            \n            # Add rm-filename header - REQUIRED for all PUT requests\n            # Handle different patterns: UUID-based files vs system files\n            if system_filename:\n                # System files like \"roothash\", \"root.docSchema\" (no UUID)\n                rm_filename = system_filename\n                print(f\"\ud83c\udff7\ufe0f rm-filename (system): {rm_filename}\")\n            elif filename:\n                # Document files with UUID pattern\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    # Generate and store new UUID for this document\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                    print(f\"\ud83d\udcca Generated new document UUID: {doc_uuid}\")\n                \n                # Use the filename as provided or construct UUID.extension format\n                if '.' in filename and len(filename.split('.')[0]) == 36:  # Already UUID.extension\n                    rm_filename = filename\n                else:\n                    # Determine extension and construct UUID.extension\n                    if content_type == 'application/pdf' or filename.lower().endswith('.pdf'):\n                        rm_filename = f\"{doc_uuid}.pdf\"\n                    elif 'metadata' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.metadata\"\n                    elif filename.lower().endswith('.content'):\n                        rm_filename = f\"{doc_uuid}.content\"\n                    elif filename.lower().endswith('.rm'):\n                        # Page data keeps original filename for .rm files\n                        rm_filename = filename\n                    elif filename.lower().endswith('.docschema') or 'docschema' in filename.lower():\n                        rm_filename = f\"{doc_uuid}.docSchema\"\n                    elif filename.lower().endswith('.pagedata'):\n                        rm_filename = f\"{doc_uuid}.pagedata\"\n                    else:\n                        # Default construction\n                        rm_filename = f\"{doc_uuid}.{filename}\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (document): {rm_filename}\")\n            else:\n                # Fallback - generate basic filename\n                if hasattr(self, '_current_document_uuid') and self._current_document_uuid:\n                    doc_uuid = self._current_document_uuid\n                else:\n                    doc_uuid = str(uuid.uuid4())\n                    self._current_document_uuid = doc_uuid\n                \n                if content_type == 'application/pdf':\n                    rm_filename = f\"{doc_uuid}.pdf\"\n                elif content_type == 'application/octet-stream':\n                    rm_filename = f\"{doc_uuid}.metadata\"\n                else:\n                    rm_filename = f\"{doc_uuid}.content\"\n                \n                print(f\"\ud83c\udff7\ufe0f rm-filename (fallback): {rm_filename}\")\n            \n            headers['rm-filename'] = rm_filename\n            \n            # Add CRC32C checksum (this is the missing piece!)\n            crc32c_header = self._compute_crc32c_header(content)\n            if crc32c_header:\n                headers['x-goog-hash'] = crc32c_header\n            \n            print(f\"\ud83d\udd0d Debug: Upload headers for {content_hash[:16]}...\")\n            for key, value in headers.items():\n                print(f\"    {key}: {value}\")\n            \n            # Make the PUT request\n            response = self.session.put(url, data=content, headers=headers)\n            \n            print(f\"\ud83d\udd0d Debug: Response status: {response.status_code}\")\n            print(f\"\ud83d\udd0d Debug: Response text: {response.text}\")\n            \n            response.raise_for_status()\n            \n            self.uploaded_hashes[content_hash] = \"uploaded\"\n            print(f\"\u2705 Uploaded content: {content_hash[:16]}... ({len(content)} bytes)\")\n            return content_hash\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload content {content_hash[:16]}...: {e}\")\n            if hasattr(e, 'response') and e.response is not None:\n                print(f\"    Response: {e.response.text}\")\n            return None\n    \n    def upload_system_file(self, content: bytes, system_filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload system files like roothash, root.docSchema with fixed filenames\"\"\"\n        print(f\"\ud83d\udcc1 Uploading system file: {system_filename}\")\n        return self.upload_raw_content(content, system_filename=system_filename, content_type=content_type)\n    \n    def upload_document_file(self, content: bytes, filename: str, content_type: str = \"application/octet-stream\") -> Optional[str]:\n        \"\"\"Upload document files with UUID.extension pattern\"\"\"\n        print(f\"\ud83d\udcc4 Uploading document file: {filename}\")\n        return self.upload_raw_content(content, filename=filename, content_type=content_type)\n\n    def create_metadata_json(self, name: str, parent_uuid: str = \"\", document_type: str = \"DocumentType\") -> Tuple[bytes, str]:\n        \"\"\"Create metadata JSON for a document\"\"\"\n        timestamp = self._generate_timestamp()\n        \n        metadata = {\n            \"createdTime\": timestamp,\n            \"lastModified\": timestamp,\n            \"lastOpened\": \"0\",  # Real app sets this to \"0\" for never-opened documents\n            \"lastOpenedPage\": 0,\n            \"new\": False,\n            \"parent\": parent_uuid,\n            \"pinned\": False,\n            \"source\": \"com.remarkable.macos\",  # \u2705 FIXED: Match real app behavior\n            \"type\": document_type,\n            \"visibleName\": name\n        }\n        \n        content = json.dumps(metadata, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_pdf_content_json(self, pdf_content: bytes, document_name: str = \"\") -> Tuple[bytes, str]:\n        \"\"\"Create content JSON for a PDF document based on real app patterns\"\"\"\n        \n        # Basic PDF content structure based on real app analysis\n        content_data = {\n            \"coverPageNumber\": 0,\n            \"customZoomCenterX\": 0,\n            \"customZoomCenterY\": 936,\n            \"customZoomOrientation\": \"portrait\",\n            \"customZoomPageHeight\": 1872,\n            \"customZoomPageWidth\": 1404,\n            \"customZoomScale\": 1,\n            \"documentMetadata\": {\n                \"title\": document_name if document_name else \"Untitled\"\n            },\n            \"extraMetadata\": {},\n            \"fileType\": \"pdf\",\n            \"fontName\": \"\",\n            \"formatVersion\": 1,\n            \"lineHeight\": -1,\n            \"orientation\": \"portrait\",\n            \"originalPageCount\": 1,  # Will be updated based on actual PDF\n            \"pageCount\": 1,  # Will be updated based on actual PDF\n            \"pageTags\": [],\n            \"pages\": [\"6a22f0dc-5606-4d40-946f-ccbc14f777ff\"],  # Default page UUID\n            \"redirectionPageMap\": [0],\n            \"sizeInBytes\": len(pdf_content),\n            \"tags\": [],\n            \"textAlignment\": \"justify\",\n            \"textScale\": 1,\n            \"zoomMode\": \"bestFit\"\n        }\n        \n        content = json.dumps(content_data, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n\n    def create_content_json(self, pages: List[str], template: str = \"Blank\") -> Tuple[bytes, str]:\n        \"\"\"Create content JSON for a notebook with pages\"\"\"\n        timestamp_base = f\"2:{len(pages)}\"\n        \n        # Create pages structure\n        pages_list = []\n        for i, page_id in enumerate(pages):\n            pages_list.append({\n                \"id\": page_id,\n                \"idx\": {\n                    \"timestamp\": f\"2:{i+2}\",\n                    \"value\": chr(ord('a') + i) if i < 26 else f\"page_{i}\"\n                },\n                \"template\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": template\n                }\n            })\n        \n        content_data = {\n            \"cPages\": {\n                \"lastOpened\": {\n                    \"timestamp\": \"2:1\",\n                    \"value\": pages[0] if pages else \"\"\n                },\n                \"original\": {\n                    \"timestamp\": \"0:0\",\n                    \"value\": -1\n                },\n                \"pages\": pages_list\n            },\n            \"extraMetadata\": {},\n            \"fileType\": \"notebook\",\n            \"fontName\": \"\",\n            \"lineHeight\": -1,\n            \"margins\": 180,\n            \"pageCount\": len(pages),\n            \"textScale\": 1,\n            \"transform\": {}\n        }\n        \n        content = json.dumps(content_data, indent=4).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def create_doc_schema(self, document_uuid: str, metadata_hash: str, pagedata_hash: str, \n                         pdf_hash: str, content_hash: str, metadata_size: int, pagedata_size: int,\n                         pdf_size: int, content_size: int) -> Tuple[bytes, str]:\n        \"\"\"Create document schema content in the exact format expected by reMarkable\"\"\"\n        # Based on raw logs: 4 components for a document\n        lines = ['4']\n        \n        # Add components in specific order (content, metadata, pagedata, pdf)\n        lines.append(f\"{content_hash}:0:{document_uuid}.content:0:{content_size}\")\n        lines.append(f\"{metadata_hash}:0:{document_uuid}.metadata:0:{metadata_size}\")\n        lines.append(f\"{pagedata_hash}:0:{document_uuid}.pagedata:0:{pagedata_size}\")\n        lines.append(f\"{pdf_hash}:0:{document_uuid}.pdf:0:{pdf_size}\")\n        \n        content = '\\n'.join(lines).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n\n    def create_directory_listing(self, child_objects: List[Dict], data_components: List[Dict]) -> Tuple[bytes, str]:\n        \"\"\"Create directory listing content\"\"\"\n        lines = [str(len(child_objects) + len(data_components))]\n        \n        # Add child objects (folders/documents)\n        for obj in child_objects:\n            line = f\"{obj['hash']}:80000000:{obj['uuid']}:{obj['type']}:{obj['size']}\"\n            lines.append(line)\n        \n        # Add data components (.content, .metadata, .rm files, etc.)\n        for comp in data_components:\n            line = f\"{comp['hash']}:0:{comp['component']}:0:{comp['size']}\"\n            lines.append(line)\n        \n        content = '\\n'.join(lines).encode('utf-8')\n        content_hash = self._compute_hash(content)\n        \n        return content, content_hash\n    \n    def update_root_directory(self) -> bool:\n        \"\"\"Update the root directory listing by adding the new document to existing entries\"\"\"\n        try:\n            print(\"\ud83d\udcc1 Updating root directory listing...\")\n            \n            # Get the current root.docSchema from the server to preserve existing entries\n            current_root_entries = self._get_current_root_entries()\n            if current_root_entries is None:\n                print(\"\u274c Failed to get current root entries\")\n                return False\n            \n            # Add the new document entry if it doesn't already exist\n            new_doc_added = self._add_new_document_to_root_entries(current_root_entries)\n            \n            if not new_doc_added:\n                print(\"\ud83d\udcc4 No new document to add to root directory\")\n                return True\n            \n            # Create the updated root directory listing\n            root_dir_content = self._create_root_directory_from_entries(current_root_entries)\n            root_dir_hash = self._compute_hash(root_dir_content)\n            \n            print(f\"\ud83d\udcc2 Updated root directory hash: {root_dir_hash}\")\n            \n            # Upload the updated root directory listing\n            uploaded_hash = self.upload_system_file(root_dir_content, \"root.docSchema\")\n            if not uploaded_hash:\n                return False\n            \n            # Update the root hash in the cloud\n            return self.update_root_hash(root_dir_hash)\n            \n        except Exception as e:\n            print(f\"\u274c Failed to update root directory: {e}\")\n            return False\n    \n    def _get_current_root_entries(self) -> Optional[List[str]]:\n        \"\"\"Get current root.docSchema entries from server to preserve existing data\"\"\"\n        try:\n            # Get current root hash\n            root_url = f\"{self.base_url}/sync/v4/root\"\n            root_response = self.session.get(root_url)\n            root_response.raise_for_status()\n            \n            current_root = root_response.json()\n            current_root_hash = current_root.get('hash')\n            \n            if not current_root_hash:\n                print(\"\u274c No current root hash found\")\n                return None\n            \n            # Fetch the current root.docSchema content\n            root_content_url = f\"{self.base_url}/sync/v3/files/{current_root_hash}\"\n            root_content_response = self.session.get(root_content_url)\n            root_content_response.raise_for_status()\n            \n            # Parse the content to extract existing entries\n            content_lines = root_content_response.text.strip().split('\\n')\n            \n            # First line should be version header \"3\"\n            if not content_lines or content_lines[0] != '3':\n                print(f\"\u274c Unexpected root.docSchema format: {content_lines[0] if content_lines else 'empty'}\")\n                return None\n            \n            # Return all entries (excluding the version header)\n            existing_entries = content_lines[1:] if len(content_lines) > 1 else []\n            \n            print(f\"\ud83d\udccb Found {len(existing_entries)} existing root entries\")\n            for entry in existing_entries[:5]:  # Show first 5 for debugging\n                parts = entry.split(':')\n                if len(parts) >= 3:\n                    uuid = parts[2]\n                    size = parts[-1] if len(parts) > 4 else 'unknown'\n                    print(f\"   - {uuid}: size={size}\")\n            \n            return existing_entries\n            \n        except Exception as e:\n            print(f\"\u274c Failed to get current root entries: {e}\")\n            return None\n    \n    def _add_new_document_to_root_entries(self, existing_entries: List[str]) -> bool:\n        \"\"\"Add the current document being uploaded to the root entries list\"\"\"\n        if not self._current_document_uuid:\n            print(\"\u26a0\ufe0f No current document UUID to add\")\n            return False\n        \n        # Check if this document is already in the entries\n        doc_uuid = self._current_document_uuid\n        for entry in existing_entries:\n            if doc_uuid in entry:\n                print(f\"\ufffd Document {doc_uuid} already exists in root entries\")\n                return False\n        \n        # Find the document in our database to get its info\n        document_node = None\n        for node_uuid, node in self.database['nodes'].items():\n            if node_uuid == doc_uuid:\n                document_node = node\n                break\n        \n        if not document_node:\n            print(f\"\u274c Document {doc_uuid} not found in database\")\n            return False\n        \n        # Get the document's hash and size\n        doc_hash = document_node.get('hash')\n        if not doc_hash:\n            print(f\"\u274c No hash found for document {doc_uuid}\")\n            return False\n        \n        # Determine the correct node type code based on the document type\n        # From analysis: Type 1/2 = folders, Type 3 = notebook documents, Type 4 = PDF documents\n        doc_metadata = document_node.get('metadata', {})\n        doc_type = doc_metadata.get('type', 'DocumentType')\n        \n        if doc_type == 'DocumentType' and 'fileType' in doc_metadata.get('content_data', ''):\n            # Check if it's a PDF or notebook\n            content_data_str = doc_metadata.get('content_data', '')\n            if '\"fileType\": \"pdf\"' in content_data_str:\n                node_type_code = 4  # PDF document\n            elif '\"fileType\": \"notebook\"' in content_data_str:\n                node_type_code = 3  # Notebook document\n            else:\n                node_type_code = 4  # Default to PDF for documents\n        else:\n            node_type_code = 4  # Default to PDF for documents\n        \n        # The size in root.docSchema is the actual document content size (PDF size for PDFs)\n        # Get the actual PDF file size or document content size\n        doc_size = 0\n        \n        # First try to get the PDF file size from metadata\n        doc_metadata = document_node.get('metadata', {})\n        content_data_str = doc_metadata.get('content_data', '')\n        if '\"sizeInBytes\"' in content_data_str:\n            # Extract sizeInBytes from the content_data JSON string\n            import re\n            size_match = re.search(r'\"sizeInBytes\":\\s*\"(\\d+)\"', content_data_str)\n            if size_match:\n                doc_size = int(size_match.group(1))\n        \n        # Fallback: try to get size from the node itself\n        if doc_size == 0:\n            doc_size = document_node.get('size', 0)\n        \n        # If still no size, use a reasonable default for new documents\n        if doc_size == 0:\n            doc_size = 50000  # Reasonable default for a new PDF\n        \n        # Create the new entry in the same format as existing ones\n        # Format: hash:80000000:uuid:type:actual_document_size\n        new_entry = f\"{doc_hash}:80000000:{doc_uuid}:{node_type_code}:{doc_size}\"\n        existing_entries.append(new_entry)\n        \n        print(f\"\u2705 Added new document entry: {doc_uuid} (size={doc_size})\")\n        return True\n    \n    def _create_root_directory_from_entries(self, entries: List[str]) -> bytes:\n        \"\"\"Create root.docSchema content from list of entries\"\"\"\n        # Always start with version header \"3\"\n        lines = [\"3\"] + entries\n        \n        # Sort entries by UUID for consistency (skip the version header)\n        if len(lines) > 1:\n            entry_lines = lines[1:]\n            # Sort by UUID (3rd field after splitting by ':')\n            entry_lines.sort(key=lambda x: x.split(':')[2] if ':' in x else x)\n            lines = [\"3\"] + entry_lines\n        \n        # Create content with newline separator\n        content = '\\n'.join(lines) + '\\n'\n        \n        print(f\"\ud83d\udd0d Debug: Updated root directory content:\")\n        print(f\"   Version header: 3\")\n        print(f\"   Entry count: {len(entries)}\")\n        print(f\"   Total lines: {len(lines)}\")\n        print(f\"   Content length: {len(content.encode('utf-8'))} bytes\")\n        print(f\"   Preview: {content[:100]}...\")\n        \n        return content.encode('utf-8')\n    \n    def create_root_directory_listing(self, root_entries: List[Dict]) -> bytes:\n        \"\"\"Create root directory listing with version header '3' (matching /sync/v3/ API version)\"\"\"\n        # Always use \"3\" as version header (not count) - this matches the /sync/v3/ API version\n        lines = [\"3\"]\n        \n        # Add each entry in the format: hash:80000000:uuid:node_type:size\n        # Sort by UUID for consistent ordering (like document components)\n        sorted_entries = sorted(root_entries, key=lambda x: x['uuid'])\n        \n        for entry in sorted_entries:\n            line = f\"{entry['hash']}:80000000:{entry['uuid']}:{entry['node_type']}:{entry['size']}\"\n            lines.append(line)\n        \n        # Use the same approach as document uploads - with newline\n        content = '\\n'.join(lines) + '\\n'\n        \n        print(f\"\ud83d\udd0d Debug: Root directory content:\")\n        print(f\"   Version header: 3 (API version, not count)\")\n        print(f\"   Entry count: {len(root_entries)}\")\n        print(f\"   Total lines: {len(lines)}\")\n        print(f\"   Content length: {len(content.encode('utf-8'))} bytes\")\n        print(f\"   Preview: {content[:100]}...\")\n        \n        return content.encode('utf-8')\n\n    def update_root_hash(self, new_root_hash: str) -> bool:\n        \"\"\"Update the root hash in the cloud - send as text body with proper headers like other files\"\"\"\n        try:\n            # Use the server generation captured at the start of upload sequence\n            if self._server_generation is None:\n                print(f\"\u26a0\ufe0f Warning: No server generation captured, capturing now...\")\n                if not self._capture_server_generation():\n                    print(f\"\u274c Failed to get server generation, aborting root hash update\")\n                    return False\n            \n            generation = self._server_generation\n            \n            print(f\"\ud83d\udd0d Using server generation: {generation}\")\n            print(f\"\ud83d\udd0d New root hash: {new_root_hash}\")\n            \n            # Create the root data exactly like the real app\n            root_data = {\n                \"broadcast\": True,\n                \"generation\": generation,\n                \"hash\": new_root_hash\n            }\n            \n            # Convert to JSON text with same formatting as real app (pretty-printed with 2-space indent)\n            root_content = json.dumps(root_data, indent=2).encode('utf-8')\n            \n            # Set up headers exactly like the real app (case-sensitive and ordered correctly)\n            headers = {\n                'Content-Type': 'application/json',\n                'rm-batch-number': '1',\n                'rm-filename': 'roothash',\n                'rm-sync-id': str(uuid.uuid4()),\n                'User-Agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 FIXED: Match real app\n                'Accept-Encoding': 'gzip, deflate',\n                'Accept-Language': 'en-BE,*',\n                'Connection': 'Keep-Alive',\n            }\n            \n            # Add CRC32C checksum\n            crc32c_header = self._compute_crc32c_header(root_content)\n            if crc32c_header:\n                headers['x-goog-hash'] = crc32c_header\n            \n            print(f\"\ud83d\udd0d Debug: Root hash update headers:\")\n            for key, value in headers.items():\n                print(f\"    {key}: {value}\")\n            print(f\"\ud83d\udd0d Debug: Root hash content: {root_content.decode('utf-8')}\")\n            \n            url = f\"{self.base_url}/sync/v3/root\"\n            response = self.session.put(url, data=root_content, headers=headers)\n            \n            print(f\"\ud83d\udd0d Debug: Root hash response status: {response.status_code}\")\n            print(f\"\ud83d\udd0d Debug: Root hash response text: {response.text}\")\n            \n            response.raise_for_status()\n            \n            print(f\"\u2705 Updated root hash: {new_root_hash}\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to update root hash: {e}\")\n            if hasattr(e, 'response') and e.response is not None:\n                print(f\"    Response: {e.response.text}\")\n            return False\n    \n    def edit_document_metadata(self, document_uuid: str, new_name: str = None, new_parent: str = None) -> bool:\n        \"\"\"Edit an existing document's metadata\"\"\"\n        try:\n            # Find the document in database\n            if document_uuid not in self.database['nodes']:\n                raise ValueError(f\"Document {document_uuid} not found in database\")\n            \n            node = self.database['nodes'][document_uuid]\n            print(f\"\ud83d\udcdd Editing document: {node['name']}\")\n            \n            # Get current metadata\n            current_metadata = node['metadata'].copy()\n            \n            # Update metadata\n            if new_name:\n                current_metadata['visibleName'] = new_name\n            if new_parent is not None:\n                current_metadata['parent'] = new_parent\n            \n            current_metadata['lastModified'] = self._generate_timestamp()\n            \n            # Create new metadata content\n            metadata_content = json.dumps(current_metadata, indent=4).encode('utf-8')\n            metadata_hash = self._compute_hash(metadata_content)\n            \n            # Upload metadata\n            self.upload_raw_content(metadata_content, metadata_hash)\n            \n            # Update component hashes\n            old_metadata_hash = node['component_hashes']['metadata']\n            node['component_hashes']['metadata'] = metadata_hash\n            \n            # Get parent node to update its directory listing\n            parent_uuid = current_metadata.get('parent', '')\n            if parent_uuid and parent_uuid in self.database['nodes']:\n                parent_node = self.database['nodes'][parent_uuid]\n                \n                # Rebuild parent's directory listing\n                child_objects = []\n                data_components = []\n                \n                # Find all children of this parent\n                for uuid, child_node in self.database['nodes'].items():\n                    if child_node.get('parent_uuid') == parent_uuid:\n                        if child_node['node_type'] == 'folder':\n                            type_val = '1'\n                        else:\n                            type_val = '3'\n                        \n                        child_objects.append({\n                            'hash': child_node['hash'],\n                            'uuid': uuid,\n                            'type': type_val,\n                            'size': len(str(child_node).encode('utf-8'))  # Approximate\n                        })\n                \n                # Add metadata components for this updated document\n                comp_hashes = node['component_hashes']\n                for comp_type, comp_hash in comp_hashes.items():\n                    if comp_hash:\n                        if comp_type == 'rm_files':\n                            for i, rm_hash in enumerate(comp_hash):\n                                data_components.append({\n                                    'hash': rm_hash,\n                                    'component': f\"{document_uuid}/{uuid.uuid4()}.rm\",\n                                    'size': 14661  # Typical RM file size\n                                })\n                        else:\n                            data_components.append({\n                                'hash': comp_hash,\n                                'component': f\"{document_uuid}.{comp_type}\",\n                                'size': len(metadata_content) if comp_type == 'metadata' else 2209\n                            })\n                \n                # Create and upload new directory listing\n                dir_content, dir_hash = self.create_directory_listing(child_objects, data_components)\n                self.upload_raw_content(dir_content, dir_hash)\n                \n                # Update parent node hash\n                parent_node['hash'] = dir_hash\n                self.database['hash_registry'][dir_hash] = {\n                    'uuid': parent_uuid,\n                    'type': 'node',\n                    'last_seen': datetime.now().isoformat()\n                }\n                \n                # Always update root directory after any upload to trigger sync\n                # This ensures the generation increments for both root and folder uploads\n                print(\"\ud83d\udd04 Updating root directory to trigger server generation increment...\")\n                self.update_root_directory()\n            \n            # Update database\n            node['metadata'] = current_metadata\n            node['last_modified'] = current_metadata['lastModified']\n            node['sync_status'] = 'updated'\n            node['last_synced'] = datetime.now().isoformat()\n            \n            # Update hash registry\n            self.database['hash_registry'][metadata_hash] = {\n                'uuid': document_uuid,\n                'type': 'metadata',\n                'last_seen': datetime.now().isoformat()\n            }\n            \n            self._save_database()\n            print(f\"\u2705 Successfully updated document metadata\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to edit document metadata: {e}\")\n            return False\n    \n    def upload_pdf_document(self, pdf_path: str, name: str, parent_uuid: str = \"\") -> bool:\n        \"\"\"Upload a new PDF document to reMarkable following the correct sequence from app logs\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            # FIRST: Capture server generation (like real app does with /sync/v4/root call)\n            if not self._capture_server_generation():\n                print(f\"\u274c Failed to capture server generation, aborting upload\")\n                return False\n            \n            pdf_file = Path(pdf_path)\n            if not pdf_file.exists():\n                raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n            print(f\"\ud83d\udcc4 Uploading PDF: {name}\")\n            \n            # Generate UUID for new document and set it for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Read PDF content\n            with open(pdf_file, 'rb') as f:\n                pdf_content = f.read()\n            \n            # EXACT SEQUENCE FROM APP LOGS:\n            # 1. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.metadata\n            # 2. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.pagedata  \n            # 3. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.pdf\n            # 4. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.content\n            # 5. cf2a3833-4a8f-4004-ab8d-8dc3c5f561bc.docSchema\n            # 6. root.docSchema\n            # 7. roothash\n            \n            print(\"\ud83d\udcdd Step 1: Creating and uploading metadata...\")\n            # Create metadata FIRST (as per app logs)\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            metadata_upload_hash = self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            if not metadata_upload_hash:\n                raise Exception(\"Failed to upload metadata\")\n            \n            print(\"\ud83d\udcdd Step 2: Creating and uploading pagedata...\")\n            # For PDFs, create minimal pagedata (single newline like real app)\n            pagedata_content = b'\\n'  # \u2705 FIXED: Real app uses newline, not empty string\n            pagedata_upload_hash = self.upload_raw_content(\n                content=pagedata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.pagedata\"\n            )\n            if not pagedata_upload_hash:\n                raise Exception(\"Failed to upload pagedata\")\n            \n            print(\"\ud83d\udcdd Step 3: Uploading PDF content...\")\n            pdf_upload_hash = self.upload_raw_content(\n                content=pdf_content,\n                content_type='application/pdf',\n                filename=f\"{document_uuid}.pdf\"\n            )\n            if not pdf_upload_hash:\n                raise Exception(\"Failed to upload PDF content\")\n            \n            print(\"\ud83d\udcdd Step 4: Creating and uploading content...\")\n            # Create proper PDF content structure based on real app patterns\n            content_data, content_hash = self.create_pdf_content_json(pdf_content, name)\n            content_upload_hash = self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            if not content_upload_hash:\n                raise Exception(\"Failed to upload content\")\n            \n            print(\"\ud83d\udcdd Step 5: Creating and uploading document schema...\")\n            # Create document schema in exact format from raw logs (4 components but count is 3)\n            doc_schema_entries = [\n                f\"{content_hash}:0:{document_uuid}.content:0:{len(content_data)}\",\n                f\"{metadata_hash}:0:{document_uuid}.metadata:0:{len(metadata_content)}\",\n                f\"{pagedata_upload_hash}:0:{document_uuid}.pagedata:0:{len(pagedata_content)}\",\n                f\"{pdf_upload_hash}:0:{document_uuid}.pdf:0:{len(pdf_content)}\"\n            ]\n            # Note: count is 3 even though there are 4 entries (PDF doesn't count)\n            doc_schema_content = f\"3\\n\" + \"\\n\".join(doc_schema_entries)\n            doc_schema_bytes = doc_schema_content.encode('utf-8')\n            doc_schema_hash = self._compute_hash(doc_schema_bytes)\n            \n            doc_schema_upload_hash = self.upload_raw_content(\n                content=doc_schema_bytes,\n                content_type='text/plain; charset=UTF-8',\n                filename=f\"{document_uuid}.docSchema\"\n            )\n            if not doc_schema_upload_hash:\n                raise Exception(\"Failed to upload document schema\")\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': pagedata_upload_hash,\n                    'component': f\"{document_uuid}.pagedata\",\n                    'size': len(pagedata_content)\n                },\n                {\n                    'hash': pdf_upload_hash,\n                    'component': f\"{document_uuid}.pdf\",\n                    'size': len(pdf_content)\n                },\n                {\n                    'hash': content_hash,\n                    'component': f\"{document_uuid}.content\",\n                    'size': len(content_data)\n                }\n            ]\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_schema_hash,  # Document hash is the docSchema hash\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [str(pdf_file)],\n                'component_hashes': {\n                    'content': content_hash,\n                    'metadata': metadata_hash,\n                    'pdf': pdf_upload_hash,\n                    'pagedata': pagedata_upload_hash,\n                    'docSchema': doc_schema_hash,\n                    'rm_files': []\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'uploaded',\n                'last_synced': datetime.now().isoformat(),\n                'size': len(pdf_content)  # Store the actual PDF file size\n            }\n            \n            # Update the metadata to include content_data with sizeInBytes for proper root.docSchema sizing\n            content_data = {\n                \"fileType\": \"pdf\",\n                \"sizeInBytes\": str(len(pdf_content)),\n                \"pageCount\": 1,\n                \"formatVersion\": 1,\n                \"orientation\": \"portrait\"\n            }\n            new_node['metadata']['content_data'] = json.dumps(content_data)\n            \n\n            # \ud83d\udeab REMOVED: Direct database manipulation for final state\n            # Do NOT add to database permanently - let replica sync handle final state\n            \n            # \ud83d\udeab REMOVED: Hash registry updates \n            # Let replica sync discover and register all hashes properly\n            \n            # CRITICAL: Complete the proper upload sequence from real app logs\n            print(\"\ud83d\udcdd Step 6: Updating root.docSchema with new document...\")\n            \n            # Temporarily add document to database for root.docSchema update\n            temp_node = {\n                'uuid': document_uuid,\n                'hash': doc_schema_hash,  # Document hash is the docSchema hash\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'component_hashes': {\n                    'docSchema': doc_schema_hash\n                },\n                'size': len(doc_schema_bytes)  # Use docSchema size for root.docSchema\n            }\n            \n            # Add temporarily for root update\n            self.database['nodes'][document_uuid] = temp_node\n            \n            root_update_success = self.update_root_directory()\n            if not root_update_success:\n                print(\"\u26a0\ufe0f Warning: Root directory update failed - document may not appear in real app\")\n                # Remove temporary entry if root update failed\n                del self.database['nodes'][document_uuid]\n                # Don't fail the upload completely, but warn user\n            else:\n                print(\"\u2705 Root directory updated successfully\")\n                # Remove temporary entry - let replica sync handle final database state\n                del self.database['nodes'][document_uuid]\n            \n            # Always trigger replica sync after any upload (root or folder)\n            # This ensures the new document is properly downloaded and cataloged with final state\n            print(\"\ud83d\udcdd Step 7: Running final replica sync to verify upload...\")\n            \n            try:\n                from local_replica_v2 import RemarkableReplicaBuilder\n                replica_builder = RemarkableReplicaBuilder(self.session)\n                \n                print(\"\ud83d\udd04 Running replica sync to discover new document...\")\n                replica_builder.build_complete_replica()\n                \n                # Reload our database to get the freshly synced data\n                print(\"\ud83d\udd04 Reloading database with fresh sync data...\")\n                self.database = self._load_database()\n                \n                # Verify the document was properly synced\n                if document_uuid in self.database['nodes']:\n                    synced_node = self.database['nodes'][document_uuid]\n                    print(f\"\u2705 Document synced successfully: {synced_node['name']}\")\n                    print(f\"   UUID: {document_uuid}\")\n                    print(f\"   Hash: {synced_node['hash']}\")\n                    print(f\"   Local path: {synced_node.get('local_path', 'Not set')}\")\n                    print(f\"   Extracted files: {synced_node.get('extracted_files', [])}\")\n                else:\n                    print(f\"\u26a0\ufe0f Document {document_uuid} not found in synced database - may need more time to propagate\")\n                    \n            except Exception as sync_e:\n                print(f\"\u26a0\ufe0f Replica sync failed, but upload may have succeeded: {sync_e}\")\n                # Don't fail the entire upload if sync fails\n                pass\n            \n            # \ud83d\udeab REMOVED: Final database save - let replica sync handle database updates\n            # self._save_database()\n            print(f\"\u2705 Successfully uploaded PDF document: {name}\")\n            print(f\"\ud83d\udd04 Document should appear in your device shortly after sync\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to upload PDF document: {e}\")\n            return False\n    \n    def create_notebook(self, name: str, parent_uuid: str = \"\", template: str = \"Blank\") -> bool:\n        \"\"\"Create a new empty notebook\"\"\"\n        try:\n            # Clear any previous document context\n            self._clear_document_context()\n            \n            print(f\"\ud83d\udcd3 Creating notebook: {name}\")\n            \n            # Generate UUIDs and set current document UUID for consistent rm-filename headers\n            document_uuid = str(uuid.uuid4())\n            self._current_document_uuid = document_uuid\n            page_uuid = str(uuid.uuid4())\n            print(f\"\ud83d\udcca Document UUID: {document_uuid}\")\n            \n            # Create empty .rm content for first page\n            rm_content = b'\\x00' * 1000  # Minimal empty page content\n            rm_hash = self.upload_raw_content(\n                content=rm_content,\n                content_type='application/octet-stream',\n                filename=f\"{page_uuid}.rm\"\n            )\n            \n            # Create content.json\n            content_data, content_hash = self.create_content_json([page_uuid], template)\n            self.upload_raw_content(\n                content=content_data,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.content\"\n            )\n            \n            # Create metadata\n            metadata_content, metadata_hash = self.create_metadata_json(name, parent_uuid)\n            self.upload_raw_content(\n                content=metadata_content,\n                content_type='application/octet-stream',\n                filename=f\"{document_uuid}.metadata\"\n            )\n            \n            # Create document directory listing\n            data_components = [\n                {\n                    'hash': content_hash,\n                    'component': f\"{document_uuid}.content\",\n                    'size': len(content_data)\n                },\n                {\n                    'hash': metadata_hash,\n                    'component': f\"{document_uuid}.metadata\",\n                    'size': len(metadata_content)\n                },\n                {\n                    'hash': rm_hash,\n                    'component': f\"{document_uuid}/{page_uuid}.rm\",\n                    'size': len(rm_content)\n                }\n            ]\n            \n            doc_dir_content, doc_dir_hash = self.create_directory_listing([], data_components)\n            self.upload_raw_content(doc_dir_content, doc_dir_hash)\n            \n            # Add to database\n            new_node = {\n                'uuid': document_uuid,\n                'hash': doc_dir_hash,\n                'name': name,\n                'node_type': 'document',\n                'parent_uuid': parent_uuid,\n                'local_path': f\"content/{name}\",\n                'extracted_files': [],\n                'component_hashes': {\n                    'content': content_hash,\n                    'metadata': metadata_hash,\n                    'pdf': None,\n                    'pagedata': None,\n                    'rm_files': [rm_hash]\n                },\n                'metadata': json.loads(metadata_content.decode('utf-8')),\n                'last_modified': self._generate_timestamp(),\n                'version': 1,\n                'sync_status': 'created',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n            # \ud83d\udeab REMOVED: Direct database manipulation \n            # Do NOT add to database directly - let replica sync handle it properly\n            # self.database['nodes'][document_uuid] = new_node\n            \n            # \ud83d\udeab REMOVED: Hash registry updates \n            # Let replica sync discover and register all hashes properly\n            # Hash registry should only be populated from actual cloud downloads\n            \n            # \ud83d\udd04 CRITICAL FIX: Instead of manually updating database, trigger fresh replica sync\n            # This ensures the new notebook is properly downloaded and cataloged\n            try:\n                from local_replica_v2 import RemarkableReplicaBuilder\n                replica_builder = RemarkableReplicaBuilder(self.session)\n                \n                print(\"\ud83d\udd04 Running replica sync to discover new notebook...\")\n                replica_builder.build_complete_replica()\n                \n                # Reload our database to get the freshly synced data\n                print(\"\ud83d\udd04 Reloading database with fresh sync data...\")\n                self.database = self._load_database()\n                \n                # Verify the notebook was properly synced\n                if document_uuid in self.database['nodes']:\n                    synced_node = self.database['nodes'][document_uuid]\n                    print(f\"\u2705 Notebook synced successfully: {synced_node['name']}\")\n                    print(f\"   UUID: {document_uuid}\")\n                    print(f\"   Hash: {synced_node['hash']}\")\n                    print(f\"   Local path: {synced_node.get('local_path', 'Not set')}\")\n                    print(f\"   Extracted files: {synced_node.get('extracted_files', [])}\")\n                else:\n                    print(f\"\u26a0\ufe0f Notebook {document_uuid} not found in synced database - may need more time to propagate\")\n                    \n            except Exception as sync_e:\n                print(f\"\u26a0\ufe0f Replica sync failed, but upload may have succeeded: {sync_e}\")\n                # Don't fail the entire upload if sync fails\n                pass\n            \n            print(f\"\u2705 Successfully created notebook: {name}\")\n            print(f\"\ud83d\udd04 Notebook should appear in your device shortly after sync\")\n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to create notebook: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_manager.py",
      "tags": [
        "class",
        "remarkableuploadmanager"
      ],
      "updated_at": "2025-12-07T01:59:32.407270",
      "usage_example": "# Example usage:\n# result = RemarkableUploadManager(bases)"
    },
    {
      "best_practices": [
        "This function uses __file__ to locate the database file, so it must be run as part of a Python script file, not in an interactive interpreter",
        "The function expects a specific directory structure relative to the script location; ensure 'remarkable_replica_v2/replica_database.json' exists",
        "The function prints extensive diagnostic output; redirect stdout if you need to capture this information programmatically",
        "The root-level detection logic checks for empty strings, None values, and empty string literals - this redundancy ensures compatibility with different data formats",
        "Node types are converted to numeric codes (1 for folders, 4 for other types) which appears to be a specific encoding scheme for the reMarkable system",
        "Consider wrapping this function in error handling for production use, as it assumes the JSON file exists and has the expected structure"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:59:32",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "A test function that analyzes a reMarkable tablet replica database JSON file to identify and list all root-level entries (folders and documents without parent nodes).",
      "docstring": "Test finding root-level entries",
      "id": 2133,
      "imports": [
        "import json",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 49,
      "line_start": 6,
      "name": "test_root_finding",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function is designed to test and debug the identification of root-level nodes in a reMarkable tablet's file system structure. It reads a JSON database file, iterates through all nodes, checks both 'parent_uuid' and 'metadata.parent' fields to determine if a node is at the root level, and prints detailed diagnostic information about each node. It's useful for understanding the structure of reMarkable's file system and verifying root-level entry detection logic.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces side effects by printing diagnostic information to stdout, including details about each node analyzed and a summary of root-level entries found.",
      "settings_required": [
        "Requires a specific file structure: a 'remarkable_replica_v2' directory in the same directory as the script, containing a 'replica_database.json' file",
        "The JSON database file must follow the expected schema with 'nodes' as a top-level key, where each node contains fields like 'parent_uuid', 'metadata', 'name', 'node_type', 'hash', and 'size'"
      ],
      "source_code": "def test_root_finding():\n    \"\"\"Test finding root-level entries\"\"\"\n    \n    database_path = Path(__file__).parent / \"remarkable_replica_v2\" / \"replica_database.json\"\n    \n    with open(database_path, 'r') as f:\n        database = json.load(f)\n    \n    print(\"\ud83d\udd0d Analyzing root-level nodes...\")\n    print(f\"\ud83d\udcca Total nodes: {len(database['nodes'])}\")\n    \n    root_entries = []\n    for uuid, node in database['nodes'].items():\n        # Check both parent_uuid field and metadata.parent field\n        parent_uuid = node.get('parent_uuid')\n        metadata_parent = node.get('metadata', {}).get('parent', '')\n        \n        print(f\"\\n\ud83d\udd39 Node: {uuid}\")\n        print(f\"   Name: {node.get('name', 'Unknown')}\")\n        print(f\"   Type: {node.get('node_type', 'Unknown')}\")\n        print(f\"   parent_uuid: {repr(parent_uuid)}\")\n        print(f\"   metadata.parent: {repr(metadata_parent)}\")\n        \n        # A node is root-level if both parent fields indicate no parent\n        is_root_level = (\n            (parent_uuid is None or parent_uuid == '' or parent_uuid == \"\") and\n            (metadata_parent == '' or metadata_parent is None)\n        )\n        \n        print(f\"   Is root-level: {is_root_level}\")\n        \n        if is_root_level:\n            node_type = 1 if node['node_type'] == 'folder' else 4\n            root_entries.append({\n                'hash': node['hash'],\n                'uuid': uuid,\n                'node_type': node_type,\n                'size': node.get('size', 0),\n                'name': node.get('name', 'Unknown')\n            })\n    \n    print(f\"\\n\ud83d\udcca Found {len(root_entries)} root-level items:\")\n    for entry in root_entries:\n        print(f\"   - {entry['name']} ({entry['uuid'][:8]}...): type={entry['node_type']}, size={entry['size']}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/debug_root.py",
      "tags": [
        "testing",
        "debugging",
        "file-system",
        "remarkable-tablet",
        "json-parsing",
        "tree-structure",
        "root-detection",
        "diagnostic",
        "data-analysis"
      ],
      "updated_at": "2025-12-07T01:59:32.125959",
      "usage_example": "# Ensure the required directory structure exists:\n# ./remarkable_replica_v2/replica_database.json\n\nimport json\nfrom pathlib import Path\n\ndef test_root_finding():\n    \"\"\"Test finding root-level entries\"\"\"\n    database_path = Path(__file__).parent / \"remarkable_replica_v2\" / \"replica_database.json\"\n    with open(database_path, 'r') as f:\n        database = json.load(f)\n    print(\"\ud83d\udd0d Analyzing root-level nodes...\")\n    print(f\"\ud83d\udcca Total nodes: {len(database['nodes'])}\")\n    root_entries = []\n    for uuid, node in database['nodes'].items():\n        parent_uuid = node.get('parent_uuid')\n        metadata_parent = node.get('metadata', {}).get('parent', '')\n        print(f\"\\n\ud83d\udd39 Node: {uuid}\")\n        print(f\"   Name: {node.get('name', 'Unknown')}\")\n        print(f\"   Type: {node.get('node_type', 'Unknown')}\")\n        print(f\"   parent_uuid: {repr(parent_uuid)}\")\n        print(f\"   metadata.parent: {repr(metadata_parent)}\")\n        is_root_level = (\n            (parent_uuid is None or parent_uuid == '' or parent_uuid == \"\") and\n            (metadata_parent == '' or metadata_parent is None)\n        )\n        print(f\"   Is root-level: {is_root_level}\")\n        if is_root_level:\n            node_type = 1 if node['node_type'] == 'folder' else 4\n            root_entries.append({\n                'hash': node['hash'],\n                'uuid': uuid,\n                'node_type': node_type,\n                'size': node.get('size', 0),\n                'name': node.get('name', 'Unknown')\n            })\n    print(f\"\\n\ud83d\udcca Found {len(root_entries)} root-level items:\")\n    for entry in root_entries:\n        print(f\"   - {entry['name']} ({entry['uuid'][:8]}...): type={entry['node_type']}, size={entry['size']}\")\n\n# Run the test\ntest_root_finding()"
    },
    {
      "best_practices": [
        "This function expects to be called as a script entry point with sys.argv available",
        "Requires companion functions (load_database, print_database_analysis, print_sync_info) to be defined in the same module",
        "Uses Path objects for cross-platform file path handling",
        "Provides user-friendly error messages with emoji indicators for better CLI experience",
        "Returns boolean for exit code handling - use sys.exit(0 if main() else 1) pattern",
        "Command-line argument at index 1 should be a valid directory path to a reMarkable replica",
        "The function performs validation before processing to fail fast on invalid inputs"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:59:09",
      "decorators": [],
      "dependencies": [],
      "description": "Main entry point function that analyzes a reMarkable tablet replica directory by loading its database, printing analysis results, and displaying sync information.",
      "docstring": "Main entry point",
      "id": 2132,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 283,
      "line_start": 260,
      "name": "main_v66",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the command-line interface entry point for analyzing a reMarkable tablet's local replica directory. It accepts an optional directory path as a command-line argument, validates the directory exists, loads the database from it, and prints comprehensive analysis including database contents and sync information. It's designed to be called when the script is executed directly.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the analysis completed successfully (directory exists, database loaded, and analysis printed), False if the replica directory was not found or the database failed to load.",
      "settings_required": [
        "Requires a reMarkable tablet replica directory structure to exist on the filesystem",
        "Expects helper functions to be defined in the same module: load_database(), print_database_analysis(), and print_sync_info()",
        "Default replica directory path is './remarkable_complete_replica' if not specified via command-line argument"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point\"\"\"\n    if len(sys.argv) > 1:\n        replica_dir = Path(sys.argv[1])\n    else:\n        replica_dir = Path.cwd() / \"remarkable_complete_replica\"\n    \n    if not replica_dir.exists():\n        print(f\"\u274c Replica directory not found: {replica_dir}\")\n        print(f\"\ud83d\udca1 Usage: python {sys.argv[0]} [replica_directory]\")\n        return False\n    \n    print(f\"\ud83d\udcc1 Analyzing replica: {replica_dir}\")\n    \n    # Load and analyze database\n    database = load_database(replica_dir)\n    if not database:\n        return False\n    \n    print_database_analysis(database)\n    print_sync_info(replica_dir)\n    \n    print(f\"\\n\u2705 Analysis complete!\")\n    return True",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "cli",
        "entry-point",
        "file-system",
        "analysis",
        "remarkable-tablet",
        "database",
        "command-line",
        "validation",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:09.437397",
      "usage_example": "# Run from command line with default directory:\n# python script.py\n\n# Run from command line with custom directory:\n# python script.py /path/to/replica\n\n# Call programmatically:\nif __name__ == '__main__':\n    success = main()\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "Ensure the replica_dir Path object points to a valid directory before calling this function",
        "The sync_log.json file should follow the expected schema with 'last_sync', 'root_hash', and 'nodes_synced' keys",
        "This function is designed for console output and uses emoji characters - ensure your terminal supports UTF-8 encoding",
        "The function gracefully handles missing files and JSON parsing errors, making it safe to call even if the sync log doesn't exist",
        "The root hash is truncated to 16 characters for display purposes - if you need the full hash, consider modifying the function or reading the file directly",
        "This is a display-only function with no return value - use it for logging/monitoring purposes, not for programmatic access to sync data"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:55",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "Reads and displays synchronization log information from a JSON file in a replica directory, including last sync time, root hash, and number of nodes synced.",
      "docstring": "Print sync log information",
      "id": 2131,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 257,
      "line_start": 239,
      "name": "print_sync_info",
      "parameters": [
        {
          "annotation": "Path",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "replica_dir"
        }
      ],
      "parameters_explained": {
        "replica_dir": "A Path object pointing to the replica directory that contains the 'sync_log.json' file. This directory should be the root of a replica structure where synchronization logs are stored. The function will look for a file named 'sync_log.json' directly within this directory."
      },
      "parent_class": null,
      "purpose": "This function provides a user-friendly display of synchronization status for a replica directory. It reads a 'sync_log.json' file and prints formatted information about the last synchronization operation, including timestamp, root hash (truncated for readability), and the count of synced nodes. It handles missing files and read errors gracefully with warning messages.",
      "return_annotation": null,
      "return_explained": "This function returns None. It performs side effects by printing formatted synchronization information to stdout using print() statements. Output includes emoji-prefixed status messages showing last sync time, root hash (first 16 characters), and number of nodes synced, or warning messages if the log file is missing or cannot be read.",
      "settings_required": [
        "The replica_dir must contain a 'sync_log.json' file with the expected structure containing 'last_sync', 'root_hash', and 'nodes_synced' keys",
        "The sync_log.json file must be valid JSON and UTF-8 encoded"
      ],
      "source_code": "def print_sync_info(replica_dir: Path):\n    \"\"\"Print sync log information\"\"\"\n    sync_log_file = replica_dir / \"sync_log.json\"\n    \n    if not sync_log_file.exists():\n        print(f\"\u26a0\ufe0f No sync log found\")\n        return\n    \n    try:\n        with open(sync_log_file, 'r', encoding='utf-8') as f:\n            sync_log = json.load(f)\n        \n        print(f\"\\n\ud83d\udccb SYNC INFORMATION:\")\n        print(f\"   \ud83d\udd50 Last sync: {sync_log.get('last_sync', 'unknown')}\")\n        print(f\"   \ud83d\udd11 Root hash: {sync_log.get('root_hash', 'unknown')[:16]}...\")\n        print(f\"   \ud83d\udcca Nodes synced: {sync_log.get('nodes_synced', 0)}\")\n        \n    except Exception as e:\n        print(f\"\u26a0\ufe0f Error reading sync log: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "logging",
        "synchronization",
        "file-io",
        "json",
        "display",
        "status",
        "replica",
        "monitoring",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:58:55.347749",
      "usage_example": "from pathlib import Path\nimport json\n\n# Setup: Create a sample replica directory with sync log\nreplica_path = Path('./my_replica')\nreplica_path.mkdir(exist_ok=True)\n\n# Create a sample sync_log.json file\nsync_data = {\n    'last_sync': '2024-01-15 14:30:00',\n    'root_hash': 'abc123def456ghi789jkl012mno345pqr678',\n    'nodes_synced': 42\n}\n\nwith open(replica_path / 'sync_log.json', 'w', encoding='utf-8') as f:\n    json.dump(sync_data, f)\n\n# Use the function\nprint_sync_info(replica_path)\n\n# Output:\n# \ud83d\udccb SYNC INFORMATION:\n#    \ud83d\udd50 Last sync: 2024-01-15 14:30:00\n#    \ud83d\udd11 Root hash: abc123def456ghi7...\n#    \ud83d\udcca Nodes synced: 42"
    },
    {
      "best_practices": [
        "Ensure the database dictionary is properly structured with all expected keys before calling this function to avoid KeyError exceptions",
        "The function depends on three helper functions (analyze_hierarchy, analyze_file_types, print_node_tree) that must be available in scope",
        "This function is designed for console output and debugging; for programmatic access to analysis data, consider using the helper functions directly",
        "The content tree display is limited to the first 3 root nodes with max depth of 3 to prevent overwhelming output; adjust these limits if needed",
        "Consider redirecting stdout if you need to capture the output for logging or further processing"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:37",
      "decorators": [],
      "dependencies": [],
      "description": "Prints a comprehensive, formatted analysis of a reMarkable tablet replica database, including statistics, hierarchy information, file types, and a content tree visualization.",
      "docstring": "Print comprehensive database analysis",
      "id": 2130,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 236,
      "line_start": 171,
      "name": "print_database_analysis",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "database"
        }
      ],
      "parameters_explained": {
        "database": "A dictionary containing the complete reMarkable replica database structure. Expected keys include: 'replica_info' (dict with 'created', 'replica_dir', 'statistics' subdicts), 'nodes' (dict mapping UUIDs to node objects), and 'hierarchy' (dict representing parent-child relationships). The 'statistics' subdict should contain counts like 'folders', 'documents', 'notebooks', 'pdfs_extracted', 'notebooks_extracted', and 'total_files'."
      },
      "parent_class": null,
      "purpose": "This function provides a detailed console output report for analyzing the structure and contents of a reMarkable tablet database replica. It displays metadata about the replica creation, node counts, file statistics, hierarchy depth analysis, file type distributions, and a visual tree representation of the content structure. This is useful for debugging, auditing, or understanding the organization of a reMarkable tablet's content after it has been replicated to a local database format.",
      "return_annotation": null,
      "return_explained": "This function returns None. It produces side effects by printing formatted analysis output directly to stdout using print() statements. The output includes emoji-decorated sections for replica info, build statistics, hierarchy analysis, file type analysis, and a content tree visualization.",
      "settings_required": [
        "Requires helper functions: analyze_hierarchy(database), analyze_file_types(database), and print_node_tree(nodes, hierarchy, root_uuid, max_depth) to be defined in the same module or imported",
        "The database parameter must be properly structured with expected keys and nested dictionaries as described in parameters_explained"
      ],
      "source_code": "def print_database_analysis(database: Dict[str, Any]):\n    \"\"\"Print comprehensive database analysis\"\"\"\n    replica_info = database.get('replica_info', {})\n    nodes = database.get('nodes', {})\n    \n    print(\"\ud83d\udd0d REMARKABLE REPLICA DATABASE ANALYSIS\")\n    print(\"=\" * 60)\n    \n    # Basic info\n    print(f\"\ud83d\udcc5 Created: {replica_info.get('created', 'unknown')}\")\n    print(f\"\ud83d\udcc1 Replica directory: {replica_info.get('replica_dir', 'unknown')}\")\n    print(f\"\ud83d\udcca Total nodes: {len(nodes)}\")\n    \n    # Statistics\n    stats = replica_info.get('statistics', {})\n    if stats:\n        print(f\"\\n\ud83d\udcc8 BUILD STATISTICS:\")\n        print(f\"   \ud83d\udcc2 Folders: {stats.get('folders', 0)}\")\n        print(f\"   \ud83d\udcc4 Documents: {stats.get('documents', 0)}\")\n        print(f\"   \ud83d\udcd4 Notebooks: {stats.get('notebooks', 0)}\")\n        print(f\"   \ud83d\udcc4 PDFs extracted: {stats.get('pdfs_extracted', 0)}\")\n        print(f\"   \ud83d\udcdd Notebooks extracted: {stats.get('notebooks_extracted', 0)}\")\n        print(f\"   \ud83d\udcce Total files: {stats.get('total_files', 0)}\")\n    \n    # Hierarchy analysis\n    hierarchy_info = analyze_hierarchy(database)\n    print(f\"\\n\ud83c\udf33 HIERARCHY ANALYSIS:\")\n    print(f\"   \ud83d\udccd Root nodes: {len(hierarchy_info['root_nodes'])}\")\n    print(f\"   \ud83d\udccf Maximum depth: {hierarchy_info['max_depth']}\")\n    print(f\"   \ud83d\udc65 Parent-child relationships: {hierarchy_info['total_parent_relationships']}\")\n    \n    # Node type distribution\n    print(f\"   \ud83d\udcca Node type distribution:\")\n    for node_type, count in hierarchy_info['type_counts'].items():\n        print(f\"     \u2022 {node_type}: {count}\")\n    \n    # Depth distribution\n    print(f\"   \ud83d\udccf Depth distribution:\")\n    for depth in sorted(hierarchy_info['depth_counts'].keys()):\n        count = hierarchy_info['depth_counts'][depth]\n        print(f\"     \u2022 Depth {depth}: {count} nodes\")\n    \n    # File type analysis\n    file_stats = analyze_file_types(database)\n    print(f\"\\n\ud83d\udcce FILE TYPE ANALYSIS:\")\n    print(f\"   \ud83d\udcc4 PDF files: {file_stats['pdf_files']}\")\n    print(f\"   \ud83d\udcdd Notebook components: {file_stats['notebook_files']}\")\n    print(f\"   \ud83d\udd8a\ufe0f reMarkable (.rm) files: {file_stats['rm_files']}\")\n    print(f\"   \ud83d\udcc4 Content files: {file_stats['content_files']}\")\n    print(f\"   \ud83d\udcce Total extracted files: {file_stats['total_extracted_files']}\")\n    \n    if file_stats['file_extensions']:\n        print(f\"   \ud83d\udcca File extensions:\")\n        for ext, count in sorted(file_stats['file_extensions'].items(), key=lambda x: x[1], reverse=True):\n            print(f\"     \u2022 {ext}: {count}\")\n    \n    # Tree view of content\n    print(f\"\\n\ud83c\udf33 CONTENT TREE:\")\n    hierarchy = database.get('hierarchy', {})\n    \n    for root_uuid in hierarchy_info['root_nodes'][:3]:  # Show first 3 root nodes\n        print_node_tree(nodes, hierarchy, root_uuid, max_depth=3)\n        print()\n    \n    if len(hierarchy_info['root_nodes']) > 3:\n        print(f\"   ... ({len(hierarchy_info['root_nodes']) - 3} more root nodes)\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "remarkable",
        "database-analysis",
        "reporting",
        "visualization",
        "console-output",
        "statistics",
        "hierarchy",
        "file-analysis",
        "tree-view",
        "metadata"
      ],
      "updated_at": "2025-12-07T01:58:37.944224",
      "usage_example": "# Assuming you have a database dict from a reMarkable replica\n# and the required helper functions are available\n\ndatabase = {\n    'replica_info': {\n        'created': '2024-01-15T10:30:00',\n        'replica_dir': '/path/to/replica',\n        'statistics': {\n            'folders': 10,\n            'documents': 25,\n            'notebooks': 15,\n            'pdfs_extracted': 20,\n            'notebooks_extracted': 15,\n            'total_files': 50\n        }\n    },\n    'nodes': {\n        'uuid-1': {'type': 'CollectionType', 'name': 'My Folder'},\n        'uuid-2': {'type': 'DocumentType', 'name': 'My Document'}\n    },\n    'hierarchy': {\n        'uuid-1': ['uuid-2']\n    }\n}\n\n# Print comprehensive analysis\nprint_database_analysis(database)"
    },
    {
      "best_practices": [
        "Ensure the database parameter contains a 'nodes' key with properly structured node data to avoid empty results",
        "File paths in 'extracted_files' should be valid path strings that can be processed by pathlib.Path",
        "The function safely handles missing 'nodes' or 'extracted_files' keys by using .get() with default values",
        "File extension matching is case-insensitive (uses .lower())",
        "Files without extensions are tracked under the '[no extension]' key in file_extensions dictionary",
        "The function does not validate if file paths actually exist on the filesystem, it only analyzes the paths stored in the database"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:20",
      "decorators": [],
      "dependencies": [],
      "description": "Analyzes file types within a replica database structure, counting different file categories and tracking file extensions.",
      "docstring": "Analyze file types in the replica",
      "id": 2129,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 168,
      "line_start": 132,
      "name": "analyze_file_types",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "database"
        }
      ],
      "parameters_explained": {
        "database": "A dictionary containing replica data with a 'nodes' key. Each node should have a 'uuid' as key and contain an 'extracted_files' list with file paths. Expected structure: {'nodes': {uuid: {'extracted_files': [file_paths]}}}"
      },
      "parent_class": null,
      "purpose": "This function processes a database dictionary containing nodes with extracted files, categorizing them by type (PDF, notebook, RM, content files) and counting occurrences of each file extension. It's designed for analyzing file distribution in a replica system, likely for a note-taking or document management application (possibly reMarkable tablet data).",
      "return_annotation": "Dict[str, Any]",
      "return_explained": "Returns a dictionary with file statistics containing: 'pdf_files' (int: count of PDF files), 'notebook_files' (int: files in notebook directories), 'rm_files' (int: .rm format files), 'content_files' (int: files named 'content'), 'total_extracted_files' (int: total file count), and 'file_extensions' (dict: mapping of extensions to their counts, with '[no extension]' for files without extensions)",
      "settings_required": [],
      "source_code": "def analyze_file_types(database: Dict[str, Any]) -> Dict[str, Any]:\n    \"\"\"Analyze file types in the replica\"\"\"\n    nodes = database.get('nodes', {})\n    \n    file_stats = {\n        'pdf_files': 0,\n        'notebook_files': 0,\n        'rm_files': 0,\n        'content_files': 0,\n        'total_extracted_files': 0,\n        'file_extensions': {}\n    }\n    \n    for uuid, node in nodes.items():\n        extracted_files = node.get('extracted_files', [])\n        file_stats['total_extracted_files'] += len(extracted_files)\n        \n        for file_path in extracted_files:\n            file_path_obj = Path(file_path)\n            ext = file_path_obj.suffix.lower()\n            \n            if ext == '.pdf':\n                file_stats['pdf_files'] += 1\n            elif ext == '.rm':\n                file_stats['rm_files'] += 1\n            elif file_path_obj.name == 'content':\n                file_stats['content_files'] += 1\n            elif '_notebook' in str(file_path_obj.parent):\n                file_stats['notebook_files'] += 1\n            \n            # Count extensions\n            if ext:\n                file_stats['file_extensions'][ext] = file_stats['file_extensions'].get(ext, 0) + 1\n            else:\n                file_stats['file_extensions']['[no extension]'] = file_stats['file_extensions'].get('[no extension]', 0) + 1\n    \n    return file_stats",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "file-analysis",
        "data-processing",
        "statistics",
        "file-types",
        "replica",
        "document-management",
        "remarkable",
        "file-counting",
        "extension-analysis"
      ],
      "updated_at": "2025-12-07T01:58:20.121775",
      "usage_example": "from pathlib import Path\nfrom typing import Dict, Any\n\ndef analyze_file_types(database: Dict[str, Any]) -> Dict[str, Any]:\n    # ... function code ...\n    pass\n\n# Example usage\ndatabase = {\n    'nodes': {\n        'uuid-1': {\n            'extracted_files': [\n                '/path/to/document.pdf',\n                '/path/to/notes_notebook/page1.rm',\n                '/path/to/content'\n            ]\n        },\n        'uuid-2': {\n            'extracted_files': [\n                '/path/to/another.pdf',\n                '/path/to/file.txt'\n            ]\n        }\n    }\n}\n\nresults = analyze_file_types(database)\nprint(f\"Total files: {results['total_extracted_files']}\")\nprint(f\"PDF files: {results['pdf_files']}\")\nprint(f\"Extensions: {results['file_extensions']}\")"
    },
    {
      "best_practices": [
        "Ensure the 'uuid' parameter corresponds to a valid key in the 'nodes' dictionary to avoid returning early without output",
        "Set an appropriate 'max_depth' value to prevent excessive output or stack overflow with deeply nested hierarchies",
        "The 'last_modified' field in nodes should be a timestamp in milliseconds (Unix epoch * 1000) for proper date formatting",
        "The function silently handles missing nodes and invalid timestamps using try-except blocks, so validate your data beforehand for debugging",
        "Children are sorted alphabetically by name for consistent output; ensure node names are present to avoid sorting issues",
        "The function uses Unicode characters (\ud83d\udcc1, \ud83d\udcc4, \ud83d\udcdd, \u2502) which may not display correctly in all terminal environments",
        "This function has side effects (prints to stdout); consider redirecting output or capturing it if you need to process the tree structure programmatically"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:03",
      "decorators": [],
      "dependencies": [
        "datetime",
        "typing"
      ],
      "description": "Recursively prints a hierarchical tree visualization of nodes with icons, names, file counts, and modification dates to the console.",
      "docstring": "Print a tree view of nodes",
      "id": 2128,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from datetime import datetime",
        "from typing import Dict, Any, List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 129,
      "line_start": 82,
      "name": "print_node_tree",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "nodes"
        },
        {
          "annotation": "Dict[str, List[str]]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "hierarchy"
        },
        {
          "annotation": "str",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "uuid"
        },
        {
          "annotation": "int",
          "default": "0",
          "kind": "positional_or_keyword",
          "name": "depth"
        },
        {
          "annotation": "int",
          "default": "3",
          "kind": "positional_or_keyword",
          "name": "max_depth"
        },
        {
          "annotation": "str",
          "default": "''",
          "kind": "positional_or_keyword",
          "name": "prefix"
        }
      ],
      "parameters_explained": {
        "depth": "Integer representing the current depth level in the tree traversal. Starts at 0 for the root node and increments with each recursive call. Used to control indentation and enforce max_depth limits.",
        "hierarchy": "Dictionary mapping parent node UUIDs (strings) to lists of child node UUIDs (strings). Defines the parent-child relationships between nodes. Keys are parent UUIDs, values are lists of their children's UUIDs.",
        "max_depth": "Integer specifying the maximum depth to traverse in the tree. Prevents infinite recursion and limits output size. Default is 3 levels deep.",
        "nodes": "Dictionary mapping node UUIDs (strings) to node data dictionaries. Each node dictionary should contain keys like 'name', 'node_type', 'extracted_files' (list), and 'last_modified' (timestamp in milliseconds). This is the complete collection of all nodes in the hierarchy.",
        "prefix": "String used for indentation and tree structure visualization. Contains characters like spaces, '\u2502', and '\u2514' to create the tree lines. Builds up with each recursive level.",
        "uuid": "String identifier of the current node to print. This is the starting point for the tree traversal and should be a valid key in the 'nodes' dictionary."
      },
      "parent_class": null,
      "purpose": "This function provides a visual representation of a node hierarchy (like a file system or document structure) by recursively traversing parent-child relationships and displaying them in a tree format with appropriate indentation, icons based on node type, and metadata such as file counts and last modified dates. It's useful for debugging, visualizing data structures, or providing user-friendly output of hierarchical data.",
      "return_annotation": null,
      "return_explained": "This function returns None (no explicit return value). It produces side effects by printing the tree structure directly to stdout using the print() function.",
      "settings_required": [],
      "source_code": "def print_node_tree(nodes: Dict[str, Any], hierarchy: Dict[str, List[str]], \n                   uuid: str, depth: int = 0, max_depth: int = 3, prefix: str = \"\"):\n    \"\"\"Print a tree view of nodes\"\"\"\n    if depth > max_depth:\n        return\n    \n    node = nodes.get(uuid)\n    if not node:\n        return\n    \n    # Format node info\n    name = node.get('name', 'unnamed')\n    node_type = node.get('node_type', 'unknown')\n    extracted_files = node.get('extracted_files', [])\n    \n    # Icon based on type\n    if node_type == 'folder':\n        icon = \"\ud83d\udcc1\"\n    elif extracted_files and any(f.endswith('.pdf') for f in extracted_files):\n        icon = \"\ud83d\udcc4\"\n    elif extracted_files:\n        icon = \"\ud83d\udcdd\"\n    else:\n        icon = \"\ud83d\udcc4\"\n    \n    # Additional info\n    info_parts = []\n    if extracted_files:\n        info_parts.append(f\"{len(extracted_files)} files\")\n    \n    if node.get('last_modified'):\n        try:\n            mod_time = datetime.fromtimestamp(int(node['last_modified']) / 1000)\n            info_parts.append(f\"modified {mod_time.strftime('%Y-%m-%d')}\")\n        except:\n            pass\n    \n    info_str = f\" ({', '.join(info_parts)})\" if info_parts else \"\"\n    \n    print(f\"{prefix}{icon} {name}{info_str}\")\n    \n    # Show children\n    children = hierarchy.get(uuid, [])\n    if children and depth < max_depth:\n        for i, child_uuid in enumerate(sorted(children, key=lambda x: nodes.get(x, {}).get('name', ''))):\n            is_last = i == len(children) - 1\n            child_prefix = prefix + (\"    \" if is_last else \"\u2502   \")\n            print_node_tree(nodes, hierarchy, child_uuid, depth + 1, max_depth, child_prefix)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "tree-visualization",
        "hierarchical-data",
        "console-output",
        "recursive",
        "file-system",
        "pretty-print",
        "node-traversal",
        "data-visualization",
        "formatting"
      ],
      "updated_at": "2025-12-07T01:58:03.636917",
      "usage_example": "from datetime import datetime\nfrom typing import Dict, Any, List\n\ndef print_node_tree(nodes: Dict[str, Any], hierarchy: Dict[str, List[str]], \n                   uuid: str, depth: int = 0, max_depth: int = 3, prefix: str = \"\"):\n    # ... function code ...\n    pass\n\n# Example data structure\nnodes = {\n    'root-123': {\n        'name': 'Documents',\n        'node_type': 'folder',\n        'extracted_files': [],\n        'last_modified': 1704067200000\n    },\n    'child-456': {\n        'name': 'Report.pdf',\n        'node_type': 'file',\n        'extracted_files': ['report.pdf'],\n        'last_modified': 1704153600000\n    },\n    'child-789': {\n        'name': 'Notes.txt',\n        'node_type': 'file',\n        'extracted_files': ['notes.txt'],\n        'last_modified': 1704240000000\n    }\n}\n\nhierarchy = {\n    'root-123': ['child-456', 'child-789']\n}\n\n# Print the tree starting from root\nprint_node_tree(nodes, hierarchy, 'root-123', max_depth=2)"
    },
    {
      "best_practices": [
        "This function uses binary units (1024-based) rather than decimal units (1000-based). Be aware of the difference between KB (kibibyte, 1024 bytes) and kB (kilobyte, 1000 bytes) when displaying to users.",
        "The function assumes non-negative input values. Consider adding input validation if negative values might be passed.",
        "For very large files (terabytes or larger), the function will display them in GB, which may result in large numbers. Consider extending the function to support TB and PB units if needed.",
        "The function returns one decimal place for all units except bytes. This provides a good balance between precision and readability for most use cases.",
        "Consider caching the result if the same file size needs to be formatted multiple times to avoid redundant calculations."
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:57:42",
      "decorators": [],
      "dependencies": [],
      "description": "Converts a file size in bytes to a human-readable string format with appropriate units (B, KB, MB, or GB).",
      "docstring": "Format file size in human readable format",
      "id": 2127,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 79,
      "line_start": 70,
      "name": "format_file_size",
      "parameters": [
        {
          "annotation": "int",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "size_bytes"
        }
      ],
      "parameters_explained": {
        "size_bytes": "An integer representing the file size in bytes. Expected to be a non-negative integer value. The function will convert this value to the most appropriate unit (B, KB, MB, or GB) based on its magnitude. Values less than 1024 are displayed in bytes, 1024-1048575 in KB, 1048576-1073741823 in MB, and 1073741824 or greater in GB."
      },
      "parent_class": null,
      "purpose": "This function takes a file size expressed in bytes and formats it into a more readable representation by automatically selecting the most appropriate unit (bytes, kilobytes, megabytes, or gigabytes). It uses binary units (1024-based) and formats decimal values to one decimal place for units larger than bytes. This is commonly used in file managers, download progress indicators, storage displays, and any application that needs to present file sizes to users in an intuitive format.",
      "return_annotation": "str",
      "return_explained": "Returns a string containing the formatted file size with one decimal place precision (for KB, MB, GB) and the appropriate unit suffix. For sizes less than 1024 bytes, returns an integer value with 'B' suffix. Examples: '512 B', '1.5 KB', '2.3 MB', '1.2 GB'. The format uses a space between the numeric value and the unit.",
      "settings_required": [],
      "source_code": "def format_file_size(size_bytes: int) -> str:\n    \"\"\"Format file size in human readable format\"\"\"\n    if size_bytes < 1024:\n        return f\"{size_bytes} B\"\n    elif size_bytes < 1024 * 1024:\n        return f\"{size_bytes / 1024:.1f} KB\"\n    elif size_bytes < 1024 * 1024 * 1024:\n        return f\"{size_bytes / (1024 * 1024):.1f} MB\"\n    else:\n        return f\"{size_bytes / (1024 * 1024 * 1024):.1f} GB\"",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "file-size",
        "formatting",
        "human-readable",
        "bytes-conversion",
        "utility",
        "display",
        "storage",
        "file-management",
        "string-formatting",
        "unit-conversion"
      ],
      "updated_at": "2025-12-07T01:57:42.275134",
      "usage_example": "# Basic usage examples\nsize1 = format_file_size(500)\nprint(size1)  # Output: '500 B'\n\nsize2 = format_file_size(2048)\nprint(size2)  # Output: '2.0 KB'\n\nsize3 = format_file_size(5242880)\nprint(size3)  # Output: '5.0 MB'\n\nsize4 = format_file_size(1073741824)\nprint(size4)  # Output: '1.0 GB'\n\n# Practical example with file operations\nimport os\nfile_path = 'example.txt'\nif os.path.exists(file_path):\n    file_size = os.path.getsize(file_path)\n    readable_size = format_file_size(file_size)\n    print(f'File size: {readable_size}')"
    },
    {
      "best_practices": [
        "This function uses 1024 as the conversion factor (binary units), which is standard for file systems. Be aware that some contexts use 1000 (decimal units) instead.",
        "The function does not validate that size_bytes is non-negative. Consider adding validation if negative values should be rejected.",
        "The function stops at TB (terabytes). For extremely large values (petabytes and beyond), it will still display in TB units.",
        "The output is always formatted to one decimal place, which provides a good balance between precision and readability for most use cases.",
        "When size_bytes is exactly 0, the function returns '0.0 B', which is correct and expected behavior.",
        "This function is purely for display purposes and should not be used for calculations or comparisons of file sizes."
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:23:01",
      "decorators": [],
      "dependencies": [],
      "description": "Converts a file size in bytes to a human-readable string format with appropriate units (B, KB, MB, GB, TB).",
      "docstring": "Convert bytes to human readable format",
      "id": 241,
      "imports": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_graph_client import SharePointGraphClient",
        "from filecloud_client import FileCloudClient",
        "from config import Config"
      ],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 28,
      "line_start": 19,
      "name": "format_file_size_v1",
      "parameters": [
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "size_bytes"
        }
      ],
      "parameters_explained": {
        "size_bytes": "The file size in bytes as a numeric value (int or float). Can be None, in which case the function returns 'Unknown'. Expected to be non-negative for meaningful results, though negative values will be processed (resulting in negative formatted output)."
      },
      "parent_class": null,
      "purpose": "This utility function formats raw byte values into user-friendly file size representations by automatically selecting the most appropriate unit (bytes, kilobytes, megabytes, gigabytes, or terabytes) and formatting the output to one decimal place. It handles None values gracefully by returning 'Unknown'. Commonly used in file management systems, storage displays, and data transfer interfaces where file sizes need to be presented to end users.",
      "return_annotation": null,
      "return_explained": "Returns a string representing the formatted file size. Format is '{value:.1f} {unit}' where value is rounded to one decimal place and unit is one of 'B', 'KB', 'MB', 'GB', or 'TB'. Returns 'Unknown' if size_bytes is None. Examples: '1.5 KB', '250.0 MB', '1.2 GB', 'Unknown'.",
      "settings_required": [],
      "source_code": "def format_file_size(size_bytes):\n    \"\"\"Convert bytes to human readable format\"\"\"\n    if size_bytes is None:\n        return \"Unknown\"\n    \n    for unit in ['B', 'KB', 'MB', 'GB']:\n        if size_bytes < 1024.0:\n            return f\"{size_bytes:.1f} {unit}\"\n        size_bytes /= 1024.0\n    return f\"{size_bytes:.1f} TB\"",
      "source_file": "/tf/active/vicechatdev/SPFCsync/dry_run_test.py",
      "tags": [
        "file-size",
        "formatting",
        "utility",
        "human-readable",
        "bytes-conversion",
        "storage",
        "data-display",
        "string-formatting"
      ],
      "updated_at": "2025-12-07T01:57:42.272802",
      "usage_example": "# Basic usage examples\nsize1 = format_file_size(1024)\nprint(size1)  # Output: '1.0 KB'\n\nsize2 = format_file_size(1536000)\nprint(size2)  # Output: '1.5 MB'\n\nsize3 = format_file_size(5368709120)\nprint(size3)  # Output: '5.0 GB'\n\nsize4 = format_file_size(None)\nprint(size4)  # Output: 'Unknown'\n\nsize5 = format_file_size(500)\nprint(size5)  # Output: '500.0 B'\n\n# Use in file listing context\nimport os\nfile_path = 'example.txt'\nif os.path.exists(file_path):\n    file_size = os.path.getsize(file_path)\n    readable_size = format_file_size(file_size)\n    print(f'{file_path}: {readable_size}')"
    },
    {
      "best_practices": [
        "Ensure the database dictionary contains both 'nodes' and 'hierarchy' keys for complete analysis",
        "Node dictionaries should include 'parent_uuid', 'depth', and 'node_type' fields for accurate statistics",
        "The function handles missing keys gracefully using .get() with defaults, but providing complete data yields better results",
        "Root nodes are identified by the absence of 'parent_uuid' or a falsy value for that field",
        "The function assumes depth values are pre-calculated in the node data; it does not compute depth from hierarchy",
        "For large hierarchies, consider the memory implications of storing all statistics in the returned dictionary"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:57:24",
      "decorators": [],
      "dependencies": [],
      "description": "Analyzes a hierarchical database structure to extract statistics about nodes, their relationships, depths, and types.",
      "docstring": "Analyze the hierarchy structure",
      "id": 2126,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 67,
      "line_start": 35,
      "name": "analyze_hierarchy",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "database"
        }
      ],
      "parameters_explained": {
        "database": "A dictionary containing the hierarchical database structure. Expected to have two keys: 'nodes' (dict mapping UUIDs to node dictionaries with properties like 'parent_uuid', 'depth', and 'node_type') and 'hierarchy' (dict representing parent-child relationships). The 'nodes' dict should contain node objects with optional fields: 'parent_uuid' (string or None), 'depth' (integer), and 'node_type' (string)."
      },
      "parent_class": null,
      "purpose": "This function processes a database dictionary containing nodes and hierarchy information to provide comprehensive statistics including root nodes identification, depth distribution analysis, node type categorization, and parent-child relationship counts. It's useful for understanding the structure and composition of hierarchical data systems, tree structures, or organizational charts stored in dictionary format.",
      "return_annotation": "Dict[str, Any]",
      "return_explained": "Returns a dictionary with five keys: 'root_nodes' (list of UUIDs for nodes without parents), 'depth_counts' (dict mapping depth levels to count of nodes at that depth), 'max_depth' (integer representing the maximum depth in the hierarchy), 'type_counts' (dict mapping node types to their counts), and 'total_parent_relationships' (integer count of entries in the hierarchy dict).",
      "settings_required": [],
      "source_code": "def analyze_hierarchy(database: Dict[str, Any]) -> Dict[str, Any]:\n    \"\"\"Analyze the hierarchy structure\"\"\"\n    nodes = database.get('nodes', {})\n    hierarchy = database.get('hierarchy', {})\n    \n    # Find root nodes (nodes with no parent)\n    root_nodes = []\n    for uuid, node in nodes.items():\n        if not node.get('parent_uuid'):\n            root_nodes.append(uuid)\n    \n    # Calculate depth statistics\n    depth_counts = {}\n    max_depth = 0\n    \n    for uuid, node in nodes.items():\n        depth = node.get('depth', 0)\n        depth_counts[depth] = depth_counts.get(depth, 0) + 1\n        max_depth = max(max_depth, depth)\n    \n    # Count node types\n    type_counts = {}\n    for uuid, node in nodes.items():\n        node_type = node.get('node_type', 'unknown')\n        type_counts[node_type] = type_counts.get(node_type, 0) + 1\n    \n    return {\n        'root_nodes': root_nodes,\n        'depth_counts': depth_counts,\n        'max_depth': max_depth,\n        'type_counts': type_counts,\n        'total_parent_relationships': len(hierarchy)\n    }",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "hierarchy",
        "tree-structure",
        "data-analysis",
        "statistics",
        "graph-analysis",
        "node-analysis",
        "depth-calculation",
        "parent-child-relationships",
        "data-processing",
        "structural-analysis"
      ],
      "updated_at": "2025-12-07T01:57:24.658174",
      "usage_example": "from typing import Dict, Any\n\ndef analyze_hierarchy(database: Dict[str, Any]) -> Dict[str, Any]:\n    nodes = database.get('nodes', {})\n    hierarchy = database.get('hierarchy', {})\n    root_nodes = []\n    for uuid, node in nodes.items():\n        if not node.get('parent_uuid'):\n            root_nodes.append(uuid)\n    depth_counts = {}\n    max_depth = 0\n    for uuid, node in nodes.items():\n        depth = node.get('depth', 0)\n        depth_counts[depth] = depth_counts.get(depth, 0) + 1\n        max_depth = max(max_depth, depth)\n    type_counts = {}\n    for uuid, node in nodes.items():\n        node_type = node.get('node_type', 'unknown')\n        type_counts[node_type] = type_counts.get(node_type, 0) + 1\n    return {\n        'root_nodes': root_nodes,\n        'depth_counts': depth_counts,\n        'max_depth': max_depth,\n        'type_counts': type_counts,\n        'total_parent_relationships': len(hierarchy)\n    }\n\n# Example usage\ndatabase = {\n    'nodes': {\n        'uuid1': {'parent_uuid': None, 'depth': 0, 'node_type': 'root'},\n        'uuid2': {'parent_uuid': 'uuid1', 'depth': 1, 'node_type': 'branch'},\n        'uuid3': {'parent_uuid': 'uuid1', 'depth': 1, 'node_type': 'branch'},\n        'uuid4': {'parent_uuid': 'uuid2', 'depth': 2, 'node_type': 'leaf'}\n    },\n    'hierarchy': {\n        'uuid1': ['uuid2', 'uuid3'],\n        'uuid2': ['uuid4']\n    }\n}\n\nresult = analyze_hierarchy(database)\nprint(result)\n# Output: {'root_nodes': ['uuid1'], 'depth_counts': {0: 1, 1: 2, 2: 1}, 'max_depth': 2, 'type_counts': {'root': 1, 'branch': 2, 'leaf': 1}, 'total_parent_relationships': 2}"
    },
    {
      "best_practices": [
        "Always check if the returned value is None before using it to avoid AttributeError",
        "Ensure the replica_dir parameter is a valid Path object, not a string",
        "The function expects a file named exactly 'replica_database.json' in the provided directory",
        "Error messages are printed to stdout; consider logging for production use",
        "The function uses UTF-8 encoding by default; ensure your JSON files are UTF-8 encoded",
        "Broad exception catching may hide specific errors; consider more granular error handling for production",
        "The function returns None on both file-not-found and parsing errors; check error messages to distinguish between failure types"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:57:05",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "Loads a JSON database file from a replica directory and returns its contents as a dictionary, with error handling for missing files or parsing failures.",
      "docstring": "Load the replica database",
      "id": 2125,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path",
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 32,
      "line_start": 19,
      "name": "load_database",
      "parameters": [
        {
          "annotation": "Path",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "replica_dir"
        }
      ],
      "parameters_explained": {
        "replica_dir": "A Path object representing the directory containing the replica database. This directory should contain a file named 'replica_database.json'. The Path object should be from pathlib.Path and point to a valid directory location."
      },
      "parent_class": null,
      "purpose": "This function is designed to safely load a replica database stored as a JSON file. It checks for file existence, handles encoding properly (UTF-8), and provides user-friendly error messages. Returns None if the file doesn't exist or if loading fails, making it suitable for applications that need to work with replica/backup database files.",
      "return_annotation": "Dict[str, Any]",
      "return_explained": "Returns a dictionary (Dict[str, Any]) containing the parsed JSON data from the replica database file. Returns None if the database file doesn't exist or if any exception occurs during loading (e.g., invalid JSON, permission errors, encoding issues). The dictionary structure depends on the content of the JSON file.",
      "settings_required": [
        "A valid replica directory path containing a 'replica_database.json' file",
        "Read permissions for the replica_database.json file",
        "Valid UTF-8 encoded JSON file"
      ],
      "source_code": "def load_database(replica_dir: Path) -> Dict[str, Any]:\n    \"\"\"Load the replica database\"\"\"\n    database_file = replica_dir / \"replica_database.json\"\n    \n    if not database_file.exists():\n        print(f\"\u274c Database file not found: {database_file}\")\n        return None\n    \n    try:\n        with open(database_file, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    except Exception as e:\n        print(f\"\u274c Failed to load database: {e}\")\n        return None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "database",
        "json",
        "file-loading",
        "error-handling",
        "replica",
        "data-persistence",
        "file-io",
        "pathlib"
      ],
      "updated_at": "2025-12-07T01:57:05.204234",
      "usage_example": "from pathlib import Path\nimport json\nfrom typing import Dict, Any\n\ndef load_database(replica_dir: Path) -> Dict[str, Any]:\n    \"\"\"Load the replica database\"\"\"\n    database_file = replica_dir / \"replica_database.json\"\n    \n    if not database_file.exists():\n        print(f\"\u274c Database file not found: {database_file}\")\n        return None\n    \n    try:\n        with open(database_file, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    except Exception as e:\n        print(f\"\u274c Failed to load database: {e}\")\n        return None\n\n# Usage\nreplica_path = Path('/path/to/replica')\ndb_data = load_database(replica_path)\n\nif db_data is not None:\n    print(f\"Database loaded successfully with {len(db_data)} entries\")\n    # Process database data\nelse:\n    print(\"Failed to load database\")"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_root_info",
            "parameters": {},
            "purpose": "Get current root.docSchema info using working method",
            "returns": "None",
            "signature": "get_current_root_info(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_info",
            "parameters": {
              "doc_uuid": "Type: str",
              "root_content": "Type: str"
            },
            "purpose": "Find document entry in root.docSchema",
            "returns": "None",
            "signature": "get_document_info(self, doc_uuid, root_content)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_document_schema",
            "parameters": {
              "doc_hash": "Type: str"
            },
            "purpose": "Retrieve document's docSchema",
            "returns": "None",
            "signature": "get_document_schema(self, doc_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_current_metadata",
            "parameters": {
              "doc_lines": "Type: list"
            },
            "purpose": "Extract and fetch current metadata",
            "returns": "None",
            "signature": "get_current_metadata(self, doc_lines)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_updated_metadata",
            "parameters": {
              "current_metadata": "Type: dict",
              "new_parent": "Type: str"
            },
            "purpose": "Create updated metadata with new parent",
            "returns": "None",
            "signature": "create_updated_metadata(self, current_metadata, new_parent)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_metadata",
            "parameters": {
              "doc_uuid": "Type: str",
              "metadata_json": "Type: str"
            },
            "purpose": "Upload new metadata and return hash",
            "returns": "See docstring for return details",
            "signature": "upload_new_metadata(self, metadata_json, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_real_pagedata",
            "parameters": {
              "doc_uuid": "Type: str"
            },
            "purpose": "Upload real pagedata (newline) to match real app documents",
            "returns": "None",
            "signature": "upload_real_pagedata(self, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_new_document_schema",
            "parameters": {
              "doc_lines": "Type: list",
              "metadata_line": "Type: str",
              "new_metadata_hash": "Type: str",
              "new_pagedata_hash": "Type: str"
            },
            "purpose": "Create new document schema with updated metadata hash and pagedata",
            "returns": "None",
            "signature": "create_new_document_schema(self, doc_lines, new_metadata_hash, metadata_line, new_pagedata_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_document_schema",
            "parameters": {
              "doc_content": "Type: str",
              "doc_uuid": "Type: str"
            },
            "purpose": "Upload new document schema",
            "returns": "None",
            "signature": "upload_new_document_schema(self, doc_content, doc_uuid)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "update_root_docschema",
            "parameters": {
              "doc_info": "Type: dict",
              "new_doc_hash": "Type: str",
              "root_content": "Type: str"
            },
            "purpose": "Update root.docSchema with new document hash",
            "returns": "None",
            "signature": "update_root_docschema(self, root_content, doc_info, new_doc_hash)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "upload_new_root",
            "parameters": {
              "generation": "Type: int",
              "root_content": "Type: str"
            },
            "purpose": "Upload new root.docSchema and update roothash",
            "returns": "None",
            "signature": "upload_new_root(self, root_content, generation)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "move_document_from_trash",
            "parameters": {
              "doc_uuid": "Type: str"
            },
            "purpose": "Complete process to move document from trash to root",
            "returns": "None",
            "signature": "move_document_from_trash(self, doc_uuid)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:35",
      "decorators": [],
      "dependencies": [],
      "description": "Moves documents between folders using the working upload mechanism",
      "docstring": "Moves documents between folders using the working upload mechanism",
      "id": 2123,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 520,
      "line_start": 52,
      "name": "DocumentMover",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Moves documents between folders using the working upload mechanism",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class DocumentMover:\n    \"\"\"Moves documents between folders using the working upload mechanism\"\"\"\n    \n    def __init__(self):\n        # Load auth session\n        auth = RemarkableAuth()\n        self.session = auth.get_authenticated_session()\n        \n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        print(\"\ud83d\udd04 Document Mover Initialized\")\n    \n    def get_current_root_info(self):\n        \"\"\"Get current root.docSchema info using working method\"\"\"\n        print(\"\\n\ud83d\udccb Step 1: Getting current root.docSchema...\")\n        \n        # Get root info\n        root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n        root_response.raise_for_status()\n        root_data = root_response.json()\n        \n        # Get root content\n        root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}\")\n        root_content_response.raise_for_status()\n        root_content = root_content_response.text\n        \n        print(f\"\u2705 Current root hash: {root_data['hash']}\")\n        print(f\"\u2705 Current generation: {root_data.get('generation')}\")\n        print(f\"\u2705 Root content size: {len(root_content)} bytes\")\n        \n        return root_data, root_content\n    \n    def get_document_info(self, doc_uuid: str, root_content: str):\n        \"\"\"Find document entry in root.docSchema\"\"\"\n        print(f\"\\n\ud83d\udcc4 Step 2: Finding document {doc_uuid[:8]}... in root.docSchema\")\n        \n        lines = root_content.strip().split('\\n')\n        for line in lines[1:]:  # Skip version header\n            if doc_uuid in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    doc_info = {\n                        'hash': parts[0],\n                        'uuid': parts[2],\n                        'type': parts[3],\n                        'size': parts[4],\n                        'full_line': line\n                    }\n                    print(f\"\u2705 Found document entry:\")\n                    print(f\"   Hash: {doc_info['hash']}\")\n                    print(f\"   Type: {doc_info['type']}\")\n                    print(f\"   Size: {doc_info['size']}\")\n                    print(f\"   Full line: {doc_info['full_line']}\")\n                    return doc_info\n        \n        raise ValueError(f\"Document {doc_uuid} not found in root.docSchema\")\n    \n    def get_document_schema(self, doc_hash: str):\n        \"\"\"Retrieve document's docSchema\"\"\"\n        print(f\"\\n\ud83d\udcc4 Step 3: Retrieving document docSchema...\")\n        \n        doc_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\")\n        doc_response.raise_for_status()\n        doc_content = doc_response.text\n        \n        print(f\"\u2705 Document docSchema size: {len(doc_content)} bytes\")\n        print(f\"\ud83d\udcc4 Document docSchema content:\")\n        \n        lines = doc_content.strip().split('\\n')\n        for i, line in enumerate(lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return doc_content, lines\n    \n    def get_current_metadata(self, doc_lines: list):\n        \"\"\"Extract and fetch current metadata\"\"\"\n        print(f\"\\n\ud83d\udcdd Step 4: Getting current metadata...\")\n        \n        metadata_hash = None\n        metadata_line = None\n        \n        # Find metadata component\n        for line in doc_lines[1:]:  # Skip version\n            if ':' in line and '.metadata' in line:\n                parts = line.split(':')\n                if len(parts) >= 5:\n                    metadata_hash = parts[0]\n                    metadata_line = line\n                    break\n        \n        if not metadata_hash:\n            raise ValueError(\"Metadata component not found in document schema\")\n        \n        print(f\"\u2705 Metadata hash: {metadata_hash}\")\n        \n        # Fetch current metadata\n        metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n        metadata_response.raise_for_status()\n        current_metadata = json.loads(metadata_response.text)\n        \n        print(f\"\u2705 Current metadata:\")\n        for key, value in current_metadata.items():\n            print(f\"   {key}: {value}\")\n        \n        return current_metadata, metadata_line\n    \n    def create_updated_metadata(self, current_metadata: dict, new_parent: str = \"\"):\n        \"\"\"Create updated metadata with new parent\"\"\"\n        print(f\"\\n\ud83d\udd04 Step 5: Creating updated metadata...\")\n        \n        # Copy current metadata and update parent\n        updated_metadata = current_metadata.copy()\n        old_parent = updated_metadata.get('parent', '')\n        updated_metadata['parent'] = new_parent\n        \n        print(f\"\u2705 Updating parent: '{old_parent}' \u2192 '{new_parent}'\")\n        \n        # Add/update source field to match real app documents (use macOS like real invoice)\n        updated_metadata['source'] = 'com.remarkable.macos'  # Always set to match real invoice\n        print(f\"\u2705 Setting 'source' field: com.remarkable.macos\")\n        \n        # Fix lastOpened to match real app behavior (use 0 for unopened)\n        if 'lastOpened' in updated_metadata and updated_metadata['lastOpened'] != 0:\n            updated_metadata['lastOpened'] = 0  # Real app uses 0 for unopened documents\n            print(f\"\u2705 Setting lastOpened to 0 (real app behavior)\")\n        \n        # Make metadata match real app behavior (don't mark as modified for moves)\n        updated_metadata['lastModified'] = int(time.time() * 1000)\n        updated_metadata['metadatamodified'] = False  # Real app doesn't mark as modified\n        updated_metadata['modified'] = False  # Real app doesn't mark as modified\n        \n        # Convert to JSON\n        updated_metadata_json = json.dumps(updated_metadata, separators=(',', ':'))\n        \n        print(f\"\u2705 Updated metadata ({len(updated_metadata_json)} bytes):\")\n        print(f\"   {updated_metadata_json[:100]}...\")\n        \n        return updated_metadata_json\n    \n    def upload_new_metadata(self, metadata_json: str, doc_uuid: str):\n        \"\"\"Upload new metadata and return hash\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 6: Uploading new metadata...\")\n        \n        # Calculate hash\n        metadata_hash = hashlib.sha256(metadata_json.encode()).hexdigest()\n        print(f\"\u2705 New metadata hash: {metadata_hash}\")\n        \n        # Upload using working method from upload_manager.py\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.metadata',  # Required: UUID.metadata format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',  # Use Windows UA\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(metadata_json.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\",\n            data=metadata_json.encode(),\n            headers=headers \n        )\n        \n        print(f\"\u2705 Metadata upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Metadata upload failed: {upload_response.status_code}\")\n        \n        return metadata_hash\n    \n    def upload_real_pagedata(self, doc_uuid: str):\n        \"\"\"Upload real pagedata (newline) to match real app documents\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 6.5: Uploading real pagedata...\")\n        \n        # Real app pagedata is just a newline character\n        pagedata_content = \"\\n\"\n        pagedata_hash = hashlib.sha256(pagedata_content.encode()).hexdigest()\n        \n        print(f\"\u2705 Real pagedata hash: {pagedata_hash}\")\n        print(f\"\u2705 Real pagedata content: {repr(pagedata_content)} ({len(pagedata_content)} bytes)\")\n        \n        # Upload pagedata using working method\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.pagedata',  # Required: UUID.pagedata format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(pagedata_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{pagedata_hash}\",\n            data=pagedata_content.encode(),\n            headers=headers \n        )\n        \n        print(f\"\u2705 Pagedata upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Pagedata upload failed: {upload_response.status_code}\")\n        \n        return pagedata_hash\n    \n    def create_new_document_schema(self, doc_lines: list, new_metadata_hash: str, metadata_line: str, new_pagedata_hash: str = None):\n        \"\"\"Create new document schema with updated metadata hash and pagedata\"\"\"\n        print(f\"\\n\ud83c\udfd7\ufe0f Step 7: Creating new document schema...\")\n        \n        # Replace metadata line and pagedata line with new hashes\n        new_lines = []\n        pagedata_line = None\n        \n        # Find pagedata line\n        for line in doc_lines[1:]:  # Skip version\n            if ':' in line and '.pagedata' in line:\n                pagedata_line = line\n                break\n        \n        for line in doc_lines:\n            if line == metadata_line:\n                # Replace metadata hash but keep size\n                parts = line.split(':')\n                parts[0] = new_metadata_hash  # Update hash\n                new_line = ':'.join(parts)\n                new_lines.append(new_line)\n                print(f\"\u2705 Updated metadata line:\")\n                print(f\"   Old: {line}\")\n                print(f\"   New: {new_line}\")\n            elif new_pagedata_hash and line == pagedata_line:\n                # Replace pagedata hash and update size to 1 byte\n                parts = line.split(':')\n                parts[0] = new_pagedata_hash  # Update hash\n                parts[4] = '1'  # Update size to 1 byte (newline)\n                new_line = ':'.join(parts)\n                new_lines.append(new_line)\n                print(f\"\u2705 Updated pagedata line:\")\n                print(f\"   Old: {line}\")\n                print(f\"   New: {new_line}\")\n            else:\n                new_lines.append(line)\n        \n        new_doc_content = '\\n'.join(new_lines)\n        \n        print(f\"\u2705 New document schema ({len(new_doc_content)} bytes):\")\n        for i, line in enumerate(new_lines):\n            print(f\"   Line {i}: {line}\")\n        \n        return new_doc_content\n    \n    def upload_new_document_schema(self, doc_content: str, doc_uuid: str):\n        \"\"\"Upload new document schema\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 8: Uploading new document schema...\")\n        \n        # Calculate hash\n        doc_hash = hashlib.sha256(doc_content.encode()).hexdigest()\n        print(f\"\u2705 New document schema hash: {doc_hash}\")\n        \n        # Upload using working method\n        headers = {\n            'Content-Type': 'application/octet-stream',\n            'rm-batch-number': '1',\n            'rm-filename': f'{doc_uuid}.docSchema',  # Required: UUID.docSchema format\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(doc_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}\",\n            data=doc_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Document schema upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Document schema upload failed: {upload_response.status_code}\")\n        \n        return doc_hash\n    \n    def update_root_docschema(self, root_content: str, doc_info: dict, new_doc_hash: str):\n        \"\"\"Update root.docSchema with new document hash\"\"\"\n        print(f\"\\n\ud83d\udd04 Step 9: Updating root.docSchema...\")\n        \n        # Replace old document line with new hash\n        old_line = doc_info['full_line']\n        parts = old_line.split(':')\n        parts[0] = new_doc_hash  # Update document hash\n        new_line = ':'.join(parts)\n        \n        print(f\"\u2705 Updating root.docSchema entry:\")\n        print(f\"   Old: {old_line}\")\n        print(f\"   New: {new_line}\")\n        \n        # Replace in root content\n        new_root_content = root_content.replace(old_line, new_line)\n        \n        print(f\"\u2705 New root.docSchema size: {len(new_root_content)} bytes\")\n        \n        return new_root_content\n    \n    def upload_new_root(self, root_content: str, generation: int):\n        \"\"\"Upload new root.docSchema and update roothash\"\"\"\n        print(f\"\\n\u2b06\ufe0f Step 10: Uploading new root.docSchema...\")\n        \n        # Calculate hash\n        root_hash = hashlib.sha256(root_content.encode()).hexdigest()\n        print(f\"\u2705 New root hash: {root_hash}\")\n        \n        # Upload root content using working method\n        headers = {\n            'Content-Type': 'text/plain',\n            'rm-batch-number': '1',\n            'rm-filename': 'root.docSchema',  # System filename for root.docSchema\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum (from test_uploads.py method)\n        crc32c_header = compute_crc32c_header(root_content.encode())\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        upload_response = self.session.put(\n            f\"https://eu.tectonic.remarkable.com/sync/v3/files/{root_hash}\",\n            data=root_content.encode(),\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root content upload response: {upload_response.status_code}\")\n        if upload_response.status_code not in [200, 202]:\n            print(f\"\u274c Upload failed: {upload_response.text}\")\n            raise RuntimeError(f\"Root content upload failed: {upload_response.status_code}\")\n        \n        # Update root hash pointer using working method\n        print(f\"\\n\ud83d\udd04 Step 11: Updating root hash pointer...\")\n        \n        # Create root data exactly like working upload_manager.py\n        root_update_data = {\n            \"broadcast\": True,\n            \"generation\": generation,  # Use generation parameter\n            \"hash\": root_hash\n        }\n        \n        # Convert to JSON with 2-space indent like real app\n        root_content_body = json.dumps(root_update_data, indent=2).encode('utf-8')\n        \n        # Headers exactly like working upload_manager.py\n        headers = {\n            'Content-Type': 'application/json',\n            'rm-batch-number': '1',\n            'rm-filename': 'roothash',\n            'rm-sync-id': str(uuid.uuid4()),\n            'User-Agent': 'reMarkable-desktop-win/3.11.1.1951',\n            'Accept-Encoding': 'gzip, deflate',\n            'Accept-Language': 'en-BE,*',\n            'Connection': 'Keep-Alive'\n        }\n        \n        # Add CRC32C checksum\n        crc32c_header = compute_crc32c_header(root_content_body)\n        if crc32c_header:\n            headers['x-goog-hash'] = crc32c_header\n        \n        # Use /sync/v3/root endpoint like working code\n        root_update_response = self.session.put(\n            \"https://eu.tectonic.remarkable.com/sync/v3/root\",\n            data=root_content_body,\n            headers=headers\n        )\n        \n        print(f\"\u2705 Root update response: {root_update_response.status_code}\")\n        if root_update_response.status_code not in [200, 202]:\n            print(f\"\u274c Root update failed: {root_update_response.text}\")\n            raise RuntimeError(f\"Root update failed: {root_update_response.status_code}\")\n        \n        return root_hash\n    \n    def move_document_from_trash(self, doc_uuid: str):\n        \"\"\"Complete process to move document from trash to root\"\"\"\n        print(f\"\ud83d\udd04 Moving Document from Trash to Root\")\n        print(f\"Document UUID: {doc_uuid}\")\n        print(\"=\" * 60)\n        \n        try:\n            # Step 1: Get current root info\n            root_data, root_content = self.get_current_root_info()\n            \n            # Step 2: Find document in root\n            doc_info = self.get_document_info(doc_uuid, root_content)\n            \n            # Step 3: Get document schema\n            doc_content, doc_lines = self.get_document_schema(doc_info['hash'])\n            \n            # Step 4: Get current metadata\n            current_metadata, metadata_line = self.get_current_metadata(doc_lines)\n            \n            # Check current parent and determine move action\n            current_parent = current_metadata.get('parent', '')\n            if current_parent == 'trash':\n                print(f\"\ud83d\udcc1 Document is in trash, moving to gpt_in folder...\")\n                target_parent = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"  # gpt_in folder\n                move_description = \"from trash to gpt_in folder\"\n            elif current_parent == '':\n                print(f\"\ud83d\udcc1 Document is in root, moving to gpt_in folder...\")\n                target_parent = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"  # gpt_in folder  \n                move_description = \"from root to gpt_in folder\"\n            else:\n                print(f\"\ud83d\udcc1 Document is in folder '{current_parent}', moving to gpt_in folder...\")\n                target_parent = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"  # gpt_in folder\n                move_description = f\"from folder '{current_parent}' to gpt_in folder\"\n            \n            # Step 5: Create updated metadata (move to gpt_in folder)\n            updated_metadata_json = self.create_updated_metadata(current_metadata, new_parent=target_parent)\n            \n            # Step 6: Upload new metadata\n            new_metadata_hash = self.upload_new_metadata(updated_metadata_json, doc_uuid)\n            \n            # Step 6.5: Upload real pagedata to match real app\n            new_pagedata_hash = self.upload_real_pagedata(doc_uuid)\n            \n            # Step 7: Create new document schema\n            new_doc_content = self.create_new_document_schema(doc_lines, new_metadata_hash, metadata_line, new_pagedata_hash)\n            \n            # Step 8: Upload new document schema\n            new_doc_hash = self.upload_new_document_schema(new_doc_content, doc_uuid)\n            \n            # Step 9: Update root.docSchema\n            new_root_content = self.update_root_docschema(root_content, doc_info, new_doc_hash)\n            \n            # Step 10-11: Upload new root and update pointer\n            new_root_hash = self.upload_new_root(new_root_content, root_data['generation'])\n            \n            print(f\"\\n\ud83c\udf89 SUCCESS! Document moved {move_description}\")\n            print(f\"   Document: {current_metadata.get('visibleName')}\")\n            print(f\"   Old parent: {current_parent or '(root)'}\")\n            print(f\"   New parent: gpt_in ({target_parent})\")\n            print(f\"   New root hash: {new_root_hash}\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\\n\u274c Move operation failed: {e}\")\n            return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_move_from_trash.py",
      "tags": [
        "class",
        "documentmover"
      ],
      "updated_at": "2025-12-07T01:56:35.177857",
      "usage_example": "# Example usage:\n# result = DocumentMover(bases)"
    },
    {
      "best_practices": [
        "Review the console output carefully to understand which fixes were applied",
        "Run the generated 'fixed_upload_test.py' to verify the fixes before using in production",
        "Ensure the ImplementationFixer class is properly implemented with all required fix methods",
        "Check that you have write permissions in the directory where this function is executed",
        "Consider running this in a test environment first before applying to production code",
        "The function prints next steps - follow them sequentially for best results",
        "Manually verify JWT device description changes as noted in the output"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:14",
      "decorators": [],
      "dependencies": [
        "json",
        "os",
        "time",
        "pathlib",
        "typing"
      ],
      "description": "Orchestrates the application of multiple critical fixes to align test code with real application behavior, including user agent, metadata, page data, JWT, and field corrections.",
      "docstring": "Apply all critical fixes to match real app behavior",
      "id": 2122,
      "imports": [
        "import json",
        "import os",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any"
      ],
      "imports_required": [
        "import json",
        "import os",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 393,
      "line_start": 368,
      "name": "main_v46",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for an implementation fixing tool that applies a series of corrections to test code to match the behavior of a real application. It instantiates an ImplementationFixer object, applies six different types of fixes (user agent, metadata source, page data content, last opened field, JWT device description, and creates a fixed upload test), generates a summary of applied fixes, and provides next steps for verification. This is typically used in development/testing scenarios where test code needs to be synchronized with production application behavior.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating whether any fixes were successfully applied. Returns True if at least one fix was applied (len(fixer.fixes_applied) > 0), False otherwise. This allows calling code to determine if the fixing process made any changes.",
      "settings_required": [
        "Requires ImplementationFixer class to be defined in the same module or imported",
        "Requires write permissions to create 'fixed_upload_test.py' file in the current directory",
        "May require specific file paths or configuration files that ImplementationFixer depends on"
      ],
      "source_code": "def main():\n    \"\"\"Apply all critical fixes to match real app behavior\"\"\"\n    print(\"\ud83d\udd27 IMPLEMENTATION FIXER\")\n    print(\"=\" * 50)\n    print(\"Applying fixes identified by dry run analysis...\")\n    \n    fixer = ImplementationFixer()\n    \n    # Apply all fixes\n    fixer.fix_user_agent()\n    fixer.fix_metadata_source()\n    fixer.fix_pagedata_content()\n    fixer.fix_last_opened_field()\n    fixer.fix_jwt_device_description()\n    fixer.create_fixed_upload_test()\n    \n    # Generate summary\n    fixer.generate_fix_summary()\n    \n    print(\"\\n\ud83c\udfaf NEXT STEPS:\")\n    print(\"1. Review the fixes applied above\")\n    print(\"2. Run 'python3 fixed_upload_test.py' to verify fixed structure\")\n    print(\"3. Test with actual upload once satisfied with the fixes\")\n    print(\"4. Manually update JWT device description in auth process\")\n    \n    return len(fixer.fixes_applied) > 0",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/implementation_fixer.py",
      "tags": [
        "testing",
        "fixing",
        "automation",
        "code-generation",
        "test-synchronization",
        "implementation-fixer",
        "orchestration",
        "main-entry-point",
        "user-agent",
        "metadata",
        "jwt",
        "upload-test"
      ],
      "updated_at": "2025-12-07T01:56:14.726443",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('Fixes applied successfully')\n    else:\n        print('No fixes were applied')\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should be called as the entry point when running the script standalone",
        "The function handles KeyboardInterrupt gracefully, allowing users to cancel the sync operation",
        "Returns proper exit codes (0 for success, 1 for failure) suitable for shell scripting",
        "Provides visual feedback with emoji indicators for better user experience",
        "Catches all exceptions to prevent unhandled crashes and provides error messages",
        "Displays a summary of synchronized content after successful completion",
        "Should be wrapped in if __name__ == '__main__': block for proper module behavior"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:55:28",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Entry point function that orchestrates a complete synchronization of a reMarkable tablet's content, displaying progress and summary statistics.",
      "docstring": "Main function for standalone execution",
      "id": 2120,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass",
        "import re",
        "import shutil"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, Any, Optional, List, Set",
        "from dataclasses import dataclass",
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 860,
      "line_start": 826,
      "name": "main_v65",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a standalone reMarkable tablet synchronization tool. It initializes a RemarkableReplicaSync instance, performs a complete replica sync, handles errors gracefully, and displays a formatted summary of the synchronized library including folder and document counts. It's designed for command-line execution with user-friendly console output including emoji indicators for status.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 for any failure (sync failure, keyboard interrupt, or exception). This follows standard Unix convention for process exit codes.",
      "settings_required": [
        "RemarkableReplicaSync class must be defined and available in the same module or imported",
        "RemarkableReplicaSync must have methods: sync_complete_replica(), get_folders(), get_root_documents(), get_documents_in_folder(uuid)",
        "Appropriate reMarkable API credentials or authentication configured for RemarkableReplicaSync",
        "Network connectivity to reMarkable cloud services"
      ],
      "source_code": "def main():\n    \"\"\"Main function for standalone execution\"\"\"\n    print(\"\ud83d\udd04 reMarkable Replica Sync - Standalone Tool\")\n    print(\"=\" * 50)\n    \n    try:\n        sync = RemarkableReplicaSync()\n        success = sync.sync_complete_replica()\n        \n        if success:\n            print(\"\\n\u2705 Sync completed successfully!\")\n            \n            # Show summary\n            folders = sync.get_folders()\n            root_docs = sync.get_root_documents()\n            \n            print(f\"\\n\ud83d\udcca Current Library:\")\n            print(f\"   \ud83d\udcc2 Folders: {len(folders)}\")\n            print(f\"   \ud83d\udcc4 Root Documents: {len(root_docs)}\")\n            \n            for folder in folders:\n                folder_docs = sync.get_documents_in_folder(folder['uuid'])\n                print(f\"   \ud83d\udcc2 {folder['name']}: {len(folder_docs)} documents\")\n        else:\n            print(\"\\n\u274c Sync failed!\")\n            return 1\n            \n    except KeyboardInterrupt:\n        print(\"\\n\u26a0\ufe0f Sync interrupted by user\")\n        return 1\n    except Exception as e:\n        print(f\"\\n\u274c Sync error: {e}\")\n        return 1\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica.py",
      "tags": [
        "entry-point",
        "synchronization",
        "remarkable-tablet",
        "cli-tool",
        "error-handling",
        "standalone",
        "main-function",
        "document-management",
        "cloud-sync"
      ],
      "updated_at": "2025-12-07T01:56:14.689217",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Always run the dry-run phase first to preview changes before applying them - this is built into the function flow",
        "Ensure proper authentication is configured before calling this function",
        "The function requires user interaction via stdin, so it should only be called in interactive CLI contexts, not in automated scripts",
        "Handle the boolean return value to determine if the repair was successful for proper exit codes or logging",
        "Ensure network connectivity to reMarkable cloud services before running",
        "Consider backing up the original root.docSchema before running this repair tool",
        "The function catches broad exceptions during initialization but not during the repair process itself - ensure RootDocSchemaRepair.run_repair() has proper error handling"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required by RootDocSchemaRepair class for parsing and manipulating docSchema data",
          "import": "import json",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for timing operations or delays",
          "import": "import time",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for calculating file hashes",
          "import": "import hashlib",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for file path operations",
          "import": "from pathlib import Path",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for type annotations",
          "import": "from typing import Dict, List, Tuple, Any",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:51:13",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Entry point function that orchestrates a repair process for a corrupted reMarkable root.docSchema file by running a dry-run analysis first, then optionally applying the repair based on user confirmation.",
      "docstring": "Run the root.docSchema repair",
      "id": 2109,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Any",
        "import requests",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 438,
      "line_start": 405,
      "name": "main_v64",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main CLI interface for the RootDocSchemaRepair tool. It guides users through a two-phase repair process: first performing a dry-run to preview changes (preserving folders and invoice PDFs, recalculating document sizes), then prompting for confirmation before applying the actual repair. It handles initialization errors and provides user-friendly console output with emoji indicators for status updates.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating the success or failure of the repair operation. Returns True if the repair was successfully applied (when user confirms), False if the dry run failed, user cancelled the operation, or an exception occurred during initialization. The return value reflects the final state of the repair process.",
      "settings_required": [
        "RootDocSchemaRepair class must be defined and importable in the same module or imported from another module",
        "RemarkableAuth authentication credentials must be configured (likely API tokens or device tokens for reMarkable cloud access)",
        "Network access to reMarkable cloud services for uploading repaired docSchema",
        "Valid reMarkable account with existing root.docSchema file to repair"
      ],
      "source_code": "def main():\n    \"\"\"Run the root.docSchema repair\"\"\"\n    try:\n        repair_tool = RootDocSchemaRepair()\n        \n        print(\"\ud83d\udd27 reMarkable Root DocSchema Repair Tool\")\n        print(\"=\" * 50)\n        print(\"This tool will fix the corrupted root.docSchema by:\")\n        print(\"  \u2705 Preserving working entries (folders + invoice PDFs)\")\n        print(\"  \ud83d\udd27 Recalculating correct sizes for broken documents\")\n        print(\"  \u2b06\ufe0f  Uploading the corrected root.docSchema\")\n        print()\n        \n        # First run in dry-run mode to show what will be done\n        print(\"\ud83d\udd0d Running DRY RUN first to analyze the repair plan...\")\n        dry_run_success = repair_tool.run_repair(dry_run=True)\n        \n        if dry_run_success:\n            print(\"\\n\" + \"=\" * 50)\n            response = input(\"\ud83d\ude80 Dry run successful! Apply the repair? (yes/no): \").strip().lower()\n            \n            if response in ['yes', 'y']:\n                print(\"\ud83d\ude80 Applying the repair...\")\n                return repair_tool.run_repair(dry_run=False)\n            else:\n                print(\"\u274c Repair cancelled by user\")\n                return False\n        else:\n            print(\"\u274c Dry run failed - cannot proceed with repair\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Repair tool failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/fix_root_docschema.py",
      "tags": [
        "cli",
        "repair-tool",
        "remarkable",
        "docschema",
        "interactive",
        "dry-run",
        "file-repair",
        "user-confirmation",
        "error-handling",
        "cloud-sync"
      ],
      "updated_at": "2025-12-07T01:56:14.688486",
      "usage_example": "if __name__ == '__main__':\n    # Run the repair tool interactively\n    success = main()\n    \n    if success:\n        print('Repair completed successfully!')\n        exit(0)\n    else:\n        print('Repair failed or was cancelled')\n        exit(1)"
    },
    {
      "best_practices": [
        "This function should only be used for testing and simulation purposes, not for actual production uploads",
        "Ensure the FixedUploadTest class is properly initialized and available before calling this function",
        "Review the generated JSON results file to understand what fixes were applied and verified",
        "The function creates a timestamped results file to avoid overwriting previous test runs",
        "Always check the return value to determine if the system is ready for real uploads",
        "The function handles exceptions gracefully and returns False on failure, making it safe to use in automated testing pipelines",
        "Ensure sufficient disk space is available for the test_results directory before running"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:34:45",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "json",
        "time"
      ],
      "description": "Executes a simulation-only test of a fixed upload process for reMarkable documents, verifying that all critical fixes are correctly applied without making actual API calls.",
      "docstring": "Run the fixed upload test - SIMULATION ONLY",
      "id": 2067,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 393,
      "line_start": 349,
      "name": "main_v63",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive test harness for validating document upload fixes in a reMarkable integration. It simulates the upload process, verifies that all fixes are properly applied, saves detailed results to a JSON file, and provides a summary report. The function is designed to ensure that the upload logic is correct before attempting real API calls, reducing the risk of errors and failed uploads.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating whether all fixes were successfully verified (True) or if some fixes need review (False). This return value can be used to determine if the system is ready for real uploads.",
      "settings_required": [
        "Requires a FixedUploadTest class to be defined in the same module or imported",
        "Requires write permissions to create a 'test_results' directory in the parent directory of the script",
        "The FixedUploadTest class must implement simulate_fixed_upload() and verify_fixes_applied() methods"
      ],
      "source_code": "def main():\n    \"\"\"Run the fixed upload test - SIMULATION ONLY\"\"\"\n    \n    try:\n        print(\"\ud83e\uddea FIXED UPLOAD TEST - SIMULATION ONLY\")\n        print(\"=\" * 60)\n        print(\"\ud83d\udeab NO ACTUAL API CALLS - TESTING FIXES ONLY\")\n        \n        # Initialize test\n        test = FixedUploadTest()\n        \n        # Simulate fixed upload\n        results = test.simulate_fixed_upload(\"Real_App_Behavior_Test\")\n        \n        # Verify fixes\n        fixes_verified = test.verify_fixes_applied(results)\n        \n        # Save results\n        results_file = Path(__file__).parent / \"test_results\" / f\"fixed_upload_simulation_{int(time.time())}.json\"\n        results_file.parent.mkdir(exist_ok=True)\n        \n        with open(results_file, 'w') as f:\n            json.dump(results, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Simulation results saved to: {results_file}\")\n        \n        # Summary\n        print(f\"\\n\ud83d\udccb SUMMARY:\")\n        print(f\"   All fixes applied: {'\u2705 YES' if fixes_verified else '\u274c NO'}\")\n        print(f\"   Components created: {len(results['upload_requests'])}\")\n        print(f\"   Ready for real upload: {'\u2705 YES' if fixes_verified else '\u274c NO'}\")\n        \n        if fixes_verified:\n            print(f\"\\n\ud83c\udfaf READY FOR REAL UPLOAD!\")\n            print(f\"   The simulated upload shows all critical fixes are correctly applied.\")\n            print(f\"   This should produce visible documents that match real app behavior.\")\n        else:\n            print(f\"\\n\u26a0\ufe0f FIXES NEED REVIEW\")\n            print(f\"   Some fixes were not applied correctly.\")\n        \n        return fixes_verified\n        \n    except Exception as e:\n        print(f\"\u274c Fixed upload test failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/fixed_upload_test.py",
      "tags": [
        "testing",
        "simulation",
        "upload",
        "remarkable",
        "validation",
        "document-processing",
        "integration-testing",
        "fix-verification",
        "file-io",
        "json"
      ],
      "updated_at": "2025-12-07T01:56:14.687726",
      "usage_example": "if __name__ == '__main__':\n    # Run the fixed upload simulation test\n    success = main()\n    \n    if success:\n        print('All tests passed, ready for production upload')\n    else:\n        print('Tests failed, review fixes before proceeding')\n    \n    # Exit with appropriate status code\n    import sys\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function is designed to be called as the main entry point of the script",
        "Ensure RealAppUploadAnalyzer class is properly implemented before calling this function",
        "The function handles exceptions gracefully and provides user-friendly console output",
        "Results are automatically saved to files, ensure proper file system permissions",
        "The hardcoded document name 'Pylontech force H3 datasheet' suggests this is for a specific test case",
        "Consider parameterizing the document name if this function needs to be reused for different documents",
        "The function returns a boolean for easy integration with exit codes in scripts"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:33:42",
      "decorators": [],
      "dependencies": [
        "requests",
        "pathlib"
      ],
      "description": "Entry point function that orchestrates the analysis of a document uploaded through a reMarkable app, saves results and logs, and reports success or failure.",
      "docstring": "Run the real app upload analysis",
      "id": 2065,
      "imports": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "import requests",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "import requests",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 407,
      "line_start": 384,
      "name": "main_v62",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main execution entry point for analyzing real app document uploads to the reMarkable cloud. It instantiates a RealAppUploadAnalyzer, analyzes a specific document ('Pylontech force H3 datasheet'), saves the analysis results and raw logs, and provides user feedback through console output. The primary use case is to understand the correct pattern for root.docSchema sizes by analyzing actual app upload behavior.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success (True) or failure (False) of the analysis operation. Returns True if the analysis completed successfully and results['success'] is True, otherwise returns False. Also returns False if any exception occurs during execution.",
      "settings_required": [
        "RealAppUploadAnalyzer class must be defined and available in the same module or imported",
        "auth module with RemarkableAuth class must be available",
        "reMarkable cloud authentication credentials (likely required by RemarkableAuth)",
        "Network access to reMarkable cloud services",
        "Write permissions for saving analysis results and logs"
      ],
      "source_code": "def main():\n    \"\"\"Run the real app upload analysis\"\"\"\n    try:\n        analyzer = RealAppUploadAnalyzer()\n        \n        # Analyze the document uploaded by the real app\n        results = analyzer.analyze_real_app_document(\"Pylontech force H3 datasheet\")\n        \n        # Save results\n        analyzer.save_analysis_results(results)\n        analyzer.save_raw_logs()\n        \n        if results['success']:\n            print(f\"\\n\ud83c\udf89 Analysis Complete!\")\n            print(f\"\u2705 Successfully analyzed '{results['document_name']}'\")\n            print(f\"\ud83d\udcca This shows us the correct pattern for root.docSchema sizes\")\n        else:\n            print(f\"\\n\u274c Analysis failed: {results.get('error', 'Unknown error')}\")\n        \n        return results['success']\n        \n    except Exception as e:\n        print(f\"\u274c Failed to run analysis: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_real_app_upload.py",
      "tags": [
        "entry-point",
        "orchestration",
        "remarkable",
        "document-analysis",
        "cloud-upload",
        "logging",
        "error-handling",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:56:14.687083",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('Analysis completed successfully')\n    else:\n        print('Analysis failed')\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function modifies sys.path to enable local imports; ensure the parent directory structure is as expected",
        "The function is designed for testing purposes and may not be suitable for production use without additional error handling",
        "Ensure proper cleanup of sys.path modifications if using this in a larger application context",
        "The hardcoded output directory name 'remarkable_replica_v2' should be parameterized for production use",
        "Consider wrapping the function call in a try-except block to handle unexpected exceptions",
        "The debug print statement for session type should be removed or converted to proper logging in production",
        "Ensure RemarkableReplicaBuilder is properly imported or defined before calling this function"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "always needed for sys.path manipulation to enable local imports",
          "import": "import sys",
          "optional": false
        },
        {
          "condition": "imported dynamically after path modification; requires auth.py module in parent directory",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:28:07",
      "decorators": [],
      "dependencies": [
        "requests",
        "pathlib",
        "sys",
        "PyPDF2",
        "shutil",
        "subprocess",
        "re",
        "json",
        "os",
        "logging",
        "typing",
        "dataclasses",
        "datetime"
      ],
      "description": "Entry point function that authenticates with Remarkable cloud service and builds a complete local replica of the user's Remarkable documents and notebooks.",
      "docstring": "Main function for testing",
      "id": 2047,
      "imports": [
        "import os",
        "import json",
        "import requests",
        "import logging",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "import shutil",
        "import subprocess",
        "import PyPDF2",
        "import shutil"
      ],
      "imports_required": [
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 908,
      "line_start": 887,
      "name": "main_v61",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test harness and main execution point for the Remarkable replica building system. It handles authentication through RemarkableAuth, obtains an authenticated session, and uses RemarkableReplicaBuilder to create a local copy of all Remarkable cloud content. The function includes error handling for authentication failures and debug output for session verification.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success or failure of the replica building process. Returns False explicitly if authentication fails, otherwise returns the success status from builder.build_complete_replica(). Type: bool",
      "settings_required": [
        "auth.py module must exist in the parent directory containing RemarkableAuth class",
        "RemarkableReplicaBuilder class must be defined in the same module or imported",
        "Remarkable cloud service credentials (handled by RemarkableAuth)",
        "Network connectivity to Remarkable cloud services",
        "Write permissions for creating 'remarkable_replica_v2' directory"
      ],
      "source_code": "def main():\n    \"\"\"Main function for testing\"\"\"\n    import sys\n    sys.path.insert(0, str(Path(__file__).parent))\n    \n    from auth import RemarkableAuth\n    \n    # Authenticate and get session\n    auth = RemarkableAuth()\n    session = auth.get_authenticated_session()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return False\n    \n    print(f\"Session type: {type(session)}\")  # Debug line\n    \n    # Build replica\n    builder = RemarkableReplicaBuilder(session, \"remarkable_replica_v2\")\n    success = builder.build_complete_replica()\n    \n    return success",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica_v2.py",
      "tags": [
        "testing",
        "authentication",
        "remarkable",
        "cloud-sync",
        "replica",
        "entry-point",
        "main-function",
        "document-management",
        "e-ink",
        "notebook-sync"
      ],
      "updated_at": "2025-12-07T01:56:14.686406",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('\u2705 Replica built successfully')\n    else:\n        print('\u274c Replica building failed')\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function is designed to be called as a script entry point, typically from if __name__ == '__main__' block",
        "The function returns proper exit codes (0 for success, 1 for failure) suitable for shell scripting and CI/CD integration",
        "Handles KeyboardInterrupt separately to allow graceful user cancellation",
        "Uses try-except blocks to catch all exceptions and prevent unhandled errors",
        "Provides clear console feedback with emoji indicators for better user experience",
        "Should be used in conjunction with proper logging configuration for production environments",
        "The RemarkableReplicaSync class must be properly initialized with required configuration before calling this function"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:26:27",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Main entry point function that orchestrates a standalone synchronization process for reMarkable Replica, handling initialization, execution, and error reporting.",
      "docstring": "Main entry point for standalone sync",
      "id": 2042,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, Any, Optional, List, Set",
        "from dataclasses import dataclass",
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 471,
      "line_start": 448,
      "name": "main_v60",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the command-line entry point for the reMarkable Replica sync tool. It initializes the RemarkableReplicaSync class, performs the synchronization operation, and provides user-friendly console feedback with emoji indicators. It handles graceful shutdown on keyboard interrupts and catches all exceptions to provide appropriate exit codes for shell integration.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful sync completion, 1 for failures (including user interruption, sync failures, or exceptions). This follows Unix convention where 0 indicates success and non-zero indicates failure.",
      "settings_required": [
        "RemarkableReplicaSync class must be defined and importable in the same module or imported from another module",
        "RemarkableReplicaSync may require configuration such as API credentials, file paths, or connection settings (depends on class implementation)",
        "Appropriate file system permissions for sync operations",
        "Network connectivity if syncing with remote reMarkable device or cloud service"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point for standalone sync\"\"\"\n    try:\n        print(\"\ud83d\udd04 Starting reMarkable Replica Sync\")\n        \n        # Initialize sync tool\n        sync = RemarkableReplicaSync()\n        \n        # Perform sync\n        success = sync.sync_replica()\n        \n        if success:\n            print(\"\u2705 Sync completed successfully!\")\n            return 0\n        else:\n            print(\"\u274c Sync failed!\")\n            return 1\n            \n    except KeyboardInterrupt:\n        print(\"\\n\u26a0\ufe0f Sync interrupted by user\")\n        return 1\n    except Exception as e:\n        print(f\"\u274c Sync failed with error: {e}\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica_new.py",
      "tags": [
        "entry-point",
        "sync",
        "remarkable",
        "cli",
        "error-handling",
        "standalone",
        "orchestration",
        "exit-code"
      ],
      "updated_at": "2025-12-07T01:56:14.685231",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Always run this function using asyncio.run(main()) or await it from another async context",
        "Ensure all three demo functions (demo_graphics_generation, demo_placeholder_parsing, demo_hybrid_response) are properly defined before calling main()",
        "The function includes comprehensive error handling with traceback printing for debugging",
        "Console output uses Unicode emoji characters; ensure terminal supports UTF-8 encoding",
        "This is a demonstration function and should not be used in production code without modification",
        "The function expects specific module dependencies to be available; verify all imports resolve correctly",
        "Consider wrapping the asyncio.run(main()) call in a if __name__ == '__main__': block when using as a script entry point"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an exception occurs during demo execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:03:16",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "tempfile",
        "pathlib",
        "json",
        "traceback"
      ],
      "description": "Orchestrates a comprehensive demonstration of E-Ink LLM hybrid mode capabilities, running three sequential demos showcasing graphics generation, placeholder parsing, and complete hybrid response processing.",
      "docstring": "Run all demos",
      "id": 1989,
      "imports": [
        "import asyncio",
        "import tempfile",
        "from pathlib import Path",
        "import json",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from hybrid_response_handler import HybridResponseHandler",
        "import traceback"
      ],
      "imports_required": [
        "import asyncio",
        "import tempfile",
        "from pathlib import Path",
        "import json",
        "from graphics_generator import GraphicsGenerator, GraphicSpec, GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 233,
      "line_start": 207,
      "name": "main_v59",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This async function serves as the main entry point for demonstrating the E-Ink LLM Assistant's hybrid text+graphics mode. It sequentially executes three demos: individual graphics generation, placeholder parsing, and complete hybrid response processing. The function provides user-friendly console output with progress indicators and error handling, and concludes with usage instructions for implementing hybrid mode in production.",
      "return_annotation": null,
      "return_explained": "Returns None (implicitly). The function is designed for side effects (console output and demonstration execution) rather than returning values. On success, it prints completion messages and usage instructions. On failure, it prints error messages and stack traces.",
      "settings_required": [
        "Requires demo_graphics_generation() async function to be defined in the same module",
        "Requires demo_placeholder_parsing() function to be defined in the same module",
        "Requires demo_hybrid_response() async function to be defined in the same module",
        "Requires graphics_generator module with GraphicsGenerator, GraphicSpec, and GraphicType classes",
        "Requires hybrid_response_handler module with HybridResponseHandler class",
        "Requires hybrid_pdf_generator module with HybridPDFGenerator class",
        "May require additional configuration for graphics generation (fonts, image libraries, etc.)"
      ],
      "source_code": "async def main():\n    \"\"\"Run all demos\"\"\"\n    print(\"\ud83d\ude80 E-Ink LLM Hybrid Mode Demonstration\")\n    print(\"=\" * 60)\n    print(\"This demo showcases the new hybrid text+graphics capabilities\")\n    print()\n    \n    try:\n        # Demo 1: Individual graphics generation\n        graphics = await demo_graphics_generation()\n        \n        # Demo 2: Placeholder parsing\n        demo_placeholder_parsing()\n        \n        # Demo 3: Complete hybrid response processing\n        await demo_hybrid_response()\n        \n        print(\"\\n\" + \"=\" * 60)\n        print(\"\u2705 Demo completed successfully!\")\n        print(\"\\nTo use hybrid mode in your E-Ink LLM Assistant:\")\n        print(\"   python main.py --file input.pdf --enable-hybrid-mode\")\n        print(\"\\nFor more information, see HYBRID_MODE_GUIDE.md\")\n        \n    except Exception as e:\n        print(f\"\\n\u274c Demo failed with error: {e}\")\n        import traceback\n        traceback.print_exc()",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/demo_hybrid_mode.py",
      "tags": [
        "async",
        "demo",
        "orchestration",
        "e-ink",
        "hybrid-mode",
        "graphics",
        "llm",
        "presentation",
        "error-handling",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:56:14.684565",
      "usage_example": "import asyncio\n\nasync def demo_graphics_generation():\n    # Your graphics demo implementation\n    return []\n\ndef demo_placeholder_parsing():\n    # Your placeholder parsing demo\n    pass\n\nasync def demo_hybrid_response():\n    # Your hybrid response demo\n    pass\n\nasync def main():\n    \"\"\"Run all demos\"\"\"\n    print(\"\ud83d\ude80 E-Ink LLM Hybrid Mode Demonstration\")\n    print(\"=\" * 60)\n    print(\"This demo showcases the new hybrid text+graphics capabilities\")\n    print()\n    \n    try:\n        graphics = await demo_graphics_generation()\n        demo_placeholder_parsing()\n        await demo_hybrid_response()\n        \n        print(\"\\n\" + \"=\" * 60)\n        print(\"\u2705 Demo completed successfully!\")\n        print(\"\\nTo use hybrid mode in your E-Ink LLM Assistant:\")\n        print(\"   python main.py --file input.pdf --enable-hybrid-mode\")\n        print(\"\\nFor more information, see HYBRID_MODE_GUIDE.md\")\n        \n    except Exception as e:\n        print(f\"\\n\u274c Demo failed with error: {e}\")\n        import traceback\n        traceback.print_exc()\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "This function must be called using asyncio.run(main()) or within an existing async context",
        "Ensure test_remarkable_with_code() and test_remarkable_authentication() functions are defined before calling main()",
        "Command-line arguments must follow the exact format: --code <one_time_code>",
        "The function uses sys.exit(1) on failure, which will terminate the entire program",
        "Both test functions should handle their own exceptions and return boolean success status",
        "The function expects emoji characters in output - ensure terminal supports UTF-8 encoding",
        "This is designed as a test/validation script, not for production use in larger applications"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:52:16",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "sys",
        "pathlib",
        "remarkable_cloud",
        "rmcl"
      ],
      "description": "Asynchronous main test function that validates reMarkable Cloud integration by either testing with a one-time authentication code or existing authentication credentials.",
      "docstring": "Main test function",
      "id": 1963,
      "imports": [
        "import asyncio",
        "import sys",
        "from pathlib import Path",
        "from remarkable_cloud import RemarkableCloudManager",
        "import rmcl"
      ],
      "imports_required": [
        "import asyncio",
        "import sys",
        "from pathlib import Path",
        "from remarkable_cloud import RemarkableCloudManager",
        "import rmcl"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 114,
      "line_start": 98,
      "name": "main_v58",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for testing reMarkable Cloud connectivity and authentication. It provides two testing modes: (1) authentication using a one-time code passed as a command-line argument, or (2) testing with existing stored authentication. The function validates the integration and provides user-friendly feedback on success or failure, exiting with appropriate status codes.",
      "return_annotation": null,
      "return_explained": "No explicit return value (returns None implicitly). The function communicates results through console output and system exit codes. Exits with code 1 on failure, continues normally (exit code 0) on success.",
      "settings_required": [
        "reMarkable Cloud authentication credentials (either existing stored credentials or a one-time authentication code)",
        "test_remarkable_with_code() function must be defined in the same module",
        "test_remarkable_authentication() function must be defined in the same module",
        "Both test functions must be async and return boolean success status"
      ],
      "source_code": "async def main():\n    \"\"\"Main test function\"\"\"\n    if len(sys.argv) > 1 and sys.argv[1] == \"--code\" and len(sys.argv) > 2:\n        # Test with one-time code\n        one_time_code = sys.argv[2]\n        success = await test_remarkable_with_code(one_time_code)\n    else:\n        # Test with existing authentication\n        success = await test_remarkable_authentication()\n    \n    if success:\n        print(\"\\n\ud83c\udf89 reMarkable Cloud integration test passed!\")\n        print(\"   You can now use the E-Ink LLM Assistant with reMarkable Cloud.\")\n    else:\n        print(\"\\n\u274c reMarkable Cloud integration test failed.\")\n        print(\"   Please check the error messages above.\")\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_remarkable.py",
      "tags": [
        "async",
        "testing",
        "authentication",
        "remarkable",
        "cloud-integration",
        "cli",
        "command-line",
        "entry-point",
        "validation",
        "e-ink"
      ],
      "updated_at": "2025-12-07T01:56:14.683899",
      "usage_example": "# Run with existing authentication:\n# python script.py\n\n# Run with one-time code:\n# python script.py --code YOUR_ONE_TIME_CODE\n\n# In code:\nimport asyncio\nimport sys\nfrom pathlib import Path\nfrom remarkable_cloud import RemarkableCloudManager\nimport rmcl\n\n# Define required test functions first\nasync def test_remarkable_with_code(code):\n    # Implementation here\n    return True\n\nasync def test_remarkable_authentication():\n    # Implementation here\n    return True\n\nasync def main():\n    if len(sys.argv) > 1 and sys.argv[1] == \"--code\" and len(sys.argv) > 2:\n        one_time_code = sys.argv[2]\n        success = await test_remarkable_with_code(one_time_code)\n    else:\n        success = await test_remarkable_authentication()\n    \n    if success:\n        print(\"\\n\ud83c\udf89 reMarkable Cloud integration test passed!\")\n        print(\"   You can now use the E-Ink LLM Assistant with reMarkable Cloud.\")\n    else:\n        print(\"\\n\u274c reMarkable Cloud integration test failed.\")\n        print(\"   Please check the error messages above.\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "Ensure all required constants (OUTPUT_FOLDER, WUXI2_FOLDER, RESULTS_FILE, DETAILED_JSON) are properly defined before calling this function",
        "Verify that all helper functions (scan_output_folder, scan_wuxi2_folder, compare_documents, save_results, print_summary) are implemented and available",
        "Ensure the directories specified in OUTPUT_FOLDER and WUXI2_FOLDER exist and are accessible",
        "Verify write permissions for the output file paths (RESULTS_FILE and DETAILED_JSON)",
        "The function exits early if no coded documents are found in the output folder, so ensure the output folder contains expected documents",
        "Consider wrapping the main() call in a try-except block to handle potential file system errors or missing dependencies",
        "This function is designed to be called as the entry point, typically within an if __name__ == '__main__': block"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:24:17",
      "decorators": [],
      "dependencies": [
        "os",
        "re",
        "hashlib",
        "pathlib",
        "typing",
        "csv",
        "datetime",
        "collections",
        "json"
      ],
      "description": "Main execution function that orchestrates a document comparison workflow between two directories (mailsearch/output and wuxi2 repository), scanning for coded documents, comparing them, and generating results.",
      "docstring": "Main execution function",
      "id": 1868,
      "imports": [
        "import os",
        "import re",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Optional",
        "import csv",
        "from datetime import datetime",
        "from collections import defaultdict",
        "import json"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, List, Tuple, Optional",
        "import csv",
        "from datetime import datetime",
        "from collections import defaultdict",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 440,
      "line_start": 412,
      "name": "main_v57",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a document comparison tool. It coordinates the entire workflow: scanning the output folder for documents, scanning the wuxi2 repository, comparing documents between the two locations, saving comparison results to files, and printing a summary. It's designed to identify differences, matches, or discrepancies between document sets in different locations.",
      "return_annotation": null,
      "return_explained": "Returns None (implicit). The function performs side effects including printing to console, writing results to files (RESULTS_FILE and DETAILED_JSON), and potentially creating output directories.",
      "settings_required": [
        "OUTPUT_FOLDER constant must be defined in the module scope pointing to the mailsearch/output directory",
        "WUXI2_FOLDER constant must be defined in the module scope pointing to the wuxi2 repository directory",
        "RESULTS_FILE constant must be defined specifying the path for CSV results output",
        "DETAILED_JSON constant must be defined specifying the path for JSON results output",
        "scan_output_folder() function must be defined and available in the same module",
        "scan_wuxi2_folder() function must be defined and available in the same module",
        "compare_documents() function must be defined and available in the same module",
        "save_results() function must be defined and available in the same module",
        "print_summary() function must be defined and available in the same module",
        "Read/write permissions for OUTPUT_FOLDER, WUXI2_FOLDER, and output file paths"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    print(f\"\\n{'='*80}\")\n    print(\"Document Comparison Tool\")\n    print(\"Comparing mailsearch/output with wuxi2 repository\")\n    print(f\"{'='*80}\")\n    \n    # Scan output folder\n    output_docs = scan_output_folder(OUTPUT_FOLDER)\n    \n    if not output_docs:\n        print(\"\\n\u2717 No coded documents found in output folder!\")\n        return\n    \n    # Scan wuxi2 repository\n    wuxi2_docs = scan_wuxi2_folder(WUXI2_FOLDER)\n    \n    # Compare documents\n    results = compare_documents(output_docs, wuxi2_docs)\n    \n    # Save results\n    save_results(results, RESULTS_FILE, DETAILED_JSON)\n    \n    # Print summary\n    print_summary(results)\n    \n    print(f\"{'='*80}\")\n    print(\"Comparison complete!\")\n    print(f\"{'='*80}\\n\")",
      "source_file": "/tf/active/vicechatdev/mailsearch/compare_documents.py",
      "tags": [
        "document-comparison",
        "file-scanning",
        "workflow-orchestration",
        "main-entry-point",
        "batch-processing",
        "reporting",
        "file-system",
        "comparison-tool"
      ],
      "updated_at": "2025-12-07T01:56:14.683179",
      "usage_example": "# Define required constants and helper functions first\nOUTPUT_FOLDER = './mailsearch/output'\nWUXI2_FOLDER = './wuxi2'\nRESULTS_FILE = './comparison_results.csv'\nDETAILED_JSON = './comparison_results.json'\n\n# Define required helper functions (scan_output_folder, scan_wuxi2_folder, etc.)\n# ... (implementation of helper functions)\n\n# Execute the main function\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function expects test functions to be defined in the same module scope before calling main()",
        "Each test function should return a boolean (True for pass, False for fail) or raise an exception",
        "The function uses print statements for output; consider redirecting stdout if capturing test results programmatically",
        "Exit codes follow Unix convention: use sys.exit(main()) to properly terminate the process with the correct code",
        "Test functions are executed sequentially; a failed test does not stop subsequent tests from running",
        "Exceptions in test functions are caught and counted as failures, allowing the test suite to continue",
        "The setup_logging() function is called before tests run; ensure logging configuration is appropriate for test environment"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:37:01",
      "decorators": [],
      "dependencies": [
        "logging",
        "sys",
        "os",
        "pathlib"
      ],
      "description": "Orchestrates and executes a test suite for an email forwarder service, running multiple test functions sequentially and reporting results.",
      "docstring": "Run all tests.",
      "id": 1478,
      "imports": [
        "import sys",
        "import os",
        "import logging",
        "from pathlib import Path",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.o365_client import O365Client",
        "from forwarder.email_handler import EmailHandler"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "import logging",
        "from pathlib import Path",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.o365_client import O365Client",
        "from forwarder.email_handler import EmailHandler"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 130,
      "line_start": 96,
      "name": "main_v56",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for testing an email forwarding service. It sets up logging, executes a predefined list of test functions (configuration, O365 connection, email handler, and send email tests), tracks pass/fail status, and provides a summary report. Returns 0 for success (all tests passed) or 1 for failure (some tests failed).",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all tests passed successfully, 1 if any tests failed. This follows standard Unix convention for process exit codes where 0 indicates success.",
      "settings_required": [
        "config.py module with settings object must be available",
        "utils.logger module with setup_logging function must be available",
        "forwarder.o365_client module with O365Client class must be available",
        "forwarder.email_handler module with EmailHandler class must be available",
        "Test functions must be defined in the same module: test_configuration, test_o365_connection, test_email_handler, test_send_email",
        "Each test function should return True for pass, False for fail, or raise an exception on error"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests.\"\"\"\n    print(\"Email Forwarder Service Test Suite\")\n    print(\"=\" * 40)\n    \n    # Setup logging\n    setup_logging()\n    \n    tests = [\n        test_configuration,\n        test_o365_connection,\n        test_email_handler,\n        test_send_email\n    ]\n    \n    passed = 0\n    total = len(tests)\n    \n    for test in tests:\n        try:\n            if test():\n                passed += 1\n        except Exception as e:\n            print(f\"\u2717 Test failed with exception: {e}\")\n        print()\n    \n    print(\"=\" * 40)\n    print(f\"Tests passed: {passed}/{total}\")\n    \n    if passed == total:\n        print(\"All tests passed! \u2713\")\n        return 0\n    else:\n        print(\"Some tests failed! \u2717\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/email-forwarder/test_service.py",
      "tags": [
        "testing",
        "test-suite",
        "email",
        "forwarder",
        "integration-tests",
        "test-runner",
        "o365",
        "logging",
        "exit-code"
      ],
      "updated_at": "2025-12-07T01:56:14.682531",
      "usage_example": "# Assuming all required modules and test functions are defined\n# in the same file or imported\n\ndef test_configuration():\n    \"\"\"Example test function.\"\"\"\n    return True\n\ndef test_o365_connection():\n    \"\"\"Example test function.\"\"\"\n    return True\n\ndef test_email_handler():\n    \"\"\"Example test function.\"\"\"\n    return True\n\ndef test_send_email():\n    \"\"\"Example test function.\"\"\"\n    return True\n\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function expects three test functions to be defined in the same module: test_pyodbc_import, test_odbc_driver, and test_connection_string",
        "Each test function should return a boolean indicating success (True) or failure (False)",
        "Test functions should handle their own exceptions and print appropriate messages",
        "The function is designed to be used as a standalone diagnostic tool, typically called from if __name__ == '__main__' block",
        "Return value can be used to set exit codes for CI/CD pipelines (0 for success, 1 for failure)",
        "All test exceptions are caught and treated as test failures, ensuring the suite completes even if individual tests crash",
        "The function provides user-friendly output with emojis and formatting for better readability in terminal environments"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:23:14",
      "decorators": [],
      "dependencies": [
        "pyodbc",
        "sqlalchemy"
      ],
      "description": "Orchestrates and executes a suite of ODBC connectivity tests for SQL Server, providing formatted output and a summary of test results.",
      "docstring": "Run all tests",
      "id": 1252,
      "imports": [
        "import pyodbc",
        "import sqlalchemy",
        "from sqlalchemy import create_engine",
        "from sqlalchemy import text",
        "import pyodbc"
      ],
      "imports_required": [
        "import pyodbc",
        "import sqlalchemy",
        "from sqlalchemy import create_engine",
        "from sqlalchemy import text"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 104,
      "line_start": 69,
      "name": "main_v55",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for testing SQL Server ODBC connectivity. It runs a predefined set of tests (pyodbc import, ODBC driver availability, and connection string validation), collects results, handles exceptions, and provides a comprehensive summary with helpful connection tips. It's designed to validate that the environment is properly configured for SQL Server database connections.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating whether all tests passed. Returns True if all tests in the suite completed successfully, False if any test failed or raised an exception.",
      "settings_required": [
        "ODBC Driver 18 for SQL Server must be installed on the system",
        "SQL Server instance must be accessible (local or remote)",
        "Requires three test functions to be defined in the same module: test_pyodbc_import, test_odbc_driver, and test_connection_string"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83e\uddea SmartStat ODBC Connectivity Test\")\n    print(\"=\" * 40)\n    \n    tests = [\n        test_pyodbc_import,\n        test_odbc_driver,\n        test_connection_string\n    ]\n    \n    results = []\n    for test in tests:\n        try:\n            result = test()\n            results.append(result)\n        except Exception as e:\n            print(f\"\u274c Test failed with exception: {e}\")\n            results.append(False)\n    \n    print(\"\\n\" + \"=\" * 40)\n    print(\"\ud83d\udcca Test Summary:\")\n    print(f\"\u2705 Passed: {sum(results)}/{len(results)} tests\")\n    \n    if all(results):\n        print(\"\ud83c\udf89 All tests passed! SQL Server connectivity is ready.\")\n        print(\"\\n\ud83d\udca1 Connection Tips:\")\n        print(\"   - Use 'ODBC Driver 18 for SQL Server' as driver name\")\n        print(\"   - For local connections: server='localhost' or server='127.0.0.1'\")\n        print(\"   - For trusted connections: trusted_connection=yes\")\n        print(\"   - For SQL Auth: provide username and password\")\n        print(\"   - You may need TrustServerCertificate=yes for SSL issues\")\n    else:\n        print(\"\u26a0\ufe0f  Some tests failed. Check the errors above.\")\n    \n    return all(results)",
      "source_file": "/tf/active/vicechatdev/full_smartstat/test_odbc.py",
      "tags": [
        "testing",
        "odbc",
        "sql-server",
        "connectivity",
        "database",
        "validation",
        "diagnostics",
        "test-suite",
        "pyodbc",
        "sqlalchemy"
      ],
      "updated_at": "2025-12-07T01:56:14.681804",
      "usage_example": "# Ensure test functions are defined in the same module\ndef test_pyodbc_import():\n    try:\n        import pyodbc\n        print(\"\u2705 pyodbc imported successfully\")\n        return True\n    except ImportError:\n        print(\"\u274c pyodbc import failed\")\n        return False\n\ndef test_odbc_driver():\n    drivers = pyodbc.drivers()\n    if 'ODBC Driver 18 for SQL Server' in drivers:\n        print(\"\u2705 ODBC Driver 18 found\")\n        return True\n    print(\"\u274c ODBC Driver 18 not found\")\n    return False\n\ndef test_connection_string():\n    print(\"\u2705 Connection string test passed\")\n    return True\n\n# Run the main test suite\nif __name__ == '__main__':\n    success = main()\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of a test script, typically with if __name__ == '__main__'",
        "The return value should be used with sys.exit() to properly signal test success/failure to the operating system",
        "All test functions called by main() must be defined before calling main()",
        "Test functions should raise AssertionError for test failures to be properly caught and reported",
        "Ensure all required modules (config, rag_engine, app) are available in the Python path before execution",
        "The function provides detailed console output, so redirect stdout/stderr appropriately in automated environments",
        "Consider running this in a test environment separate from production to avoid side effects"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only used when an unexpected exception occurs during test execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:57:50",
      "decorators": [],
      "dependencies": [
        "sys",
        "json",
        "config",
        "rag_engine",
        "os",
        "app",
        "traceback"
      ],
      "description": "Test orchestration function that executes a comprehensive test suite for DocChat's multi-LLM model selection feature and reports results.",
      "docstring": "Run all tests",
      "id": 346,
      "imports": [
        "import sys",
        "import json",
        "import config",
        "from rag_engine import get_llm_instance",
        "import config",
        "import config",
        "import os",
        "import app as flask_app",
        "import traceback"
      ],
      "imports_required": [
        "import sys",
        "import json",
        "import config",
        "from rag_engine import get_llm_instance",
        "import os",
        "import app as flask_app",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 213,
      "line_start": 172,
      "name": "main_v54",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running integration tests for the DocChat application. It sequentially executes tests for configuration, RAG engine, Flask routes, API endpoints, and frontend files. It provides formatted console output showing test progress and results, and returns an exit code indicating success (0) or failure (1) for use in CI/CD pipelines or test automation.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all tests pass successfully, 1 if any test fails (either through AssertionError or unexpected Exception). This follows Unix convention for process exit codes where 0 indicates success.",
      "settings_required": [
        "config.py module must be present with multi-LLM configuration settings",
        "rag_engine.py module must be available with get_llm_instance function",
        "app.py Flask application module must exist",
        "test_config(), test_rag_engine(), test_flask_routes(), test_api_models_endpoint(), and test_frontend_files() functions must be defined in the same module or imported",
        "Frontend files and API endpoints must be properly configured for testing"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\\n\" + \"=\" * 60)\n    print(\"DocChat Multi-LLM Model Selection Tests\")\n    print(\"=\" * 60)\n    print()\n    \n    try:\n        test_config()\n        test_rag_engine()\n        test_flask_routes()\n        test_api_models_endpoint()\n        test_frontend_files()\n        \n        print(\"=\" * 60)\n        print(\"\u2705 ALL TESTS PASSED!\")\n        print(\"=\" * 60)\n        print()\n        print(\"Model selection is fully implemented and working:\")\n        print(\"  - Backend: \u2713 Multi-LLM support configured\")\n        print(\"  - API: \u2713 Model selection endpoint ready\")\n        print(\"  - Frontend: \u2713 UI with model dropdown\")\n        print(\"  - Integration: \u2713 Model parameter sent and used\")\n        print()\n        return 0\n        \n    except AssertionError as e:\n        print(\"=\" * 60)\n        print(\"\u274c TEST FAILED!\")\n        print(\"=\" * 60)\n        print(f\"Error: {e}\")\n        print()\n        return 1\n    except Exception as e:\n        print(\"=\" * 60)\n        print(\"\u274c UNEXPECTED ERROR!\")\n        print(\"=\" * 60)\n        print(f\"Error: {type(e).__name__}: {e}\")\n        import traceback\n        traceback.print_exc()\n        print()\n        return 1",
      "source_file": "/tf/active/vicechatdev/docchat/test_model_selection.py",
      "tags": [
        "testing",
        "integration-tests",
        "test-orchestration",
        "multi-llm",
        "test-runner",
        "ci-cd",
        "validation",
        "docchat",
        "model-selection"
      ],
      "updated_at": "2025-12-07T01:56:14.681151",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point for testing the sync application before production use",
        "Ensure all configuration files and credentials are properly set up before running",
        "The function returns exit codes suitable for use with sys.exit() for proper process termination",
        "Review console output carefully as it provides detailed status and next steps",
        "This is a test function and should not be used for production synchronization - use main.py with appropriate flags instead",
        "The function depends on test_graph_client() and test_filecloud_integration() helper functions being defined in the same module",
        "Exception handling includes full stack trace printing for debugging purposes"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside try block during configuration loading phase",
          "import": "from config import Config",
          "optional": false
        },
        {
          "condition": "imported inside except block for error handling and stack trace printing",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:13:32",
      "decorators": [],
      "dependencies": [
        "datetime",
        "os",
        "sys",
        "traceback",
        "sharepoint_graph_client",
        "config",
        "sync_service"
      ],
      "description": "Main test function that validates SharePoint Graph API integration, tests the Graph client connection, and verifies FileCloud sync functionality.",
      "docstring": "Main test function.",
      "id": 214,
      "imports": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_graph_client import SharePointGraphClient",
        "from config import Config",
        "from sync_service import SharePointFileCloudSync",
        "from config import Config",
        "import traceback",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "from datetime import datetime"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 145,
      "line_start": 99,
      "name": "main_v53",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for testing the SharePoint to FileCloud synchronization application. It validates configuration, tests the Graph API client connection to SharePoint, verifies document retrieval, and tests the full sync integration with FileCloud. It provides detailed console output with status indicators and next steps for deployment.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion of all tests (Graph API client test and sync integration test both passed), 1 for any failure (configuration error, Graph API test failure, sync integration failure, or exception). This follows standard Unix exit code conventions.",
      "settings_required": [
        "config.py module with Config class containing SHAREPOINT_SITE_URL",
        "config.py module with Config class containing FILECLOUD_SERVER_URL",
        "Config.validate_config() must pass validation",
        "Config.setup_logging() must be available",
        "SharePoint Graph API credentials configured",
        "FileCloud server credentials configured",
        "test_graph_client() function must be defined in the same module",
        "test_filecloud_integration() function must be defined in the same module"
      ],
      "source_code": "def main():\n    \"\"\"Main test function.\"\"\"\n    print(\"SharePoint Graph API Integration Test\")\n    print(\"=\" * 60)\n    print(f\"Test time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n    print()\n    \n    try:\n        # Load configuration\n        from config import Config\n        Config.validate_config()\n        Config.setup_logging()\n        \n        print(\"\u2705 Configuration loaded successfully\")\n        print(f\"SharePoint Site: {Config.SHAREPOINT_SITE_URL}\")\n        print(f\"FileCloud Server: {Config.FILECLOUD_SERVER_URL}\")\n        print()\n        \n        # Test Graph client\n        graph_success, doc_count = test_graph_client()\n        \n        if graph_success:\n            print(f\"\\n\ud83c\udf89 Graph API client test PASSED! Found {doc_count} documents.\")\n            \n            # Test full sync integration\n            sync_success = test_filecloud_integration()\n            \n            if sync_success:\n                print(\"\\n\ud83c\udf89 Full sync integration test PASSED!\")\n                print(\"\\n\u2705 The sync application is ready to use with Graph API!\")\n                print(\"\\nNext steps:\")\n                print(\"1. Run a test sync: python main.py --once\")\n                print(\"2. Monitor the logs for any issues\")\n                print(\"3. Set up continuous sync: python main.py\")\n                return 0\n            else:\n                print(\"\\n\u274c Sync integration test failed.\")\n                return 1\n        else:\n            print(\"\\n\u274c Graph API client test failed.\")\n            return 1\n            \n    except Exception as e:\n        print(f\"\u274c Test failed with exception: {e}\")\n        import traceback\n        traceback.print_exc()\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/test_graph_client.py",
      "tags": [
        "testing",
        "integration-test",
        "sharepoint",
        "graph-api",
        "filecloud",
        "sync",
        "validation",
        "configuration",
        "main-entry-point",
        "cli"
      ],
      "updated_at": "2025-12-07T01:56:14.680464",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point for SharePoint connectivity testing",
        "Ensure all required configuration values are set in config.py before running",
        "The function returns exit codes suitable for use with sys.exit() for proper process termination",
        "Check that test_rest_client() function is properly defined before calling main()",
        "Review console output for detailed test results and error messages",
        "The function handles exceptions gracefully and provides informative error messages",
        "Use the return code to determine if the SharePoint sync service is ready for deployment"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside try block for configuration loading and validation",
          "import": "from config import Config",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:12:44",
      "decorators": [],
      "dependencies": [
        "datetime",
        "config",
        "sharepoint_rest_client"
      ],
      "description": "Main test function that validates SharePoint REST API connectivity by loading configuration, setting up logging, and executing REST client tests.",
      "docstring": "Main test function.",
      "id": 211,
      "imports": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_rest_client import SharePointRestClient",
        "from config import Config",
        "from config import Config"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_rest_client import SharePointRestClient",
        "from config import Config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 90,
      "line_start": 61,
      "name": "main_v52",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for testing SharePoint REST API integration. It validates the configuration, displays connection details, executes REST client tests, and reports success or failure. It's designed to verify that the SharePoint sync service can successfully connect and communicate with SharePoint before deployment.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful test completion (all tests passed), 1 for test failure (either exception occurred or REST client test failed). This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        "config.py module with Config class containing SHAREPOINT_SITE_URL attribute",
        "config.py module with Config class containing SHAREPOINT_DOCUMENTS_PATH attribute",
        "Config.validate_config() method must be implemented and functional",
        "Config.setup_logging() method must be implemented and functional",
        "test_rest_client() function must be defined in the same module or imported",
        "SharePoint authentication credentials configured in Config class",
        "Network access to SharePoint site specified in configuration"
      ],
      "source_code": "def main():\n    \"\"\"Main test function.\"\"\"\n    print(\"SharePoint REST API Connection Test\")\n    print(\"=\" * 50)\n    print(f\"Test time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n    print()\n    \n    try:\n        # Load configuration\n        from config import Config\n        Config.validate_config()\n        Config.setup_logging()\n        \n        print(\"\u2705 Configuration loaded successfully\")\n        print(f\"SharePoint Site: {Config.SHAREPOINT_SITE_URL}\")\n        print(f\"Documents Path: {Config.SHAREPOINT_DOCUMENTS_PATH}\")\n        print()\n        \n        # Test REST client\n        if test_rest_client():\n            print(\"\\n\ud83c\udf89 SharePoint REST API test passed!\")\n            print(\"The sync service should work with this approach.\")\n            return 0\n        else:\n            print(\"\\n\u274c SharePoint REST API test failed.\")\n            return 1\n            \n    except Exception as e:\n        print(f\"\u274c Test failed with exception: {e}\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/test_rest_client.py",
      "tags": [
        "testing",
        "sharepoint",
        "rest-api",
        "integration-test",
        "configuration",
        "validation",
        "entry-point",
        "connectivity-test",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:56:14.679734",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of a diagnostic script",
        "Ensure config.py is properly configured with SharePoint credentials before running",
        "The function depends on explore_site_structure() which must be defined elsewhere in the codebase",
        "Review the diagnostic output carefully to understand folder visibility issues",
        "Use the exit code for integration with shell scripts or CI/CD pipelines",
        "The function provides user-friendly output with emojis and formatting for better readability",
        "Consider checking multiple document libraries if folders are not found in the default location"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside try block for configuration validation",
          "import": "from config import Config",
          "optional": false
        },
        {
          "condition": "imported inside except block for error reporting",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:10:44",
      "decorators": [],
      "dependencies": [
        "requests",
        "config",
        "sharepoint_graph_client",
        "traceback"
      ],
      "description": "A diagnostic function that explores SharePoint site structure to investigate why only 2 folders are visible when more are expected in the web interface.",
      "docstring": "Main diagnostic function.",
      "id": 205,
      "imports": [
        "import requests",
        "import json",
        "import os",
        "import sys",
        "from sharepoint_graph_client import SharePointGraphClient",
        "from config import Config",
        "from config import Config",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "from config import Config",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 288,
      "line_start": 248,
      "name": "main_v51",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a SharePoint diagnostic tool. It validates configuration, explores the SharePoint site structure using the Graph API, and provides detailed feedback about potential reasons for folder visibility discrepancies. It helps troubleshoot issues where the SharePoint web interface shows more folders than are accessible via the API.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if the diagnostic completed successfully, 1 if any errors occurred during execution. This follows standard Unix exit code conventions for command-line tools.",
      "settings_required": [
        "config.py module with Config class containing SHAREPOINT_SITE_URL and validate_config() method",
        "SharePoint authentication credentials configured in Config class",
        "explore_site_structure() function must be defined in the same module or imported",
        "SharePointGraphClient class must be available for SharePoint API access"
      ],
      "source_code": "def main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Structure Diagnostic\")\n    print(\"=\" * 60)\n    print(\"This diagnostic will explore why we're only seeing 2 folders\")\n    print(\"when the SharePoint web interface shows many more.\")\n    print()\n    \n    try:\n        # Load configuration\n        from config import Config\n        Config.validate_config()\n        \n        print(\"\u2705 Configuration loaded successfully\")\n        print(f\"SharePoint Site: {Config.SHAREPOINT_SITE_URL}\")\n        print()\n        \n        # Run comprehensive exploration\n        success = explore_site_structure()\n        \n        if success:\n            print(\"\\n\ud83d\udccb DIAGNOSTIC SUMMARY:\")\n            print(\"-\" * 30)\n            print(\"The diagnostic has explored multiple ways to access your SharePoint content.\")\n            print(\"If we still only see 2 folders, it could mean:\")\n            print(\"1. The other folders are in a different document library\")\n            print(\"2. There are permission restrictions on those folders\")\n            print(\"3. The folders might be in a different site or subsite\")\n            print(\"4. The web interface shows a filtered or aggregated view\")\n            print()\n            print(\"\ud83d\udca1 RECOMMENDATION:\")\n            print(\"Check if there are multiple document libraries in your SharePoint site,\")\n            print(\"or if the folders are organized differently than expected.\")\n            \n        return 0 if success else 1\n        \n    except Exception as e:\n        print(f\"\u274c Diagnostic failed with exception: {e}\")\n        import traceback\n        traceback.print_exc()\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/diagnostic_comprehensive.py",
      "tags": [
        "diagnostic",
        "sharepoint",
        "debugging",
        "folder-exploration",
        "configuration-validation",
        "graph-api",
        "troubleshooting",
        "cli-tool",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:56:14.678987",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Ensure load_config() function is implemented before calling main()",
        "The SharePoint site URL must follow the exact format: https://{tenant}.sharepoint.com/sites/{sitename}",
        "User must have SharePoint administrator privileges to grant app permissions",
        "The function is designed for interactive terminal use and prints directly to stdout",
        "Should be called as the entry point of a script (if __name__ == '__main__')",
        "The generated permission XML grants Read access at site collection scope - modify if different permissions are needed",
        "After running this helper, users should execute test_connections.py to verify permissions were granted successfully"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:08:03",
      "decorators": [],
      "dependencies": [],
      "description": "Interactive CLI helper function that generates and displays instructions for granting SharePoint app permissions to an Azure AD application.",
      "docstring": "Generate the app permission grant URL.",
      "id": 196,
      "imports": [
        "import os"
      ],
      "imports_required": [
        "import os"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 85,
      "line_start": 24,
      "name": "main_v50",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a guided wizard to help administrators grant SharePoint permissions to an Azure AD app. It loads configuration, parses SharePoint site URLs, extracts tenant information, and provides step-by-step instructions with the necessary URLs and XML configuration for granting app-only permissions at the site collection level. It offers both the direct appinv.aspx method and an alternative Admin Center approach.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 1 if configuration loading fails, missing required settings, or URL parsing fails; implicitly returns None (0) on successful execution. The return value indicates whether the helper completed successfully.",
      "settings_required": [
        "Requires a load_config() function to be defined in the same module that returns a dictionary",
        "SHAREPOINT_SITE_URL configuration value in format 'https://{tenant}.sharepoint.com/sites/{sitename}'",
        "AZURE_CLIENT_ID configuration value containing the Azure AD application client ID"
      ],
      "source_code": "def main():\n    \"\"\"Generate the app permission grant URL.\"\"\"\n    print(\"SharePoint App Permission Grant Helper\")\n    print(\"=\" * 50)\n    \n    config = load_config()\n    if not config:\n        print(\"\u274c Could not load configuration\")\n        return 1\n    \n    site_url = config.get('SHAREPOINT_SITE_URL', '')\n    client_id = config.get('AZURE_CLIENT_ID', '')\n    \n    if not site_url or not client_id:\n        print(\"\u274c Missing SHAREPOINT_SITE_URL or AZURE_CLIENT_ID in configuration\")\n        return 1\n    \n    # Extract site components\n    if '.sharepoint.com/sites/' in site_url:\n        base_url = site_url.split('/sites/')[0]\n        site_name = site_url.split('/sites/')[-1]\n        tenant = base_url.split('https://')[-1].split('.sharepoint.com')[0]\n    else:\n        print(\"\u274c Cannot parse SharePoint site URL\")\n        return 1\n    \n    print(f\"Site URL: {site_url}\")\n    print(f\"Tenant: {tenant}\")\n    print(f\"Client ID: {client_id}\")\n    print()\n    \n    # Generate the app permission grant URL\n    app_grant_url = f\"{base_url}/_layouts/15/appinv.aspx\"\n    \n    print(\"\ud83d\udd17 SharePoint App Permission Grant\")\n    print(\"-\" * 30)\n    print(f\"1. Open this URL in your browser: {app_grant_url}\")\n    print()\n    print(\"2. Fill in the form with these values:\")\n    print(f\"   App Id: {client_id}\")\n    print(\"   App Domain: (leave blank)\")\n    print(\"   App Redirect URL: (leave blank)\")\n    print()\n    print(\"3. Click 'Generate' to auto-fill the Title and App Domain\")\n    print()\n    print(\"4. In the Permission Request XML field, paste this:\")\n    print()\n    print(\"\"\"<AppPermissionRequests AllowAppOnlyPolicy=\"true\">\n  <AppPermissionRequest Scope=\"http://sharepoint/content/sitecollection\" Right=\"Read\" />\n</AppPermissionRequests>\"\"\")\n    print()\n    print(\"5. Click 'Create' and then 'Trust It' when prompted\")\n    print()\n    print(\"\ud83d\udccb Alternative Method: Admin Center\")\n    print(\"-\" * 30)\n    print(\"If the above doesn't work, try this:\")\n    print(f\"1. Go to SharePoint Admin Center\")\n    print(f\"2. Navigate to More features \u2192 Apps \u2192 App Catalog\")\n    print(f\"3. Add your app with Client ID: {client_id}\")\n    print()\n    print(\"After granting permissions, test again with:\")\n    print(\"python test_connections.py\")",
      "source_file": "/tf/active/vicechatdev/SPFCsync/grant_sharepoint_access.py",
      "tags": [
        "sharepoint",
        "azure-ad",
        "permissions",
        "cli-helper",
        "configuration",
        "app-registration",
        "authentication",
        "admin-tool",
        "interactive",
        "setup-wizard"
      ],
      "updated_at": "2025-12-07T01:56:14.678203",
      "usage_example": "# Assuming load_config() function exists and returns proper config\n# Example config.py or .env should contain:\n# SHAREPOINT_SITE_URL=https://contoso.sharepoint.com/sites/mysite\n# AZURE_CLIENT_ID=12345678-1234-1234-1234-123456789abc\n\nif __name__ == '__main__':\n    exit_code = main()\n    if exit_code:\n        print('Failed to generate permission grant instructions')\n    else:\n        print('Instructions displayed successfully')"
    },
    {
      "best_practices": [
        "Run this function before attempting any SharePoint connections to ensure configuration is valid",
        "Use the return code to determine if setup is complete (0 = success, 1 = failure)",
        "Follow the printed next steps after successful validation",
        "Ensure all helper functions (load_env_file, validate_sharepoint_url, validate_azure_client_id, validate_azure_client_secret) are properly implemented",
        "The function expects validation functions to return tuples of (bool, str) where bool indicates validity and str contains the message",
        "This function is designed for interactive use with console output and should be run from command line",
        "Keep the .env file secure as it contains sensitive Azure credentials"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:07:32",
      "decorators": [],
      "dependencies": [],
      "description": "A validation function that checks SharePoint configuration settings from environment variables and provides diagnostic feedback on their validity.",
      "docstring": "Main validation function.",
      "id": 194,
      "imports": [
        "import os",
        "import sys",
        "import re",
        "from urllib.parse import urlparse"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import re",
        "from urllib.parse import urlparse"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 128,
      "line_start": 79,
      "name": "main_v49",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a configuration validator for SharePoint integration. It loads environment variables from a .env file, validates the SharePoint URL, Azure Client ID, and Azure Client Secret, then provides comprehensive feedback on configuration status with actionable next steps. It's designed to be run as a standalone diagnostic tool before attempting SharePoint connections.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all validations pass (SharePoint URL, Azure Client ID, and Azure Client Secret are all valid), or 1 if any validation fails or if the .env file cannot be loaded. This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        ".env file in the working directory containing SHAREPOINT_SITE_URL, AZURE_CLIENT_ID, and AZURE_CLIENT_SECRET",
        "load_env_file() function must be defined in the same module",
        "validate_sharepoint_url() function must be defined in the same module",
        "validate_azure_client_id() function must be defined in the same module",
        "validate_azure_client_secret() function must be defined in the same module"
      ],
      "source_code": "def main():\n    \"\"\"Main validation function.\"\"\"\n    print(\"SharePoint Configuration Validator\")\n    print(\"=\" * 40)\n    \n    # Load environment variables\n    env_vars = load_env_file()\n    if env_vars is None:\n        return 1\n    \n    print(\"\u2705 .env file loaded successfully\")\n    print()\n    \n    # Validate SharePoint URL\n    sharepoint_url = env_vars.get('SHAREPOINT_SITE_URL', '')\n    url_valid, url_message = validate_sharepoint_url(sharepoint_url)\n    print(f\"SharePoint URL: {'\u2705' if url_valid else '\u274c'} {url_message}\")\n    if sharepoint_url and sharepoint_url != \"https://your-tenant.sharepoint.com/sites/your-site\":\n        print(f\"  Current value: {sharepoint_url}\")\n    \n    # Validate Azure Client ID\n    client_id = env_vars.get('AZURE_CLIENT_ID', '')\n    id_valid, id_message = validate_azure_client_id(client_id)\n    print(f\"Azure Client ID: {'\u2705' if id_valid else '\u274c'} {id_message}\")\n    \n    # Validate Azure Client Secret\n    client_secret = env_vars.get('AZURE_CLIENT_SECRET', '')\n    secret_valid, secret_message = validate_azure_client_secret(client_secret)\n    print(f\"Azure Client Secret: {'\u2705' if secret_valid else '\u274c'} {secret_message}\")\n    \n    print()\n    \n    # Overall status\n    all_valid = url_valid and id_valid and secret_valid\n    \n    if all_valid:\n        print(\"\ud83c\udf89 All SharePoint configuration looks good!\")\n        print()\n        print(\"Next steps:\")\n        print(\"1. Run connection test: python test_connections.py\")\n        print(\"2. If test passes, try one-time sync: python main.py --once\")\n        return 0\n    else:\n        print(\"\u274c Configuration issues found. Please fix the above issues.\")\n        print()\n        print(\"Setup help:\")\n        print(\"1. See SHAREPOINT_SETUP.md for detailed instructions\")\n        print(\"2. Update your .env file with correct values\")\n        print(\"3. Run this validator again\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/validate_config.py",
      "tags": [
        "validation",
        "configuration",
        "sharepoint",
        "azure",
        "environment-variables",
        "diagnostic",
        "setup",
        "cli",
        "configuration-checker"
      ],
      "updated_at": "2025-12-07T01:56:14.677511",
      "usage_example": "# Assuming all required validation functions are defined in the same module\n# and a .env file exists with SharePoint configuration\n\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)\n\n# Or simply call directly:\n# main()\n\n# Expected .env file format:\n# SHAREPOINT_SITE_URL=https://your-tenant.sharepoint.com/sites/your-site\n# AZURE_CLIENT_ID=your-client-id-guid\n# AZURE_CLIENT_SECRET=your-client-secret"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point, typically within an 'if __name__ == \"__main__\":' block",
        "Ensure all helper functions (analyze_logs, print_status, tail_logs, watch_logs) are defined before calling main()",
        "The log file path should be accessible and readable by the process",
        "Default command is 'status' if no command is specified",
        "Use appropriate error handling in helper functions as main() does not catch exceptions",
        "Consider adding signal handlers for graceful shutdown when using the 'watch' command",
        "The function relies on argparse's built-in help system; users can run with --help for usage information"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:03:48",
      "decorators": [],
      "dependencies": [
        "argparse",
        "os",
        "sys",
        "time",
        "datetime",
        "re"
      ],
      "description": "Command-line interface entry point for monitoring SharePoint to FileCloud synchronization logs, providing status analysis, log tailing, and real-time watching capabilities.",
      "docstring": "Main entry point.",
      "id": 183,
      "imports": [
        "import os",
        "import sys",
        "import argparse",
        "import time",
        "from datetime import datetime",
        "from datetime import timedelta",
        "import re"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import argparse",
        "import time",
        "from datetime import datetime",
        "from datetime import timedelta",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 247,
      "line_start": 202,
      "name": "main_v48",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a CLI tool that monitors and analyzes SharePoint to FileCloud sync operations. It parses command-line arguments to provide three main functionalities: displaying sync status with statistics over a specified time period, showing recent log entries (tail), and watching the log file in real-time. The function delegates to helper functions (analyze_logs, print_status, tail_logs, watch_logs) based on the selected command.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing output to stdout based on the selected command and may exit the program if argument parsing fails.",
      "settings_required": [
        "Log file must exist at the specified path (default: 'spfc_sync.log')",
        "Helper functions must be defined in the same module: analyze_logs(log_file, hours), print_status(stats), tail_logs(log_file, lines), watch_logs(log_file)",
        "Log file should contain SharePoint to FileCloud sync operation logs in a parseable format"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point.\"\"\"\n    parser = argparse.ArgumentParser(description=\"Monitor SharePoint to FileCloud Sync\")\n    \n    parser.add_argument(\n        '--log-file',\n        default='spfc_sync.log',\n        help='Path to log file (default: spfc_sync.log)'\n    )\n    \n    parser.add_argument(\n        '--hours',\n        type=int,\n        default=24,\n        help='Hours of history to analyze (default: 24)'\n    )\n    \n    subparsers = parser.add_subparsers(dest='command', help='Commands')\n    \n    # Status command\n    status_parser = subparsers.add_parser('status', help='Show sync status')\n    \n    # Tail command\n    tail_parser = subparsers.add_parser('tail', help='Show recent log entries')\n    tail_parser.add_argument(\n        '--lines',\n        type=int,\n        default=50,\n        help='Number of lines to show (default: 50)'\n    )\n    \n    # Watch command\n    watch_parser = subparsers.add_parser('watch', help='Watch log file in real-time')\n    \n    args = parser.parse_args()\n    \n    if not args.command:\n        args.command = 'status'  # Default command\n    \n    if args.command == 'status':\n        stats = analyze_logs(args.log_file, args.hours)\n        print_status(stats)\n    elif args.command == 'tail':\n        tail_logs(args.log_file, args.lines)\n    elif args.command == 'watch':\n        watch_logs(args.log_file)",
      "source_file": "/tf/active/vicechatdev/SPFCsync/monitor.py",
      "tags": [
        "cli",
        "command-line-interface",
        "log-monitoring",
        "sharepoint",
        "filecloud",
        "sync-monitoring",
        "argparse",
        "log-analysis",
        "real-time-monitoring",
        "entry-point"
      ],
      "updated_at": "2025-12-07T01:56:14.676825",
      "usage_example": "# Run from command line:\n# Show status for last 24 hours (default)\npython script.py status\n\n# Show status for last 48 hours with custom log file\npython script.py --log-file /path/to/sync.log --hours 48 status\n\n# Show last 100 log lines\npython script.py tail --lines 100\n\n# Watch log file in real-time\npython script.py watch\n\n# If calling from Python code:\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function should be called as the entry point of a debug/test script, typically within an 'if __name__ == \"__main__\":' block",
        "Ensure Chroma DB server is running before executing this function to avoid connection errors",
        "The function depends on 'test_chroma_collections()' and 'test_collection_creation()' being defined in the same module",
        "Review the printed troubleshooting suggestions if tests fail",
        "This function is designed for development/debugging purposes and should not be used in production code",
        "Consider wrapping the test function calls in try-except blocks for better error handling",
        "The function mentions a specific collection '99_EDR' which suggests it's part of a larger application context"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:47:48",
      "decorators": [],
      "dependencies": [
        "chromadb"
      ],
      "description": "Entry point function that executes a comprehensive test suite for Chroma DB collections, including collection listing and creation tests, followed by troubleshooting suggestions.",
      "docstring": "Main function to run all Chroma DB tests.",
      "id": 142,
      "imports": [
        "import chromadb",
        "import sys",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any"
      ],
      "imports_required": [
        "import chromadb"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 204,
      "line_start": 186,
      "name": "main_v47",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main orchestrator for debugging and testing Chroma DB functionality. It runs multiple test functions to verify Chroma DB connectivity, collection operations, and provides diagnostic output with troubleshooting steps. Primarily used for development, debugging, and validation of Chroma DB setup.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces side effects by printing test results and troubleshooting information to stdout.",
      "settings_required": [
        "Chroma DB server must be running and accessible",
        "Correct Chroma DB host and port configuration (typically localhost:8000 or as configured in the application)",
        "Network connectivity to Chroma DB server if running remotely",
        "Appropriate permissions to create and list collections in Chroma DB",
        "The functions 'test_chroma_collections()' and 'test_collection_creation()' must be defined in the same module or imported"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run all Chroma DB tests.\"\"\"\n    \n    print(\"Chroma DB Collections Debug Script\")\n    print(\"==================================\")\n    \n    # Test collections\n    test_chroma_collections()\n    \n    # Test collection creation\n    test_collection_creation()\n    \n    print(f\"\\n\ud83d\udccb TROUBLESHOOTING SUGGESTIONS:\")\n    print(\"1. Check if Chroma DB server is running\")\n    print(\"2. Verify the correct host and port\")\n    print(\"3. Check if 99_EDR collection was created with a different name\")\n    print(\"4. Verify Chroma DB version compatibility\")\n    print(\"5. Check Docker container status if using Docker\")\n    print(\"6. Review Chroma DB logs for any errors\")",
      "source_file": "/tf/active/vicechatdev/test_chroma_collections.py",
      "tags": [
        "testing",
        "debugging",
        "chroma-db",
        "database",
        "collections",
        "diagnostics",
        "troubleshooting",
        "entry-point",
        "main-function",
        "vector-database"
      ],
      "updated_at": "2025-12-07T01:56:14.676119",
      "usage_example": "# Ensure Chroma DB server is running\n# Ensure test_chroma_collections() and test_collection_creation() are defined\n\nif __name__ == '__main__':\n    main()\n\n# Expected output:\n# Chroma DB Collections Debug Script\n# ==================================\n# [Output from test_chroma_collections()]\n# [Output from test_collection_creation()]\n# \n# \ud83d\udccb TROUBLESHOOTING SUGGESTIONS:\n# 1. Check if Chroma DB server is running\n# 2. Verify the correct host and port\n# 3. Check if 99_EDR collection was created with a different name\n# 4. Verify Chroma DB version compatibility\n# 5. Check Docker container status if using Docker\n# 6. Review Chroma DB logs for any errors"
    },
    {
      "best_practices": [
        "Always backup your source files before running the fixer as it modifies files in place",
        "Run the fixer in a version-controlled environment so changes can be reviewed and reverted if needed",
        "Call methods in sequence: fix methods first, then create_fixed_upload_test(), then generate_fix_summary()",
        "Review the fixes_applied list after execution to verify all intended changes were made",
        "The fix_jwt_device_description method only flags issues for manual review rather than automatically fixing them",
        "Check console output for error messages (\u274c) indicating files that couldn't be updated",
        "The class assumes specific file names and patterns exist in the codebase; verify these exist before running",
        "The fixer is idempotent for most operations but may create duplicate entries if run multiple times on already-fixed code",
        "Ensure the base_dir path is correct; it defaults to the parent directory of the script file"
      ],
      "class_interface": {
        "attributes": [
          {
            "description": "The base directory path where source files are located (parent directory of the script)",
            "is_class_variable": false,
            "name": "base_dir",
            "type": "Path"
          },
          {
            "description": "Accumulates descriptions of all fixes successfully applied during the session",
            "is_class_variable": false,
            "name": "fixes_applied",
            "type": "list[str]"
          }
        ],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {},
            "purpose": "Initialize the ImplementationFixer with base directory and empty fixes tracking list",
            "returns": "None",
            "signature": "__init__(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_user_agent",
            "parameters": {},
            "purpose": "Replace old user-agent string with real app user-agent in upload_manager.py, auth.py, and test_uploads.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_user_agent(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_metadata_source",
            "parameters": {},
            "purpose": "Change metadata source field from 'com.remarkable.windows' to 'com.remarkable.macos' in upload_manager.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_metadata_source(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_pagedata_content",
            "parameters": {},
            "purpose": "Replace empty string pagedata with newline character ('\\n') in upload_manager.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_pagedata_content(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_last_opened_field",
            "parameters": {},
            "purpose": "Ensure lastOpened field is consistently set to '0' in metadata structures in upload_manager.py",
            "returns": "None (modifies files in place and updates fixes_applied list)",
            "signature": "fix_last_opened_field(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "fix_jwt_device_description",
            "parameters": {},
            "purpose": "Analyze auth.py for device description patterns and flag for manual review (does not auto-fix)",
            "returns": "None (prints warnings and updates fixes_applied list with manual action items)",
            "signature": "fix_jwt_device_description(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "create_fixed_upload_test",
            "parameters": {},
            "purpose": "Generate a new test script (fixed_upload_test.py) that incorporates all identified fixes",
            "returns": "None (creates new executable Python file and updates fixes_applied list)",
            "signature": "create_fixed_upload_test(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "generate_fix_summary",
            "parameters": {},
            "purpose": "Print and save a JSON summary of all fixes applied during the session",
            "returns": "None (prints summary to console and saves JSON file to test_results directory)",
            "signature": "generate_fix_summary(self)"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:00",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "json",
        "os",
        "time",
        "uuid"
      ],
      "description": "A utility class that automatically fixes implementation discrepancies between a custom reMarkable tablet upload implementation and the real reMarkable app behavior by modifying source files.",
      "docstring": "Fix our implementation to match real app behavior",
      "id": 2121,
      "imports": [
        "import json",
        "import os",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any"
      ],
      "imports_required": [
        "from pathlib import Path",
        "import json",
        "import os",
        "import time",
        "import uuid"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 365,
      "line_start": 14,
      "name": "ImplementationFixer",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "__init__": "No parameters required. The constructor automatically initializes the base directory (parent directory of the current file) and an empty list to track applied fixes."
      },
      "parent_class": null,
      "purpose": "This class identifies and applies fixes to source code files to ensure the custom implementation matches the real reMarkable app's behavior. It updates user-agent strings, metadata source fields, pagedata content, lastOpened fields, and JWT device descriptions across multiple Python files. It also generates a fixed test script and provides a summary of all applied fixes. The class is designed to be run as a one-time fixer or as part of a maintenance workflow to align custom code with observed real app behavior.",
      "return_annotation": null,
      "return_explained": "Instantiation returns an ImplementationFixer object. Methods do not return values but modify files in place and print status messages. The fixes_applied attribute accumulates a list of strings describing each fix applied. The generate_fix_summary method creates a JSON file with the summary but does not return it.",
      "settings_required": [
        "Write permissions to the directory containing the script and target files (upload_manager.py, auth.py, test_uploads.py)",
        "A 'test_results' directory will be created if it doesn't exist for storing summaries and test files"
      ],
      "source_code": "class ImplementationFixer:\n    \"\"\"Fix our implementation to match real app behavior\"\"\"\n    \n    def __init__(self):\n        self.base_dir = Path(__file__).parent\n        self.fixes_applied = []\n    \n    def fix_user_agent(self):\n        \"\"\"Fix user-agent to match real app\"\"\"\n        print(\"\ud83d\udd27 Fixing User-Agent...\")\n        \n        # Find files that contain user-agent strings\n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\",\n            self.base_dir / \"auth.py\",\n            self.base_dir / \"test_uploads.py\"\n        ]\n        \n        old_ua = \"reMarkable-desktop-win/3.11.1.1951\"\n        new_ua = \"desktop/3.20.0.922 (macos 15.4)\"\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        content = f.read()\n                    \n                    if old_ua in content:\n                        updated_content = content.replace(old_ua, new_ua)\n                        with open(file_path, 'w') as f:\n                            f.write(updated_content)\n                        print(f\"   \u2705 Updated {file_path.name}\")\n                        self.fixes_applied.append(f\"Updated user-agent in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_metadata_source(self):\n        \"\"\"Fix metadata source field to match real app\"\"\"\n        print(\"\ud83d\udd27 Fixing Metadata Source Field...\")\n        \n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\"\n        ]\n        \n        old_source = '\"source\": \"com.remarkable.windows\"'\n        new_source = '\"source\": \"com.remarkable.macos\"'\n        \n        # Also fix the alternative format\n        old_source_alt = \"'source': 'com.remarkable.windows'\"\n        new_source_alt = \"'source': 'com.remarkable.macos'\"\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        content = f.read()\n                    \n                    updated = False\n                    if old_source in content:\n                        content = content.replace(old_source, new_source)\n                        updated = True\n                    \n                    if old_source_alt in content:\n                        content = content.replace(old_source_alt, new_source_alt)\n                        updated = True\n                    \n                    if updated:\n                        with open(file_path, 'w') as f:\n                            f.write(content)\n                        print(f\"   \u2705 Updated source field in {file_path.name}\")\n                        self.fixes_applied.append(f\"Updated metadata source in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_pagedata_content(self):\n        \"\"\"Fix pagedata to use newline character instead of empty string\"\"\"\n        print(\"\ud83d\udd27 Fixing Pagedata Content...\")\n        \n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\"\n        ]\n        \n        # Look for pagedata creation patterns\n        old_patterns = [\n            'pagedata = \"\"',\n            \"pagedata = ''\",\n            'pagedata_content = \"\"',\n            \"pagedata_content = ''\"\n        ]\n        \n        new_pattern = 'pagedata = \"\\\\n\"'\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        content = f.read()\n                    \n                    updated = False\n                    for old_pattern in old_patterns:\n                        if old_pattern in content:\n                            content = content.replace(old_pattern, new_pattern)\n                            updated = True\n                    \n                    if updated:\n                        with open(file_path, 'w') as f:\n                            f.write(content)\n                        print(f\"   \u2705 Updated pagedata content in {file_path.name}\")\n                        self.fixes_applied.append(f\"Updated pagedata content in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_last_opened_field(self):\n        \"\"\"Ensure lastOpened is consistently set to '0'\"\"\"\n        print(\"\ud83d\udd27 Fixing LastOpened Field...\")\n        \n        files_to_fix = [\n            self.base_dir / \"upload_manager.py\"\n        ]\n        \n        # Look for lastOpened patterns that might not be \"0\"\n        patterns_to_check = [\n            '\"lastOpened\":',\n            \"'lastOpened':\"\n        ]\n        \n        for file_path in files_to_fix:\n            if file_path.exists():\n                try:\n                    with open(file_path, 'r') as f:\n                        lines = f.readlines()\n                    \n                    updated = False\n                    for i, line in enumerate(lines):\n                        for pattern in patterns_to_check:\n                            if pattern in line and '\"0\"' not in line and \"'0'\" not in line:\n                                # Fix the line to use \"0\"\n                                if '\"lastOpened\":' in line:\n                                    lines[i] = line.split('\"lastOpened\":')[0] + '\"lastOpened\": \"0\",' + line.split(':')[1].split(',', 1)[1] if ',' in line.split(':')[1] else '\\n'\n                                updated = True\n                    \n                    if updated:\n                        with open(file_path, 'w') as f:\n                            f.writelines(lines)\n                        print(f\"   \u2705 Updated lastOpened field in {file_path.name}\")\n                        self.fixes_applied.append(f\"Fixed lastOpened field in {file_path.name}\")\n                \n                except Exception as e:\n                    print(f\"   \u274c Failed to update {file_path.name}: {e}\")\n    \n    def fix_jwt_device_description(self):\n        \"\"\"Update JWT generation to use macOS device description\"\"\"\n        print(\"\ud83d\udd27 Fixing JWT Device Description...\")\n        \n        # This requires updating the authentication process\n        auth_file = self.base_dir / \"auth.py\"\n        \n        if auth_file.exists():\n            try:\n                with open(auth_file, 'r') as f:\n                    content = f.read()\n                \n                # Look for device description patterns\n                old_patterns = [\n                    'desktop-windows',\n                    'desktop-win',\n                    'windows'\n                ]\n                \n                new_replacement = 'desktop-macos'\n                \n                updated = False\n                for old_pattern in old_patterns:\n                    if old_pattern in content.lower():\n                        # This is more complex - we need to identify the specific context\n                        print(f\"   \u26a0\ufe0f Found '{old_pattern}' in auth.py - manual review needed\")\n                        print(f\"   \ud83d\udcdd Action: Update device registration to use 'desktop-macos'\")\n                        self.fixes_applied.append(f\"JWT device description needs manual update in auth.py\")\n                        updated = True\n                \n                if not updated:\n                    print(\"   \u2139\ufe0f No obvious device description patterns found in auth.py\")\n                    print(\"   \ud83d\udcdd Note: JWT device description may be set during token generation\")\n            \n            except Exception as e:\n                print(f\"   \u274c Failed to analyze auth.py: {e}\")\n    \n    def create_fixed_upload_test(self):\n        \"\"\"Create a test script with all fixes applied\"\"\"\n        print(\"\ud83d\udd27 Creating Fixed Upload Test...\")\n        \n        fixed_test_content = '''#!/usr/bin/env python3\n\"\"\"\nFixed Upload Test - Matches Real App Behavior\n\nThis test script incorporates all the fixes identified by dry run analysis.\n\"\"\"\n\nimport os\nimport json\nimport time\nfrom pathlib import Path\nimport uuid\n\ndef create_test_document_with_fixes():\n    \"\"\"Create a test document with all real app fixes applied\"\"\"\n    \n    # Generate document UUID\n    doc_uuid = str(uuid.uuid4())\n    \n    # Fixed metadata (matches real app)\n    metadata = {\n        \"createdTime\": str(int(time.time() * 1000)),\n        \"lastModified\": str(int(time.time() * 1000)),\n        \"lastOpened\": \"0\",  # \u2705 Fixed: Always \"0\"\n        \"lastOpenedPage\": 0,\n        \"metadatamodified\": False,\n        \"modified\": False,\n        \"parent\": \"\",\n        \"pinned\": False,\n        \"source\": \"com.remarkable.macos\",  # \u2705 Fixed: Changed from windows to macos\n        \"type\": \"DocumentType\",\n        \"visibleName\": \"Fixed_Test_Document\",\n        \"version\": 1\n    }\n    \n    # Fixed content structure\n    content = {\n        \"coverPageNumber\": 0,\n        \"customZoomCenterX\": 0,\n        \"customZoomCenterY\": 936,\n        \"customZoomOrientation\": \"portrait\",\n        \"customZoomPageHeight\": 1872,\n        \"customZoomPageWidth\": 1404,\n        \"customZoomScale\": 1,\n        \"documentMetadata\": {},\n        \"extraMetadata\": {},\n        \"fileType\": \"pdf\",\n        \"fontName\": \"\",\n        \"formatVersion\": 1,\n        \"lineHeight\": -1,\n        \"orientation\": \"portrait\",\n        \"originalPageCount\": 1,\n        \"pageCount\": 1,\n        \"pageTags\": [],\n        \"pages\": [str(uuid.uuid4())],\n        \"redirectionPageMap\": [0],\n        \"sizeInBytes\": \"1000\",\n        \"tags\": [],\n        \"textAlignment\": \"justify\",\n        \"textScale\": 1,\n        \"zoomMode\": \"bestFit\"\n    }\n    \n    # Fixed pagedata content\n    pagedata = \"\\\\n\"  # \u2705 Fixed: Changed from empty string to newline\n    \n    # Fixed headers (for reference)\n    headers_template = {\n        'host': 'eu.tectonic.remarkable.com',\n        'authorization': 'Bearer YOUR_TOKEN_HERE',\n        'content-type': 'application/octet-stream',\n        'rm-batch-number': '1',\n        'rm-sync-id': str(uuid.uuid4()),\n        'user-agent': 'desktop/3.20.0.922 (macos 15.4)',  # \u2705 Fixed: Matches real app\n        'connection': 'Keep-Alive',\n        'accept-encoding': 'gzip, deflate',\n        'accept-language': 'en-BE,*'  # \u2705 Fixed: Matches real app locale\n    }\n    \n    print(\"\u2705 Test document created with all real app fixes applied:\")\n    print(f\"   \ud83d\udcdd Document UUID: {doc_uuid}\")\n    print(f\"   \ud83d\udd27 Metadata source: {metadata['source']}\")\n    print(f\"   \ud83d\udd27 LastOpened: {metadata['lastOpened']}\")\n    print(f\"   \ud83d\udd27 Pagedata: {repr(pagedata)}\")\n    print(f\"   \ud83d\udd27 User-Agent: {headers_template['user-agent']}\")\n    \n    return {\n        'uuid': doc_uuid,\n        'metadata': metadata,\n        'content': content,\n        'pagedata': pagedata,\n        'headers_template': headers_template\n    }\n\nif __name__ == \"__main__\":\n    print(\"\ud83e\uddea FIXED UPLOAD TEST - REAL APP BEHAVIOR\")\n    print(\"=\" * 50)\n    \n    test_doc = create_test_document_with_fixes()\n    \n    # Save test data for analysis\n    output_file = Path(__file__).parent / \"test_results\" / \"fixed_document_structure.json\"\n    output_file.parent.mkdir(exist_ok=True)\n    \n    with open(output_file, 'w') as f:\n        json.dump(test_doc, f, indent=2, default=str)\n    \n    print(f\"\\\\n\ud83d\udcbe Fixed document structure saved to: {output_file}\")\n    print(\"\\\\n\ud83c\udfaf Ready for real app behavior testing!\")\n'''\n        \n        fixed_test_file = self.base_dir / \"fixed_upload_test.py\"\n        \n        try:\n            with open(fixed_test_file, 'w') as f:\n                f.write(fixed_test_content)\n            \n            # Make executable\n            os.chmod(fixed_test_file, 0o755)\n            \n            print(f\"   \u2705 Created {fixed_test_file}\")\n            self.fixes_applied.append(f\"Created fixed upload test script\")\n        \n        except Exception as e:\n            print(f\"   \u274c Failed to create fixed test: {e}\")\n    \n    def generate_fix_summary(self):\n        \"\"\"Generate a summary of all fixes applied\"\"\"\n        print(\"\\n\ud83d\udccb FIX SUMMARY\")\n        print(\"=\" * 50)\n        \n        if self.fixes_applied:\n            print(\"\u2705 Fixes Applied:\")\n            for i, fix in enumerate(self.fixes_applied, 1):\n                print(f\"   {i}. {fix}\")\n        else:\n            print(\"\u274c No fixes were applied\")\n        \n        # Save summary\n        summary = {\n            'timestamp': time.time(),\n            'fixes_applied': self.fixes_applied,\n            'critical_fixes': [\n                'User-Agent changed to: desktop/3.20.0.922 (macos 15.4)',\n                'Metadata source changed to: com.remarkable.macos',\n                'Pagedata content changed to: newline character',\n                'LastOpened field standardized to: \"0\"',\n                'JWT device description flagged for manual update'\n            ]\n        }\n        \n        summary_file = self.base_dir / \"test_results\" / f\"implementation_fixes_{int(time.time())}.json\"\n        summary_file.parent.mkdir(exist_ok=True)\n        \n        with open(summary_file, 'w') as f:\n            json.dump(summary, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Fix summary saved to: {summary_file}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/implementation_fixer.py",
      "tags": [
        "code-fixer",
        "file-modification",
        "remarkable-tablet",
        "implementation-alignment",
        "automation",
        "refactoring",
        "string-replacement",
        "metadata-correction",
        "testing-utilities"
      ],
      "updated_at": "2025-12-07T01:56:00.111096",
      "usage_example": "from implementation_fixer import ImplementationFixer\n\n# Instantiate the fixer\nfixer = ImplementationFixer()\n\n# Apply individual fixes\nfixer.fix_user_agent()\nfixer.fix_metadata_source()\nfixer.fix_pagedata_content()\nfixer.fix_last_opened_field()\nfixer.fix_jwt_device_description()\n\n# Create a fixed test script\nfixer.create_fixed_upload_test()\n\n# Generate and save summary\nfixer.generate_fix_summary()\n\n# Check what was fixed\nprint(f\"Applied {len(fixer.fixes_applied)} fixes\")\nfor fix in fixer.fixes_applied:\n    print(f\"  - {fix}\")"
    },
    {
      "best_practices": [],
      "class_interface": {
        "attributes": [],
        "methods": [
          {
            "is_property": false,
            "is_static": false,
            "name": "__init__",
            "parameters": {
              "workspace_dir": "Type: str"
            },
            "purpose": "Internal method:   init  ",
            "returns": "None",
            "signature": "__init__(self, workspace_dir)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "setup_logging",
            "parameters": {},
            "purpose": "Setup logging to file",
            "returns": "None",
            "signature": "setup_logging(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_authenticate",
            "parameters": {},
            "purpose": "Authenticate with the reMarkable cloud service using token-based approach",
            "returns": "Returns Optional[requests.Session]",
            "signature": "_authenticate(self) -> Optional[requests.Session]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_new_token",
            "parameters": {},
            "purpose": "Get a new authentication token",
            "returns": "Returns Optional[requests.Session]",
            "signature": "_get_new_token(self) -> Optional[requests.Session]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "sync_replica",
            "parameters": {},
            "purpose": "Perform replica synchronization using the proven 3-step process:\n1. Discovery - Get all nodes from cloud\n2. Hierarchy - Build proper folder structure\n3. Extraction - Download content to correct locations",
            "returns": "Returns bool",
            "signature": "sync_replica(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_load_database",
            "parameters": {},
            "purpose": "Load or create replica database",
            "returns": "Returns Dict[str, Any]",
            "signature": "_load_database(self) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_database",
            "parameters": {},
            "purpose": "Save database to disk",
            "returns": "None",
            "signature": "_save_database(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_file_content",
            "parameters": {
              "content": "Type: bytes",
              "content_hash": "Type: str",
              "filename": "Type: str"
            },
            "purpose": "Save file content to local content directory",
            "returns": "Returns bool",
            "signature": "_save_file_content(self, content_hash, content, filename) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_get_file_type",
            "parameters": {
              "filename": "Type: str"
            },
            "purpose": "Determine file type from filename",
            "returns": "Returns str",
            "signature": "_get_file_type(self, filename) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_compute_hash",
            "parameters": {
              "content": "Type: bytes"
            },
            "purpose": "Compute SHA256 hash of content",
            "returns": "Returns str",
            "signature": "_compute_hash(self, content) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "sync_complete_replica",
            "parameters": {},
            "purpose": "Perform complete replica synchronization",
            "returns": "Returns bool",
            "signature": "sync_complete_replica(self) -> bool"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fetch_node_details",
            "parameters": {
              "node_hash": "Type: str",
              "node_size": "Type: str",
              "node_type": "Type: str",
              "node_uuid": "Type: str"
            },
            "purpose": "Fetch detailed information about a node",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "_fetch_node_details(self, node_uuid, node_hash, node_type, node_size) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_parse_folder_node",
            "parameters": {
              "folder_content": "Type: str",
              "node_hash": "Type: str",
              "node_uuid": "Type: str"
            },
            "purpose": "Parse folder node content",
            "returns": "Returns Dict[str, Any]",
            "signature": "_parse_folder_node(self, node_uuid, node_hash, folder_content) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_parse_document_node",
            "parameters": {
              "doc_content": "Type: str",
              "node_hash": "Type: str",
              "node_type": "Type: str",
              "node_uuid": "Type: str"
            },
            "purpose": "Parse document node content (docSchema)",
            "returns": "Returns Dict[str, Any]",
            "signature": "_parse_document_node(self, node_uuid, node_hash, doc_content, node_type) -> Dict[str, Any]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_fetch_metadata",
            "parameters": {
              "metadata_hash": "Type: str"
            },
            "purpose": "Fetch and parse document metadata",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "_fetch_metadata(self, metadata_hash) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_save_summary",
            "parameters": {},
            "purpose": "Save human-readable summary",
            "returns": "None",
            "signature": "_save_summary(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_content_index",
            "parameters": {},
            "purpose": "Create an index of all downloaded content files",
            "returns": "None",
            "signature": "_create_content_index(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_content_file_path",
            "parameters": {
              "content_hash": "Type: str"
            },
            "purpose": "Get the local path for a content file",
            "returns": "Returns Path",
            "signature": "get_content_file_path(self, content_hash) -> Path"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_create_folder_structure",
            "parameters": {},
            "purpose": "Create readable folder structure with documents in their proper folders",
            "returns": "None",
            "signature": "_create_folder_structure(self)"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "_sanitize_filename",
            "parameters": {
              "filename": "Type: str"
            },
            "purpose": "Sanitize filename for filesystem use",
            "returns": "Returns str",
            "signature": "_sanitize_filename(self, filename) -> str"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_node_by_uuid",
            "parameters": {
              "uuid": "Type: str"
            },
            "purpose": "Get a specific node by UUID",
            "returns": "Returns Optional[Dict[str, Any]]",
            "signature": "get_node_by_uuid(self, uuid) -> Optional[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_documents_in_folder",
            "parameters": {
              "folder_uuid": "Type: str"
            },
            "purpose": "Get all documents in a specific folder",
            "returns": "Returns List[Dict[str, Any]]",
            "signature": "get_documents_in_folder(self, folder_uuid) -> List[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_folders",
            "parameters": {},
            "purpose": "Get all folders",
            "returns": "Returns List[Dict[str, Any]]",
            "signature": "get_folders(self) -> List[Dict[str, Any]]"
          },
          {
            "is_property": false,
            "is_static": false,
            "name": "get_root_documents",
            "parameters": {},
            "purpose": "Get all documents in root (no parent)",
            "returns": "Returns List[Dict[str, Any]]",
            "signature": "get_root_documents(self) -> List[Dict[str, Any]]"
          }
        ]
      },
      "complexity": "moderate",
      "component_type": "class",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:55:13",
      "decorators": [],
      "dependencies": [],
      "description": "Standalone replica synchronization using proven local_replica_v2 approach",
      "docstring": "Standalone replica synchronization using proven local_replica_v2 approach",
      "id": 2119,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass",
        "import re",
        "import shutil"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 824,
      "line_start": 59,
      "name": "RemarkableReplicaSync_v1",
      "parameters": [
        {
          "annotation": "",
          "default": null,
          "name": "bases"
        }
      ],
      "parameters_explained": {
        "bases": "Parameter of type "
      },
      "parent_class": null,
      "purpose": "Standalone replica synchronization using proven local_replica_v2 approach",
      "return_annotation": null,
      "return_explained": "Returns unspecified type",
      "settings_required": [],
      "source_code": "class RemarkableReplicaSync:\n    \"\"\"Standalone replica synchronization using proven local_replica_v2 approach\"\"\"\n    \n    def __init__(self, workspace_dir: str = None):\n        self.workspace_dir = Path(workspace_dir) if workspace_dir else Path(__file__).parent\n        self.replica_dir = self.workspace_dir / \"remarkable_replica_v2\"\n        self.content_dir = self.replica_dir / \"content\"\n        \n        # Create directories\n        for directory in [self.replica_dir, self.content_dir]:\n            directory.mkdir(parents=True, exist_ok=True)\n        \n        # Setup logging\n        self.log_file = self.replica_dir / \"build.log\"\n        self.setup_logging()\n        \n        # Initialize authentication\n        self.session = self._authenticate()\n        if not self.session:\n            raise RuntimeError(\"Failed to authenticate with reMarkable\")\n        \n        # State matching local_replica_v2.py\n        self.nodes: Dict[str, RemarkableNode] = {}\n        self.all_hashes: Set[str] = set()\n        self.failed_downloads: Set[str] = set()\n        \n        # Statistics\n        self.stats = {\n            'total_nodes': 0,\n            'folders': 0,\n            'documents': 0,\n            'pdfs_extracted': 0,\n            'rm_files_extracted': 0,\n            'rm_pdfs_converted': 0,\n            'nodes_added': 0\n        }\n        \n    def setup_logging(self):\n        \"\"\"Setup logging to file\"\"\"\n        logging.basicConfig(\n            level=logging.INFO,\n            format='%(asctime)s - %(levelname)s - %(message)s',\n            handlers=[\n                logging.FileHandler(self.log_file, mode='w'),\n                logging.StreamHandler()\n            ]\n        )\n        self.logger = logging.getLogger(__name__)\n    \n    def _authenticate(self) -> Optional[requests.Session]:\n        \"\"\"Authenticate with the reMarkable cloud service using token-based approach\"\"\"\n        token_file = self.workspace_dir / '.remarkable_token'\n        \n        if token_file.exists():\n            print(\"\ufffd Using existing reMarkable token...\")\n            try:\n                with open(token_file, 'r') as f:\n                    token_data = json.load(f)\n                \n                session = requests.Session()\n                session.headers.update({\n                    'Authorization': f'Bearer {token_data[\"access_token\"]}',\n                    'User-Agent': 'remarkable-replica-sync/1.0'\n                })\n                \n                # Quick test - try to get document root\n                test_url = f'{token_data[\"service_manager_url\"]}/document-storage/json/2/docs'\n                response = session.get(test_url)\n                \n                if response.status_code == 200:\n                    print(\"\u2705 Authentication successful\")\n                    return session\n                else:\n                    print(f\"\u274c Token test failed with status {response.status_code}\")\n                    \n            except Exception as e:\n                print(f\"\u274c Token authentication failed: {e}\")\n        \n        # Need new token\n        print(\"\ud83d\udd10 No valid token found. Getting new token...\")\n        return self._get_new_token()\n    \n    def _get_new_token(self) -> Optional[requests.Session]:\n        \"\"\"Get a new authentication token\"\"\"\n        device_token = '9c4e7c2b-c6c7-4831-8b2a-3f5a2e8f9c3d'\n        \n        try:\n            # Step 1: Register device\n            register_url = 'https://webapp-production-dot-remarkable-production.appspot.com/token/json/2/device/new'\n            register_data = {\n                'code': device_token,\n                'deviceDesc': 'desktop-linux',\n                'deviceID': hashlib.sha256(f\"replica-sync-{int(time.time())}\".encode()).hexdigest()[:8]\n            }\n            \n            response = requests.post(register_url, json=register_data)\n            if response.status_code != 200:\n                print(f\"\u274c Device registration failed: {response.status_code}\")\n                return None\n                \n            device_bearer = response.text.strip('\"')\n            \n            # Step 2: Get user token\n            user_url = 'https://webapp-production-dot-remarkable-production.appspot.com/token/json/2/user/new'\n            user_response = requests.post(\n                user_url,\n                headers={'Authorization': f'Bearer {device_bearer}'}\n            )\n            \n            if user_response.status_code != 200:\n                print(f\"\u274c User token failed: {user_response.status_code}\")\n                return None\n                \n            user_token = user_response.text.strip('\"')\n            \n            # Step 3: Get service discovery\n            discovery_url = 'https://service-manager-production-dot-remarkable-production.appspot.com/service/json/1/document-storage?environment=production&group=auth0%7C5a68dc51cb30df3877a1d7c4&apiVer=2'\n            discovery_response = requests.get(\n                discovery_url,\n                headers={'Authorization': f'Bearer {user_token}'}\n            )\n            \n            if discovery_response.status_code != 200:\n                print(f\"\u274c Service discovery failed: {discovery_response.status_code}\")\n                return None\n                \n            service_info = discovery_response.json()\n            service_url = service_info.get('Host')\n            \n            if not service_url:\n                print(\"\u274c No service URL in discovery response\")\n                return None\n            \n            # Save token info\n            token_data = {\n                'access_token': user_token,\n                'service_manager_url': service_url,\n                'created_at': datetime.now().isoformat()\n            }\n            \n            token_file = self.workspace_dir / '.remarkable_token'\n            with open(token_file, 'w') as f:\n                json.dump(token_data, f, indent=2)\n            \n            # Create session\n            session = requests.Session()\n            session.headers.update({\n                'Authorization': f'Bearer {user_token}',\n                'User-Agent': 'remarkable-replica-sync/1.0'\n            })\n            \n            print(\"\u2705 New authentication token obtained and saved\")\n            return session\n            \n        except Exception as e:\n            print(f\"\u274c Authentication failed: {e}\")\n            return None\n            \n            return None\n    \n    def sync_replica(self) -> bool:\n        \"\"\"\n        Perform replica synchronization using the proven 3-step process:\n        1. Discovery - Get all nodes from cloud\n        2. Hierarchy - Build proper folder structure\n        3. Extraction - Download content to correct locations\n        \"\"\"\n        try:\n            self.logger.info(\"\ud83d\ude80 Starting reMarkable replica sync\")\n            \n            # Phase 1: Discovery\n            if not self._discover_all_nodes():\n                self.logger.error(\"\u274c Discovery phase failed\")\n                return False\n            \n            # Phase 2: Build hierarchy \n            if not self._build_folder_hierarchy():\n                self.logger.error(\"\u274c Hierarchy phase failed\")\n                return False\n            \n            # Phase 3: Extract content\n            if not self._extract_content():\n                self.logger.error(\"\u274c Content extraction phase failed\")\n                return False\n            \n            # Generate summary\n            self._generate_summary()\n            \n            self.logger.info(\"\u2705 Replica sync completed successfully\")\n            return True\n            \n        except Exception as e:\n            self.logger.error(f\"\u274c Sync failed: {e}\")\n            return False\n    \n    def _load_database(self) -> Dict[str, Any]:\n        \"\"\"Load or create replica database\"\"\"\n        if self.database_path.exists():\n            try:\n                with open(self.database_path, 'r') as f:\n                    db = json.load(f)\n                print(f\"\ud83d\udcc2 Loaded existing database with {len(db.get('nodes', {}))} nodes\")\n                return db\n            except Exception as e:\n                print(f\"\u26a0\ufe0f Database corrupted, creating new: {e}\")\n        \n        # Create new database\n        db = {\n            'nodes': {},\n            'hash_registry': {},\n            'metadata': {\n                'last_sync': None,\n                'sync_count': 0,\n                'created': datetime.now().isoformat()\n            }\n        }\n        print(\"\ud83d\udcc2 Created new replica database\")\n        return db\n    \n    def _save_database(self):\n        \"\"\"Save database to disk\"\"\"\n        try:\n            with open(self.database_path, 'w') as f:\n                json.dump(self.database, f, indent=2, default=str)\n            \n            # Update metadata\n            self.database['metadata']['last_sync'] = datetime.now().isoformat()\n            self.database['metadata']['sync_count'] += 1\n            \n        except Exception as e:\n            print(f\"\u274c Failed to save database: {e}\")\n    \n    def _save_file_content(self, content_hash: str, content: bytes, filename: str) -> bool:\n        \"\"\"Save file content to local content directory\"\"\"\n        try:\n            # Create file path using hash (first 2 chars as subdirectory)\n            subdir = content_hash[:2]\n            file_dir = self.content_dir / subdir\n            file_dir.mkdir(exist_ok=True)\n            \n            file_path = file_dir / content_hash\n            \n            # Only save if file doesn't exist (avoid re-downloading)\n            if not file_path.exists():\n                with open(file_path, 'wb') as f:\n                    f.write(content)\n                print(f\"   \ud83d\udcbe Saved {filename} ({len(content)} bytes)\")\n            \n            # Register in hash registry\n            if content_hash not in self.database.get('hash_registry', {}):\n                if 'hash_registry' not in self.database:\n                    self.database['hash_registry'] = {}\n                \n                self.database['hash_registry'][content_hash] = {\n                    'filename': filename,\n                    'size': len(content),\n                    'type': self._get_file_type(filename),\n                    'downloaded': datetime.now().isoformat()\n                }\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Failed to save {filename}: {e}\")\n            return False\n    \n    def _get_file_type(self, filename: str) -> str:\n        \"\"\"Determine file type from filename\"\"\"\n        if filename.endswith('.pdf'):\n            return 'pdf'\n        elif filename.endswith('.metadata'):\n            return 'metadata'\n        elif filename.endswith('.content'):\n            return 'content'\n        elif filename.endswith('.pagedata'):\n            return 'pagedata'\n        elif filename.endswith('.rm'):\n            return 'notebook_page'\n        elif filename.endswith('.docSchema'):\n            return 'docschema'\n        else:\n            return 'unknown'\n\n    def _compute_hash(self, content: bytes) -> str:\n        \"\"\"Compute SHA256 hash of content\"\"\"\n        return hashlib.sha256(content).hexdigest()\n    \n    def sync_complete_replica(self) -> bool:\n        \"\"\"Perform complete replica synchronization\"\"\"\n        try:\n            print(\"\\n\ud83d\ude80 STARTING COMPLETE REPLICA SYNC\")\n            print(\"=\" * 50)\n            \n            # Step 1: Get current root state\n            print(\"\ud83d\udccb Step 1: Getting root state from server...\")\n            root_response = self.session.get(\"https://eu.tectonic.remarkable.com/sync/v4/root\")\n            root_response.raise_for_status()\n            root_data = root_response.json()\n            \n            current_root_hash = root_data['hash']\n            current_generation = root_data['generation']\n            \n            print(f\"\ud83c\udf31 Root hash: {current_root_hash}\")\n            print(f\"\ud83d\udd22 Generation: {current_generation}\")\n            \n            # Step 2: Fetch root.docSchema\n            print(\"\ud83d\udccb Step 2: Fetching root.docSchema...\")\n            root_content_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{current_root_hash}\")\n            root_content_response.raise_for_status()\n            root_content = root_content_response.text\n            \n            print(f\"\ud83d\udcc4 Root.docSchema size: {len(root_content)} bytes\")\n            \n            # Step 3: Parse and discover all nodes\n            print(\"\ud83d\udccb Step 3: Discovering all nodes...\")\n            discovered_nodes = {}\n            \n            lines = root_content.strip().split('\\n')\n            if len(lines) < 1:\n                print(\"\u274c Empty root.docSchema\")\n                return False\n            \n            version = lines[0]\n            print(f\"\ud83d\udcca Schema version: {version}\")\n            \n            # Process each entry in root.docSchema\n            for line_num, line in enumerate(lines[1:], 1):\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 5:\n                        node_hash = parts[0]\n                        node_uuid = parts[2]\n                        node_type = parts[3]\n                        node_size = parts[4]\n                        \n                        # Fetch node details\n                        node_info = self._fetch_node_details(node_uuid, node_hash, node_type, node_size)\n                        if node_info:\n                            discovered_nodes[node_uuid] = node_info\n                            \n                        # Progress indicator\n                        if line_num % 5 == 0:\n                            print(f\"   \ud83d\udcca Processed {line_num}/{len(lines)-1} entries...\")\n            \n            print(f\"\u2705 Discovered {len(discovered_nodes)} nodes\")\n            \n            # Step 4: Update database\n            print(\"\ud83d\udccb Step 4: Updating database...\")\n            \n            # Count changes\n            new_nodes = 0\n            updated_nodes = 0\n            \n            for uuid, node_info in discovered_nodes.items():\n                if uuid not in self.database['nodes']:\n                    new_nodes += 1\n                else:\n                    # Check if updated\n                    existing_node = self.database['nodes'][uuid]\n                    if existing_node.get('hash') != node_info.get('hash'):\n                        updated_nodes += 1\n                \n                self.database['nodes'][uuid] = node_info\n            \n            # Remove nodes no longer in cloud\n            cloud_uuids = set(discovered_nodes.keys())\n            local_uuids = set(self.database['nodes'].keys())\n            removed_uuids = local_uuids - cloud_uuids\n            \n            for uuid in removed_uuids:\n                del self.database['nodes'][uuid]\n            \n            print(f\"\ud83d\udcca Database changes:\")\n            print(f\"   \ud83c\udd95 New nodes: {new_nodes}\")\n            print(f\"   \ud83d\udd04 Updated nodes: {updated_nodes}\")\n            print(f\"   \ud83d\uddd1\ufe0f Removed nodes: {len(removed_uuids)}\")\n            \n            # Step 5: Save database and summary\n            print(\"\ud83d\udccb Step 5: Saving database and summary...\")\n            self._save_database()\n            self._save_summary()\n            \n            # Step 6: Create content index\n            print(\"\ud83d\udccb Step 6: Creating content index...\")\n            self._create_content_index()\n            \n            # Step 7: Create folder structure with files\n            print(\"\ud83d\udccb Step 7: Creating folder structure...\")\n            self._create_folder_structure()\n            \n            print(f\"\\n\ud83c\udf89 REPLICA SYNC COMPLETED!\")\n            print(f\"\ud83d\udcca Total nodes: {len(self.database['nodes'])}\")\n            print(f\"\ud83d\udcc1 Database: {self.database_path}\")\n            print(f\"\ud83d\udcc4 Summary: {self.summary_path}\")\n            print(f\"\ud83d\udcbe Content files: {self.content_dir}\")\n            print(f\"\ud83d\udcc2 Folder structure: {self.replica_dir / 'documents'}\")\n            \n            return True\n            \n        except Exception as e:\n            print(f\"\u274c Replica sync failed: {e}\")\n            return False\n    \n    def _fetch_node_details(self, node_uuid: str, node_hash: str, node_type: str, node_size: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch detailed information about a node\"\"\"\n        try:\n            # Fetch node content (docSchema or metadata)\n            node_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{node_hash}\")\n            node_response.raise_for_status()\n            node_content = node_response.text\n            node_content_bytes = node_response.content\n            \n            # Save the node content locally\n            if node_type in ['1', '2']:  # Folder\n                self._save_file_content(node_hash, node_content_bytes, f\"{node_uuid}.metadata\")\n            else:  # Document\n                self._save_file_content(node_hash, node_content_bytes, f\"{node_uuid}.docSchema\")\n            \n            # Determine node type and parse\n            if node_type in ['1', '2']:  # Folder\n                return self._parse_folder_node(node_uuid, node_hash, node_content)\n            elif node_type in ['3', '4']:  # Document\n                return self._parse_document_node(node_uuid, node_hash, node_content, node_type)\n            else:\n                print(f\"\u26a0\ufe0f Unknown node type {node_type} for {node_uuid[:8]}...\")\n                return None\n                \n        except Exception as e:\n            print(f\"\u274c Failed to fetch node {node_uuid[:8]}...: {e}\")\n            return None\n    \n    def _parse_folder_node(self, node_uuid: str, node_hash: str, folder_content: str) -> Dict[str, Any]:\n        \"\"\"Parse folder node content\"\"\"\n        try:\n            # For folders, the content is the metadata JSON\n            metadata = json.loads(folder_content)\n            \n            return {\n                'uuid': node_uuid,\n                'hash': node_hash,\n                'name': metadata.get('visibleName', 'Unknown Folder'),\n                'node_type': 'folder',\n                'metadata': metadata,\n                'last_modified': metadata.get('lastModified', '0'),\n                'parent_uuid': metadata.get('parent', ''),\n                'sync_status': 'synced',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n        except Exception as e:\n            print(f\"\u274c Failed to parse folder {node_uuid[:8]}...: {e}\")\n            return None\n    \n    def _parse_document_node(self, node_uuid: str, node_hash: str, doc_content: str, node_type: str) -> Dict[str, Any]:\n        \"\"\"Parse document node content (docSchema)\"\"\"\n        try:\n            # Parse docSchema to get components\n            lines = doc_content.strip().split('\\n')\n            if len(lines) < 2:\n                print(f\"\u26a0\ufe0f Invalid docSchema for {node_uuid[:8]}...\")\n                return None\n            \n            version = lines[0]\n            component_hashes = {}\n            metadata = None\n            \n            # Extract component hashes and download components\n            for line in lines[1:]:\n                if ':' in line:\n                    parts = line.split(':')\n                    if len(parts) >= 3:\n                        comp_hash = parts[0]\n                        comp_name = parts[2]\n                        \n                        # Download the component\n                        try:\n                            comp_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{comp_hash}\")\n                            comp_response.raise_for_status()\n                            \n                            # Save component content locally\n                            self._save_file_content(comp_hash, comp_response.content, comp_name)\n                            \n                            # Store component hash\n                            if comp_name.endswith('.metadata'):\n                                component_hashes['metadata'] = comp_hash\n                                # Parse metadata\n                                try:\n                                    metadata = json.loads(comp_response.text)\n                                except:\n                                    print(f\"\u26a0\ufe0f Failed to parse metadata for {node_uuid[:8]}...\")\n                            elif comp_name.endswith('.content'):\n                                component_hashes['content'] = comp_hash\n                            elif comp_name.endswith('.pdf'):\n                                component_hashes['pdf'] = comp_hash\n                            elif comp_name.endswith('.pagedata'):\n                                component_hashes['pagedata'] = comp_hash\n                            elif comp_name.endswith('.rm'):\n                                if 'rm_files' not in component_hashes:\n                                    component_hashes['rm_files'] = []\n                                component_hashes['rm_files'].append(comp_hash)\n                                \n                        except Exception as e:\n                            print(f\"\u26a0\ufe0f Failed to download component {comp_name}: {e}\")\n                        \n                        component_hashes['docSchema'] = node_hash\n            \n            # Determine document name\n            doc_name = \"Unknown Document\"\n            if metadata:\n                doc_name = metadata.get('visibleName', doc_name)\n            \n            return {\n                'uuid': node_uuid,\n                'hash': node_hash,\n                'name': doc_name,\n                'node_type': 'document',\n                'metadata': metadata or {},\n                'component_hashes': component_hashes,\n                'last_modified': metadata.get('lastModified', '0') if metadata else '0',\n                'parent_uuid': metadata.get('parent', '') if metadata else '',\n                'version': int(version) if version.isdigit() else 1,\n                'sync_status': 'synced',\n                'last_synced': datetime.now().isoformat()\n            }\n            \n        except Exception as e:\n            print(f\"\u274c Failed to parse document {node_uuid[:8]}...: {e}\")\n            return None\n    \n    def _fetch_metadata(self, metadata_hash: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Fetch and parse document metadata\"\"\"\n        try:\n            metadata_response = self.session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}\")\n            metadata_response.raise_for_status()\n            return json.loads(metadata_response.text)\n        except Exception as e:\n            print(f\"\u26a0\ufe0f Failed to fetch metadata {metadata_hash[:8]}...: {e}\")\n            return None\n    \n    def _save_summary(self):\n        \"\"\"Save human-readable summary\"\"\"\n        try:\n            with open(self.summary_path, 'w') as f:\n                f.write(\"reMarkable Replica Summary\\n\")\n                f.write(\"=\" * 50 + \"\\n\\n\")\n                \n                f.write(f\"Last sync: {self.database['metadata'].get('last_sync', 'Never')}\\n\")\n                f.write(f\"Total syncs: {self.database['metadata'].get('sync_count', 0)}\\n\")\n                f.write(f\"Total nodes: {len(self.database['nodes'])}\\n\\n\")\n                \n                # Count by type\n                folders = [n for n in self.database['nodes'].values() if n.get('node_type') == 'folder']\n                documents = [n for n in self.database['nodes'].values() if n.get('node_type') == 'document']\n                \n                f.write(f\"\ud83d\udcc2 Folders: {len(folders)}\\n\")\n                f.write(f\"\ud83d\udcc4 Documents: {len(documents)}\\n\\n\")\n                \n                # List folders\n                if folders:\n                    f.write(\"Folders:\\n\")\n                    f.write(\"-\" * 20 + \"\\n\")\n                    for folder in sorted(folders, key=lambda x: x.get('name', '')):\n                        f.write(f\"  \ud83d\udcc2 {folder['name']} ({folder['uuid'][:8]}...)\\n\")\n                    f.write(\"\\n\")\n                \n                # List documents\n                if documents:\n                    f.write(\"Documents:\\n\")\n                    f.write(\"-\" * 20 + \"\\n\")\n                    for doc in sorted(documents, key=lambda x: x.get('name', '')):\n                        parent_info = \"\"\n                        if doc.get('parent_uuid'):\n                            parent_name = \"Unknown Folder\"\n                            for folder in folders:\n                                if folder['uuid'] == doc['parent_uuid']:\n                                    parent_name = folder['name']\n                                    break\n                            parent_info = f\" [in {parent_name}]\"\n                        \n                        f.write(f\"  \ud83d\udcc4 {doc['name']} ({doc['uuid'][:8]}...){parent_info}\\n\")\n                \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Failed to save summary: {e}\")\n    \n    def _create_content_index(self):\n        \"\"\"Create an index of all downloaded content files\"\"\"\n        try:\n            index_path = self.replica_dir / \"content_index.txt\"\n            \n            with open(index_path, 'w') as f:\n                f.write(\"reMarkable Content Index\\n\")\n                f.write(\"=\" * 50 + \"\\n\\n\")\n                \n                f.write(f\"Generated: {datetime.now().isoformat()}\\n\")\n                f.write(f\"Total files: {len(self.database.get('hash_registry', {}))}\\n\\n\")\n                \n                # Group by file type\n                by_type = {}\n                for hash_val, info in self.database.get('hash_registry', {}).items():\n                    file_type = info.get('type', 'unknown')\n                    if file_type not in by_type:\n                        by_type[file_type] = []\n                    by_type[file_type].append((hash_val, info))\n                \n                for file_type, files in sorted(by_type.items()):\n                    f.write(f\"{file_type.upper()} Files ({len(files)}):\\n\")\n                    f.write(\"-\" * 30 + \"\\n\")\n                    \n                    for hash_val, info in sorted(files, key=lambda x: x[1].get('filename', '')):\n                        filename = info.get('filename', 'unknown')\n                        size = info.get('size', 0)\n                        subdir = hash_val[:2]\n                        f.write(f\"  {filename} ({size} bytes)\\n\")\n                        f.write(f\"    Hash: {hash_val}\\n\")\n                        f.write(f\"    Path: content/{subdir}/{hash_val}\\n\\n\")\n                    \n                    f.write(\"\\n\")\n            \n            print(f\"\ud83d\udccb Content index saved: {index_path}\")\n            \n        except Exception as e:\n            print(f\"\u26a0\ufe0f Failed to create content index: {e}\")\n    \n    def get_content_file_path(self, content_hash: str) -> Path:\n        \"\"\"Get the local path for a content file\"\"\"\n        subdir = content_hash[:2]\n        return self.content_dir / subdir / content_hash\n    \n    def _create_folder_structure(self):\n        \"\"\"Create readable folder structure with documents in their proper folders\"\"\"\n        try:\n            # Create documents directory\n            documents_dir = self.replica_dir / \"documents\"\n            documents_dir.mkdir(exist_ok=True)\n            \n            print(f\"\ud83d\udcc1 Creating folder structure in: {documents_dir}\")\n            \n            # Build folder hierarchy\n            folders = self.get_folders()\n            documents = [n for n in self.database['nodes'].values() if n.get('node_type') == 'document']\n            \n            # Create folder directories\n            folder_paths = {}\n            \n            # Process root folders first\n            for folder in folders:\n                if not folder.get('parent_uuid'):\n                    folder_path = documents_dir / self._sanitize_filename(folder['name'])\n                    folder_path.mkdir(exist_ok=True)\n                    folder_paths[folder['uuid']] = folder_path\n                    print(f\"   \ud83d\udcc2 Created root folder: {folder['name']}\")\n            \n            # Process nested folders\n            remaining_folders = [f for f in folders if f.get('parent_uuid')]\n            max_iterations = 10  # Prevent infinite loops\n            \n            while remaining_folders and max_iterations > 0:\n                processed_this_round = []\n                \n                for folder in remaining_folders:\n                    parent_uuid = folder.get('parent_uuid')\n                    if parent_uuid in folder_paths:\n                        # Parent folder exists, create this folder\n                        parent_path = folder_paths[parent_uuid]\n                        folder_path = parent_path / self._sanitize_filename(folder['name'])\n                        folder_path.mkdir(exist_ok=True)\n                        folder_paths[folder['uuid']] = folder_path\n                        processed_this_round.append(folder)\n                        print(f\"   \ud83d\udcc2 Created nested folder: {folder['name']}\")\n                \n                # Remove processed folders\n                for folder in processed_this_round:\n                    remaining_folders.remove(folder)\n                \n                max_iterations -= 1\n            \n            # Extract documents to their folders\n            for doc in documents:\n                doc_name = self._sanitize_filename(doc['name'])\n                parent_uuid = doc.get('parent_uuid')\n                \n                # Determine target directory\n                if parent_uuid and parent_uuid in folder_paths:\n                    target_dir = folder_paths[parent_uuid]\n                else:\n                    target_dir = documents_dir\n                \n                # Extract PDF if available\n                pdf_hash = doc.get('component_hashes', {}).get('pdf')\n                if pdf_hash:\n                    pdf_path = target_dir / f\"{doc_name}.pdf\"\n                    source_path = self.get_content_file_path(pdf_hash)\n                    \n                    if source_path.exists():\n                        try:\n                            # Copy PDF to folder structure\n                            import shutil\n                            shutil.copy2(source_path, pdf_path)\n                            print(f\"   \ud83d\udcc4 Extracted PDF: {doc_name}.pdf\")\n                        except Exception as e:\n                            print(f\"   \u274c Failed to copy PDF {doc_name}: {e}\")\n                    else:\n                        print(f\"   \u26a0\ufe0f PDF source not found: {pdf_hash[:16]}...\")\n                \n                # For notebooks (with .rm files), create a note that it's a notebook\n                rm_files = doc.get('component_hashes', {}).get('rm_files', [])\n                if rm_files and not pdf_hash:\n                    notebook_info_path = target_dir / f\"{doc_name}_notebook_info.txt\"\n                    try:\n                        with open(notebook_info_path, 'w') as f:\n                            f.write(f\"reMarkable Notebook: {doc['name']}\\n\")\n                            f.write(f\"UUID: {doc['uuid']}\\n\")\n                            f.write(f\"Created: {doc.get('metadata', {}).get('lastModified', 'Unknown')}\\n\")\n                            f.write(f\"Pages: {len(rm_files)}\\n\\n\")\n                            f.write(\"This is a reMarkable notebook with handwritten content.\\n\")\n                            f.write(\"The original .rm files are stored in the content directory.\\n\")\n                        print(f\"   \ud83d\udcd3 Created notebook info: {doc_name}_notebook_info.txt\")\n                    except Exception as e:\n                        print(f\"   \u274c Failed to create notebook info: {e}\")\n            \n            print(f\"\u2705 Folder structure created successfully\")\n            \n        except Exception as e:\n            print(f\"\u274c Failed to create folder structure: {e}\")\n    \n    def _sanitize_filename(self, filename: str) -> str:\n        \"\"\"Sanitize filename for filesystem use\"\"\"\n        # Remove or replace invalid characters\n        import re\n        sanitized = re.sub(r'[<>:\"/\\\\|?*]', '_', filename)\n        sanitized = sanitized.strip('. ')\n        \n        # Ensure it's not empty\n        if not sanitized:\n            sanitized = \"unnamed\"\n        \n        # Limit length\n        if len(sanitized) > 200:\n            sanitized = sanitized[:200]\n        \n        return sanitized\n    \n    def get_node_by_uuid(self, uuid: str) -> Optional[Dict[str, Any]]:\n        \"\"\"Get a specific node by UUID\"\"\"\n        return self.database['nodes'].get(uuid)\n    \n    def get_documents_in_folder(self, folder_uuid: str) -> List[Dict[str, Any]]:\n        \"\"\"Get all documents in a specific folder\"\"\"\n        return [\n            node for node in self.database['nodes'].values()\n            if node.get('node_type') == 'document' and node.get('parent_uuid') == folder_uuid\n        ]\n    \n    def get_folders(self) -> List[Dict[str, Any]]:\n        \"\"\"Get all folders\"\"\"\n        return [\n            node for node in self.database['nodes'].values()\n            if node.get('node_type') == 'folder'\n        ]\n    \n    def get_root_documents(self) -> List[Dict[str, Any]]:\n        \"\"\"Get all documents in root (no parent)\"\"\"\n        return [\n            node for node in self.database['nodes'].values()\n            if node.get('node_type') == 'document' and not node.get('parent_uuid')\n        ]",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica.py",
      "tags": [
        "class",
        "remarkablereplicasync"
      ],
      "updated_at": "2025-12-07T01:55:13.688284",
      "usage_example": "# Example usage:\n# result = RemarkableReplicaSync(bases)"
    }
  ],
  "count": 100
}
